]> Shamusworld >> Repos - virtualjaguar/blob - src/objectp.cpp
Fixed memory leak
[virtualjaguar] / src / objectp.cpp
1 //
2 // Object Processor
3 //
4 // Original source by Cal2
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 //
8
9 //#include <stdio.h>
10 //#include <stdlib.h>
11 #include <string.h>
12 #include "jaguar.h"
13
14 //#define OP_DEBUG
15 //#define OP_DEBUG_BMP
16
17 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
18 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
19
20 #define OBJECT_TYPE_BITMAP      0                       // 000
21 #define OBJECT_TYPE_SCALE       1                       // 001
22 #define OBJECT_TYPE_GPU         2                       // 010
23 #define OBJECT_TYPE_BRANCH      3                       // 011
24 #define OBJECT_TYPE_STOP        4                       // 100
25
26 #define CONDITION_EQUAL                         0
27 #define CONDITION_LESS_THAN                     1
28 #define CONDITION_GREATER_THAN          2
29 #define CONDITION_OP_FLAG_SET           3
30 #define CONDITION_SECOND_HALF_LINE      4
31
32 #define OPFLAG_RELEASE          8                       // Bus release bit
33 #define OPFLAG_TRANS            4                       // Transparency bit
34 #define OPFLAG_RMW                      2                       // Read-Modify-Write bit
35 #define OPFLAG_REFLECT          1                       // Horizontal mirror bit
36
37 // Private function prototypes
38
39 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
40 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
41 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
42 void DumpFixedObject(uint64 p0, uint64 p1);
43 uint64 op_load_phrase(uint32 offset);
44
45 // Local global variables
46
47 static uint8 * op_blend_y;
48 static uint8 * op_blend_cr;
49 // There may be a problem with this "RAM" overlapping (and thus being independent of)
50 // some of the regular TOM RAM...
51 static uint8 objectp_ram[0x40];                 // This is based at $F00000
52 uint8 objectp_running;
53 //bool objectp_stop_reading_list;
54
55 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
56 //static uint32 op_bitmap_bit_size[8] =
57 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
58 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
59 static uint32 op_pointer;
60
61 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
62
63
64 //
65 // Object Processor initialization
66 //
67 void op_init(void)
68 {
69         // Blend tables (64K each)
70         memory_malloc_secure((void **)&op_blend_y, 0x10000, "Jaguar Object processor Y blend lookup table");
71         memory_malloc_secure((void **)&op_blend_cr, 0x10000, "Jaguar Object processor CR blend lookup table");
72
73         // Here we calculate the saturating blend of a signed 4-bit value and an
74         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
75         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
76         for(int i=0; i<256*256; i++)
77         {
78                 int y = (i >> 8) & 0xFF;
79                 int dy = (INT8)i;                                       // Sign extend the Y index
80                 int c1 = (i >> 8) & 0x0F;
81                 int dc1 = (INT8)(i << 4) >> 4;          // Sign extend the R index
82                 int c2 = (i >> 12) & 0x0F;
83                 int dc2 = (INT8)(i & 0xF0) >> 4;        // Sign extend the C index
84
85                 y += dy;
86                 if (y < 0)
87                         y = 0;
88                 else if (y > 0xFF)
89                         y = 0xFF;
90                 op_blend_y[i] = y;
91
92                 c1 += dc1;
93                 if (c1 < 0)
94                         c1 = 0;
95                 else if (c1 > 0x0F)
96                         c1 = 0x0F;
97                 c2 += dc2;
98
99                 if (c2 < 0)
100                         c2 = 0;
101                 else if (c2 > 0x0F)
102                         c2 = 0x0F;
103                 op_blend_cr[i] = (c2 << 4) | c1;
104         }
105
106         op_reset();
107 }
108
109 //
110 // Object Processor reset
111 //
112 void op_reset(void)
113 {
114         memset(objectp_ram, 0x00, 0x40);
115         objectp_running = 0;
116 }
117
118 void op_done(void)
119 {
120         char * opType[8] =
121         { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
122         char * ccType[8] =
123                 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
124
125         uint32 olp = op_get_list_pointer();
126         WriteLog("OP: OLP = %08X\n", olp);
127         WriteLog("OP: Phrase dump\n    ----------\n");
128         for(uint32 i=0; i<0x100; i+=8)
129         {
130                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
131                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
132                 if ((lo & 0x07) == 3)
133                 {
134                         uint16 ypos = (lo >> 3) & 0x7FF;
135                         uint8  cc   = (lo >> 14) & 0x03;
136                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
137                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
138                 }
139                 WriteLog("\n");
140                 if ((lo & 0x07) == 0)
141                         DumpFixedObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8));
142                 if ((lo & 0x07) == 1)
143                         DumpScaledObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8), op_load_phrase(olp+i+16));
144         }
145         WriteLog("\n");
146
147         memory_free(op_blend_y);
148         memory_free(op_blend_cr);
149 }
150
151 //
152 // Object Processor memory access
153 // Memory range: F00010 - F00027
154 //
155 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
156 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
157 //      F00026            W   -------- -------x   OBF - object processor flag
158 //
159
160 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
161 {
162         offset &= 0x3F;
163         return objectp_ram[offset];
164 }
165
166 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
167 {
168         offset &= 0x3F;
169         return GET16(objectp_ram, offset);
170 }
171
172 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
173 {
174         offset &= 0x3F;
175         objectp_ram[offset] = data;
176 }
177
178 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
179 {
180         offset &= 0x3F;
181         SET16(objectp_ram, offset, data);
182
183 /*if (offset == 0x20)
184 WriteLog("OP: Setting lo list pointer: %04X\n", data);
185 if (offset == 0x22)
186 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
187 }
188
189 uint32 op_get_list_pointer(void)
190 {
191         // Note: This register is LO / HI WORD, hence the funky look of this...
192 //      return (objectp_ram[0x22] << 24) | (objectp_ram[0x23] << 16) | (objectp_ram[0x20] << 8) | objectp_ram[0x21];
193         return GET16(objectp_ram, 0x20) | (GET16(objectp_ram, 0x22) << 16);
194 }
195
196 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
197
198 uint32 op_get_status_register(void)
199 {
200 //      return (objectp_ram[0x26] << 24) | (objectp_ram[0x27] << 16) | (objectp_ram[0x28] << 8) | objectp_ram[0x29];
201 //      return GET32(objectp_ram, 0x26);
202         return GET16(objectp_ram, 0x26);
203 }
204
205 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
206
207 void op_set_status_register(uint32 data)
208 {
209 /*      objectp_ram[0x26] = (data & 0xFF000000) >> 24;
210         objectp_ram[0x27] = (data & 0x00FF0000) >> 16;
211         objectp_ram[0x28] = (data & 0x0000FF00) >> 8;
212         objectp_ram[0x29] |= (data & 0xFE);*/
213         objectp_ram[0x26] = (data & 0x0000FF00) >> 8;
214         objectp_ram[0x27] |= (data & 0xFE);
215 }
216
217 void op_set_current_object(uint64 object)
218 {
219 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
220         // Stored as least significant 32 bits first, ms32 last in big endian
221 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
222         objectp_ram[0x12] = object & 0xFF; object >>= 8;
223         objectp_ram[0x11] = object & 0xFF; object >>= 8;
224         objectp_ram[0x10] = object & 0xFF; object >>= 8;
225
226         objectp_ram[0x17] = object & 0xFF; object >>= 8;
227         objectp_ram[0x16] = object & 0xFF; object >>= 8;
228         objectp_ram[0x15] = object & 0xFF; object >>= 8;
229         objectp_ram[0x14] = object & 0xFF;*/
230 // Let's try regular good old big endian...
231         objectp_ram[0x17] = object & 0xFF; object >>= 8;
232         objectp_ram[0x16] = object & 0xFF; object >>= 8;
233         objectp_ram[0x15] = object & 0xFF; object >>= 8;
234         objectp_ram[0x14] = object & 0xFF; object >>= 8;
235
236         objectp_ram[0x13] = object & 0xFF; object >>= 8;
237         objectp_ram[0x12] = object & 0xFF; object >>= 8;
238         objectp_ram[0x11] = object & 0xFF; object >>= 8;
239         objectp_ram[0x10] = object & 0xFF;
240 }
241
242 uint64 op_load_phrase(uint32 offset)
243 {
244         offset &= ~0x07;                                                // 8 byte alignment
245         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
246 }
247
248 void OPStorePhrase(uint32 offset, uint64 p)
249 {
250         offset &= ~0x07;                                                // 8 byte alignment
251         JaguarWriteLong(offset, p >> 32, OP);
252         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
253 }
254
255 //
256 // Debugging routines
257 //
258 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
259 {
260         WriteLog(" (SCALED BITMAP)");
261         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
262         WriteLog("                 %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
263         uint8 bitdepth = (p1 >> 12) & 0x07;
264 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
265         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
266         int32 xpos = p1 & 0xFFF;
267         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
268         uint32 iwidth = ((p1 >> 28) & 0x3FF);
269         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
270         uint16 height = ((p0 >> 14) & 0x3FF);
271         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
272         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
273         uint32 firstPix = (p1 >> 49) & 0x3F;
274         uint8 flags = (p1 >> 45) & 0x0F;
275         uint8 idx = (p1 >> 38) & 0x7F;
276         uint32 pitch = (p1 >> 15) & 0x07;
277         WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
278                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
279         uint32 hscale = p2 & 0xFF;
280         uint32 vscale = (p2 >> 8) & 0xFF;
281         uint32 remainder = (p2 >> 16) & 0xFF;
282         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
283 }
284
285 void DumpFixedObject(uint64 p0, uint64 p1)
286 {
287         WriteLog(" (BITMAP)");
288         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
289         uint8 bitdepth = (p1 >> 12) & 0x07;
290 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
291         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
292         int32 xpos = p1 & 0xFFF;
293         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
294         uint32 iwidth = ((p1 >> 28) & 0x3FF);
295         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
296         uint16 height = ((p0 >> 14) & 0x3FF);
297         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
298         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
299         uint32 firstPix = (p1 >> 49) & 0x3F;
300         uint8 flags = (p1 >> 45) & 0x0F;
301         uint8 idx = (p1 >> 38) & 0x7F;
302         uint32 pitch = (p1 >> 15) & 0x07;
303         WriteLog("    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
304                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
305 }
306
307 //
308 // Object Processor main routine
309 //
310 //Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing
311 //where we left off. !!! FIX !!!
312 void OPProcessList(int scanline, bool render)
313 {
314 extern int op_start_log;
315 //      char * condition_to_str[8] =
316 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
317
318         op_pointer = op_get_list_pointer();
319
320 //      objectp_stop_reading_list = false;
321
322 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer);
323 //op_done();
324
325 // *** BEGIN OP PROCESSOR TESTING ONLY ***
326 extern bool interactiveMode;
327 extern bool iToggle;
328 extern int objectPtr;
329 bool inhibit;
330 int bitmapCounter = 0;
331 // *** END OP PROCESSOR TESTING ONLY ***
332
333         uint32 opCyclesToRun = 10000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
334
335 //      if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
336         while (op_pointer)
337         {
338 // *** BEGIN OP PROCESSOR TESTING ONLY ***
339 if (interactiveMode && bitmapCounter == objectPtr)
340         inhibit = iToggle;
341 else
342         inhibit = false;
343 // *** END OP PROCESSOR TESTING ONLY ***
344 //              if (objectp_stop_reading_list)
345 //                      return;
346                         
347                 uint64 p0 = op_load_phrase(op_pointer);
348 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
349                 op_pointer += 8;
350 if (scanline == tom_get_vdb() && op_start_log)
351 //if (scanline == 215 && op_start_log)
352 //if (scanline == 28 && op_start_log)
353 {
354 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
355 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
356 {
357 WriteLog(" (BITMAP) ");
358 uint64 p1 = op_load_phrase(op_pointer);
359 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
360         uint8 bitdepth = (p1 >> 12) & 0x07;
361 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
362         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
363 int32 xpos = p1 & 0xFFF;
364 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
365         uint32 iwidth = ((p1 >> 28) & 0x3FF);
366         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
367         uint16 height = ((p0 >> 14) & 0x3FF);
368         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
369         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
370         uint32 firstPix = (p1 >> 49) & 0x3F;
371         uint8 flags = (p1 >> 45) & 0x0F;
372         uint8 idx = (p1 >> 38) & 0x7F;
373         uint32 pitch = (p1 >> 15) & 0x07;
374 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
375         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
376 }
377 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
378 {
379 WriteLog(" (SCALED BITMAP)");
380 uint64 p1 = op_load_phrase(op_pointer), p2 = op_load_phrase(op_pointer+8);
381 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
382 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
383         uint8 bitdepth = (p1 >> 12) & 0x07;
384 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
385         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
386 int32 xpos = p1 & 0xFFF;
387 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
388         uint32 iwidth = ((p1 >> 28) & 0x3FF);
389         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
390         uint16 height = ((p0 >> 14) & 0x3FF);
391         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
392         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
393         uint32 firstPix = (p1 >> 49) & 0x3F;
394         uint8 flags = (p1 >> 45) & 0x0F;
395         uint8 idx = (p1 >> 38) & 0x7F;
396         uint32 pitch = (p1 >> 15) & 0x07;
397 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
398         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
399         uint32 hscale = p2 & 0xFF;
400         uint32 vscale = (p2 >> 8) & 0xFF;
401         uint32 remainder = (p2 >> 16) & 0xFF;
402 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
403 }
404 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
405 WriteLog(" (GPU)\n");
406 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
407 {
408 WriteLog(" (BRANCH)\n");
409 uint8 * jaguar_mainRam = GetRamPtr();
410 WriteLog("[RAM] --> ");
411 for(int k=0; k<8; k++)
412         WriteLog("%02X ", jaguar_mainRam[op_pointer-8 + k]);
413 WriteLog("\n");
414 }
415 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
416 WriteLog("    --> List end\n");
417 }//*/
418                 
419                 switch ((uint8)p0 & 0x07)
420                 {
421                 case OBJECT_TYPE_BITMAP:
422                 {
423 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
424                         uint16 ypos = (p0 >> 3) & 0x7FF;
425 // This is only theory implied by Rayman...!
426 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
427 // the VDB value. With interlacing, this would be slightly more tricky.
428 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
429 // to affect any other game in a negative way (that I've seen).
430 // Either that, or it's an undocumented bug...
431
432 //No, the reason this was needed is that the OP code before was wrong. Any value
433 //less than VDB will get written to the top line of the display!
434 //                      if (ypos == 0)
435 //                              ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
436                         uint32 height = (p0 & 0xFFC000) >> 14;
437                         uint32 oldOPP = op_pointer - 8;
438 // *** BEGIN OP PROCESSOR TESTING ONLY ***
439 if (inhibit && op_start_log)
440         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
441 bitmapCounter++;
442 if (!inhibit)   // For OP testing only!
443 // *** END OP PROCESSOR TESTING ONLY ***
444                         if (scanline >= ypos && height > 0)
445                         {
446                                 uint64 p1 = op_load_phrase(op_pointer);
447                                 op_pointer += 8;
448 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
449 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
450 //                              OPProcessFixedBitmap(scanline, p0, p1, render);
451                                 OPProcessFixedBitmap(p0, p1, render);
452
453                                 // OP write-backs
454
455 //???Does this really happen??? Doesn't seem to work if you do this...!
456 //Probably not. Must be a bug in the documentation...!
457 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
458 //                              SET16(objectp_ram, 0x20, link & 0xFFFF);        // OLP
459 //                              SET16(objectp_ram, 0x22, link >> 16);
460 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
461                                 if (height - 1 > 0)
462                                         height--;*/
463                                 // NOTE: Would subtract 2 if in interlaced mode...!
464 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
465 //                              if (height)
466                                 height--;
467
468                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
469                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
470                                 data += dwidth;
471
472                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
473                                 p0 |= (uint64)height << 14;
474                                 p0 |= data << 40;
475                                 OPStorePhrase(oldOPP, p0);
476                         }
477 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
478 //Temp, for testing...
479 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
480 //And it does! !!! FIX !!!
481 //Let's remove this "fix" since it screws up more than it fixes.
482 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
483                 return;*/
484
485                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
486 //WriteLog("New OP: %08X\n", op_pointer);
487                         break;
488                 }
489                 case OBJECT_TYPE_SCALE:
490                 {
491 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
492                         uint16 ypos = (p0 >> 3) & 0x7FF;
493                         uint32 height = (p0 & 0xFFC000) >> 14;
494                         uint32 oldOPP = op_pointer - 8;
495 // *** BEGIN OP PROCESSOR TESTING ONLY ***
496 if (inhibit && op_start_log)
497 {
498         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
499         DumpScaledObject(p0, op_load_phrase(op_pointer), op_load_phrase(op_pointer+8));
500 }
501 bitmapCounter++;
502 if (!inhibit)   // For OP testing only!
503 // *** END OP PROCESSOR TESTING ONLY ***
504                         if (scanline >= ypos && height > 0)
505                         {
506                                 uint64 p1 = op_load_phrase(op_pointer);
507                                 op_pointer += 8;
508                                 uint64 p2 = op_load_phrase(op_pointer);
509                                 op_pointer += 8;
510 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
511                                 OPProcessScaledBitmap(p0, p1, p2, render);
512
513                                 // OP write-backs
514
515                                 uint8 remainder = p2 >> 16, vscale = p2 >> 8;
516 //Actually, we should skip this object if it has a vscale of zero.
517 //Or do we? Not sure... Atari Karts has a few lines that look like:
518 // (SCALED BITMAP)
519 //000E8268 --> phrase 00010000 7000B00D 
520 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
521 //    [hsc: 9A, vsc: 00, rem: 00]
522 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
523
524                                 if (vscale == 0)
525                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
526
527 //extern int start_logging;
528 //if (start_logging)
529 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
530 //Locks up here:
531 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
532 //There are other problems here, it looks like...
533 //Another lock up:
534 //About to execute OP (508)...
535 /*
536 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
537 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
538 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
539 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
540 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
541 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
542 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
543 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
544 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
545 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
546 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
547 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
548 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
549 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
550 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
551 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
552 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
553 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
554 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
555 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
556 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
557 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
558 */
559 //Here's another problem:
560 //    [hsc: 20, vsc: 20, rem: 00]
561 // Since we're not checking for $E0 (but that's what we get from the above), we end
562 // up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite
563 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
564 //Also note: $E0 = 7.0 which IS a legal vscale value...
565
566 //                              if (remainder & 0x80)                           // I.e., it's negative
567 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
568 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
569 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
570 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
571                                 if (remainder <= 0x20)                          // I.e., it's <= 0
572                                 {
573                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
574                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
575
576 //                                      while (remainder & 0x80)
577 //                                      while ((remainder & 0x80) || remainder == 0)
578 //                                      while ((remainder - 1) >= 0xE0)
579 //                                      while ((remainder >= 0xE1) || remainder == 0)
580 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
581                                         while (remainder <= 0x20)
582                                         {
583                                                 remainder += vscale;
584
585                                                 if (height)
586                                                         height--;
587
588                                                 data += dwidth;
589                                         }
590
591                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
592                                         p0 |= (uint64)height << 14;
593                                         p0 |= data << 40;
594                                         OPStorePhrase(oldOPP, p0);
595                                 }
596
597                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
598
599 //if (start_logging)
600 //      WriteLog("--> Finished writebacks...\n");//*/
601
602 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
603                                 p2 &= ~0x0000000000FF0000LL;
604                                 p2 |= (uint64)remainder << 16;
605 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
606                                 OPStorePhrase(oldOPP+16, p2);
607 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
608 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
609                         }
610                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
611                         break;
612                 }
613                 case OBJECT_TYPE_GPU:
614                 {
615 //WriteLog("OP: Asserting GPU IRQ #3...\n");
616                         op_set_current_object(p0);
617                         GPUSetIRQLine(3, ASSERT_LINE);
618 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
619 // !!! FIX !!!
620 //Do something like:
621 //OPSuspendedByGPU = true;
622 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
623 //on the next scanline...
624 // --> It continues from where it was interrupted! !!! FIX !!!
625                         break;
626                 }
627                 case OBJECT_TYPE_BRANCH:
628                 {
629                         uint16 ypos = (p0 >> 3) & 0x7FF;
630                         uint8  cc   = (p0 >> 14) & 0x03;
631                         uint32 link = (p0 >> 21) & 0x3FFFF8;
632                         
633 //                      if ((ypos!=507)&&(ypos!=25))
634 //                              WriteLog("\t%i%s%i link=0x%.8x\n",scanline,condition_to_str[cc],ypos>>1,link);
635                         switch (cc)
636                         {
637                         case CONDITION_EQUAL:
638                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
639                                         op_pointer = link;
640                                 break;
641                         case CONDITION_LESS_THAN:
642                                 if (TOMReadWord(0xF00006, OP) < ypos)
643                                         op_pointer = link;
644                                 break;
645                         case CONDITION_GREATER_THAN:
646                                 if (TOMReadWord(0xF00006, OP) > ypos)
647                                         op_pointer = link;
648                                 break;
649                         case CONDITION_OP_FLAG_SET:
650                                 if (op_get_status_register() & 0x01)
651                                         op_pointer = link;
652                                 break;
653                         case CONDITION_SECOND_HALF_LINE:
654                                 // This basically means branch if bit 10 of HC is set
655                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shuting down\n");
656                                 fclose(log_get());
657                                 exit(0);
658                                 break;
659                         default:
660                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
661                         }
662                         break;
663                 }
664                 case OBJECT_TYPE_STOP:
665                 {
666 //op_start_log = 0;
667                         // unsure
668 //WriteLog("OP: --> STOP\n");
669 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
670 //This seems more likely...
671                         op_set_current_object(p0);
672                         
673                         if (p0 & 0x08)
674                         {
675                                 tom_set_pending_object_int();
676                                 if (tom_irq_enabled(IRQ_OPFLAG))// && jaguar_interrupt_handler_is_valid(64))
677                                         m68k_set_irq(7);                                // Cause an NMI to occur...
678                         }
679
680                         return;
681 //                      break;
682                 }
683                 default:
684                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07)); 
685                         return;
686                 }
687
688                 // Here is a little sanity check to keep the OP from locking up the machine
689                 // when fed bad data. Better would be to count how many actual cycles it used
690                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
691                 opCyclesToRun--;
692                 if (!opCyclesToRun)
693                         return;
694         }
695 }
696
697 //
698 // Store fixed size bitmap in line buffer
699 //
700 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
701 {
702 // Need to make sure that when writing that it stays within the line buffer...
703 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
704         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
705         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
706         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
707         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
708 //#ifdef OP_DEBUG_BMP
709         uint32  firstPix = (p1 >> 49) & 0x3F;
710         // "The LSB is significant only for scaled objects..." -JTRM
711         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
712         firstPix &= 0x3E;
713 //#endif
714 // We can ignore the RELEASE (high order) bit for now--probably forever...!
715 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
716 //Optimize: break these out to their own BOOL values
717         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
718         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
719                 flagRMW = (flags & OPFLAG_RMW ? true : false),
720                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
721 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
722 //  provide the most significant bits of the palette address."
723         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
724         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
725         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
726
727 //      int16 scanlineWidth = tom_getVideoModeWidth();
728         uint8 * tom_ram_8 = tom_get_ram_pointer();
729         uint8 * paletteRAM = &tom_ram_8[0x400];
730         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
731         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
732         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
733
734 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
735 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
736
737 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
738 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
739 // Pitch == 0 is OK too...
740 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
741 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
742         if (!render || iwidth == 0)
743                 return;
744
745 //#define OP_DEBUG_BMP
746 //#ifdef OP_DEBUG_BMP
747 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
748 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
749 //#endif
750
751 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
752         int32 startPos = xpos, endPos = xpos +
753                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
754                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
755         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
756         bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false);      // VMODE
757         // Not sure if this is Jaguar Two only location or what...
758         // From the docs, it is... If we want to limit here we should think of something else.
759 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
760         int32 limit = 720;
761         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
762
763         // If the image is completely to the left or right of the line buffer, then bail.
764 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
765 //There are four possibilities:
766 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
767 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
768 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
769 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
770 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
771 // numbers 1 & 3 are of concern.
772 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
773 //      if (rightMargin < 0 || leftMargin > lbufWidth)
774
775 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
776 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
777 // Still have to be careful with the DATA and IWIDTH values though...
778
779 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
780 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
781 //              return;
782         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
783                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
784                 return;
785
786         // Otherwise, find the clip limits and clip the phrase as well...
787         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
788         //       line buffer, but it shouldn't matter since there are two unused line
789         //       buffers below and nothing above and I'll at most write 8 bytes outside
790         //       the line buffer... I could use a fractional clip begin/end value, but
791         //       this makes the blit a *lot* more hairy. I might fix this in the future
792         //       if it becomes necessary. (JLH)
793         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
794         //       which pixel in the phrase is being written, and quit when either end of phrases
795         //       is reached or line buffer extents are surpassed.
796
797 //This stuff is probably wrong as well... !!! FIX !!!
798 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
799 //Yup. Seems that JagMania doesn't work correctly with this...
800 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
801 //      if (!flagREFLECT)
802
803 /*
804         if (leftMargin < 0)
805                 clippedWidth = 0 - leftMargin,
806                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
807                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
808 //              leftMargin = 0;
809
810         if (rightMargin > lbufWidth)
811                 clippedWidth = rightMargin - lbufWidth,
812                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
813 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
814 //              rightMargin = lbufWidth;
815 */
816 if (depth > 5)
817         WriteLog("OP: We're about to encounter a divide by zero error!\n");
818         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
819         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
820         // !!! FIX !!!
821         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
822                 clippedWidth = 0 - startPos,
823                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
824                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
825
826         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
827                 clippedWidth = 0 - endPos,
828                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
829
830         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
831                 clippedWidth = endPos - lbufWidth,
832                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
833
834         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
835                 clippedWidth = startPos - lbufWidth,
836                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
837                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
838
839         // If the image is sitting on the line buffer left or right edge, we need to compensate
840         // by decreasing the image phrase width accordingly.
841         iwidth -= phraseClippedWidth;
842
843         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
844         // the pixel data.
845 //      data += phraseClippedWidth * (pitch << 3);
846         data += dataClippedWidth * pitch;
847
848         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
849         //       bitmap! This makes clipping & etc. MUCH, much easier...!
850 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
851 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
852 //Is this a bug in the OP?
853         uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
854         uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
855
856         // Render.
857
858 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
859 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
860 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
861 // anyway.
862 // This seems to be the case (at least according to the Midsummer docs)...!
863
864         if (depth == 0)                                                                 // 1 BPP
865         {
866                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
867                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
868
869                 // Fetch 1st phrase...
870                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
871 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
872 //i.e., we didn't clip on the margin... !!! FIX !!!
873                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
874                 int i = firstPix;                                                       // Start counter at right spot...
875
876                 while (iwidth--)
877                 {
878                         while (i++ < 64)
879                         {
880                                 uint8 bit = pixels >> 63;
881                                 if (flagTRANS && bit == 0)
882                                         ;       // Do nothing...
883                                 else
884                                 {
885                                         if (!flagRMW)
886 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
887 //Won't optimize RMW case though...
888                                                 // This is the *only* correct use of endian-dependent code
889                                                 // (i.e., mem-to-mem direct copying)!
890                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
891                                         else
892                                                 *currentLineBuffer = 
893                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
894                                                 *(currentLineBuffer + 1) = 
895                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
896                                 }
897
898                                 currentLineBuffer += lbufDelta;
899                                 pixels <<= 1;
900                         }
901                         i = 0;
902                         // Fetch next phrase...
903                         data += pitch;
904                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
905                 }
906         }
907         else if (depth == 1)                                                    // 2 BPP
908         {
909 if (firstPix)
910         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
911                 index &= 0xFC;                                                          // Top six bits form CLUT index
912                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
913                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
914
915                 while (iwidth--)
916                 {
917                         // Fetch phrase...
918                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
919                         data += pitch;
920
921                         for(int i=0; i<32; i++)
922                         {
923                                 uint8 bits = pixels >> 62;
924 // Seems to me that both of these are in the same endian, so we could cast it as
925 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
926 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
927 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
928 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
929                                 if (flagTRANS && bits == 0)
930                                         ;       // Do nothing...
931                                 else
932                                 {
933                                         if (!flagRMW)
934                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
935                                         else
936                                                 *currentLineBuffer = 
937                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
938                                                 *(currentLineBuffer + 1) = 
939                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
940                                 }
941
942                                 currentLineBuffer += lbufDelta;
943                                 pixels <<= 2;
944                         }
945                 }
946         }
947         else if (depth == 2)                                                    // 4 BPP
948         {
949 if (firstPix)
950         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
951                 index &= 0xF0;                                                          // Top four bits form CLUT index
952                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
953                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
954
955                 while (iwidth--)
956                 {
957                         // Fetch phrase...
958                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
959                         data += pitch;
960
961                         for(int i=0; i<16; i++)
962                         {
963                                 uint8 bits = pixels >> 60;
964 // Seems to me that both of these are in the same endian, so we could cast it as
965 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
966 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
967 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
968 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
969                                 if (flagTRANS && bits == 0)
970                                         ;       // Do nothing...
971                                 else
972                                 {
973                                         if (!flagRMW)
974                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
975                                         else
976                                                 *currentLineBuffer = 
977                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
978                                                 *(currentLineBuffer + 1) = 
979                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
980                                 }
981
982                                 currentLineBuffer += lbufDelta;
983                                 pixels <<= 4;
984                         }
985                 }
986         }
987         else if (depth == 3)                                                    // 8 BPP
988         {
989                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
990                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
991
992                 // Fetch 1st phrase...
993                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
994 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
995 //i.e., we didn't clip on the margin... !!! FIX !!!
996                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
997                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
998                 int i = firstPix >> 3;                                          // Start counter at right spot...
999
1000                 while (iwidth--)
1001                 {
1002                         while (i++ < 8)
1003                         {
1004                                 uint8 bits = pixels >> 56;
1005 // Seems to me that both of these are in the same endian, so we could cast it as
1006 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1007 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1008 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1009 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1010                                 if (flagTRANS && bits == 0)
1011                                         ;       // Do nothing...
1012                                 else
1013                                 {
1014                                         if (!flagRMW)
1015                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1016                                         else
1017                                                 *currentLineBuffer = 
1018                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1019                                                 *(currentLineBuffer + 1) = 
1020                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1021                                 }
1022
1023                                 currentLineBuffer += lbufDelta;
1024                                 pixels <<= 8;
1025                         }
1026                         i = 0;
1027                         // Fetch next phrase...
1028                         data += pitch;
1029                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1030                 }
1031         }
1032         else if (depth == 4)                                                    // 16 BPP
1033         {
1034 if (firstPix)
1035         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1036                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1037                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1038
1039                 while (iwidth--)
1040                 {
1041                         // Fetch phrase...
1042                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1043                         data += pitch;
1044
1045                         for(int i=0; i<4; i++)
1046                         {
1047                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1048 // Seems to me that both of these are in the same endian, so we could cast it as
1049 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1050 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1051 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1052 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1053                                 if (flagTRANS && (bitsLo | bitsHi) == 0)
1054                                         ;       // Do nothing...
1055                                 else
1056                                 {
1057                                         if (!flagRMW)
1058                                                 *currentLineBuffer = bitsHi,
1059                                                 *(currentLineBuffer + 1) = bitsLo;
1060                                         else
1061                                                 *currentLineBuffer = 
1062                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1063                                                 *(currentLineBuffer + 1) = 
1064                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1065                                 }
1066
1067                                 currentLineBuffer += lbufDelta;
1068                                 pixels <<= 16;
1069                         }
1070                 }
1071         }
1072         else if (depth == 5)                                                    // 24 BPP
1073         {
1074 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1075 //There *might* be others...
1076 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1077 if (firstPix)
1078         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1079                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1080                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1081                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1082
1083                 while (iwidth--)
1084                 {
1085                         // Fetch phrase...
1086                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1087                         data += pitch;
1088
1089                         for(int i=0; i<2; i++)
1090                         {
1091                                 // We don't use a 32-bit var here because of endian issues...!
1092                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1093                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1094
1095                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1096                                         ;       // Do nothing...
1097                                 else
1098                                         *currentLineBuffer = bits3,
1099                                         *(currentLineBuffer + 1) = bits2,
1100                                         *(currentLineBuffer + 2) = bits1,
1101                                         *(currentLineBuffer + 3) = bits0;
1102
1103                                 currentLineBuffer += lbufDelta;
1104                                 pixels <<= 32;
1105                         }
1106                 }
1107         }
1108 }
1109
1110 //
1111 // Store scaled bitmap in line buffer
1112 //
1113 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1114 {
1115 // Need to make sure that when writing that it stays within the line buffer...
1116 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1117         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1118         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1119         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1120         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1121 //#ifdef OP_DEBUG_BMP
1122 // Prolly should use this... Though not sure exactly how.
1123 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1124         uint32 firstPix = (p1 >> 49) & 0x3F;
1125 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1126 if (firstPix)
1127         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1128 //#endif
1129 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1130 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1131 //Optimize: break these out to their own BOOL values [DONE]
1132         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1133         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1134                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1135                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1136         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1137         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1138
1139         uint8 * tom_ram_8 = tom_get_ram_pointer();
1140         uint8 * paletteRAM = &tom_ram_8[0x400];
1141         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1142         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1143         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1144
1145         uint8 hscale = p2 & 0xFF;
1146 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1147 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1148         uint8 horizontalRemainder = hscale;                             // Not sure if it starts full, but seems reasonable [It's not!]
1149 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1150         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1151         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1152
1153 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1154 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1155
1156 // Looks like an hscale of zero means don't draw!
1157         if (!render || iwidth == 0 || hscale == 0)
1158                 return;
1159
1160 /*extern int start_logging;
1161 if (start_logging)
1162         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1163                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1164 //#define OP_DEBUG_BMP
1165 //#ifdef OP_DEBUG_BMP
1166 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1167 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1168 //#endif
1169
1170         int32 startPos = xpos, endPos = xpos +
1171                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1172         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1173         bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false);      // VMODE
1174         // Not sure if this is Jaguar Two only location or what...
1175         // From the docs, it is... If we want to limit here we should think of something else.
1176 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1177         int32 limit = 720;
1178         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1179
1180         // If the image is completely to the left or right of the line buffer, then bail.
1181 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1182 //There are four possibilities:
1183 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1184 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1185 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1186 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1187 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1188 // numbers 1 & 3 are of concern.
1189 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1190 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1191
1192 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1193 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1194 // Still have to be careful with the DATA and IWIDTH values though...
1195
1196         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1197                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1198                 return;
1199
1200         // Otherwise, find the clip limits and clip the phrase as well...
1201         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1202         //       line buffer, but it shouldn't matter since there are two unused line
1203         //       buffers below and nothing above and I'll at most write 40 bytes outside
1204         //       the line buffer... I could use a fractional clip begin/end value, but
1205         //       this makes the blit a *lot* more hairy. I might fix this in the future
1206         //       if it becomes necessary. (JLH)
1207         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1208         //       which pixel in the phrase is being written, and quit when either end of phrases
1209         //       is reached or line buffer extents are surpassed.
1210
1211 //This stuff is probably wrong as well... !!! FIX !!!
1212 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1213 //Yup. Seems that JagMania doesn't work correctly with this...
1214 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1215 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1216 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1217 // a bit more accurately... Strange!
1218 //It's probably a case of the REFLECT flag being set and the background being written
1219 //from the right side of the screen...
1220 //But no, it isn't... At least if the diagnostics are telling the truth!
1221
1222         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1223         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1224         // !!! FIX !!!
1225
1226 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1227 //the scaling factor is small. So fix it already! !!! FIX !!!
1228 /*if (scaledPhrasePixels == 0)
1229 {
1230         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1231         DumpScaledObject(p0, p1, p2);
1232 }//*/
1233 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1234
1235 //Try a simple example...
1236 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1237 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1238 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1239 //
1240 // Normally, we would expect this in the line buffer:
1241 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1242 //
1243 // But instead we're getting:
1244 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1245 //
1246 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1247 // on negative boundary--or are we? Hmm...
1248 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1249 //
1250 // Let's try a real world example:
1251 //
1252 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1253 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1254 //
1255 // Really, spp is 27.75 in the second case...
1256 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1257 // start position (14 * 27.75), we get -6.5... NOT -17!
1258
1259 //Now it seems we're working OK, at least for the first case...
1260 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1261
1262         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1263 {
1264 extern int start_logging;
1265 if (start_logging)
1266         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1267 //              clippedWidth = 0 - startPos,
1268                 clippedWidth = (0 - startPos) << 5,
1269 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1270                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1271 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1272                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1273 if (start_logging)
1274         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1275 }
1276
1277         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1278                 clippedWidth = 0 - endPos,
1279                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1280
1281         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1282                 clippedWidth = endPos - lbufWidth,
1283                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1284
1285         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1286                 clippedWidth = startPos - lbufWidth,
1287                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1288                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1289
1290 extern int op_start_log;
1291 if (op_start_log && clippedWidth != 0)
1292         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1293 if (op_start_log && startPos == 13)
1294 {
1295         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1296         DumpScaledObject(p0, p1, p2);
1297         if (iwidth == 7)
1298         {
1299                 WriteLog("    %08X: ", data);
1300                 for(int i=0; i<7*8; i++)
1301                         WriteLog("%02X ", JaguarReadByte(data+i));
1302                 WriteLog("\n");
1303         }
1304 }
1305         // If the image is sitting on the line buffer left or right edge, we need to compensate
1306         // by decreasing the image phrase width accordingly.
1307         iwidth -= phraseClippedWidth;
1308
1309         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1310         // the pixel data.
1311 //      data += phraseClippedWidth * (pitch << 3);
1312         data += dataClippedWidth * (pitch << 3);
1313
1314         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1315         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1316 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1317 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1318         uint32 lbufAddress = 0x1800 + startPos * 2;
1319         uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
1320 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1321 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1322
1323         // Render.
1324
1325 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1326 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1327 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1328 // anyway.
1329 // This seems to be the case (at least according to the Midsummer docs)...!
1330
1331         if (depth == 0)                                                                 // 1 BPP
1332         {
1333 if (firstPix != 0)
1334         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1335                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1336                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1337
1338                 int pixCount = 0;
1339                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1340
1341                 while ((int32)iwidth > 0)
1342                 {
1343                         uint8 bits = pixels >> 63;
1344
1345                         if (flagTRANS && bits == 0)
1346                                 ;       // Do nothing...
1347                         else
1348                         {
1349                                 if (!flagRMW)
1350                                         // This is the *only* correct use of endian-dependent code
1351                                         // (i.e., mem-to-mem direct copying)!
1352                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1353                                 else
1354                                         *currentLineBuffer = 
1355                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1356                                         *(currentLineBuffer + 1) = 
1357                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1358                         }
1359
1360                         currentLineBuffer += lbufDelta;
1361
1362 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1363                         while (horizontalRemainder & 0x80)
1364                         {
1365                                 horizontalRemainder += hscale;
1366                                 pixCount++;
1367                                 pixels <<= 1;
1368                         }//*/
1369                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1370                         {
1371                                 horizontalRemainder += hscale;
1372                                 pixCount++;
1373                                 pixels <<= 1;
1374                         }
1375                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1376
1377                         if (pixCount > 63)
1378                         {
1379                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1380
1381                                 data += (pitch << 3) * phrasesToSkip;
1382                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1383                                 pixels <<= 1 * pixelShift;
1384                                 iwidth -= phrasesToSkip;
1385                                 pixCount = pixelShift;
1386                         }
1387                 }
1388         }
1389         else if (depth == 1)                                                    // 2 BPP
1390         {
1391 if (firstPix != 0)
1392         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1393                 index &= 0xFC;                                                          // Top six bits form CLUT index
1394                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1395                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1396
1397                 int pixCount = 0;
1398                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1399
1400                 while ((int32)iwidth > 0)
1401                 {
1402                         uint8 bits = pixels >> 62;
1403
1404                         if (flagTRANS && bits == 0)
1405                                 ;       // Do nothing...
1406                         else
1407                         {
1408                                 if (!flagRMW)
1409                                         // This is the *only* correct use of endian-dependent code
1410                                         // (i.e., mem-to-mem direct copying)!
1411                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1412                                 else
1413                                         *currentLineBuffer = 
1414                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1415                                         *(currentLineBuffer + 1) = 
1416                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1417                         }
1418
1419                         currentLineBuffer += lbufDelta;
1420
1421 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1422                         while (horizontalRemainder & 0x80)
1423                         {
1424                                 horizontalRemainder += hscale;
1425                                 pixCount++;
1426                                 pixels <<= 2;
1427                         }//*/
1428                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1429                         {
1430                                 horizontalRemainder += hscale;
1431                                 pixCount++;
1432                                 pixels <<= 2;
1433                         }
1434                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1435
1436                         if (pixCount > 31)
1437                         {
1438                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1439
1440                                 data += (pitch << 3) * phrasesToSkip;
1441                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1442                                 pixels <<= 2 * pixelShift;
1443                                 iwidth -= phrasesToSkip;
1444                                 pixCount = pixelShift;
1445                         }
1446                 }
1447         }
1448         else if (depth == 2)                                                    // 4 BPP
1449         {
1450 if (firstPix != 0)
1451         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1452                 index &= 0xF0;                                                          // Top four bits form CLUT index
1453                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1454                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1455
1456                 int pixCount = 0;
1457                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1458
1459                 while ((int32)iwidth > 0)
1460                 {
1461                         uint8 bits = pixels >> 60;
1462
1463                         if (flagTRANS && bits == 0)
1464                                 ;       // Do nothing...
1465                         else
1466                         {
1467                                 if (!flagRMW)
1468                                         // This is the *only* correct use of endian-dependent code
1469                                         // (i.e., mem-to-mem direct copying)!
1470                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1471                                 else
1472                                         *currentLineBuffer = 
1473                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1474                                         *(currentLineBuffer + 1) = 
1475                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1476                         }
1477
1478                         currentLineBuffer += lbufDelta;
1479
1480 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1481                         while (horizontalRemainder & 0x80)
1482                         {
1483                                 horizontalRemainder += hscale;
1484                                 pixCount++;
1485                                 pixels <<= 4;
1486                         }//*/
1487                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1488                         {
1489                                 horizontalRemainder += hscale;
1490                                 pixCount++;
1491                                 pixels <<= 4;
1492                         }
1493                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1494
1495                         if (pixCount > 15)
1496                         {
1497                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1498
1499                                 data += (pitch << 3) * phrasesToSkip;
1500                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1501                                 pixels <<= 4 * pixelShift;
1502                                 iwidth -= phrasesToSkip;
1503                                 pixCount = pixelShift;
1504                         }
1505                 }
1506         }
1507         else if (depth == 3)                                                    // 8 BPP
1508         {
1509 if (firstPix)
1510         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1511                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1512                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1513
1514                 int pixCount = 0;
1515                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1516
1517                 while ((int32)iwidth > 0)
1518                 {
1519                         uint8 bits = pixels >> 56;
1520
1521                         if (flagTRANS && bits == 0)
1522                                 ;       // Do nothing...
1523                         else
1524                         {
1525                                 if (!flagRMW)
1526                                         // This is the *only* correct use of endian-dependent code
1527                                         // (i.e., mem-to-mem direct copying)!
1528                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1529 /*                              {
1530                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1531                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1532                                 }*/
1533                                 else
1534                                         *currentLineBuffer = 
1535                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1536                                         *(currentLineBuffer + 1) = 
1537                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1538                         }
1539
1540                         currentLineBuffer += lbufDelta;
1541
1542                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1543                         {
1544                                 horizontalRemainder += hscale;
1545                                 pixCount++;
1546                                 pixels <<= 8;
1547                         }
1548                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1549
1550                         if (pixCount > 7)
1551                         {
1552                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1553
1554                                 data += (pitch << 3) * phrasesToSkip;
1555                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1556                                 pixels <<= 8 * pixelShift;
1557                                 iwidth -= phrasesToSkip;
1558                                 pixCount = pixelShift;
1559                         }
1560                 }
1561         }
1562         else if (depth == 4)                                                    // 16 BPP
1563         {
1564 if (firstPix != 0)
1565         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1566                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1567                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1568
1569                 int pixCount = 0;
1570                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1571
1572                 while ((int32)iwidth > 0)
1573                 {
1574                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1575
1576                         if (flagTRANS && (bitsLo | bitsHi) == 0)
1577                                 ;       // Do nothing...
1578                         else
1579                         {
1580                                 if (!flagRMW)
1581                                         *currentLineBuffer = bitsHi,
1582                                         *(currentLineBuffer + 1) = bitsLo;
1583                                 else
1584                                         *currentLineBuffer = 
1585                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1586                                         *(currentLineBuffer + 1) = 
1587                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1588                         }
1589
1590                         currentLineBuffer += lbufDelta;
1591
1592 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1593                         while (horizontalRemainder & 0x80)
1594                         {
1595                                 horizontalRemainder += hscale;
1596                                 pixCount++;
1597                                 pixels <<= 16;
1598                         }//*/
1599                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1600                         {
1601                                 horizontalRemainder += hscale;
1602                                 pixCount++;
1603                                 pixels <<= 16;
1604                         }
1605                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1606 //*/
1607                         if (pixCount > 3)
1608                         {
1609                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1610
1611                                 data += (pitch << 3) * phrasesToSkip;
1612                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1613                                 pixels <<= 16 * pixelShift;
1614
1615                                 iwidth -= phrasesToSkip;
1616
1617                                 pixCount = pixelShift;
1618                         }
1619                 }
1620         }
1621         else if (depth == 5)                                                    // 24 BPP
1622         {
1623 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1624 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1625 if (firstPix != 0)
1626         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1627                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1628                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1629                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1630
1631                 while (iwidth--)
1632                 {
1633                         // Fetch phrase...
1634                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1635                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1636
1637                         for(int i=0; i<2; i++)
1638                         {
1639                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1640                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1641
1642                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1643                                         ;       // Do nothing...
1644                                 else
1645                                         *currentLineBuffer = bits3,
1646                                         *(currentLineBuffer + 1) = bits2,
1647                                         *(currentLineBuffer + 2) = bits1,
1648                                         *(currentLineBuffer + 3) = bits0;
1649
1650                                 currentLineBuffer += lbufDelta;
1651                                 pixels <<= 32;
1652                         }
1653                 }
1654         }
1655 }