]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
62be06681901ac49a6f311e5298a3cb20bcc7645
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James L. Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68k.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0
40 #define CONDITION_LESS_THAN                     1
41 #define CONDITION_GREATER_THAN          2
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #define OPFLAG_RELEASE          8                                       // Bus release bit
46 #define OPFLAG_TRANS            4                                       // Transparency bit
47 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
48 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
49
50 // Private function prototypes
51
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void OPDiscoverObjects(uint32 address);
55 void OPDumpObjectList(void);
56 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
57 void DumpFixedObject(uint64 p0, uint64 p1);
58 void DumpBitmapCore(uint64 p0, uint64 p1);
59 uint64 OPLoadPhrase(uint32 offset);
60
61 // Local global variables
62
63 // Blend tables (64K each)
64 static uint8 op_blend_y[0x10000];
65 static uint8 op_blend_cr[0x10000];
66 // There may be a problem with this "RAM" overlapping (and thus being independent of)
67 // some of the regular TOM RAM...
68 //#warning objectp_ram is separated from TOM RAM--need to fix that!
69 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
70 uint8 objectp_running = 0;
71 //bool objectp_stop_reading_list;
72
73 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
74 //static uint32 op_bitmap_bit_size[8] =
75 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
76 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
77 static uint32 op_pointer;
78
79 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
80
81
82 //
83 // Object Processor initialization
84 //
85 void OPInit(void)
86 {
87         // Here we calculate the saturating blend of a signed 4-bit value and an
88         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
89         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
90         for(int i=0; i<256*256; i++)
91         {
92                 int y = (i >> 8) & 0xFF;
93                 int dy = (int8)i;                                       // Sign extend the Y index
94                 int c1 = (i >> 8) & 0x0F;
95                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
96                 int c2 = (i >> 12) & 0x0F;
97                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
98
99                 y += dy;
100
101                 if (y < 0)
102                         y = 0;
103                 else if (y > 0xFF)
104                         y = 0xFF;
105
106                 op_blend_y[i] = y;
107
108                 c1 += dc1;
109
110                 if (c1 < 0)
111                         c1 = 0;
112                 else if (c1 > 0x0F)
113                         c1 = 0x0F;
114
115                 c2 += dc2;
116
117                 if (c2 < 0)
118                         c2 = 0;
119                 else if (c2 > 0x0F)
120                         c2 = 0x0F;
121
122                 op_blend_cr[i] = (c2 << 4) | c1;
123         }
124
125         OPReset();
126 }
127
128 //
129 // Object Processor reset
130 //
131 void OPReset(void)
132 {
133 //      memset(objectp_ram, 0x00, 0x40);
134         objectp_running = 0;
135 }
136
137 static const char * opType[8] =
138 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
139 static const char * ccType[8] =
140         { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
141 static uint32 object[8192];
142 static uint32 numberOfObjects;
143 //static uint32 objectLink[8192];
144 //static uint32 numberOfLinks;
145
146 void OPDone(void)
147 {
148 //#warning "!!! Fix OL dump so that it follows links !!!"
149 //      const char * opType[8] =
150 //      { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
151 //      const char * ccType[8] =
152 //              { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
153
154         uint32 olp = OPGetListPointer();
155         WriteLog("\nOP: OLP = $%08X\n", olp);
156         WriteLog("OP: Phrase dump\n    ----------\n");
157
158 #if 0
159         for(uint32 i=0; i<0x100; i+=8)
160         {
161                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
162                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
163
164                 if ((lo & 0x07) == 3)
165                 {
166                         uint16 ypos = (lo >> 3) & 0x7FF;
167                         uint8  cc   = (lo >> 14) & 0x03;
168                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
169                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
170                 }
171
172                 WriteLog("\n");
173
174                 if ((lo & 0x07) == 0)
175                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
176
177                 if ((lo & 0x07) == 1)
178                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
179         }
180
181         WriteLog("\n");
182 #else
183         numberOfObjects = 0;
184         OPDiscoverObjects(olp);
185         OPDumpObjectList();
186 #endif
187 }
188
189 void OPDiscoverObjects(uint32 address)
190 {
191         // Check to see if we've already seen this object
192         for(uint32 i=0; i<numberOfObjects; i++)
193         {
194                 if (address == object[i])
195                         return;
196         }
197
198         // Store the object...
199         object[numberOfObjects++] = address;
200         uint8 objectType = 0;
201
202         do
203         {
204                 uint32 hi = JaguarReadLong(address + 0, OP);
205                 uint32 lo = JaguarReadLong(address + 4, OP);
206                 objectType = lo & 0x07;
207                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
208
209                 if (objectType == 3)
210                 {
211                         uint16 ypos = (lo >> 3) & 0x7FF;
212                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
213
214                         // Recursion needed to follow all links!
215                         OPDiscoverObjects(address + 8);
216                 }
217
218                 if (address == link)    // Ruh roh...
219                 {
220                         // Runaway recursive link is bad!
221                         return;
222                 }
223
224                 address = link;
225
226                 // Check to see if we've already seen this object, and add it if not
227                 bool seenObject = false;
228
229                 for(uint32 i=0; i<numberOfObjects; i++)
230                 {
231                         if (address == object[i])
232                         {
233                                 seenObject = true;
234                                 break;
235                         }
236                 }
237
238                 if (!seenObject)
239                         object[numberOfObjects++] = address;
240         }
241         while (objectType != 4);
242 }
243
244 void OPDumpObjectList(void)
245 {
246         for(uint32 i=0; i<numberOfObjects; i++)
247         {
248                 uint32 address = object[i];
249
250                 uint32 hi = JaguarReadLong(address + 0, OP);
251                 uint32 lo = JaguarReadLong(address + 4, OP);
252                 uint8 objectType = lo & 0x07;
253                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
254                 WriteLog("%08X: %08X %08X %s", address, hi, lo, opType[objectType]);
255
256                 if (objectType == 3)
257                 {
258                         uint16 ypos = (lo >> 3) & 0x7FF;
259                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
260                         WriteLog(" YPOS=%u, CC=%s, link=$%08X", ypos, ccType[cc], link);
261                 }
262
263                 WriteLog("\n");
264
265                 if (objectType == 0)
266                         DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
267
268                 if (objectType == 1)
269                         DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
270                                 OPLoadPhrase(address + 16));
271
272                 if (address == link)    // Ruh roh...
273                 {
274                         // Runaway recursive link is bad!
275                         WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
276                 }
277         }
278
279         WriteLog("\n");
280 }
281
282 //
283 // Object Processor memory access
284 // Memory range: F00010 - F00027
285 //
286 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
287 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
288 //      F00026            W   -------- -------x   OBF - object processor flag
289 //
290
291 #if 0
292 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
293 {
294         offset &= 0x3F;
295         return objectp_ram[offset];
296 }
297
298 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
299 {
300         offset &= 0x3F;
301         return GET16(objectp_ram, offset);
302 }
303
304 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
305 {
306         offset &= 0x3F;
307         objectp_ram[offset] = data;
308 }
309
310 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
311 {
312         offset &= 0x3F;
313         SET16(objectp_ram, offset, data);
314
315 /*if (offset == 0x20)
316 WriteLog("OP: Setting lo list pointer: %04X\n", data);
317 if (offset == 0x22)
318 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
319 }
320 #endif
321
322 uint32 OPGetListPointer(void)
323 {
324         // Note: This register is LO / HI WORD, hence the funky look of this...
325         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
326 }
327
328 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
329
330 uint32 OPGetStatusRegister(void)
331 {
332         return GET16(tomRam8, 0x26);
333 }
334
335 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
336
337 void OPSetStatusRegister(uint32 data)
338 {
339         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
340         tomRam8[0x27] |= (data & 0xFE);
341 }
342
343 void OPSetCurrentObject(uint64 object)
344 {
345 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
346         // Stored as least significant 32 bits first, ms32 last in big endian
347 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
348         objectp_ram[0x12] = object & 0xFF; object >>= 8;
349         objectp_ram[0x11] = object & 0xFF; object >>= 8;
350         objectp_ram[0x10] = object & 0xFF; object >>= 8;
351
352         objectp_ram[0x17] = object & 0xFF; object >>= 8;
353         objectp_ram[0x16] = object & 0xFF; object >>= 8;
354         objectp_ram[0x15] = object & 0xFF; object >>= 8;
355         objectp_ram[0x14] = object & 0xFF;*/
356 // Let's try regular good old big endian...
357         tomRam8[0x17] = object & 0xFF; object >>= 8;
358         tomRam8[0x16] = object & 0xFF; object >>= 8;
359         tomRam8[0x15] = object & 0xFF; object >>= 8;
360         tomRam8[0x14] = object & 0xFF; object >>= 8;
361
362         tomRam8[0x13] = object & 0xFF; object >>= 8;
363         tomRam8[0x12] = object & 0xFF; object >>= 8;
364         tomRam8[0x11] = object & 0xFF; object >>= 8;
365         tomRam8[0x10] = object & 0xFF;
366 }
367
368 uint64 OPLoadPhrase(uint32 offset)
369 {
370         offset &= ~0x07;                                                // 8 byte alignment
371         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
372 }
373
374 void OPStorePhrase(uint32 offset, uint64 p)
375 {
376         offset &= ~0x07;                                                // 8 byte alignment
377         JaguarWriteLong(offset, p >> 32, OP);
378         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
379 }
380
381 //
382 // Debugging routines
383 //
384 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
385 {
386         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
387         WriteLog("          %08X %08X\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
388         DumpBitmapCore(p0, p1);
389         uint32 hscale = p2 & 0xFF;
390         uint32 vscale = (p2 >> 8) & 0xFF;
391         uint32 remainder = (p2 >> 16) & 0xFF;
392         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
393 }
394
395 void DumpFixedObject(uint64 p0, uint64 p1)
396 {
397         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
398         DumpBitmapCore(p0, p1);
399 }
400
401 void DumpBitmapCore(uint64 p0, uint64 p1)
402 {
403         uint32 bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
404         uint8 bitdepth = (p1 >> 12) & 0x07;
405 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
406         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
407         int32 xpos = p1 & 0xFFF;
408         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);       // Sign extend that mutha!
409         uint32 iwidth = ((p1 >> 28) & 0x3FF);
410         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
411         uint16 height = ((p0 >> 14) & 0x3FF);
412         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
413         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
414         uint32 firstPix = (p1 >> 49) & 0x3F;
415         uint8 flags = (p1 >> 45) & 0x0F;
416         uint8 idx = (p1 >> 38) & 0x7F;
417         uint32 pitch = (p1 >> 15) & 0x07;
418         WriteLog("    [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), l:%08X, p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
419                 iwidth * bdMultiplier[bitdepth],
420                 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth], link,
421                 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
422                 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
423                 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
424 }
425
426 //
427 // Object Processor main routine
428 //
429 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
430 void OPProcessList(int halfline, bool render)
431 {
432 extern int op_start_log;
433 //      char * condition_to_str[8] =
434 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
435
436         op_pointer = OPGetListPointer();
437
438 //      objectp_stop_reading_list = false;
439
440 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
441 //op_done();
442
443 // *** BEGIN OP PROCESSOR TESTING ONLY ***
444 extern bool interactiveMode;
445 extern bool iToggle;
446 extern int objectPtr;
447 bool inhibit;
448 int bitmapCounter = 0;
449 // *** END OP PROCESSOR TESTING ONLY ***
450
451         uint32 opCyclesToRun = 30000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
452
453 //      if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
454         while (op_pointer)
455         {
456 // *** BEGIN OP PROCESSOR TESTING ONLY ***
457 if (interactiveMode && bitmapCounter == objectPtr)
458         inhibit = iToggle;
459 else
460         inhibit = false;
461 // *** END OP PROCESSOR TESTING ONLY ***
462 //              if (objectp_stop_reading_list)
463 //                      return;
464
465                 uint64 p0 = OPLoadPhrase(op_pointer);
466                 op_pointer += 8;
467 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
468
469 #if 1
470 if (halfline == TOMGetVDB() && op_start_log)
471 //if (halfline == 215 && op_start_log)
472 //if (halfline == 28 && op_start_log)
473 //if (halfline == 0)
474 {
475 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
476 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
477 {
478 WriteLog(" (BITMAP) ");
479 uint64 p1 = OPLoadPhrase(op_pointer);
480 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
481         uint8 bitdepth = (p1 >> 12) & 0x07;
482 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
483         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
484 int32 xpos = p1 & 0xFFF;
485 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
486         uint32 iwidth = ((p1 >> 28) & 0x3FF);
487         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
488         uint16 height = ((p0 >> 14) & 0x3FF);
489         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
490         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
491         uint32 firstPix = (p1 >> 49) & 0x3F;
492         uint8 flags = (p1 >> 45) & 0x0F;
493         uint8 idx = (p1 >> 38) & 0x7F;
494         uint32 pitch = (p1 >> 15) & 0x07;
495 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
496         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
497 }
498 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
499 {
500 WriteLog(" (SCALED BITMAP)");
501 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
502 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
503 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
504         uint8 bitdepth = (p1 >> 12) & 0x07;
505 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
506         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
507 int32 xpos = p1 & 0xFFF;
508 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
509         uint32 iwidth = ((p1 >> 28) & 0x3FF);
510         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
511         uint16 height = ((p0 >> 14) & 0x3FF);
512         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
513         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
514         uint32 firstPix = (p1 >> 49) & 0x3F;
515         uint8 flags = (p1 >> 45) & 0x0F;
516         uint8 idx = (p1 >> 38) & 0x7F;
517         uint32 pitch = (p1 >> 15) & 0x07;
518 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
519         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
520         uint32 hscale = p2 & 0xFF;
521         uint32 vscale = (p2 >> 8) & 0xFF;
522         uint32 remainder = (p2 >> 16) & 0xFF;
523 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
524 }
525 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
526 WriteLog(" (GPU)\n");
527 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
528 {
529 WriteLog(" (BRANCH)\n");
530 uint8 * jaguarMainRam = GetRamPtr();
531 WriteLog("[RAM] --> ");
532 for(int k=0; k<8; k++)
533         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
534 WriteLog("\n");
535 }
536 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
537 WriteLog("    --> List end\n\n");
538 }
539 #endif
540
541                 switch ((uint8)p0 & 0x07)
542                 {
543                 case OBJECT_TYPE_BITMAP:
544                 {
545 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
546                         uint16 ypos = (p0 >> 3) & 0x7FF;
547 // This is only theory implied by Rayman...!
548 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
549 // the VDB value. With interlacing, this would be slightly more tricky.
550 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
551 // to affect any other game in a negative way (that I've seen).
552 // Either that, or it's an undocumented bug...
553
554 //No, the reason this was needed is that the OP code before was wrong. Any value
555 //less than VDB will get written to the top line of the display!
556 #if 0
557 // Not so sure... Let's see what happens here...
558 // No change...
559                         if (ypos == 0)
560                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
561 #endif
562 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
563 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
564 // what's causing things to fuck up. Still no idea why.
565
566                         uint32 height = (p0 & 0xFFC000) >> 14;
567                         uint32 oldOPP = op_pointer - 8;
568 // *** BEGIN OP PROCESSOR TESTING ONLY ***
569 if (inhibit && op_start_log)
570         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
571 bitmapCounter++;
572 if (!inhibit)   // For OP testing only!
573 // *** END OP PROCESSOR TESTING ONLY ***
574                         if (halfline >= ypos && height > 0)
575                         {
576                                 uint64 p1 = OPLoadPhrase(op_pointer);
577                                 op_pointer += 8;
578 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
579 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
580 //                              OPProcessFixedBitmap(halfline, p0, p1, render);
581                                 OPProcessFixedBitmap(p0, p1, render);
582
583                                 // OP write-backs
584
585 //???Does this really happen??? Doesn't seem to work if you do this...!
586 //Probably not. Must be a bug in the documentation...!
587 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
588 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
589 //                              SET16(tom_ram_8, 0x22, link >> 16);
590 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
591                                 if (height - 1 > 0)
592                                         height--;*/
593                                 // NOTE: Would subtract 2 if in interlaced mode...!
594 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
595 //                              if (height)
596                                 height--;
597
598                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
599                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
600                                 data += dwidth;
601
602                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
603                                 p0 |= (uint64)height << 14;
604                                 p0 |= data << 40;
605                                 OPStorePhrase(oldOPP, p0);
606                         }
607 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
608 //Temp, for testing...
609 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
610 //And it does! !!! FIX !!!
611 //Let's remove this "fix" since it screws up more than it fixes.
612 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
613                 return;*/
614
615                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
616 //WriteLog("New OP: %08X\n", op_pointer);
617                         break;
618                 }
619                 case OBJECT_TYPE_SCALE:
620                 {
621 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
622                         uint16 ypos = (p0 >> 3) & 0x7FF;
623                         uint32 height = (p0 & 0xFFC000) >> 14;
624                         uint32 oldOPP = op_pointer - 8;
625 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
626 // *** BEGIN OP PROCESSOR TESTING ONLY ***
627 if (inhibit && op_start_log)
628 {
629         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
630         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
631 }
632 bitmapCounter++;
633 if (!inhibit)   // For OP testing only!
634 // *** END OP PROCESSOR TESTING ONLY ***
635                         if (halfline >= ypos && height > 0)
636                         {
637                                 uint64 p1 = OPLoadPhrase(op_pointer);
638                                 op_pointer += 8;
639                                 uint64 p2 = OPLoadPhrase(op_pointer);
640                                 op_pointer += 8;
641 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
642                                 OPProcessScaledBitmap(p0, p1, p2, render);
643
644                                 // OP write-backs
645
646                                 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
647                                 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
648 //Actually, we should skip this object if it has a vscale of zero.
649 //Or do we? Not sure... Atari Karts has a few lines that look like:
650 // (SCALED BITMAP)
651 //000E8268 --> phrase 00010000 7000B00D
652 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
653 //    [hsc: 9A, vsc: 00, rem: 00]
654 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
655 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
656
657                                 if (vscale == 0)
658                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
659
660 //extern int start_logging;
661 //if (start_logging)
662 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
663 //Locks up here:
664 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
665 //There are other problems here, it looks like...
666 //Another lock up:
667 //About to execute OP (508)...
668 /*
669 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
670 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
671 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
672 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
673 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
674 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
675 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
676 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
677 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
678 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
679 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
680 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
681 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
682 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
683 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
684 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
685 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
686 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
687 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
688 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
689 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
690 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
691 */
692 //Here's another problem:
693 //    [hsc: 20, vsc: 20, rem: 00]
694 // Since we're not checking for $E0 (but that's what we get from the above), we end
695 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
696 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
697 //Also note: $E0 = 7.0 which IS a legal vscale value...
698
699 //                              if (remainder & 0x80)                           // I.e., it's negative
700 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
701 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
702 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
703 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
704 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
705                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
706                                 if (remainder < 0x20)
707                                 {
708                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
709                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
710
711 //                                      while (remainder & 0x80)
712 //                                      while ((remainder & 0x80) || remainder == 0)
713 //                                      while ((remainder - 1) >= 0xE0)
714 //                                      while ((remainder >= 0xE1) || remainder == 0)
715 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
716 //                                      while (remainder <= 0x20)
717                                         while (remainder < 0x20)
718                                         {
719                                                 remainder += vscale;
720
721                                                 if (height)
722                                                         height--;
723
724                                                 data += dwidth;
725                                         }
726
727                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
728                                         p0 |= (uint64)height << 14;
729                                         p0 |= data << 40;
730                                         OPStorePhrase(oldOPP, p0);
731                                 }
732
733                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
734
735 //if (start_logging)
736 //      WriteLog("--> Finished writebacks...\n");//*/
737
738 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
739                                 p2 &= ~0x0000000000FF0000LL;
740                                 p2 |= (uint64)remainder << 16;
741 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
742                                 OPStorePhrase(oldOPP + 16, p2);
743 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
744 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
745                         }
746
747                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
748                         break;
749                 }
750                 case OBJECT_TYPE_GPU:
751                 {
752 //WriteLog("OP: Asserting GPU IRQ #3...\n");
753 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
754                         OPSetCurrentObject(p0);
755                         GPUSetIRQLine(3, ASSERT_LINE);
756 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
757 // !!! FIX !!!
758 //Do something like:
759 //OPSuspendedByGPU = true;
760 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
761 //on the next halfline...
762 // --> It continues from where it was interrupted! !!! FIX !!!
763                         break;
764                 }
765                 case OBJECT_TYPE_BRANCH:
766                 {
767                         uint16 ypos = (p0 >> 3) & 0x7FF;
768 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
769 //       conditions! Need at least one more bit for that! :-P
770 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
771                         uint8  cc   = (p0 >> 14) & 0x03;
772                         uint32 link = (p0 >> 21) & 0x3FFFF8;
773
774 //                      if ((ypos!=507)&&(ypos!=25))
775 //                              WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
776                         switch (cc)
777                         {
778                         case CONDITION_EQUAL:
779                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
780                                         op_pointer = link;
781                                 break;
782                         case CONDITION_LESS_THAN:
783                                 if (TOMReadWord(0xF00006, OP) < ypos)
784                                         op_pointer = link;
785                                 break;
786                         case CONDITION_GREATER_THAN:
787                                 if (TOMReadWord(0xF00006, OP) > ypos)
788                                         op_pointer = link;
789                                 break;
790                         case CONDITION_OP_FLAG_SET:
791                                 if (OPGetStatusRegister() & 0x01)
792                                         op_pointer = link;
793                                 break;
794                         case CONDITION_SECOND_HALF_LINE:
795 //Here's the ASIC code:
796 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
797 //which means, do the link if bit 10 of HC is set...
798
799                                 // This basically means branch if bit 10 of HC is set
800 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
801                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
802                                 LogDone();
803                                 exit(0);
804                                 break;
805                         default:
806                                 // Basically, if you do this, the OP does nothing. :-)
807                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
808                         }
809                         break;
810                 }
811                 case OBJECT_TYPE_STOP:
812                 {
813 //op_start_log = 0;
814                         // unsure
815 //WriteLog("OP: --> STOP\n");
816 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
817 //This seems more likely...
818                         OPSetCurrentObject(p0);
819
820                         if (p0 & 0x08)
821                         {
822                                 // We need to check whether these interrupts are enabled or not, THEN
823                                 // set an IRQ + pending flag if necessary...
824                                 if (TOMIRQEnabled(IRQ_OPFLAG))
825                                 {
826                                         TOMSetPendingObjectInt();
827                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
828                                 }
829                         }
830
831                         return;
832 //                      break;
833                 }
834                 default:
835                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
836                         return;
837                 }
838
839                 // Here is a little sanity check to keep the OP from locking up the machine
840                 // when fed bad data. Better would be to count how many actual cycles it used
841                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
842 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
843                 opCyclesToRun--;
844
845                 if (!opCyclesToRun)
846                         return;
847         }
848 }
849
850 //
851 // Store fixed size bitmap in line buffer
852 //
853 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
854 {
855 // Need to make sure that when writing that it stays within the line buffer...
856 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
857         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
858         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
859         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
860         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
861 //#ifdef OP_DEBUG_BMP
862         uint32  firstPix = (p1 >> 49) & 0x3F;
863         // "The LSB is significant only for scaled objects..." -JTRM
864         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
865         firstPix &= 0x3E;
866 //#endif
867 // We can ignore the RELEASE (high order) bit for now--probably forever...!
868 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
869 //Optimize: break these out to their own BOOL values
870         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
871         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
872                 flagRMW = (flags & OPFLAG_RMW ? true : false),
873                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
874 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
875 //  provide the most significant bits of the palette address."
876         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
877         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
878         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
879
880 //      int16 scanlineWidth = tom_getVideoModeWidth();
881         uint8 * tomRam8 = TOMGetRamPointer();
882         uint8 * paletteRAM = &tomRam8[0x400];
883         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
884         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
885         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
886
887 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
888 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
889
890 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
891 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
892 // Pitch == 0 is OK too...
893 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
894 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
895         if (!render || iwidth == 0)
896                 return;
897
898 //OK, so we know the position in the line buffer is correct. It's the clipping in
899 //24bpp mode that's wrong!
900 #if 0
901 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
902 //into the line buffer for each pixel.
903 if (depth == 5) // i.e., 24bpp mode...
904         xpos >>= 1;     // Cut it in half...
905 #endif
906
907 //#define OP_DEBUG_BMP
908 //#ifdef OP_DEBUG_BMP
909 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
910 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
911 //#endif
912
913 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
914         int32 startPos = xpos, endPos = xpos +
915                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
916                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
917         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
918         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
919         // Not sure if this is Jaguar Two only location or what...
920         // From the docs, it is... If we want to limit here we should think of something else.
921 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
922 //      int32 limit = 720;
923 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
924 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
925         // This is correct, the OP line buffer is a constant size... 
926         int32 limit = 720;
927         int32 lbufWidth = 719;
928
929         // If the image is completely to the left or right of the line buffer, then bail.
930 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
931 //There are four possibilities:
932 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
933 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
934 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
935 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
936 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
937 // numbers 1 & 3 are of concern.
938 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
939 //      if (rightMargin < 0 || leftMargin > lbufWidth)
940
941 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
942 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
943 // Still have to be careful with the DATA and IWIDTH values though...
944
945 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
946 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
947 //              return;
948         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
949                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
950                 return;
951
952         // Otherwise, find the clip limits and clip the phrase as well...
953         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
954         //       line buffer, but it shouldn't matter since there are two unused line
955         //       buffers below and nothing above and I'll at most write 8 bytes outside
956         //       the line buffer... I could use a fractional clip begin/end value, but
957         //       this makes the blit a *lot* more hairy. I might fix this in the future
958         //       if it becomes necessary. (JLH)
959         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
960         //       which pixel in the phrase is being written, and quit when either end of phrases
961         //       is reached or line buffer extents are surpassed.
962
963 //This stuff is probably wrong as well... !!! FIX !!!
964 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
965 //Yup. Seems that JagMania doesn't work correctly with this...
966 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
967 //      if (!flagREFLECT)
968
969 /*
970         if (leftMargin < 0)
971                 clippedWidth = 0 - leftMargin,
972                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
973                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
974 //              leftMargin = 0;
975
976         if (rightMargin > lbufWidth)
977                 clippedWidth = rightMargin - lbufWidth,
978                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
979 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
980 //              rightMargin = lbufWidth;
981 */
982 if (depth > 5)
983         WriteLog("OP: We're about to encounter a divide by zero error!\n");
984         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
985         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
986         // !!! FIX !!!
987         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
988                 clippedWidth = 0 - startPos,
989                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
990                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
991
992         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
993                 clippedWidth = 0 - endPos,
994                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
995
996         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
997                 clippedWidth = endPos - lbufWidth,
998                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
999
1000         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1001                 clippedWidth = startPos - lbufWidth,
1002                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1003                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1004 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1005
1006         // If the image is sitting on the line buffer left or right edge, we need to compensate
1007         // by decreasing the image phrase width accordingly.
1008         iwidth -= phraseClippedWidth;
1009
1010         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1011         // the pixel data.
1012 //      data += phraseClippedWidth * (pitch << 3);
1013         data += dataClippedWidth * pitch;
1014
1015         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1016         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1017 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1018 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1019 //Is this a bug in the OP?
1020 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1021 //Though it looks like we're doing it here no matter what...
1022 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1023 //Let's try this:
1024         uint32 lbufAddress = 0x1800 + (startPos * 2);
1025         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1026
1027         // Render.
1028
1029 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1030 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1031 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1032 // anyway.
1033 // This seems to be the case (at least according to the Midsummer docs)...!
1034
1035 // This is to test using palette zeroes instead of bit zeroes...
1036 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1037 //#define OP_USES_PALETTE_ZERO
1038
1039         if (depth == 0)                                                                 // 1 BPP
1040         {
1041                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1042                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1043
1044                 // Fetch 1st phrase...
1045                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1046 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1047 //i.e., we didn't clip on the margin... !!! FIX !!!
1048                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1049                 int i = firstPix;                                                       // Start counter at right spot...
1050
1051                 while (iwidth--)
1052                 {
1053                         while (i++ < 64)
1054                         {
1055                                 uint8 bit = pixels >> 63;
1056 #ifndef OP_USES_PALETTE_ZERO
1057                                 if (flagTRANS && bit == 0)
1058 #else
1059                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1060 #endif
1061                                         ;       // Do nothing...
1062                                 else
1063                                 {
1064                                         if (!flagRMW)
1065 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1066 //Won't optimize RMW case though...
1067                                                 // This is the *only* correct use of endian-dependent code
1068                                                 // (i.e., mem-to-mem direct copying)!
1069                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
1070                                         else
1071                                                 *currentLineBuffer =
1072                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1073                                                 *(currentLineBuffer + 1) =
1074                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1075                                 }
1076
1077                                 currentLineBuffer += lbufDelta;
1078                                 pixels <<= 1;
1079                         }
1080                         i = 0;
1081                         // Fetch next phrase...
1082                         data += pitch;
1083                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1084                 }
1085         }
1086         else if (depth == 1)                                                    // 2 BPP
1087         {
1088 if (firstPix)
1089         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1090                 index &= 0xFC;                                                          // Top six bits form CLUT index
1091                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1092                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1093
1094                 while (iwidth--)
1095                 {
1096                         // Fetch phrase...
1097                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1098                         data += pitch;
1099
1100                         for(int i=0; i<32; i++)
1101                         {
1102                                 uint8 bits = pixels >> 62;
1103 // Seems to me that both of these are in the same endian, so we could cast it as
1104 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1105 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1106 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1107 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1108 #ifndef OP_USES_PALETTE_ZERO
1109                                 if (flagTRANS && bits == 0)
1110 #else
1111                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1112 #endif
1113                                         ;       // Do nothing...
1114                                 else
1115                                 {
1116                                         if (!flagRMW)
1117                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1118                                         else
1119                                                 *currentLineBuffer =
1120                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1121                                                 *(currentLineBuffer + 1) =
1122                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1123                                 }
1124
1125                                 currentLineBuffer += lbufDelta;
1126                                 pixels <<= 2;
1127                         }
1128                 }
1129         }
1130         else if (depth == 2)                                                    // 4 BPP
1131         {
1132 if (firstPix)
1133         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1134                 index &= 0xF0;                                                          // Top four bits form CLUT index
1135                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1136                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1137
1138                 while (iwidth--)
1139                 {
1140                         // Fetch phrase...
1141                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1142                         data += pitch;
1143
1144                         for(int i=0; i<16; i++)
1145                         {
1146                                 uint8 bits = pixels >> 60;
1147 // Seems to me that both of these are in the same endian, so we could cast it as
1148 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1149 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1150 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1151 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1152 #ifndef OP_USES_PALETTE_ZERO
1153                                 if (flagTRANS && bits == 0)
1154 #else
1155                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1156 #endif
1157                                         ;       // Do nothing...
1158                                 else
1159                                 {
1160                                         if (!flagRMW)
1161                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1162                                         else
1163                                                 *currentLineBuffer =
1164                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1165                                                 *(currentLineBuffer + 1) =
1166                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1167                                 }
1168
1169                                 currentLineBuffer += lbufDelta;
1170                                 pixels <<= 4;
1171                         }
1172                 }
1173         }
1174         else if (depth == 3)                                                    // 8 BPP
1175         {
1176                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1177                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1178
1179                 // Fetch 1st phrase...
1180                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1181 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1182 //i.e., we didn't clip on the margin... !!! FIX !!!
1183                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1184                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1185                 int i = firstPix >> 3;                                          // Start counter at right spot...
1186
1187                 while (iwidth--)
1188                 {
1189                         while (i++ < 8)
1190                         {
1191                                 uint8 bits = pixels >> 56;
1192 // Seems to me that both of these are in the same endian, so we could cast it as
1193 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1194 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1195 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1196 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1197 //This would seem to be problematic...
1198 //Because it's the palette entry being zero that makes the pixel transparent...
1199 //Let's try it and see.
1200 #ifndef OP_USES_PALETTE_ZERO
1201                                 if (flagTRANS && bits == 0)
1202 #else
1203                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1204 #endif
1205                                         ;       // Do nothing...
1206                                 else
1207                                 {
1208                                         if (!flagRMW)
1209                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1210                                         else
1211                                                 *currentLineBuffer =
1212                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1213                                                 *(currentLineBuffer + 1) =
1214                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1215                                 }
1216
1217                                 currentLineBuffer += lbufDelta;
1218                                 pixels <<= 8;
1219                         }
1220                         i = 0;
1221                         // Fetch next phrase...
1222                         data += pitch;
1223                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1224                 }
1225         }
1226         else if (depth == 4)                                                    // 16 BPP
1227         {
1228 if (firstPix)
1229         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1230                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1231                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1232
1233                 while (iwidth--)
1234                 {
1235                         // Fetch phrase...
1236                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1237                         data += pitch;
1238
1239                         for(int i=0; i<4; i++)
1240                         {
1241                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1242 // Seems to me that both of these are in the same endian, so we could cast it as
1243 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1244 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1245 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1246 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1247 //This doesn't seem right... Let's try the encoded black value ($8800):
1248 //Apparently, CRY 0 maps to $8800...
1249                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1250 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1251                                         ;       // Do nothing...
1252                                 else
1253                                 {
1254                                         if (!flagRMW)
1255                                                 *currentLineBuffer = bitsHi,
1256                                                 *(currentLineBuffer + 1) = bitsLo;
1257                                         else
1258                                                 *currentLineBuffer =
1259                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1260                                                 *(currentLineBuffer + 1) =
1261                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1262                                 }
1263
1264                                 currentLineBuffer += lbufDelta;
1265                                 pixels <<= 16;
1266                         }
1267                 }
1268         }
1269         else if (depth == 5)                                                    // 24 BPP
1270         {
1271 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1272 //There *might* be others...
1273 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1274 if (firstPix)
1275         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1276                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1277                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1278                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1279
1280                 while (iwidth--)
1281                 {
1282                         // Fetch phrase...
1283                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1284                         data += pitch;
1285
1286                         for(int i=0; i<2; i++)
1287                         {
1288                                 // We don't use a 32-bit var here because of endian issues...!
1289                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1290                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1291
1292                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1293                                         ;       // Do nothing...
1294                                 else
1295                                         *currentLineBuffer = bits3,
1296                                         *(currentLineBuffer + 1) = bits2,
1297                                         *(currentLineBuffer + 2) = bits1,
1298                                         *(currentLineBuffer + 3) = bits0;
1299
1300                                 currentLineBuffer += lbufDelta;
1301                                 pixels <<= 32;
1302                         }
1303                 }
1304         }
1305 }
1306
1307 //
1308 // Store scaled bitmap in line buffer
1309 //
1310 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1311 {
1312 // Need to make sure that when writing that it stays within the line buffer...
1313 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1314         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1315         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1316         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1317         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1318 //#ifdef OP_DEBUG_BMP
1319 // Prolly should use this... Though not sure exactly how.
1320 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1321         uint32 firstPix = (p1 >> 49) & 0x3F;
1322 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1323 if (firstPix)
1324         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1325 //#endif
1326 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1327 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1328 //Optimize: break these out to their own BOOL values [DONE]
1329         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1330         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1331                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1332                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1333         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1334         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1335
1336         uint8 * tomRam8 = TOMGetRamPointer();
1337         uint8 * paletteRAM = &tomRam8[0x400];
1338         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1339         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1340         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1341
1342         uint16 hscale = p2 & 0xFF;
1343 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1344 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1345         uint16 horizontalRemainder = hscale;                            // Not sure if it starts full, but seems reasonable [It's not!]
1346 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1347         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1348         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1349
1350 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1351 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1352
1353 // Looks like an hscale of zero means don't draw!
1354         if (!render || iwidth == 0 || hscale == 0)
1355                 return;
1356
1357 /*extern int start_logging;
1358 if (start_logging)
1359         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1360                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1361 //#define OP_DEBUG_BMP
1362 //#ifdef OP_DEBUG_BMP
1363 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1364 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1365 //#endif
1366
1367         int32 startPos = xpos, endPos = xpos +
1368                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1369         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1370         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1371         // Not sure if this is Jaguar Two only location or what...
1372         // From the docs, it is... If we want to limit here we should think of something else.
1373 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1374         int32 limit = 720;
1375 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1376         int32 lbufWidth = 719;  // Zero based limit...
1377
1378         // If the image is completely to the left or right of the line buffer, then bail.
1379 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1380 //There are four possibilities:
1381 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1382 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1383 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1384 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1385 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1386 // numbers 1 & 3 are of concern.
1387 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1388 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1389
1390 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1391 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1392 // Still have to be careful with the DATA and IWIDTH values though...
1393
1394         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1395                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1396                 return;
1397
1398         // Otherwise, find the clip limits and clip the phrase as well...
1399         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1400         //       line buffer, but it shouldn't matter since there are two unused line
1401         //       buffers below and nothing above and I'll at most write 40 bytes outside
1402         //       the line buffer... I could use a fractional clip begin/end value, but
1403         //       this makes the blit a *lot* more hairy. I might fix this in the future
1404         //       if it becomes necessary. (JLH)
1405         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1406         //       which pixel in the phrase is being written, and quit when either end of phrases
1407         //       is reached or line buffer extents are surpassed.
1408
1409 //This stuff is probably wrong as well... !!! FIX !!!
1410 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1411 //Yup. Seems that JagMania doesn't work correctly with this...
1412 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1413 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1414 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1415 // a bit more accurately... Strange!
1416 //It's probably a case of the REFLECT flag being set and the background being written
1417 //from the right side of the screen...
1418 //But no, it isn't... At least if the diagnostics are telling the truth!
1419
1420         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1421         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1422         // !!! FIX !!!
1423
1424 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1425 //the scaling factor is small. So fix it already! !!! FIX !!!
1426 /*if (scaledPhrasePixels == 0)
1427 {
1428         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1429         DumpScaledObject(p0, p1, p2);
1430 }//*/
1431 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1432
1433 //Try a simple example...
1434 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1435 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1436 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1437 //
1438 // Normally, we would expect this in the line buffer:
1439 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1440 //
1441 // But instead we're getting:
1442 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1443 //
1444 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1445 // on negative boundary--or are we? Hmm...
1446 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1447 //
1448 // Let's try a real world example:
1449 //
1450 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1451 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1452 //
1453 // Really, spp is 27.75 in the second case...
1454 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1455 // start position (14 * 27.75), we get -6.5... NOT -17!
1456
1457 //Now it seems we're working OK, at least for the first case...
1458 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1459
1460         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1461 {
1462 extern int start_logging;
1463 if (start_logging)
1464         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1465 //              clippedWidth = 0 - startPos,
1466                 clippedWidth = (0 - startPos) << 5,
1467 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1468                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1469 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1470                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1471 if (start_logging)
1472         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1473 }
1474
1475         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1476                 clippedWidth = 0 - endPos,
1477                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1478
1479         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1480                 clippedWidth = endPos - lbufWidth,
1481                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1482
1483         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1484                 clippedWidth = startPos - lbufWidth,
1485                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1486                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1487
1488 extern int op_start_log;
1489 if (op_start_log && clippedWidth != 0)
1490         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1491 if (op_start_log && startPos == 13)
1492 {
1493         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1494         DumpScaledObject(p0, p1, p2);
1495         if (iwidth == 7)
1496         {
1497                 WriteLog("    %08X: ", data);
1498                 for(int i=0; i<7*8; i++)
1499                         WriteLog("%02X ", JaguarReadByte(data+i));
1500                 WriteLog("\n");
1501         }
1502 }
1503         // If the image is sitting on the line buffer left or right edge, we need to compensate
1504         // by decreasing the image phrase width accordingly.
1505         iwidth -= phraseClippedWidth;
1506
1507         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1508         // the pixel data.
1509 //      data += phraseClippedWidth * (pitch << 3);
1510         data += dataClippedWidth * (pitch << 3);
1511
1512         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1513         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1514 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1515 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1516         uint32 lbufAddress = 0x1800 + startPos * 2;
1517         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1518 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1519 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1520
1521         // Render.
1522
1523 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1524 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1525 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1526 // anyway.
1527 // This seems to be the case (at least according to the Midsummer docs)...!
1528
1529         if (depth == 0)                                                                 // 1 BPP
1530         {
1531 if (firstPix != 0)
1532         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1533                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1534                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1535
1536                 int pixCount = 0;
1537                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1538
1539                 while ((int32)iwidth > 0)
1540                 {
1541                         uint8 bits = pixels >> 63;
1542
1543 #ifndef OP_USES_PALETTE_ZERO
1544                         if (flagTRANS && bits == 0)
1545 #else
1546                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1547 #endif
1548                                 ;       // Do nothing...
1549                         else
1550                         {
1551                                 if (!flagRMW)
1552                                         // This is the *only* correct use of endian-dependent code
1553                                         // (i.e., mem-to-mem direct copying)!
1554                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1555                                 else
1556                                         *currentLineBuffer =
1557                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1558                                         *(currentLineBuffer + 1) =
1559                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1560                         }
1561
1562                         currentLineBuffer += lbufDelta;
1563
1564 /*
1565 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1566 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1567 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1568 */
1569 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1570                         while (horizontalRemainder & 0x80)
1571                         {
1572                                 horizontalRemainder += hscale;
1573                                 pixCount++;
1574                                 pixels <<= 1;
1575                         }//*/
1576 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1577                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1578                         {
1579                                 horizontalRemainder += hscale;
1580                                 pixCount++;
1581                                 pixels <<= 1;
1582                         }
1583                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1584
1585                         if (pixCount > 63)
1586                         {
1587                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1588
1589                                 data += (pitch << 3) * phrasesToSkip;
1590                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1591                                 pixels <<= 1 * pixelShift;
1592                                 iwidth -= phrasesToSkip;
1593                                 pixCount = pixelShift;
1594                         }
1595                 }
1596         }
1597         else if (depth == 1)                                                    // 2 BPP
1598         {
1599 if (firstPix != 0)
1600         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1601                 index &= 0xFC;                                                          // Top six bits form CLUT index
1602                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1603                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1604
1605                 int pixCount = 0;
1606                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1607
1608                 while ((int32)iwidth > 0)
1609                 {
1610                         uint8 bits = pixels >> 62;
1611
1612 #ifndef OP_USES_PALETTE_ZERO
1613                         if (flagTRANS && bits == 0)
1614 #else
1615                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1616 #endif
1617                                 ;       // Do nothing...
1618                         else
1619                         {
1620                                 if (!flagRMW)
1621                                         // This is the *only* correct use of endian-dependent code
1622                                         // (i.e., mem-to-mem direct copying)!
1623                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1624                                 else
1625                                         *currentLineBuffer =
1626                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1627                                         *(currentLineBuffer + 1) =
1628                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1629                         }
1630
1631                         currentLineBuffer += lbufDelta;
1632
1633 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1634                         while (horizontalRemainder & 0x80)
1635                         {
1636                                 horizontalRemainder += hscale;
1637                                 pixCount++;
1638                                 pixels <<= 2;
1639                         }//*/
1640 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1641                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1642                         {
1643                                 horizontalRemainder += hscale;
1644                                 pixCount++;
1645                                 pixels <<= 2;
1646                         }
1647                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1648
1649                         if (pixCount > 31)
1650                         {
1651                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1652
1653                                 data += (pitch << 3) * phrasesToSkip;
1654                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1655                                 pixels <<= 2 * pixelShift;
1656                                 iwidth -= phrasesToSkip;
1657                                 pixCount = pixelShift;
1658                         }
1659                 }
1660         }
1661         else if (depth == 2)                                                    // 4 BPP
1662         {
1663 if (firstPix != 0)
1664         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1665                 index &= 0xF0;                                                          // Top four bits form CLUT index
1666                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1667                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1668
1669                 int pixCount = 0;
1670                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1671
1672                 while ((int32)iwidth > 0)
1673                 {
1674                         uint8 bits = pixels >> 60;
1675
1676 #ifndef OP_USES_PALETTE_ZERO
1677                         if (flagTRANS && bits == 0)
1678 #else
1679                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1680 #endif
1681                                 ;       // Do nothing...
1682                         else
1683                         {
1684                                 if (!flagRMW)
1685                                         // This is the *only* correct use of endian-dependent code
1686                                         // (i.e., mem-to-mem direct copying)!
1687                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1688                                 else
1689                                         *currentLineBuffer =
1690                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1691                                         *(currentLineBuffer + 1) =
1692                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1693                         }
1694
1695                         currentLineBuffer += lbufDelta;
1696
1697 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1698                         while (horizontalRemainder & 0x80)
1699                         {
1700                                 horizontalRemainder += hscale;
1701                                 pixCount++;
1702                                 pixels <<= 4;
1703                         }//*/
1704 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1705                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1706                         {
1707                                 horizontalRemainder += hscale;
1708                                 pixCount++;
1709                                 pixels <<= 4;
1710                         }
1711                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1712
1713                         if (pixCount > 15)
1714                         {
1715                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1716
1717                                 data += (pitch << 3) * phrasesToSkip;
1718                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1719                                 pixels <<= 4 * pixelShift;
1720                                 iwidth -= phrasesToSkip;
1721                                 pixCount = pixelShift;
1722                         }
1723                 }
1724         }
1725         else if (depth == 3)                                                    // 8 BPP
1726         {
1727 if (firstPix)
1728         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1729                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1730                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1731
1732                 int pixCount = 0;
1733                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1734
1735                 while ((int32)iwidth > 0)
1736                 {
1737                         uint8 bits = pixels >> 56;
1738
1739 #ifndef OP_USES_PALETTE_ZERO
1740                         if (flagTRANS && bits == 0)
1741 #else
1742                         if (flagTRANS && (paletteRAM16[bits] == 0))
1743 #endif
1744                                 ;       // Do nothing...
1745                         else
1746                         {
1747                                 if (!flagRMW)
1748                                         // This is the *only* correct use of endian-dependent code
1749                                         // (i.e., mem-to-mem direct copying)!
1750                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1751 /*                              {
1752                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1753                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1754                                 }*/
1755                                 else
1756                                         *currentLineBuffer =
1757                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1758                                         *(currentLineBuffer + 1) =
1759                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1760                         }
1761
1762                         currentLineBuffer += lbufDelta;
1763
1764 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1765                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1766                         {
1767                                 horizontalRemainder += hscale;
1768                                 pixCount++;
1769                                 pixels <<= 8;
1770                         }
1771                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1772
1773                         if (pixCount > 7)
1774                         {
1775                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1776
1777                                 data += (pitch << 3) * phrasesToSkip;
1778                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1779                                 pixels <<= 8 * pixelShift;
1780                                 iwidth -= phrasesToSkip;
1781                                 pixCount = pixelShift;
1782                         }
1783                 }
1784         }
1785         else if (depth == 4)                                                    // 16 BPP
1786         {
1787 if (firstPix != 0)
1788         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1789                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1790                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1791
1792                 int pixCount = 0;
1793                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1794
1795                 while ((int32)iwidth > 0)
1796                 {
1797                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1798
1799 //This doesn't seem right... Let's try the encoded black value ($8800):
1800 //Apparently, CRY 0 maps to $8800...
1801                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1802 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1803                                 ;       // Do nothing...
1804                         else
1805                         {
1806                                 if (!flagRMW)
1807                                         *currentLineBuffer = bitsHi,
1808                                         *(currentLineBuffer + 1) = bitsLo;
1809                                 else
1810                                         *currentLineBuffer =
1811                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1812                                         *(currentLineBuffer + 1) =
1813                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1814                         }
1815
1816                         currentLineBuffer += lbufDelta;
1817
1818 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1819                         while (horizontalRemainder & 0x80)
1820                         {
1821                                 horizontalRemainder += hscale;
1822                                 pixCount++;
1823                                 pixels <<= 16;
1824                         }//*/
1825 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1826                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1827                         {
1828                                 horizontalRemainder += hscale;
1829                                 pixCount++;
1830                                 pixels <<= 16;
1831                         }
1832                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1833 //*/
1834                         if (pixCount > 3)
1835                         {
1836                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1837
1838                                 data += (pitch << 3) * phrasesToSkip;
1839                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1840                                 pixels <<= 16 * pixelShift;
1841
1842                                 iwidth -= phrasesToSkip;
1843
1844                                 pixCount = pixelShift;
1845                         }
1846                 }
1847         }
1848         else if (depth == 5)                                                    // 24 BPP
1849         {
1850 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1851 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1852 if (firstPix != 0)
1853         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1854                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1855                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1856                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1857
1858                 while (iwidth--)
1859                 {
1860                         // Fetch phrase...
1861                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1862                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1863
1864                         for(int i=0; i<2; i++)
1865                         {
1866                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1867                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1868
1869                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1870                                         ;       // Do nothing...
1871                                 else
1872                                         *currentLineBuffer = bits3,
1873                                         *(currentLineBuffer + 1) = bits2,
1874                                         *(currentLineBuffer + 2) = bits1,
1875                                         *(currentLineBuffer + 3) = bits0;
1876
1877                                 currentLineBuffer += lbufDelta;
1878                                 pixels <<= 32;
1879                         }
1880                 }
1881         }
1882 }