]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
Fixed VC to act like a real Jaguar. :-)
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James L. Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68k.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0
40 #define CONDITION_LESS_THAN                     1
41 #define CONDITION_GREATER_THAN          2
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #define OPFLAG_RELEASE          8                                       // Bus release bit
46 #define OPFLAG_TRANS            4                                       // Transparency bit
47 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
48 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
49
50 // Private function prototypes
51
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void OPDiscoverObjects(uint32 address);
55 void OPDumpObjectList(void);
56 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
57 void DumpFixedObject(uint64 p0, uint64 p1);
58 void DumpBitmapCore(uint64 p0, uint64 p1);
59 uint64 OPLoadPhrase(uint32 offset);
60
61 // Local global variables
62
63 // Blend tables (64K each)
64 static uint8 op_blend_y[0x10000];
65 static uint8 op_blend_cr[0x10000];
66 // There may be a problem with this "RAM" overlapping (and thus being independent of)
67 // some of the regular TOM RAM...
68 //#warning objectp_ram is separated from TOM RAM--need to fix that!
69 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
70 uint8 objectp_running = 0;
71 //bool objectp_stop_reading_list;
72
73 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
74 //static uint32 op_bitmap_bit_size[8] =
75 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
76 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
77 static uint32 op_pointer;
78
79 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
80
81
82 //
83 // Object Processor initialization
84 //
85 void OPInit(void)
86 {
87         // Here we calculate the saturating blend of a signed 4-bit value and an
88         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
89         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
90         for(int i=0; i<256*256; i++)
91         {
92                 int y = (i >> 8) & 0xFF;
93                 int dy = (int8)i;                                       // Sign extend the Y index
94                 int c1 = (i >> 8) & 0x0F;
95                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
96                 int c2 = (i >> 12) & 0x0F;
97                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
98
99                 y += dy;
100
101                 if (y < 0)
102                         y = 0;
103                 else if (y > 0xFF)
104                         y = 0xFF;
105
106                 op_blend_y[i] = y;
107
108                 c1 += dc1;
109
110                 if (c1 < 0)
111                         c1 = 0;
112                 else if (c1 > 0x0F)
113                         c1 = 0x0F;
114
115                 c2 += dc2;
116
117                 if (c2 < 0)
118                         c2 = 0;
119                 else if (c2 > 0x0F)
120                         c2 = 0x0F;
121
122                 op_blend_cr[i] = (c2 << 4) | c1;
123         }
124
125         OPReset();
126 }
127
128 //
129 // Object Processor reset
130 //
131 void OPReset(void)
132 {
133 //      memset(objectp_ram, 0x00, 0x40);
134         objectp_running = 0;
135 }
136
137 static const char * opType[8] =
138 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
139 static const char * ccType[8] =
140         { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
141 static uint32 object[8192];
142 static uint32 numberOfObjects;
143 //static uint32 objectLink[8192];
144 //static uint32 numberOfLinks;
145
146 void OPDone(void)
147 {
148 //#warning "!!! Fix OL dump so that it follows links !!!"
149 //      const char * opType[8] =
150 //      { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
151 //      const char * ccType[8] =
152 //              { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
153
154         uint32 olp = OPGetListPointer();
155         WriteLog("\nOP: OLP = $%08X\n", olp);
156         WriteLog("OP: Phrase dump\n    ----------\n");
157
158 #if 0
159         for(uint32 i=0; i<0x100; i+=8)
160         {
161                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
162                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
163
164                 if ((lo & 0x07) == 3)
165                 {
166                         uint16 ypos = (lo >> 3) & 0x7FF;
167                         uint8  cc   = (lo >> 14) & 0x03;
168                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
169                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
170                 }
171
172                 WriteLog("\n");
173
174                 if ((lo & 0x07) == 0)
175                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
176
177                 if ((lo & 0x07) == 1)
178                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
179         }
180
181         WriteLog("\n");
182 #else
183         numberOfObjects = 0;
184         OPDiscoverObjects(olp);
185         OPDumpObjectList();
186 #endif
187 }
188
189 void OPDiscoverObjects(uint32 address)
190 {
191         // Check to see if we've already seen this object
192         for(uint32 i=0; i<numberOfObjects; i++)
193         {
194                 if (address == object[i])
195                         return;
196         }
197
198         // Store the object...
199         object[numberOfObjects++] = address;
200         uint8 objectType = 0;
201
202         do
203         {
204                 uint32 hi = JaguarReadLong(address + 0, OP);
205                 uint32 lo = JaguarReadLong(address + 4, OP);
206                 objectType = lo & 0x07;
207                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
208
209                 if (objectType == 3)
210                 {
211                         uint16 ypos = (lo >> 3) & 0x7FF;
212                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
213
214                         // Recursion needed to follow all links!
215                         OPDiscoverObjects(address + 8);
216                 }
217
218                 if (address == link)    // Ruh roh...
219                 {
220                         // Runaway recursive link is bad!
221                         return;
222                 }
223
224                 address = link;
225
226                 // Check to see if we've already seen this object, and add it if not
227                 bool seenObject = false;
228
229                 for(uint32 i=0; i<numberOfObjects; i++)
230                 {
231                         if (address == object[i])
232                         {
233                                 seenObject = true;
234                                 break;
235                         }
236                 }
237
238                 if (!seenObject)
239                         object[numberOfObjects++] = address;
240         }
241         while (objectType != 4);
242 }
243
244 void OPDumpObjectList(void)
245 {
246         for(uint32 i=0; i<numberOfObjects; i++)
247         {
248                 uint32 address = object[i];
249
250                 uint32 hi = JaguarReadLong(address + 0, OP);
251                 uint32 lo = JaguarReadLong(address + 4, OP);
252                 uint8 objectType = lo & 0x07;
253                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
254                 WriteLog("%08X: %08X %08X %s", address, hi, lo, opType[objectType]);
255
256                 if (objectType == 3)
257                 {
258                         uint16 ypos = (lo >> 3) & 0x7FF;
259                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
260                         WriteLog(" YPOS=%u, CC=%s, link=$%08X", ypos, ccType[cc], link);
261                 }
262
263                 WriteLog("\n");
264
265                 if (objectType == 0)
266                         DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
267
268                 if (objectType == 1)
269                         DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
270                                 OPLoadPhrase(address + 16));
271
272                 if (address == link)    // Ruh roh...
273                 {
274                         // Runaway recursive link is bad!
275                         WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
276                 }
277         }
278
279         WriteLog("\n");
280 }
281
282 //
283 // Object Processor memory access
284 // Memory range: F00010 - F00027
285 //
286 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
287 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
288 //      F00026            W   -------- -------x   OBF - object processor flag
289 //
290
291 #if 0
292 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
293 {
294         offset &= 0x3F;
295         return objectp_ram[offset];
296 }
297
298 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
299 {
300         offset &= 0x3F;
301         return GET16(objectp_ram, offset);
302 }
303
304 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
305 {
306         offset &= 0x3F;
307         objectp_ram[offset] = data;
308 }
309
310 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
311 {
312         offset &= 0x3F;
313         SET16(objectp_ram, offset, data);
314
315 /*if (offset == 0x20)
316 WriteLog("OP: Setting lo list pointer: %04X\n", data);
317 if (offset == 0x22)
318 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
319 }
320 #endif
321
322 uint32 OPGetListPointer(void)
323 {
324         // Note: This register is LO / HI WORD, hence the funky look of this...
325         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
326 }
327
328 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
329
330 uint32 OPGetStatusRegister(void)
331 {
332         return GET16(tomRam8, 0x26);
333 }
334
335 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
336
337 void OPSetStatusRegister(uint32 data)
338 {
339         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
340         tomRam8[0x27] |= (data & 0xFE);
341 }
342
343 void OPSetCurrentObject(uint64 object)
344 {
345 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
346         // Stored as least significant 32 bits first, ms32 last in big endian
347 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
348         objectp_ram[0x12] = object & 0xFF; object >>= 8;
349         objectp_ram[0x11] = object & 0xFF; object >>= 8;
350         objectp_ram[0x10] = object & 0xFF; object >>= 8;
351
352         objectp_ram[0x17] = object & 0xFF; object >>= 8;
353         objectp_ram[0x16] = object & 0xFF; object >>= 8;
354         objectp_ram[0x15] = object & 0xFF; object >>= 8;
355         objectp_ram[0x14] = object & 0xFF;*/
356 // Let's try regular good old big endian...
357         tomRam8[0x17] = object & 0xFF; object >>= 8;
358         tomRam8[0x16] = object & 0xFF; object >>= 8;
359         tomRam8[0x15] = object & 0xFF; object >>= 8;
360         tomRam8[0x14] = object & 0xFF; object >>= 8;
361
362         tomRam8[0x13] = object & 0xFF; object >>= 8;
363         tomRam8[0x12] = object & 0xFF; object >>= 8;
364         tomRam8[0x11] = object & 0xFF; object >>= 8;
365         tomRam8[0x10] = object & 0xFF;
366 }
367
368 uint64 OPLoadPhrase(uint32 offset)
369 {
370         offset &= ~0x07;                                                // 8 byte alignment
371         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
372 }
373
374 void OPStorePhrase(uint32 offset, uint64 p)
375 {
376         offset &= ~0x07;                                                // 8 byte alignment
377         JaguarWriteLong(offset, p >> 32, OP);
378         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
379 }
380
381 //
382 // Debugging routines
383 //
384 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
385 {
386         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
387         WriteLog("          %08X %08X\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
388         DumpBitmapCore(p0, p1);
389         uint32 hscale = p2 & 0xFF;
390         uint32 vscale = (p2 >> 8) & 0xFF;
391         uint32 remainder = (p2 >> 16) & 0xFF;
392         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
393 }
394
395 void DumpFixedObject(uint64 p0, uint64 p1)
396 {
397         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
398         DumpBitmapCore(p0, p1);
399 }
400
401 void DumpBitmapCore(uint64 p0, uint64 p1)
402 {
403         uint32 bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
404         uint8 bitdepth = (p1 >> 12) & 0x07;
405 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
406         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
407         int32 xpos = p1 & 0xFFF;
408         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);       // Sign extend that mutha!
409         uint32 iwidth = ((p1 >> 28) & 0x3FF);
410         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
411         uint16 height = ((p0 >> 14) & 0x3FF);
412         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
413         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
414         uint32 firstPix = (p1 >> 49) & 0x3F;
415         uint8 flags = (p1 >> 45) & 0x0F;
416         uint8 idx = (p1 >> 38) & 0x7F;
417         uint32 pitch = (p1 >> 15) & 0x07;
418         WriteLog("    [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), l:%08X, p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
419                 iwidth * bdMultiplier[bitdepth],
420                 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth], link,
421                 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
422                 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
423                 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
424 }
425
426 //
427 // Object Processor main routine
428 //
429 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
430 void OPProcessList(int halfline, bool render)
431 {
432 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!
433 // We ignore them, for now; not good
434         halfline &= 0x7FF;
435
436 extern int op_start_log;
437 //      char * condition_to_str[8] =
438 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
439
440         op_pointer = OPGetListPointer();
441
442 //      objectp_stop_reading_list = false;
443
444 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
445 //op_done();
446
447 // *** BEGIN OP PROCESSOR TESTING ONLY ***
448 extern bool interactiveMode;
449 extern bool iToggle;
450 extern int objectPtr;
451 bool inhibit;
452 int bitmapCounter = 0;
453 // *** END OP PROCESSOR TESTING ONLY ***
454
455         uint32 opCyclesToRun = 30000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
456
457 //      if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
458         while (op_pointer)
459         {
460 // *** BEGIN OP PROCESSOR TESTING ONLY ***
461 if (interactiveMode && bitmapCounter == objectPtr)
462         inhibit = iToggle;
463 else
464         inhibit = false;
465 // *** END OP PROCESSOR TESTING ONLY ***
466 //              if (objectp_stop_reading_list)
467 //                      return;
468
469                 uint64 p0 = OPLoadPhrase(op_pointer);
470                 op_pointer += 8;
471 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
472
473 #if 1
474 if (halfline == TOMGetVDB() && op_start_log)
475 //if (halfline == 215 && op_start_log)
476 //if (halfline == 28 && op_start_log)
477 //if (halfline == 0)
478 {
479 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
480 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
481 {
482 WriteLog(" (BITMAP) ");
483 uint64 p1 = OPLoadPhrase(op_pointer);
484 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
485         uint8 bitdepth = (p1 >> 12) & 0x07;
486 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
487         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
488 int32 xpos = p1 & 0xFFF;
489 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
490         uint32 iwidth = ((p1 >> 28) & 0x3FF);
491         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
492         uint16 height = ((p0 >> 14) & 0x3FF);
493         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
494         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
495         uint32 firstPix = (p1 >> 49) & 0x3F;
496         uint8 flags = (p1 >> 45) & 0x0F;
497         uint8 idx = (p1 >> 38) & 0x7F;
498         uint32 pitch = (p1 >> 15) & 0x07;
499 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
500         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
501 }
502 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
503 {
504 WriteLog(" (SCALED BITMAP)");
505 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
506 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
507 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
508         uint8 bitdepth = (p1 >> 12) & 0x07;
509 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
510         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
511 int32 xpos = p1 & 0xFFF;
512 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
513         uint32 iwidth = ((p1 >> 28) & 0x3FF);
514         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
515         uint16 height = ((p0 >> 14) & 0x3FF);
516         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
517         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
518         uint32 firstPix = (p1 >> 49) & 0x3F;
519         uint8 flags = (p1 >> 45) & 0x0F;
520         uint8 idx = (p1 >> 38) & 0x7F;
521         uint32 pitch = (p1 >> 15) & 0x07;
522 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
523         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
524         uint32 hscale = p2 & 0xFF;
525         uint32 vscale = (p2 >> 8) & 0xFF;
526         uint32 remainder = (p2 >> 16) & 0xFF;
527 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
528 }
529 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
530 WriteLog(" (GPU)\n");
531 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
532 {
533 WriteLog(" (BRANCH)\n");
534 uint8 * jaguarMainRam = GetRamPtr();
535 WriteLog("[RAM] --> ");
536 for(int k=0; k<8; k++)
537         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
538 WriteLog("\n");
539 }
540 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
541 WriteLog("    --> List end\n\n");
542 }
543 #endif
544
545                 switch ((uint8)p0 & 0x07)
546                 {
547                 case OBJECT_TYPE_BITMAP:
548                 {
549 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
550                         uint16 ypos = (p0 >> 3) & 0x7FF;
551 // This is only theory implied by Rayman...!
552 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
553 // the VDB value. With interlacing, this would be slightly more tricky.
554 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
555 // to affect any other game in a negative way (that I've seen).
556 // Either that, or it's an undocumented bug...
557
558 //No, the reason this was needed is that the OP code before was wrong. Any value
559 //less than VDB will get written to the top line of the display!
560 #if 0
561 // Not so sure... Let's see what happens here...
562 // No change...
563                         if (ypos == 0)
564                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
565 #endif
566 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
567 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
568 // what's causing things to fuck up. Still no idea why.
569
570                         uint32 height = (p0 & 0xFFC000) >> 14;
571                         uint32 oldOPP = op_pointer - 8;
572 // *** BEGIN OP PROCESSOR TESTING ONLY ***
573 if (inhibit && op_start_log)
574         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
575 bitmapCounter++;
576 if (!inhibit)   // For OP testing only!
577 // *** END OP PROCESSOR TESTING ONLY ***
578                         if (halfline >= ypos && height > 0)
579                         {
580                                 uint64 p1 = OPLoadPhrase(op_pointer);
581                                 op_pointer += 8;
582 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
583 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
584 //                              OPProcessFixedBitmap(halfline, p0, p1, render);
585                                 OPProcessFixedBitmap(p0, p1, render);
586
587                                 // OP write-backs
588
589 //???Does this really happen??? Doesn't seem to work if you do this...!
590 //Probably not. Must be a bug in the documentation...!
591 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
592 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
593 //                              SET16(tom_ram_8, 0x22, link >> 16);
594 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
595                                 if (height - 1 > 0)
596                                         height--;*/
597                                 // NOTE: Would subtract 2 if in interlaced mode...!
598 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
599 //                              if (height)
600                                 height--;
601
602                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
603                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
604                                 data += dwidth;
605
606                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
607                                 p0 |= (uint64)height << 14;
608                                 p0 |= data << 40;
609                                 OPStorePhrase(oldOPP, p0);
610                         }
611 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
612 //Temp, for testing...
613 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
614 //And it does! !!! FIX !!!
615 //Let's remove this "fix" since it screws up more than it fixes.
616 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
617                 return;*/
618
619                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
620 //WriteLog("New OP: %08X\n", op_pointer);
621                         break;
622                 }
623                 case OBJECT_TYPE_SCALE:
624                 {
625 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
626                         uint16 ypos = (p0 >> 3) & 0x7FF;
627                         uint32 height = (p0 & 0xFFC000) >> 14;
628                         uint32 oldOPP = op_pointer - 8;
629 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
630 // *** BEGIN OP PROCESSOR TESTING ONLY ***
631 if (inhibit && op_start_log)
632 {
633         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
634         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
635 }
636 bitmapCounter++;
637 if (!inhibit)   // For OP testing only!
638 // *** END OP PROCESSOR TESTING ONLY ***
639                         if (halfline >= ypos && height > 0)
640                         {
641                                 uint64 p1 = OPLoadPhrase(op_pointer);
642                                 op_pointer += 8;
643                                 uint64 p2 = OPLoadPhrase(op_pointer);
644                                 op_pointer += 8;
645 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
646                                 OPProcessScaledBitmap(p0, p1, p2, render);
647
648                                 // OP write-backs
649
650                                 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
651                                 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
652 //Actually, we should skip this object if it has a vscale of zero.
653 //Or do we? Not sure... Atari Karts has a few lines that look like:
654 // (SCALED BITMAP)
655 //000E8268 --> phrase 00010000 7000B00D
656 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
657 //    [hsc: 9A, vsc: 00, rem: 00]
658 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
659 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
660
661                                 if (vscale == 0)
662                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
663
664 //extern int start_logging;
665 //if (start_logging)
666 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
667 //Locks up here:
668 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
669 //There are other problems here, it looks like...
670 //Another lock up:
671 //About to execute OP (508)...
672 /*
673 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
674 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
675 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
676 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
677 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
678 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
679 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
680 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
681 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
682 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
683 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
684 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
685 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
686 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
687 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
688 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
689 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
690 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
691 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
692 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
693 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
694 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
695 */
696 //Here's another problem:
697 //    [hsc: 20, vsc: 20, rem: 00]
698 // Since we're not checking for $E0 (but that's what we get from the above), we end
699 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
700 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
701 //Also note: $E0 = 7.0 which IS a legal vscale value...
702
703 //                              if (remainder & 0x80)                           // I.e., it's negative
704 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
705 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
706 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
707 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
708 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
709                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
710                                 if (remainder < 0x20)
711                                 {
712                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
713                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
714
715 //                                      while (remainder & 0x80)
716 //                                      while ((remainder & 0x80) || remainder == 0)
717 //                                      while ((remainder - 1) >= 0xE0)
718 //                                      while ((remainder >= 0xE1) || remainder == 0)
719 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
720 //                                      while (remainder <= 0x20)
721                                         while (remainder < 0x20)
722                                         {
723                                                 remainder += vscale;
724
725                                                 if (height)
726                                                         height--;
727
728                                                 data += dwidth;
729                                         }
730
731                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
732                                         p0 |= (uint64)height << 14;
733                                         p0 |= data << 40;
734                                         OPStorePhrase(oldOPP, p0);
735                                 }
736
737                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
738
739 //if (start_logging)
740 //      WriteLog("--> Finished writebacks...\n");//*/
741
742 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
743                                 p2 &= ~0x0000000000FF0000LL;
744                                 p2 |= (uint64)remainder << 16;
745 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
746                                 OPStorePhrase(oldOPP + 16, p2);
747 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
748 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
749                         }
750
751                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
752                         break;
753                 }
754                 case OBJECT_TYPE_GPU:
755                 {
756 //WriteLog("OP: Asserting GPU IRQ #3...\n");
757 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
758                         OPSetCurrentObject(p0);
759                         GPUSetIRQLine(3, ASSERT_LINE);
760 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
761 // !!! FIX !!!
762 //Do something like:
763 //OPSuspendedByGPU = true;
764 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
765 //on the next halfline...
766 // --> It continues from where it was interrupted! !!! FIX !!!
767                         break;
768                 }
769                 case OBJECT_TYPE_BRANCH:
770                 {
771                         uint16 ypos = (p0 >> 3) & 0x7FF;
772 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
773 //       conditions! Need at least one more bit for that! :-P
774 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
775                         uint8  cc   = (p0 >> 14) & 0x03;
776                         uint32 link = (p0 >> 21) & 0x3FFFF8;
777
778 //                      if ((ypos!=507)&&(ypos!=25))
779 //                              WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
780                         switch (cc)
781                         {
782                         case CONDITION_EQUAL:
783                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
784                                         op_pointer = link;
785                                 break;
786                         case CONDITION_LESS_THAN:
787                                 if (TOMReadWord(0xF00006, OP) < ypos)
788                                         op_pointer = link;
789                                 break;
790                         case CONDITION_GREATER_THAN:
791                                 if (TOMReadWord(0xF00006, OP) > ypos)
792                                         op_pointer = link;
793                                 break;
794                         case CONDITION_OP_FLAG_SET:
795                                 if (OPGetStatusRegister() & 0x01)
796                                         op_pointer = link;
797                                 break;
798                         case CONDITION_SECOND_HALF_LINE:
799 //Here's the ASIC code:
800 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
801 //which means, do the link if bit 10 of HC is set...
802
803                                 // This basically means branch if bit 10 of HC is set
804 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
805                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
806                                 LogDone();
807                                 exit(0);
808                                 break;
809                         default:
810                                 // Basically, if you do this, the OP does nothing. :-)
811                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
812                         }
813                         break;
814                 }
815                 case OBJECT_TYPE_STOP:
816                 {
817 //op_start_log = 0;
818                         // unsure
819 //WriteLog("OP: --> STOP\n");
820 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
821 //This seems more likely...
822                         OPSetCurrentObject(p0);
823
824                         if (p0 & 0x08)
825                         {
826                                 // We need to check whether these interrupts are enabled or not, THEN
827                                 // set an IRQ + pending flag if necessary...
828                                 if (TOMIRQEnabled(IRQ_OPFLAG))
829                                 {
830                                         TOMSetPendingObjectInt();
831                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
832                                 }
833                         }
834
835                         return;
836 //                      break;
837                 }
838                 default:
839                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
840                         return;
841                 }
842
843                 // Here is a little sanity check to keep the OP from locking up the machine
844                 // when fed bad data. Better would be to count how many actual cycles it used
845                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
846 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
847                 opCyclesToRun--;
848
849                 if (!opCyclesToRun)
850                         return;
851         }
852 }
853
854 //
855 // Store fixed size bitmap in line buffer
856 //
857 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
858 {
859 // Need to make sure that when writing that it stays within the line buffer...
860 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
861         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
862         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
863         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
864         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
865 //#ifdef OP_DEBUG_BMP
866         uint32  firstPix = (p1 >> 49) & 0x3F;
867         // "The LSB is significant only for scaled objects..." -JTRM
868         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
869         firstPix &= 0x3E;
870 //#endif
871 // We can ignore the RELEASE (high order) bit for now--probably forever...!
872 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
873 //Optimize: break these out to their own BOOL values
874         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
875         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
876                 flagRMW = (flags & OPFLAG_RMW ? true : false),
877                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
878 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
879 //  provide the most significant bits of the palette address."
880         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
881         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
882         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
883
884 //      int16 scanlineWidth = tom_getVideoModeWidth();
885         uint8 * tomRam8 = TOMGetRamPointer();
886         uint8 * paletteRAM = &tomRam8[0x400];
887         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
888         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
889         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
890
891 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
892 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
893
894 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
895 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
896 // Pitch == 0 is OK too...
897 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
898 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
899         if (!render || iwidth == 0)
900                 return;
901
902 //OK, so we know the position in the line buffer is correct. It's the clipping in
903 //24bpp mode that's wrong!
904 #if 0
905 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
906 //into the line buffer for each pixel.
907 if (depth == 5) // i.e., 24bpp mode...
908         xpos >>= 1;     // Cut it in half...
909 #endif
910
911 //#define OP_DEBUG_BMP
912 //#ifdef OP_DEBUG_BMP
913 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
914 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
915 //#endif
916
917 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
918         int32 startPos = xpos, endPos = xpos +
919                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
920                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
921         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
922         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
923         // Not sure if this is Jaguar Two only location or what...
924         // From the docs, it is... If we want to limit here we should think of something else.
925 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
926 //      int32 limit = 720;
927 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
928 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
929         // This is correct, the OP line buffer is a constant size... 
930         int32 limit = 720;
931         int32 lbufWidth = 719;
932
933         // If the image is completely to the left or right of the line buffer, then bail.
934 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
935 //There are four possibilities:
936 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
937 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
938 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
939 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
940 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
941 // numbers 1 & 3 are of concern.
942 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
943 //      if (rightMargin < 0 || leftMargin > lbufWidth)
944
945 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
946 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
947 // Still have to be careful with the DATA and IWIDTH values though...
948
949 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
950 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
951 //              return;
952         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
953                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
954                 return;
955
956         // Otherwise, find the clip limits and clip the phrase as well...
957         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
958         //       line buffer, but it shouldn't matter since there are two unused line
959         //       buffers below and nothing above and I'll at most write 8 bytes outside
960         //       the line buffer... I could use a fractional clip begin/end value, but
961         //       this makes the blit a *lot* more hairy. I might fix this in the future
962         //       if it becomes necessary. (JLH)
963         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
964         //       which pixel in the phrase is being written, and quit when either end of phrases
965         //       is reached or line buffer extents are surpassed.
966
967 //This stuff is probably wrong as well... !!! FIX !!!
968 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
969 //Yup. Seems that JagMania doesn't work correctly with this...
970 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
971 //      if (!flagREFLECT)
972
973 /*
974         if (leftMargin < 0)
975                 clippedWidth = 0 - leftMargin,
976                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
977                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
978 //              leftMargin = 0;
979
980         if (rightMargin > lbufWidth)
981                 clippedWidth = rightMargin - lbufWidth,
982                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
983 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
984 //              rightMargin = lbufWidth;
985 */
986 if (depth > 5)
987         WriteLog("OP: We're about to encounter a divide by zero error!\n");
988         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
989         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
990         // !!! FIX !!!
991         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
992                 clippedWidth = 0 - startPos,
993                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
994                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
995
996         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
997                 clippedWidth = 0 - endPos,
998                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
999
1000         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1001                 clippedWidth = endPos - lbufWidth,
1002                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1003
1004         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1005                 clippedWidth = startPos - lbufWidth,
1006                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1007                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1008 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1009
1010         // If the image is sitting on the line buffer left or right edge, we need to compensate
1011         // by decreasing the image phrase width accordingly.
1012         iwidth -= phraseClippedWidth;
1013
1014         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1015         // the pixel data.
1016 //      data += phraseClippedWidth * (pitch << 3);
1017         data += dataClippedWidth * pitch;
1018
1019         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1020         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1021 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1022 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1023 //Is this a bug in the OP?
1024 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1025 //Though it looks like we're doing it here no matter what...
1026 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1027 //Let's try this:
1028         uint32 lbufAddress = 0x1800 + (startPos * 2);
1029         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1030
1031         // Render.
1032
1033 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1034 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1035 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1036 // anyway.
1037 // This seems to be the case (at least according to the Midsummer docs)...!
1038
1039 // This is to test using palette zeroes instead of bit zeroes...
1040 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1041 //#define OP_USES_PALETTE_ZERO
1042
1043         if (depth == 0)                                                                 // 1 BPP
1044         {
1045                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1046                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1047
1048                 // Fetch 1st phrase...
1049                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1050 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1051 //i.e., we didn't clip on the margin... !!! FIX !!!
1052                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1053                 int i = firstPix;                                                       // Start counter at right spot...
1054
1055                 while (iwidth--)
1056                 {
1057                         while (i++ < 64)
1058                         {
1059                                 uint8 bit = pixels >> 63;
1060 #ifndef OP_USES_PALETTE_ZERO
1061                                 if (flagTRANS && bit == 0)
1062 #else
1063                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1064 #endif
1065                                         ;       // Do nothing...
1066                                 else
1067                                 {
1068                                         if (!flagRMW)
1069 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1070 //Won't optimize RMW case though...
1071                                                 // This is the *only* correct use of endian-dependent code
1072                                                 // (i.e., mem-to-mem direct copying)!
1073                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
1074                                         else
1075                                                 *currentLineBuffer =
1076                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1077                                                 *(currentLineBuffer + 1) =
1078                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1079                                 }
1080
1081                                 currentLineBuffer += lbufDelta;
1082                                 pixels <<= 1;
1083                         }
1084                         i = 0;
1085                         // Fetch next phrase...
1086                         data += pitch;
1087                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1088                 }
1089         }
1090         else if (depth == 1)                                                    // 2 BPP
1091         {
1092 if (firstPix)
1093         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1094                 index &= 0xFC;                                                          // Top six bits form CLUT index
1095                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1096                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1097
1098                 while (iwidth--)
1099                 {
1100                         // Fetch phrase...
1101                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1102                         data += pitch;
1103
1104                         for(int i=0; i<32; i++)
1105                         {
1106                                 uint8 bits = pixels >> 62;
1107 // Seems to me that both of these are in the same endian, so we could cast it as
1108 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1109 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1110 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1111 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1112 #ifndef OP_USES_PALETTE_ZERO
1113                                 if (flagTRANS && bits == 0)
1114 #else
1115                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1116 #endif
1117                                         ;       // Do nothing...
1118                                 else
1119                                 {
1120                                         if (!flagRMW)
1121                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1122                                         else
1123                                                 *currentLineBuffer =
1124                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1125                                                 *(currentLineBuffer + 1) =
1126                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1127                                 }
1128
1129                                 currentLineBuffer += lbufDelta;
1130                                 pixels <<= 2;
1131                         }
1132                 }
1133         }
1134         else if (depth == 2)                                                    // 4 BPP
1135         {
1136 if (firstPix)
1137         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1138                 index &= 0xF0;                                                          // Top four bits form CLUT index
1139                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1140                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1141
1142                 while (iwidth--)
1143                 {
1144                         // Fetch phrase...
1145                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1146                         data += pitch;
1147
1148                         for(int i=0; i<16; i++)
1149                         {
1150                                 uint8 bits = pixels >> 60;
1151 // Seems to me that both of these are in the same endian, so we could cast it as
1152 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1153 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1154 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1155 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1156 #ifndef OP_USES_PALETTE_ZERO
1157                                 if (flagTRANS && bits == 0)
1158 #else
1159                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1160 #endif
1161                                         ;       // Do nothing...
1162                                 else
1163                                 {
1164                                         if (!flagRMW)
1165                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1166                                         else
1167                                                 *currentLineBuffer =
1168                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1169                                                 *(currentLineBuffer + 1) =
1170                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1171                                 }
1172
1173                                 currentLineBuffer += lbufDelta;
1174                                 pixels <<= 4;
1175                         }
1176                 }
1177         }
1178         else if (depth == 3)                                                    // 8 BPP
1179         {
1180                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1181                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1182
1183                 // Fetch 1st phrase...
1184                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1185 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1186 //i.e., we didn't clip on the margin... !!! FIX !!!
1187                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1188                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1189                 int i = firstPix >> 3;                                          // Start counter at right spot...
1190
1191                 while (iwidth--)
1192                 {
1193                         while (i++ < 8)
1194                         {
1195                                 uint8 bits = pixels >> 56;
1196 // Seems to me that both of these are in the same endian, so we could cast it as
1197 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1198 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1199 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1200 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1201 //This would seem to be problematic...
1202 //Because it's the palette entry being zero that makes the pixel transparent...
1203 //Let's try it and see.
1204 #ifndef OP_USES_PALETTE_ZERO
1205                                 if (flagTRANS && bits == 0)
1206 #else
1207                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1208 #endif
1209                                         ;       // Do nothing...
1210                                 else
1211                                 {
1212                                         if (!flagRMW)
1213                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1214                                         else
1215                                                 *currentLineBuffer =
1216                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1217                                                 *(currentLineBuffer + 1) =
1218                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1219                                 }
1220
1221                                 currentLineBuffer += lbufDelta;
1222                                 pixels <<= 8;
1223                         }
1224                         i = 0;
1225                         // Fetch next phrase...
1226                         data += pitch;
1227                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1228                 }
1229         }
1230         else if (depth == 4)                                                    // 16 BPP
1231         {
1232 if (firstPix)
1233         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1234                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1235                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1236
1237                 while (iwidth--)
1238                 {
1239                         // Fetch phrase...
1240                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1241                         data += pitch;
1242
1243                         for(int i=0; i<4; i++)
1244                         {
1245                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1246 // Seems to me that both of these are in the same endian, so we could cast it as
1247 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1248 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1249 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1250 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1251 //This doesn't seem right... Let's try the encoded black value ($8800):
1252 //Apparently, CRY 0 maps to $8800...
1253                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1254 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1255                                         ;       // Do nothing...
1256                                 else
1257                                 {
1258                                         if (!flagRMW)
1259                                                 *currentLineBuffer = bitsHi,
1260                                                 *(currentLineBuffer + 1) = bitsLo;
1261                                         else
1262                                                 *currentLineBuffer =
1263                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1264                                                 *(currentLineBuffer + 1) =
1265                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1266                                 }
1267
1268                                 currentLineBuffer += lbufDelta;
1269                                 pixels <<= 16;
1270                         }
1271                 }
1272         }
1273         else if (depth == 5)                                                    // 24 BPP
1274         {
1275 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1276 //There *might* be others...
1277 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1278 if (firstPix)
1279         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1280                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1281                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1282                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1283
1284                 while (iwidth--)
1285                 {
1286                         // Fetch phrase...
1287                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1288                         data += pitch;
1289
1290                         for(int i=0; i<2; i++)
1291                         {
1292                                 // We don't use a 32-bit var here because of endian issues...!
1293                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1294                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1295
1296                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1297                                         ;       // Do nothing...
1298                                 else
1299                                         *currentLineBuffer = bits3,
1300                                         *(currentLineBuffer + 1) = bits2,
1301                                         *(currentLineBuffer + 2) = bits1,
1302                                         *(currentLineBuffer + 3) = bits0;
1303
1304                                 currentLineBuffer += lbufDelta;
1305                                 pixels <<= 32;
1306                         }
1307                 }
1308         }
1309 }
1310
1311 //
1312 // Store scaled bitmap in line buffer
1313 //
1314 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1315 {
1316 // Need to make sure that when writing that it stays within the line buffer...
1317 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1318         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1319         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1320         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1321         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1322 //#ifdef OP_DEBUG_BMP
1323 // Prolly should use this... Though not sure exactly how.
1324 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1325         uint32 firstPix = (p1 >> 49) & 0x3F;
1326 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1327 if (firstPix)
1328         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1329 //#endif
1330 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1331 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1332 //Optimize: break these out to their own BOOL values [DONE]
1333         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1334         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1335                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1336                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1337         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1338         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1339
1340         uint8 * tomRam8 = TOMGetRamPointer();
1341         uint8 * paletteRAM = &tomRam8[0x400];
1342         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1343         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1344         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1345
1346         uint16 hscale = p2 & 0xFF;
1347 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1348 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1349         uint16 horizontalRemainder = hscale;                            // Not sure if it starts full, but seems reasonable [It's not!]
1350 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1351         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1352         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1353
1354 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1355 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1356
1357 // Looks like an hscale of zero means don't draw!
1358         if (!render || iwidth == 0 || hscale == 0)
1359                 return;
1360
1361 /*extern int start_logging;
1362 if (start_logging)
1363         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1364                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1365 //#define OP_DEBUG_BMP
1366 //#ifdef OP_DEBUG_BMP
1367 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1368 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1369 //#endif
1370
1371         int32 startPos = xpos, endPos = xpos +
1372                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1373         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1374         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1375         // Not sure if this is Jaguar Two only location or what...
1376         // From the docs, it is... If we want to limit here we should think of something else.
1377 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1378         int32 limit = 720;
1379 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1380         int32 lbufWidth = 719;  // Zero based limit...
1381
1382         // If the image is completely to the left or right of the line buffer, then bail.
1383 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1384 //There are four possibilities:
1385 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1386 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1387 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1388 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1389 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1390 // numbers 1 & 3 are of concern.
1391 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1392 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1393
1394 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1395 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1396 // Still have to be careful with the DATA and IWIDTH values though...
1397
1398         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1399                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1400                 return;
1401
1402         // Otherwise, find the clip limits and clip the phrase as well...
1403         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1404         //       line buffer, but it shouldn't matter since there are two unused line
1405         //       buffers below and nothing above and I'll at most write 40 bytes outside
1406         //       the line buffer... I could use a fractional clip begin/end value, but
1407         //       this makes the blit a *lot* more hairy. I might fix this in the future
1408         //       if it becomes necessary. (JLH)
1409         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1410         //       which pixel in the phrase is being written, and quit when either end of phrases
1411         //       is reached or line buffer extents are surpassed.
1412
1413 //This stuff is probably wrong as well... !!! FIX !!!
1414 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1415 //Yup. Seems that JagMania doesn't work correctly with this...
1416 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1417 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1418 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1419 // a bit more accurately... Strange!
1420 //It's probably a case of the REFLECT flag being set and the background being written
1421 //from the right side of the screen...
1422 //But no, it isn't... At least if the diagnostics are telling the truth!
1423
1424         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1425         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1426         // !!! FIX !!!
1427
1428 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1429 //the scaling factor is small. So fix it already! !!! FIX !!!
1430 /*if (scaledPhrasePixels == 0)
1431 {
1432         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1433         DumpScaledObject(p0, p1, p2);
1434 }//*/
1435 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1436
1437 //Try a simple example...
1438 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1439 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1440 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1441 //
1442 // Normally, we would expect this in the line buffer:
1443 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1444 //
1445 // But instead we're getting:
1446 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1447 //
1448 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1449 // on negative boundary--or are we? Hmm...
1450 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1451 //
1452 // Let's try a real world example:
1453 //
1454 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1455 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1456 //
1457 // Really, spp is 27.75 in the second case...
1458 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1459 // start position (14 * 27.75), we get -6.5... NOT -17!
1460
1461 //Now it seems we're working OK, at least for the first case...
1462 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1463
1464         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1465 {
1466 extern int start_logging;
1467 if (start_logging)
1468         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1469 //              clippedWidth = 0 - startPos,
1470                 clippedWidth = (0 - startPos) << 5,
1471 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1472                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1473 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1474                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1475 if (start_logging)
1476         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1477 }
1478
1479         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1480                 clippedWidth = 0 - endPos,
1481                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1482
1483         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1484                 clippedWidth = endPos - lbufWidth,
1485                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1486
1487         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1488                 clippedWidth = startPos - lbufWidth,
1489                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1490                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1491
1492 extern int op_start_log;
1493 if (op_start_log && clippedWidth != 0)
1494         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1495 if (op_start_log && startPos == 13)
1496 {
1497         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1498         DumpScaledObject(p0, p1, p2);
1499         if (iwidth == 7)
1500         {
1501                 WriteLog("    %08X: ", data);
1502                 for(int i=0; i<7*8; i++)
1503                         WriteLog("%02X ", JaguarReadByte(data+i));
1504                 WriteLog("\n");
1505         }
1506 }
1507         // If the image is sitting on the line buffer left or right edge, we need to compensate
1508         // by decreasing the image phrase width accordingly.
1509         iwidth -= phraseClippedWidth;
1510
1511         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1512         // the pixel data.
1513 //      data += phraseClippedWidth * (pitch << 3);
1514         data += dataClippedWidth * (pitch << 3);
1515
1516         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1517         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1518 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1519 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1520         uint32 lbufAddress = 0x1800 + startPos * 2;
1521         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1522 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1523 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1524
1525         // Render.
1526
1527 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1528 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1529 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1530 // anyway.
1531 // This seems to be the case (at least according to the Midsummer docs)...!
1532
1533         if (depth == 0)                                                                 // 1 BPP
1534         {
1535 if (firstPix != 0)
1536         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1537                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1538                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1539
1540                 int pixCount = 0;
1541                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1542
1543                 while ((int32)iwidth > 0)
1544                 {
1545                         uint8 bits = pixels >> 63;
1546
1547 #ifndef OP_USES_PALETTE_ZERO
1548                         if (flagTRANS && bits == 0)
1549 #else
1550                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1551 #endif
1552                                 ;       // Do nothing...
1553                         else
1554                         {
1555                                 if (!flagRMW)
1556                                         // This is the *only* correct use of endian-dependent code
1557                                         // (i.e., mem-to-mem direct copying)!
1558                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1559                                 else
1560                                         *currentLineBuffer =
1561                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1562                                         *(currentLineBuffer + 1) =
1563                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1564                         }
1565
1566                         currentLineBuffer += lbufDelta;
1567
1568 /*
1569 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1570 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1571 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1572 */
1573 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1574                         while (horizontalRemainder & 0x80)
1575                         {
1576                                 horizontalRemainder += hscale;
1577                                 pixCount++;
1578                                 pixels <<= 1;
1579                         }//*/
1580 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1581                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1582                         {
1583                                 horizontalRemainder += hscale;
1584                                 pixCount++;
1585                                 pixels <<= 1;
1586                         }
1587                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1588
1589                         if (pixCount > 63)
1590                         {
1591                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1592
1593                                 data += (pitch << 3) * phrasesToSkip;
1594                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1595                                 pixels <<= 1 * pixelShift;
1596                                 iwidth -= phrasesToSkip;
1597                                 pixCount = pixelShift;
1598                         }
1599                 }
1600         }
1601         else if (depth == 1)                                                    // 2 BPP
1602         {
1603 if (firstPix != 0)
1604         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1605                 index &= 0xFC;                                                          // Top six bits form CLUT index
1606                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1607                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1608
1609                 int pixCount = 0;
1610                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1611
1612                 while ((int32)iwidth > 0)
1613                 {
1614                         uint8 bits = pixels >> 62;
1615
1616 #ifndef OP_USES_PALETTE_ZERO
1617                         if (flagTRANS && bits == 0)
1618 #else
1619                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1620 #endif
1621                                 ;       // Do nothing...
1622                         else
1623                         {
1624                                 if (!flagRMW)
1625                                         // This is the *only* correct use of endian-dependent code
1626                                         // (i.e., mem-to-mem direct copying)!
1627                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1628                                 else
1629                                         *currentLineBuffer =
1630                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1631                                         *(currentLineBuffer + 1) =
1632                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1633                         }
1634
1635                         currentLineBuffer += lbufDelta;
1636
1637 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1638                         while (horizontalRemainder & 0x80)
1639                         {
1640                                 horizontalRemainder += hscale;
1641                                 pixCount++;
1642                                 pixels <<= 2;
1643                         }//*/
1644 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1645                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1646                         {
1647                                 horizontalRemainder += hscale;
1648                                 pixCount++;
1649                                 pixels <<= 2;
1650                         }
1651                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1652
1653                         if (pixCount > 31)
1654                         {
1655                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1656
1657                                 data += (pitch << 3) * phrasesToSkip;
1658                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1659                                 pixels <<= 2 * pixelShift;
1660                                 iwidth -= phrasesToSkip;
1661                                 pixCount = pixelShift;
1662                         }
1663                 }
1664         }
1665         else if (depth == 2)                                                    // 4 BPP
1666         {
1667 if (firstPix != 0)
1668         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1669                 index &= 0xF0;                                                          // Top four bits form CLUT index
1670                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1671                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1672
1673                 int pixCount = 0;
1674                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1675
1676                 while ((int32)iwidth > 0)
1677                 {
1678                         uint8 bits = pixels >> 60;
1679
1680 #ifndef OP_USES_PALETTE_ZERO
1681                         if (flagTRANS && bits == 0)
1682 #else
1683                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1684 #endif
1685                                 ;       // Do nothing...
1686                         else
1687                         {
1688                                 if (!flagRMW)
1689                                         // This is the *only* correct use of endian-dependent code
1690                                         // (i.e., mem-to-mem direct copying)!
1691                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1692                                 else
1693                                         *currentLineBuffer =
1694                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1695                                         *(currentLineBuffer + 1) =
1696                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1697                         }
1698
1699                         currentLineBuffer += lbufDelta;
1700
1701 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1702                         while (horizontalRemainder & 0x80)
1703                         {
1704                                 horizontalRemainder += hscale;
1705                                 pixCount++;
1706                                 pixels <<= 4;
1707                         }//*/
1708 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1709                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1710                         {
1711                                 horizontalRemainder += hscale;
1712                                 pixCount++;
1713                                 pixels <<= 4;
1714                         }
1715                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1716
1717                         if (pixCount > 15)
1718                         {
1719                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1720
1721                                 data += (pitch << 3) * phrasesToSkip;
1722                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1723                                 pixels <<= 4 * pixelShift;
1724                                 iwidth -= phrasesToSkip;
1725                                 pixCount = pixelShift;
1726                         }
1727                 }
1728         }
1729         else if (depth == 3)                                                    // 8 BPP
1730         {
1731 if (firstPix)
1732         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1733                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1734                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1735
1736                 int pixCount = 0;
1737                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1738
1739                 while ((int32)iwidth > 0)
1740                 {
1741                         uint8 bits = pixels >> 56;
1742
1743 #ifndef OP_USES_PALETTE_ZERO
1744                         if (flagTRANS && bits == 0)
1745 #else
1746                         if (flagTRANS && (paletteRAM16[bits] == 0))
1747 #endif
1748                                 ;       // Do nothing...
1749                         else
1750                         {
1751                                 if (!flagRMW)
1752                                         // This is the *only* correct use of endian-dependent code
1753                                         // (i.e., mem-to-mem direct copying)!
1754                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1755 /*                              {
1756                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1757                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1758                                 }*/
1759                                 else
1760                                         *currentLineBuffer =
1761                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1762                                         *(currentLineBuffer + 1) =
1763                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1764                         }
1765
1766                         currentLineBuffer += lbufDelta;
1767
1768 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1769                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1770                         {
1771                                 horizontalRemainder += hscale;
1772                                 pixCount++;
1773                                 pixels <<= 8;
1774                         }
1775                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1776
1777                         if (pixCount > 7)
1778                         {
1779                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1780
1781                                 data += (pitch << 3) * phrasesToSkip;
1782                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1783                                 pixels <<= 8 * pixelShift;
1784                                 iwidth -= phrasesToSkip;
1785                                 pixCount = pixelShift;
1786                         }
1787                 }
1788         }
1789         else if (depth == 4)                                                    // 16 BPP
1790         {
1791 if (firstPix != 0)
1792         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1793                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1794                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1795
1796                 int pixCount = 0;
1797                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1798
1799                 while ((int32)iwidth > 0)
1800                 {
1801                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1802
1803 //This doesn't seem right... Let's try the encoded black value ($8800):
1804 //Apparently, CRY 0 maps to $8800...
1805                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1806 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1807                                 ;       // Do nothing...
1808                         else
1809                         {
1810                                 if (!flagRMW)
1811                                         *currentLineBuffer = bitsHi,
1812                                         *(currentLineBuffer + 1) = bitsLo;
1813                                 else
1814                                         *currentLineBuffer =
1815                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1816                                         *(currentLineBuffer + 1) =
1817                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1818                         }
1819
1820                         currentLineBuffer += lbufDelta;
1821
1822 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1823                         while (horizontalRemainder & 0x80)
1824                         {
1825                                 horizontalRemainder += hscale;
1826                                 pixCount++;
1827                                 pixels <<= 16;
1828                         }//*/
1829 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1830                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1831                         {
1832                                 horizontalRemainder += hscale;
1833                                 pixCount++;
1834                                 pixels <<= 16;
1835                         }
1836                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1837 //*/
1838                         if (pixCount > 3)
1839                         {
1840                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1841
1842                                 data += (pitch << 3) * phrasesToSkip;
1843                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1844                                 pixels <<= 16 * pixelShift;
1845
1846                                 iwidth -= phrasesToSkip;
1847
1848                                 pixCount = pixelShift;
1849                         }
1850                 }
1851         }
1852         else if (depth == 5)                                                    // 24 BPP
1853         {
1854 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1855 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1856 if (firstPix != 0)
1857         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1858                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1859                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1860                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1861
1862                 while (iwidth--)
1863                 {
1864                         // Fetch phrase...
1865                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1866                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1867
1868                         for(int i=0; i<2; i++)
1869                         {
1870                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1871                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1872
1873                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1874                                         ;       // Do nothing...
1875                                 else
1876                                         *currentLineBuffer = bits3,
1877                                         *(currentLineBuffer + 1) = bits2,
1878                                         *(currentLineBuffer + 2) = bits1,
1879                                         *(currentLineBuffer + 3) = bits0;
1880
1881                                 currentLineBuffer += lbufDelta;
1882                                 pixels <<= 32;
1883                         }
1884                 }
1885         }
1886 }