]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
8ae2eb5a437ac33dee0f417b0bee42aef77bb067
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68000/m68kinterface.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0                       // VC == YPOS
40 #define CONDITION_LESS_THAN                     1                       // VC < YPOS
41 #define CONDITION_GREATER_THAN          2                       // VC > YPOS
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #define OPFLAG_RELEASE          8                                       // Bus release bit
46 #define OPFLAG_TRANS            4                                       // Transparency bit
47 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
48 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
49
50 // Private function prototypes
51
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void OPDiscoverObjects(uint32 address);
55 void OPDumpObjectList(void);
56 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
57 void DumpFixedObject(uint64 p0, uint64 p1);
58 void DumpBitmapCore(uint64 p0, uint64 p1);
59 uint64 OPLoadPhrase(uint32 offset);
60
61 // Local global variables
62
63 // Blend tables (64K each)
64 static uint8 op_blend_y[0x10000];
65 static uint8 op_blend_cr[0x10000];
66 // There may be a problem with this "RAM" overlapping (and thus being independent of)
67 // some of the regular TOM RAM...
68 //#warning objectp_ram is separated from TOM RAM--need to fix that!
69 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
70 uint8 objectp_running = 0;
71 //bool objectp_stop_reading_list;
72
73 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
74 //static uint32 op_bitmap_bit_size[8] =
75 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
76 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
77 static uint32 op_pointer;
78
79 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
80
81
82 //
83 // Object Processor initialization
84 //
85 void OPInit(void)
86 {
87         // Here we calculate the saturating blend of a signed 4-bit value and an
88         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
89         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
90         for(int i=0; i<256*256; i++)
91         {
92                 int y = (i >> 8) & 0xFF;
93                 int dy = (int8)i;                                       // Sign extend the Y index
94                 int c1 = (i >> 8) & 0x0F;
95                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
96                 int c2 = (i >> 12) & 0x0F;
97                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
98
99                 y += dy;
100
101                 if (y < 0)
102                         y = 0;
103                 else if (y > 0xFF)
104                         y = 0xFF;
105
106                 op_blend_y[i] = y;
107
108                 c1 += dc1;
109
110                 if (c1 < 0)
111                         c1 = 0;
112                 else if (c1 > 0x0F)
113                         c1 = 0x0F;
114
115                 c2 += dc2;
116
117                 if (c2 < 0)
118                         c2 = 0;
119                 else if (c2 > 0x0F)
120                         c2 = 0x0F;
121
122                 op_blend_cr[i] = (c2 << 4) | c1;
123         }
124
125         OPReset();
126 }
127
128 //
129 // Object Processor reset
130 //
131 void OPReset(void)
132 {
133 //      memset(objectp_ram, 0x00, 0x40);
134         objectp_running = 0;
135 }
136
137 static const char * opType[8] =
138 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
139 static const char * ccType[8] =
140         { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
141 static uint32 object[8192];
142 static uint32 numberOfObjects;
143 //static uint32 objectLink[8192];
144 //static uint32 numberOfLinks;
145
146 void OPDone(void)
147 {
148 //#warning "!!! Fix OL dump so that it follows links !!!"
149 //      const char * opType[8] =
150 //      { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
151 //      const char * ccType[8] =
152 //              { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
153
154         uint32 olp = OPGetListPointer();
155         WriteLog("\nOP: OLP = $%08X\n", olp);
156         WriteLog("OP: Phrase dump\n    ----------\n");
157
158 #if 0
159         for(uint32 i=0; i<0x100; i+=8)
160         {
161                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
162                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
163
164                 if ((lo & 0x07) == 3)
165                 {
166                         uint16 ypos = (lo >> 3) & 0x7FF;
167                         uint8  cc   = (lo >> 14) & 0x03;
168                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
169                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
170                 }
171
172                 WriteLog("\n");
173
174                 if ((lo & 0x07) == 0)
175                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
176
177                 if ((lo & 0x07) == 1)
178                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
179         }
180
181         WriteLog("\n");
182 #else
183 //temp, to keep the following function from locking up on bad/weird OLs
184 return;
185
186         numberOfObjects = 0;
187         OPDiscoverObjects(olp);
188         OPDumpObjectList();
189 #endif
190 }
191
192 void OPDiscoverObjects(uint32 address)
193 {
194         // Check to see if we've already seen this object
195         for(uint32 i=0; i<numberOfObjects; i++)
196         {
197                 if (address == object[i])
198                         return;
199         }
200
201         // Store the object...
202         object[numberOfObjects++] = address;
203         uint8 objectType = 0;
204
205         do
206         {
207                 uint32 hi = JaguarReadLong(address + 0, OP);
208                 uint32 lo = JaguarReadLong(address + 4, OP);
209                 objectType = lo & 0x07;
210                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
211
212                 if (objectType == 3)
213                 {
214                         uint16 ypos = (lo >> 3) & 0x7FF;
215                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
216
217                         // Recursion needed to follow all links!
218                         OPDiscoverObjects(address + 8);
219                 }
220
221                 if (address == link)    // Ruh roh...
222                 {
223                         // Runaway recursive link is bad!
224                         return;
225                 }
226
227                 address = link;
228
229                 // Check to see if we've already seen this object, and add it if not
230                 bool seenObject = false;
231
232                 for(uint32 i=0; i<numberOfObjects; i++)
233                 {
234                         if (address == object[i])
235                         {
236                                 seenObject = true;
237                                 break;
238                         }
239                 }
240
241                 if (!seenObject)
242                         object[numberOfObjects++] = address;
243         }
244         while (objectType != 4);
245 }
246
247 void OPDumpObjectList(void)
248 {
249         for(uint32 i=0; i<numberOfObjects; i++)
250         {
251                 uint32 address = object[i];
252
253                 uint32 hi = JaguarReadLong(address + 0, OP);
254                 uint32 lo = JaguarReadLong(address + 4, OP);
255                 uint8 objectType = lo & 0x07;
256                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
257                 WriteLog("%08X: %08X %08X %s", address, hi, lo, opType[objectType]);
258
259                 if (objectType == 3)
260                 {
261                         uint16 ypos = (lo >> 3) & 0x7FF;
262                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
263                         WriteLog(" YPOS=%u, CC=%s, link=$%08X", ypos, ccType[cc], link);
264                 }
265
266                 WriteLog("\n");
267
268                 if (objectType == 0)
269                         DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
270
271                 if (objectType == 1)
272                         DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
273                                 OPLoadPhrase(address + 16));
274
275                 if (address == link)    // Ruh roh...
276                 {
277                         // Runaway recursive link is bad!
278                         WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
279                 }
280         }
281
282         WriteLog("\n");
283 }
284
285 //
286 // Object Processor memory access
287 // Memory range: F00010 - F00027
288 //
289 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
290 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
291 //      F00026            W   -------- -------x   OBF - object processor flag
292 //
293
294 #if 0
295 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
296 {
297         offset &= 0x3F;
298         return objectp_ram[offset];
299 }
300
301 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
302 {
303         offset &= 0x3F;
304         return GET16(objectp_ram, offset);
305 }
306
307 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
308 {
309         offset &= 0x3F;
310         objectp_ram[offset] = data;
311 }
312
313 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
314 {
315         offset &= 0x3F;
316         SET16(objectp_ram, offset, data);
317
318 /*if (offset == 0x20)
319 WriteLog("OP: Setting lo list pointer: %04X\n", data);
320 if (offset == 0x22)
321 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
322 }
323 #endif
324
325 uint32 OPGetListPointer(void)
326 {
327         // Note: This register is LO / HI WORD, hence the funky look of this...
328         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
329 }
330
331 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
332
333 uint32 OPGetStatusRegister(void)
334 {
335         return GET16(tomRam8, 0x26);
336 }
337
338 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
339
340 void OPSetStatusRegister(uint32 data)
341 {
342         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
343         tomRam8[0x27] |= (data & 0xFE);
344 }
345
346 void OPSetCurrentObject(uint64 object)
347 {
348 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
349         // Stored as least significant 32 bits first, ms32 last in big endian
350 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
351         objectp_ram[0x12] = object & 0xFF; object >>= 8;
352         objectp_ram[0x11] = object & 0xFF; object >>= 8;
353         objectp_ram[0x10] = object & 0xFF; object >>= 8;
354
355         objectp_ram[0x17] = object & 0xFF; object >>= 8;
356         objectp_ram[0x16] = object & 0xFF; object >>= 8;
357         objectp_ram[0x15] = object & 0xFF; object >>= 8;
358         objectp_ram[0x14] = object & 0xFF;*/
359 // Let's try regular good old big endian...
360         tomRam8[0x17] = object & 0xFF; object >>= 8;
361         tomRam8[0x16] = object & 0xFF; object >>= 8;
362         tomRam8[0x15] = object & 0xFF; object >>= 8;
363         tomRam8[0x14] = object & 0xFF; object >>= 8;
364
365         tomRam8[0x13] = object & 0xFF; object >>= 8;
366         tomRam8[0x12] = object & 0xFF; object >>= 8;
367         tomRam8[0x11] = object & 0xFF; object >>= 8;
368         tomRam8[0x10] = object & 0xFF;
369 }
370
371 uint64 OPLoadPhrase(uint32 offset)
372 {
373         offset &= ~0x07;                                                // 8 byte alignment
374         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
375 }
376
377 void OPStorePhrase(uint32 offset, uint64 p)
378 {
379         offset &= ~0x07;                                                // 8 byte alignment
380         JaguarWriteLong(offset, p >> 32, OP);
381         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
382 }
383
384 //
385 // Debugging routines
386 //
387 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
388 {
389         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
390         WriteLog("          %08X %08X\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
391         DumpBitmapCore(p0, p1);
392         uint32 hscale = p2 & 0xFF;
393         uint32 vscale = (p2 >> 8) & 0xFF;
394         uint32 remainder = (p2 >> 16) & 0xFF;
395         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
396 }
397
398 void DumpFixedObject(uint64 p0, uint64 p1)
399 {
400         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
401         DumpBitmapCore(p0, p1);
402 }
403
404 void DumpBitmapCore(uint64 p0, uint64 p1)
405 {
406         uint32 bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
407         uint8 bitdepth = (p1 >> 12) & 0x07;
408 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
409         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
410         int32 xpos = p1 & 0xFFF;
411         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);       // Sign extend that mutha!
412         uint32 iwidth = ((p1 >> 28) & 0x3FF);
413         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
414         uint16 height = ((p0 >> 14) & 0x3FF);
415         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
416         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
417         uint32 firstPix = (p1 >> 49) & 0x3F;
418         uint8 flags = (p1 >> 45) & 0x0F;
419         uint8 idx = (p1 >> 38) & 0x7F;
420         uint32 pitch = (p1 >> 15) & 0x07;
421         WriteLog("    [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), l:%08X, p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
422                 iwidth * bdMultiplier[bitdepth],
423                 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth], link,
424                 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
425                 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
426                 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
427 }
428
429 //
430 // Object Processor main routine
431 //
432 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
433 void OPProcessList(int halfline, bool render)
434 {
435 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!
436 // We ignore them, for now; not good
437         halfline &= 0x7FF;
438
439 extern int op_start_log;
440 //      char * condition_to_str[8] =
441 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
442
443         op_pointer = OPGetListPointer();
444
445 //      objectp_stop_reading_list = false;
446
447 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
448 //op_done();
449
450 // *** BEGIN OP PROCESSOR TESTING ONLY ***
451 extern bool interactiveMode;
452 extern bool iToggle;
453 extern int objectPtr;
454 bool inhibit;
455 int bitmapCounter = 0;
456 // *** END OP PROCESSOR TESTING ONLY ***
457
458         uint32 opCyclesToRun = 30000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
459
460 //      if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
461         while (op_pointer)
462         {
463 // *** BEGIN OP PROCESSOR TESTING ONLY ***
464 if (interactiveMode && bitmapCounter == objectPtr)
465         inhibit = iToggle;
466 else
467         inhibit = false;
468 // *** END OP PROCESSOR TESTING ONLY ***
469 //              if (objectp_stop_reading_list)
470 //                      return;
471
472                 uint64 p0 = OPLoadPhrase(op_pointer);
473                 op_pointer += 8;
474 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
475
476 #if 1
477 if (halfline == TOMGetVDB() && op_start_log)
478 //if (halfline == 215 && op_start_log)
479 //if (halfline == 28 && op_start_log)
480 //if (halfline == 0)
481 {
482 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
483 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
484 {
485 WriteLog(" (BITMAP) ");
486 uint64 p1 = OPLoadPhrase(op_pointer);
487 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
488         uint8 bitdepth = (p1 >> 12) & 0x07;
489 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
490         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
491 int32 xpos = p1 & 0xFFF;
492 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
493         uint32 iwidth = ((p1 >> 28) & 0x3FF);
494         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
495         uint16 height = ((p0 >> 14) & 0x3FF);
496         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
497         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
498         uint32 firstPix = (p1 >> 49) & 0x3F;
499         uint8 flags = (p1 >> 45) & 0x0F;
500         uint8 idx = (p1 >> 38) & 0x7F;
501         uint32 pitch = (p1 >> 15) & 0x07;
502 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
503         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
504 }
505 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
506 {
507 WriteLog(" (SCALED BITMAP)");
508 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
509 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
510 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
511         uint8 bitdepth = (p1 >> 12) & 0x07;
512 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
513         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
514 int32 xpos = p1 & 0xFFF;
515 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
516         uint32 iwidth = ((p1 >> 28) & 0x3FF);
517         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
518         uint16 height = ((p0 >> 14) & 0x3FF);
519         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
520         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
521         uint32 firstPix = (p1 >> 49) & 0x3F;
522         uint8 flags = (p1 >> 45) & 0x0F;
523         uint8 idx = (p1 >> 38) & 0x7F;
524         uint32 pitch = (p1 >> 15) & 0x07;
525 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
526         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
527         uint32 hscale = p2 & 0xFF;
528         uint32 vscale = (p2 >> 8) & 0xFF;
529         uint32 remainder = (p2 >> 16) & 0xFF;
530 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
531 }
532 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
533 WriteLog(" (GPU)\n");
534 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
535 {
536 WriteLog(" (BRANCH)\n");
537 uint8 * jaguarMainRam = GetRamPtr();
538 WriteLog("[RAM] --> ");
539 for(int k=0; k<8; k++)
540         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
541 WriteLog("\n");
542 }
543 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
544 WriteLog("    --> List end\n\n");
545 }
546 #endif
547
548                 switch ((uint8)p0 & 0x07)
549                 {
550                 case OBJECT_TYPE_BITMAP:
551                 {
552 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
553                         uint16 ypos = (p0 >> 3) & 0x7FF;
554 // This is only theory implied by Rayman...!
555 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
556 // the VDB value. With interlacing, this would be slightly more tricky.
557 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
558 // to affect any other game in a negative way (that I've seen).
559 // Either that, or it's an undocumented bug...
560
561 //No, the reason this was needed is that the OP code before was wrong. Any value
562 //less than VDB will get written to the top line of the display!
563 #if 0
564 // Not so sure... Let's see what happens here...
565 // No change...
566                         if (ypos == 0)
567                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
568 #endif
569 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
570 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
571 // what's causing things to fuck up. Still no idea why.
572
573                         uint32 height = (p0 & 0xFFC000) >> 14;
574                         uint32 oldOPP = op_pointer - 8;
575 // *** BEGIN OP PROCESSOR TESTING ONLY ***
576 if (inhibit && op_start_log)
577         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
578 bitmapCounter++;
579 if (!inhibit)   // For OP testing only!
580 // *** END OP PROCESSOR TESTING ONLY ***
581                         if (halfline >= ypos && height > 0)
582                         {
583                                 uint64 p1 = OPLoadPhrase(op_pointer);
584                                 op_pointer += 8;
585 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
586 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
587 //                              OPProcessFixedBitmap(halfline, p0, p1, render);
588                                 OPProcessFixedBitmap(p0, p1, render);
589
590                                 // OP write-backs
591
592 //???Does this really happen??? Doesn't seem to work if you do this...!
593 //Probably not. Must be a bug in the documentation...!
594 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
595 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
596 //                              SET16(tom_ram_8, 0x22, link >> 16);
597 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
598                                 if (height - 1 > 0)
599                                         height--;*/
600                                 // NOTE: Would subtract 2 if in interlaced mode...!
601 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
602 //                              if (height)
603                                 height--;
604
605                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
606                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
607                                 data += dwidth;
608
609                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
610                                 p0 |= (uint64)height << 14;
611                                 p0 |= data << 40;
612                                 OPStorePhrase(oldOPP, p0);
613                         }
614 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
615 //Temp, for testing...
616 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
617 //And it does! !!! FIX !!!
618 //Let's remove this "fix" since it screws up more than it fixes.
619 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
620                 return;*/
621
622 // NOTE: The link address only replaces bits 3-21 in the OLP, and this replaces
623 //       EVERYTHING. !!! FIX !!! [DONE]
624 #warning "!!! Link address is not linked properly for all object types !!!"
625 #warning "!!! Only BITMAP is properly handled !!!"
626                         op_pointer &= 0xFFC00007;
627                         op_pointer |= (p0 & 0x000007FFFF000000LL) >> 21;
628 //WriteLog("New OP: %08X\n", op_pointer);
629 //kludge: Seems that memory access is mirrored in the first 8MB of memory...
630 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
631         op_pointer &= 0xFF1FFFFF;       // Knock out bits 21-23
632
633                         break;
634                 }
635                 case OBJECT_TYPE_SCALE:
636                 {
637 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
638                         uint16 ypos = (p0 >> 3) & 0x7FF;
639                         uint32 height = (p0 & 0xFFC000) >> 14;
640                         uint32 oldOPP = op_pointer - 8;
641 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
642 // *** BEGIN OP PROCESSOR TESTING ONLY ***
643 if (inhibit && op_start_log)
644 {
645         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
646         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
647 }
648 bitmapCounter++;
649 if (!inhibit)   // For OP testing only!
650 // *** END OP PROCESSOR TESTING ONLY ***
651                         if (halfline >= ypos && height > 0)
652                         {
653                                 uint64 p1 = OPLoadPhrase(op_pointer);
654                                 op_pointer += 8;
655                                 uint64 p2 = OPLoadPhrase(op_pointer);
656                                 op_pointer += 8;
657 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
658                                 OPProcessScaledBitmap(p0, p1, p2, render);
659
660                                 // OP write-backs
661
662                                 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
663                                 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
664 //Actually, we should skip this object if it has a vscale of zero.
665 //Or do we? Not sure... Atari Karts has a few lines that look like:
666 // (SCALED BITMAP)
667 //000E8268 --> phrase 00010000 7000B00D
668 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
669 //    [hsc: 9A, vsc: 00, rem: 00]
670 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
671 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
672
673                                 if (vscale == 0)
674                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
675
676 //extern int start_logging;
677 //if (start_logging)
678 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
679 //Locks up here:
680 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
681 //There are other problems here, it looks like...
682 //Another lock up:
683 //About to execute OP (508)...
684 /*
685 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
686 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
687 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
688 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
689 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
690 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
691 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
692 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
693 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
694 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
695 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
696 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
697 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
698 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
699 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
700 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
701 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
702 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
703 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
704 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
705 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
706 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
707 */
708 //Here's another problem:
709 //    [hsc: 20, vsc: 20, rem: 00]
710 // Since we're not checking for $E0 (but that's what we get from the above), we end
711 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
712 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
713 //Also note: $E0 = 7.0 which IS a legal vscale value...
714
715 //                              if (remainder & 0x80)                           // I.e., it's negative
716 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
717 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
718 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
719 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
720 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
721                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
722                                 if (remainder < 0x20)
723                                 {
724                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
725                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
726
727 //                                      while (remainder & 0x80)
728 //                                      while ((remainder & 0x80) || remainder == 0)
729 //                                      while ((remainder - 1) >= 0xE0)
730 //                                      while ((remainder >= 0xE1) || remainder == 0)
731 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
732 //                                      while (remainder <= 0x20)
733                                         while (remainder < 0x20)
734                                         {
735                                                 remainder += vscale;
736
737                                                 if (height)
738                                                         height--;
739
740                                                 data += dwidth;
741                                         }
742
743                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
744                                         p0 |= (uint64)height << 14;
745                                         p0 |= data << 40;
746                                         OPStorePhrase(oldOPP, p0);
747                                 }
748
749                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
750
751 //if (start_logging)
752 //      WriteLog("--> Finished writebacks...\n");//*/
753
754 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
755                                 p2 &= ~0x0000000000FF0000LL;
756                                 p2 |= (uint64)remainder << 16;
757 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
758                                 OPStorePhrase(oldOPP + 16, p2);
759 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
760 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
761                         }
762
763                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
764                         break;
765                 }
766                 case OBJECT_TYPE_GPU:
767                 {
768 //WriteLog("OP: Asserting GPU IRQ #3...\n");
769 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
770                         OPSetCurrentObject(p0);
771                         GPUSetIRQLine(3, ASSERT_LINE);
772 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
773 // !!! FIX !!!
774 //Do something like:
775 //OPSuspendedByGPU = true;
776 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
777 //on the next halfline...
778 // --> It continues from where it was interrupted! !!! FIX !!!
779                         break;
780                 }
781                 case OBJECT_TYPE_BRANCH:
782                 {
783                         uint16 ypos = (p0 >> 3) & 0x7FF;
784 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
785 //       conditions! Need at least one more bit for that! :-P
786 // Also, the ASIC nets imply that it uses bits 14-16 (height in BM & SBM objects)
787 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
788                         uint8  cc   = (p0 >> 14) & 0x03;
789                         uint32 link = (p0 >> 21) & 0x3FFFF8;
790
791 //                      if ((ypos!=507)&&(ypos!=25))
792 //                              WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
793                         switch (cc)
794                         {
795                         case CONDITION_EQUAL:
796                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
797                                         op_pointer = link;
798                                 break;
799                         case CONDITION_LESS_THAN:
800                                 if (TOMReadWord(0xF00006, OP) < ypos)
801                                         op_pointer = link;
802                                 break;
803                         case CONDITION_GREATER_THAN:
804                                 if (TOMReadWord(0xF00006, OP) > ypos)
805                                         op_pointer = link;
806                                 break;
807                         case CONDITION_OP_FLAG_SET:
808                                 if (OPGetStatusRegister() & 0x01)
809                                         op_pointer = link;
810                                 break;
811                         case CONDITION_SECOND_HALF_LINE:
812 //Here's the ASIC code:
813 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
814 //which means, do the link if bit 10 of HC is set...
815
816                                 // This basically means branch if bit 10 of HC is set
817 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
818                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
819                                 LogDone();
820                                 exit(0);
821                                 break;
822                         default:
823                                 // Basically, if you do this, the OP does nothing. :-)
824                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
825                         }
826                         break;
827                 }
828                 case OBJECT_TYPE_STOP:
829                 {
830 //op_start_log = 0;
831                         // unsure
832 //WriteLog("OP: --> STOP\n");
833 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
834 //This seems more likely...
835                         OPSetCurrentObject(p0);
836
837                         if (p0 & 0x08)
838                         {
839                                 // We need to check whether these interrupts are enabled or not, THEN
840                                 // set an IRQ + pending flag if necessary...
841                                 if (TOMIRQEnabled(IRQ_OPFLAG))
842                                 {
843                                         TOMSetPendingObjectInt();
844                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
845                                 }
846                         }
847
848                         return;
849 //                      break;
850                 }
851                 default:
852 //                      WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
853                         return;
854                 }
855
856                 // Here is a little sanity check to keep the OP from locking up the machine
857                 // when fed bad data. Better would be to count how many actual cycles it used
858                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
859 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
860                 opCyclesToRun--;
861
862                 if (!opCyclesToRun)
863                         return;
864         }
865 }
866
867 //
868 // Store fixed size bitmap in line buffer
869 //
870 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
871 {
872 // Need to make sure that when writing that it stays within the line buffer...
873 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
874         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
875         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
876         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
877         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
878 //#ifdef OP_DEBUG_BMP
879         uint32  firstPix = (p1 >> 49) & 0x3F;
880         // "The LSB is significant only for scaled objects..." -JTRM
881         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
882         firstPix &= 0x3E;
883 //#endif
884 // We can ignore the RELEASE (high order) bit for now--probably forever...!
885 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
886 //Optimize: break these out to their own BOOL values
887         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
888         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
889                 flagRMW = (flags & OPFLAG_RMW ? true : false),
890                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
891 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
892 //  provide the most significant bits of the palette address."
893         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
894         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
895         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
896
897 //      int16 scanlineWidth = tom_getVideoModeWidth();
898         uint8 * tomRam8 = TOMGetRamPointer();
899         uint8 * paletteRAM = &tomRam8[0x400];
900         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
901         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
902         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
903
904 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
905 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
906
907 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
908 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
909 // Pitch == 0 is OK too...
910
911 //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to investigate
912 //        on real hardware...
913 #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
914 if (iwidth == 0)
915         iwidth = 1;
916
917 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
918 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
919         if (!render || iwidth == 0)
920                 return;
921
922 //OK, so we know the position in the line buffer is correct. It's the clipping in
923 //24bpp mode that's wrong!
924 #if 0
925 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
926 //into the line buffer for each pixel.
927 if (depth == 5) // i.e., 24bpp mode...
928         xpos >>= 1;     // Cut it in half...
929 #endif
930
931 //#define OP_DEBUG_BMP
932 //#ifdef OP_DEBUG_BMP
933 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
934 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
935 //#endif
936
937 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
938         int32 startPos = xpos, endPos = xpos +
939                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
940                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
941         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
942         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
943         // Not sure if this is Jaguar Two only location or what...
944         // From the docs, it is... If we want to limit here we should think of something else.
945 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
946 //      int32 limit = 720;
947 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
948 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
949         // This is correct, the OP line buffer is a constant size... 
950         int32 limit = 720;
951         int32 lbufWidth = 719;
952
953         // If the image is completely to the left or right of the line buffer, then bail.
954 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
955 //There are four possibilities:
956 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
957 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
958 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
959 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
960 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
961 // numbers 1 & 3 are of concern.
962 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
963 //      if (rightMargin < 0 || leftMargin > lbufWidth)
964
965 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
966 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
967 // Still have to be careful with the DATA and IWIDTH values though...
968
969 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
970 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
971 //              return;
972         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
973                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
974                 return;
975
976         // Otherwise, find the clip limits and clip the phrase as well...
977         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
978         //       line buffer, but it shouldn't matter since there are two unused line
979         //       buffers below and nothing above and I'll at most write 8 bytes outside
980         //       the line buffer... I could use a fractional clip begin/end value, but
981         //       this makes the blit a *lot* more hairy. I might fix this in the future
982         //       if it becomes necessary. (JLH)
983         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
984         //       which pixel in the phrase is being written, and quit when either end of phrases
985         //       is reached or line buffer extents are surpassed.
986
987 //This stuff is probably wrong as well... !!! FIX !!!
988 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
989 //Yup. Seems that JagMania doesn't work correctly with this...
990 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
991 //      if (!flagREFLECT)
992
993 /*
994         if (leftMargin < 0)
995                 clippedWidth = 0 - leftMargin,
996                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
997                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
998 //              leftMargin = 0;
999
1000         if (rightMargin > lbufWidth)
1001                 clippedWidth = rightMargin - lbufWidth,
1002                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
1003 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1004 //              rightMargin = lbufWidth;
1005 */
1006 if (depth > 5)
1007         WriteLog("OP: We're about to encounter a divide by zero error!\n");
1008         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1009         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1010         // !!! FIX !!!
1011         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1012                 clippedWidth = 0 - startPos,
1013                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1014                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1015
1016         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1017                 clippedWidth = 0 - endPos,
1018                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1019
1020         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1021                 clippedWidth = endPos - lbufWidth,
1022                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1023
1024         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1025                 clippedWidth = startPos - lbufWidth,
1026                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1027                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1028 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1029
1030         // If the image is sitting on the line buffer left or right edge, we need to compensate
1031         // by decreasing the image phrase width accordingly.
1032         iwidth -= phraseClippedWidth;
1033
1034         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1035         // the pixel data.
1036 //      data += phraseClippedWidth * (pitch << 3);
1037         data += dataClippedWidth * pitch;
1038
1039         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1040         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1041 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1042 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1043 //Is this a bug in the OP?
1044 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1045 //Though it looks like we're doing it here no matter what...
1046 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1047 //Let's try this:
1048         uint32 lbufAddress = 0x1800 + (startPos * 2);
1049         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1050
1051         // Render.
1052
1053 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1054 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1055 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1056 // anyway.
1057 // This seems to be the case (at least according to the Midsummer docs)...!
1058
1059 // This is to test using palette zeroes instead of bit zeroes...
1060 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1061 //#define OP_USES_PALETTE_ZERO
1062
1063         if (depth == 0)                                                                 // 1 BPP
1064         {
1065                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1066                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1067
1068                 // Fetch 1st phrase...
1069                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1070 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1071 //i.e., we didn't clip on the margin... !!! FIX !!!
1072                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1073                 int i = firstPix;                                                       // Start counter at right spot...
1074
1075                 while (iwidth--)
1076                 {
1077                         while (i++ < 64)
1078                         {
1079                                 uint8 bit = pixels >> 63;
1080 #ifndef OP_USES_PALETTE_ZERO
1081                                 if (flagTRANS && bit == 0)
1082 #else
1083                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1084 #endif
1085                                         ;       // Do nothing...
1086                                 else
1087                                 {
1088                                         if (!flagRMW)
1089 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1090 //Won't optimize RMW case though...
1091                                                 // This is the *only* correct use of endian-dependent code
1092                                                 // (i.e., mem-to-mem direct copying)!
1093                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
1094                                         else
1095                                                 *currentLineBuffer =
1096                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1097                                                 *(currentLineBuffer + 1) =
1098                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1099                                 }
1100
1101                                 currentLineBuffer += lbufDelta;
1102                                 pixels <<= 1;
1103                         }
1104                         i = 0;
1105                         // Fetch next phrase...
1106                         data += pitch;
1107                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1108                 }
1109         }
1110         else if (depth == 1)                                                    // 2 BPP
1111         {
1112 if (firstPix)
1113         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1114                 index &= 0xFC;                                                          // Top six bits form CLUT index
1115                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1116                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1117
1118                 while (iwidth--)
1119                 {
1120                         // Fetch phrase...
1121                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1122                         data += pitch;
1123
1124                         for(int i=0; i<32; i++)
1125                         {
1126                                 uint8 bits = pixels >> 62;
1127 // Seems to me that both of these are in the same endian, so we could cast it as
1128 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1129 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1130 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1131 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1132 #ifndef OP_USES_PALETTE_ZERO
1133                                 if (flagTRANS && bits == 0)
1134 #else
1135                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1136 #endif
1137                                         ;       // Do nothing...
1138                                 else
1139                                 {
1140                                         if (!flagRMW)
1141                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1142                                         else
1143                                                 *currentLineBuffer =
1144                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1145                                                 *(currentLineBuffer + 1) =
1146                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1147                                 }
1148
1149                                 currentLineBuffer += lbufDelta;
1150                                 pixels <<= 2;
1151                         }
1152                 }
1153         }
1154         else if (depth == 2)                                                    // 4 BPP
1155         {
1156 if (firstPix)
1157         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1158                 index &= 0xF0;                                                          // Top four bits form CLUT index
1159                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1160                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1161
1162                 while (iwidth--)
1163                 {
1164                         // Fetch phrase...
1165                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1166                         data += pitch;
1167
1168                         for(int i=0; i<16; i++)
1169                         {
1170                                 uint8 bits = pixels >> 60;
1171 // Seems to me that both of these are in the same endian, so we could cast it as
1172 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1173 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1174 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1175 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1176 #ifndef OP_USES_PALETTE_ZERO
1177                                 if (flagTRANS && bits == 0)
1178 #else
1179                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1180 #endif
1181                                         ;       // Do nothing...
1182                                 else
1183                                 {
1184                                         if (!flagRMW)
1185                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1186                                         else
1187                                                 *currentLineBuffer =
1188                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1189                                                 *(currentLineBuffer + 1) =
1190                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1191                                 }
1192
1193                                 currentLineBuffer += lbufDelta;
1194                                 pixels <<= 4;
1195                         }
1196                 }
1197         }
1198         else if (depth == 3)                                                    // 8 BPP
1199         {
1200                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1201                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1202
1203                 // Fetch 1st phrase...
1204                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1205 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1206 //i.e., we didn't clip on the margin... !!! FIX !!!
1207                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1208                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1209                 int i = firstPix >> 3;                                          // Start counter at right spot...
1210
1211                 while (iwidth--)
1212                 {
1213                         while (i++ < 8)
1214                         {
1215                                 uint8 bits = pixels >> 56;
1216 // Seems to me that both of these are in the same endian, so we could cast it as
1217 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1218 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1219 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1220 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1221 //This would seem to be problematic...
1222 //Because it's the palette entry being zero that makes the pixel transparent...
1223 //Let's try it and see.
1224 #ifndef OP_USES_PALETTE_ZERO
1225                                 if (flagTRANS && bits == 0)
1226 #else
1227                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1228 #endif
1229                                         ;       // Do nothing...
1230                                 else
1231                                 {
1232                                         if (!flagRMW)
1233                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1234                                         else
1235                                                 *currentLineBuffer =
1236                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1237                                                 *(currentLineBuffer + 1) =
1238                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1239                                 }
1240
1241                                 currentLineBuffer += lbufDelta;
1242                                 pixels <<= 8;
1243                         }
1244                         i = 0;
1245                         // Fetch next phrase...
1246                         data += pitch;
1247                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1248                 }
1249         }
1250         else if (depth == 4)                                                    // 16 BPP
1251         {
1252 if (firstPix)
1253         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1254                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1255                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1256
1257                 while (iwidth--)
1258                 {
1259                         // Fetch phrase...
1260                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1261                         data += pitch;
1262
1263                         for(int i=0; i<4; i++)
1264                         {
1265                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1266 // Seems to me that both of these are in the same endian, so we could cast it as
1267 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1268 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1269 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1270 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1271 //This doesn't seem right... Let's try the encoded black value ($8800):
1272 //Apparently, CRY 0 maps to $8800...
1273                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1274 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1275                                         ;       // Do nothing...
1276                                 else
1277                                 {
1278                                         if (!flagRMW)
1279                                                 *currentLineBuffer = bitsHi,
1280                                                 *(currentLineBuffer + 1) = bitsLo;
1281                                         else
1282                                                 *currentLineBuffer =
1283                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1284                                                 *(currentLineBuffer + 1) =
1285                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1286                                 }
1287
1288                                 currentLineBuffer += lbufDelta;
1289                                 pixels <<= 16;
1290                         }
1291                 }
1292         }
1293         else if (depth == 5)                                                    // 24 BPP
1294         {
1295 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1296 //There *might* be others...
1297 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1298 if (firstPix)
1299         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1300                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1301                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1302                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1303
1304                 while (iwidth--)
1305                 {
1306                         // Fetch phrase...
1307                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1308                         data += pitch;
1309
1310                         for(int i=0; i<2; i++)
1311                         {
1312                                 // We don't use a 32-bit var here because of endian issues...!
1313                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1314                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1315
1316                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1317                                         ;       // Do nothing...
1318                                 else
1319                                         *currentLineBuffer = bits3,
1320                                         *(currentLineBuffer + 1) = bits2,
1321                                         *(currentLineBuffer + 2) = bits1,
1322                                         *(currentLineBuffer + 3) = bits0;
1323
1324                                 currentLineBuffer += lbufDelta;
1325                                 pixels <<= 32;
1326                         }
1327                 }
1328         }
1329 }
1330
1331 //
1332 // Store scaled bitmap in line buffer
1333 //
1334 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1335 {
1336 // Need to make sure that when writing that it stays within the line buffer...
1337 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1338         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1339         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1340         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1341         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1342 //#ifdef OP_DEBUG_BMP
1343 // Prolly should use this... Though not sure exactly how.
1344 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1345         uint32 firstPix = (p1 >> 49) & 0x3F;
1346 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1347 if (firstPix)
1348         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1349 //#endif
1350 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1351 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1352 //Optimize: break these out to their own BOOL values [DONE]
1353         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1354         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1355                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1356                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1357         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1358         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1359
1360         uint8 * tomRam8 = TOMGetRamPointer();
1361         uint8 * paletteRAM = &tomRam8[0x400];
1362         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1363         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1364         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1365
1366         uint16 hscale = p2 & 0xFF;
1367 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1368 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1369         uint16 horizontalRemainder = hscale;                            // Not sure if it starts full, but seems reasonable [It's not!]
1370 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1371         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1372         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1373
1374 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1375 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1376
1377 // Looks like an hscale of zero means don't draw!
1378         if (!render || iwidth == 0 || hscale == 0)
1379                 return;
1380
1381 /*extern int start_logging;
1382 if (start_logging)
1383         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1384                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1385 //#define OP_DEBUG_BMP
1386 //#ifdef OP_DEBUG_BMP
1387 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1388 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1389 //#endif
1390
1391         int32 startPos = xpos, endPos = xpos +
1392                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1393         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1394         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1395         // Not sure if this is Jaguar Two only location or what...
1396         // From the docs, it is... If we want to limit here we should think of something else.
1397 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1398         int32 limit = 720;
1399 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1400         int32 lbufWidth = 719;  // Zero based limit...
1401
1402         // If the image is completely to the left or right of the line buffer, then bail.
1403 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1404 //There are four possibilities:
1405 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1406 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1407 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1408 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1409 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1410 // numbers 1 & 3 are of concern.
1411 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1412 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1413
1414 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1415 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1416 // Still have to be careful with the DATA and IWIDTH values though...
1417
1418         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1419                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1420                 return;
1421
1422         // Otherwise, find the clip limits and clip the phrase as well...
1423         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1424         //       line buffer, but it shouldn't matter since there are two unused line
1425         //       buffers below and nothing above and I'll at most write 40 bytes outside
1426         //       the line buffer... I could use a fractional clip begin/end value, but
1427         //       this makes the blit a *lot* more hairy. I might fix this in the future
1428         //       if it becomes necessary. (JLH)
1429         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1430         //       which pixel in the phrase is being written, and quit when either end of phrases
1431         //       is reached or line buffer extents are surpassed.
1432
1433 //This stuff is probably wrong as well... !!! FIX !!!
1434 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1435 //Yup. Seems that JagMania doesn't work correctly with this...
1436 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1437 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1438 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1439 // a bit more accurately... Strange!
1440 //It's probably a case of the REFLECT flag being set and the background being written
1441 //from the right side of the screen...
1442 //But no, it isn't... At least if the diagnostics are telling the truth!
1443
1444         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1445         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1446         // !!! FIX !!!
1447
1448 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1449 //the scaling factor is small. So fix it already! !!! FIX !!!
1450 /*if (scaledPhrasePixels == 0)
1451 {
1452         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1453         DumpScaledObject(p0, p1, p2);
1454 }//*/
1455 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1456
1457 //Try a simple example...
1458 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1459 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1460 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1461 //
1462 // Normally, we would expect this in the line buffer:
1463 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1464 //
1465 // But instead we're getting:
1466 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1467 //
1468 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1469 // on negative boundary--or are we? Hmm...
1470 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1471 //
1472 // Let's try a real world example:
1473 //
1474 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1475 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1476 //
1477 // Really, spp is 27.75 in the second case...
1478 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1479 // start position (14 * 27.75), we get -6.5... NOT -17!
1480
1481 //Now it seems we're working OK, at least for the first case...
1482 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1483
1484         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1485 {
1486 extern int start_logging;
1487 if (start_logging)
1488         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1489 //              clippedWidth = 0 - startPos,
1490                 clippedWidth = (0 - startPos) << 5,
1491 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1492                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1493 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1494                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1495 if (start_logging)
1496         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1497 }
1498
1499         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1500                 clippedWidth = 0 - endPos,
1501                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1502
1503         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1504                 clippedWidth = endPos - lbufWidth,
1505                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1506
1507         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1508                 clippedWidth = startPos - lbufWidth,
1509                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1510                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1511
1512 extern int op_start_log;
1513 if (op_start_log && clippedWidth != 0)
1514         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1515 if (op_start_log && startPos == 13)
1516 {
1517         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1518         DumpScaledObject(p0, p1, p2);
1519         if (iwidth == 7)
1520         {
1521                 WriteLog("    %08X: ", data);
1522                 for(int i=0; i<7*8; i++)
1523                         WriteLog("%02X ", JaguarReadByte(data+i));
1524                 WriteLog("\n");
1525         }
1526 }
1527         // If the image is sitting on the line buffer left or right edge, we need to compensate
1528         // by decreasing the image phrase width accordingly.
1529         iwidth -= phraseClippedWidth;
1530
1531         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1532         // the pixel data.
1533 //      data += phraseClippedWidth * (pitch << 3);
1534         data += dataClippedWidth * (pitch << 3);
1535
1536         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1537         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1538 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1539 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1540         uint32 lbufAddress = 0x1800 + startPos * 2;
1541         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1542 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1543 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1544
1545         // Render.
1546
1547 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1548 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1549 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1550 // anyway.
1551 // This seems to be the case (at least according to the Midsummer docs)...!
1552
1553         if (depth == 0)                                                                 // 1 BPP
1554         {
1555 if (firstPix != 0)
1556         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1557                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1558                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1559
1560                 int pixCount = 0;
1561                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1562
1563                 while ((int32)iwidth > 0)
1564                 {
1565                         uint8 bits = pixels >> 63;
1566
1567 #ifndef OP_USES_PALETTE_ZERO
1568                         if (flagTRANS && bits == 0)
1569 #else
1570                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1571 #endif
1572                                 ;       // Do nothing...
1573                         else
1574                         {
1575                                 if (!flagRMW)
1576                                         // This is the *only* correct use of endian-dependent code
1577                                         // (i.e., mem-to-mem direct copying)!
1578                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1579                                 else
1580                                         *currentLineBuffer =
1581                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1582                                         *(currentLineBuffer + 1) =
1583                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1584                         }
1585
1586                         currentLineBuffer += lbufDelta;
1587
1588 /*
1589 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1590 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1591 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1592 */
1593 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1594                         while (horizontalRemainder & 0x80)
1595                         {
1596                                 horizontalRemainder += hscale;
1597                                 pixCount++;
1598                                 pixels <<= 1;
1599                         }//*/
1600 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1601                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1602                         {
1603                                 horizontalRemainder += hscale;
1604                                 pixCount++;
1605                                 pixels <<= 1;
1606                         }
1607                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1608
1609                         if (pixCount > 63)
1610                         {
1611                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1612
1613                                 data += (pitch << 3) * phrasesToSkip;
1614                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1615                                 pixels <<= 1 * pixelShift;
1616                                 iwidth -= phrasesToSkip;
1617                                 pixCount = pixelShift;
1618                         }
1619                 }
1620         }
1621         else if (depth == 1)                                                    // 2 BPP
1622         {
1623 if (firstPix != 0)
1624         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1625                 index &= 0xFC;                                                          // Top six bits form CLUT index
1626                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1627                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1628
1629                 int pixCount = 0;
1630                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1631
1632                 while ((int32)iwidth > 0)
1633                 {
1634                         uint8 bits = pixels >> 62;
1635
1636 #ifndef OP_USES_PALETTE_ZERO
1637                         if (flagTRANS && bits == 0)
1638 #else
1639                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1640 #endif
1641                                 ;       // Do nothing...
1642                         else
1643                         {
1644                                 if (!flagRMW)
1645                                         // This is the *only* correct use of endian-dependent code
1646                                         // (i.e., mem-to-mem direct copying)!
1647                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1648                                 else
1649                                         *currentLineBuffer =
1650                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1651                                         *(currentLineBuffer + 1) =
1652                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1653                         }
1654
1655                         currentLineBuffer += lbufDelta;
1656
1657 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1658                         while (horizontalRemainder & 0x80)
1659                         {
1660                                 horizontalRemainder += hscale;
1661                                 pixCount++;
1662                                 pixels <<= 2;
1663                         }//*/
1664 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1665                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1666                         {
1667                                 horizontalRemainder += hscale;
1668                                 pixCount++;
1669                                 pixels <<= 2;
1670                         }
1671                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1672
1673                         if (pixCount > 31)
1674                         {
1675                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1676
1677                                 data += (pitch << 3) * phrasesToSkip;
1678                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1679                                 pixels <<= 2 * pixelShift;
1680                                 iwidth -= phrasesToSkip;
1681                                 pixCount = pixelShift;
1682                         }
1683                 }
1684         }
1685         else if (depth == 2)                                                    // 4 BPP
1686         {
1687 if (firstPix != 0)
1688         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1689                 index &= 0xF0;                                                          // Top four bits form CLUT index
1690                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1691                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1692
1693                 int pixCount = 0;
1694                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1695
1696                 while ((int32)iwidth > 0)
1697                 {
1698                         uint8 bits = pixels >> 60;
1699
1700 #ifndef OP_USES_PALETTE_ZERO
1701                         if (flagTRANS && bits == 0)
1702 #else
1703                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1704 #endif
1705                                 ;       // Do nothing...
1706                         else
1707                         {
1708                                 if (!flagRMW)
1709                                         // This is the *only* correct use of endian-dependent code
1710                                         // (i.e., mem-to-mem direct copying)!
1711                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1712                                 else
1713                                         *currentLineBuffer =
1714                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1715                                         *(currentLineBuffer + 1) =
1716                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1717                         }
1718
1719                         currentLineBuffer += lbufDelta;
1720
1721 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1722                         while (horizontalRemainder & 0x80)
1723                         {
1724                                 horizontalRemainder += hscale;
1725                                 pixCount++;
1726                                 pixels <<= 4;
1727                         }//*/
1728 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1729                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1730                         {
1731                                 horizontalRemainder += hscale;
1732                                 pixCount++;
1733                                 pixels <<= 4;
1734                         }
1735                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1736
1737                         if (pixCount > 15)
1738                         {
1739                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1740
1741                                 data += (pitch << 3) * phrasesToSkip;
1742                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1743                                 pixels <<= 4 * pixelShift;
1744                                 iwidth -= phrasesToSkip;
1745                                 pixCount = pixelShift;
1746                         }
1747                 }
1748         }
1749         else if (depth == 3)                                                    // 8 BPP
1750         {
1751 if (firstPix)
1752         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1753                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1754                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1755
1756                 int pixCount = 0;
1757                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1758
1759                 while ((int32)iwidth > 0)
1760                 {
1761                         uint8 bits = pixels >> 56;
1762
1763 #ifndef OP_USES_PALETTE_ZERO
1764                         if (flagTRANS && bits == 0)
1765 #else
1766                         if (flagTRANS && (paletteRAM16[bits] == 0))
1767 #endif
1768                                 ;       // Do nothing...
1769                         else
1770                         {
1771                                 if (!flagRMW)
1772                                         // This is the *only* correct use of endian-dependent code
1773                                         // (i.e., mem-to-mem direct copying)!
1774                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1775 /*                              {
1776                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1777                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1778                                 }*/
1779                                 else
1780                                         *currentLineBuffer =
1781                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1782                                         *(currentLineBuffer + 1) =
1783                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1784                         }
1785
1786                         currentLineBuffer += lbufDelta;
1787
1788 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1789                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1790                         {
1791                                 horizontalRemainder += hscale;
1792                                 pixCount++;
1793                                 pixels <<= 8;
1794                         }
1795                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1796
1797                         if (pixCount > 7)
1798                         {
1799                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1800
1801                                 data += (pitch << 3) * phrasesToSkip;
1802                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1803                                 pixels <<= 8 * pixelShift;
1804                                 iwidth -= phrasesToSkip;
1805                                 pixCount = pixelShift;
1806                         }
1807                 }
1808         }
1809         else if (depth == 4)                                                    // 16 BPP
1810         {
1811 if (firstPix != 0)
1812         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1813                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1814                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1815
1816                 int pixCount = 0;
1817                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1818
1819                 while ((int32)iwidth > 0)
1820                 {
1821                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1822
1823 //This doesn't seem right... Let's try the encoded black value ($8800):
1824 //Apparently, CRY 0 maps to $8800...
1825                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1826 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1827                                 ;       // Do nothing...
1828                         else
1829                         {
1830                                 if (!flagRMW)
1831                                         *currentLineBuffer = bitsHi,
1832                                         *(currentLineBuffer + 1) = bitsLo;
1833                                 else
1834                                         *currentLineBuffer =
1835                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1836                                         *(currentLineBuffer + 1) =
1837                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1838                         }
1839
1840                         currentLineBuffer += lbufDelta;
1841
1842 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1843                         while (horizontalRemainder & 0x80)
1844                         {
1845                                 horizontalRemainder += hscale;
1846                                 pixCount++;
1847                                 pixels <<= 16;
1848                         }//*/
1849 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1850                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1851                         {
1852                                 horizontalRemainder += hscale;
1853                                 pixCount++;
1854                                 pixels <<= 16;
1855                         }
1856                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1857 //*/
1858                         if (pixCount > 3)
1859                         {
1860                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1861
1862                                 data += (pitch << 3) * phrasesToSkip;
1863                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1864                                 pixels <<= 16 * pixelShift;
1865
1866                                 iwidth -= phrasesToSkip;
1867
1868                                 pixCount = pixelShift;
1869                         }
1870                 }
1871         }
1872         else if (depth == 5)                                                    // 24 BPP
1873         {
1874 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1875 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1876 if (firstPix != 0)
1877         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1878                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1879                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1880                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1881
1882                 while (iwidth--)
1883                 {
1884                         // Fetch phrase...
1885                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1886                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1887
1888                         for(int i=0; i<2; i++)
1889                         {
1890                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1891                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1892
1893                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1894                                         ;       // Do nothing...
1895                                 else
1896                                         *currentLineBuffer = bits3,
1897                                         *(currentLineBuffer + 1) = bits2,
1898                                         *(currentLineBuffer + 2) = bits1,
1899                                         *(currentLineBuffer + 3) = bits0;
1900
1901                                 currentLineBuffer += lbufDelta;
1902                                 pixels <<= 32;
1903                         }
1904                 }
1905         }
1906 }