]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
Added Object Processor browser window for debug mode.
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68000/m68kinterface.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0                       // VC == YPOS
40 #define CONDITION_LESS_THAN                     1                       // VC < YPOS
41 #define CONDITION_GREATER_THAN          2                       // VC > YPOS
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #if 0
46 #define OPFLAG_RELEASE          8                                       // Bus release bit
47 #define OPFLAG_TRANS            4                                       // Transparency bit
48 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
49 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
50 #endif
51
52 // Private function prototypes
53
54 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
55 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
56 void OPDiscoverObjects(uint32 address);
57 void OPDumpObjectList(void);
58 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
59 void DumpFixedObject(uint64 p0, uint64 p1);
60 void DumpBitmapCore(uint64 p0, uint64 p1);
61 uint64 OPLoadPhrase(uint32 offset);
62
63 // Local global variables
64
65 // Blend tables (64K each)
66 static uint8 op_blend_y[0x10000];
67 static uint8 op_blend_cr[0x10000];
68 // There may be a problem with this "RAM" overlapping (and thus being independent of)
69 // some of the regular TOM RAM...
70 //#warning objectp_ram is separated from TOM RAM--need to fix that!
71 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
72 uint8 objectp_running = 0;
73 //bool objectp_stop_reading_list;
74
75 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
76 //static uint32 op_bitmap_bit_size[8] =
77 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
78 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
79 static uint32 op_pointer;
80
81 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
82
83
84 //
85 // Object Processor initialization
86 //
87 void OPInit(void)
88 {
89         // Here we calculate the saturating blend of a signed 4-bit value and an
90         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
91         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
92         for(int i=0; i<256*256; i++)
93         {
94                 int y = (i >> 8) & 0xFF;
95                 int dy = (int8)i;                                       // Sign extend the Y index
96                 int c1 = (i >> 8) & 0x0F;
97                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
98                 int c2 = (i >> 12) & 0x0F;
99                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
100
101                 y += dy;
102
103                 if (y < 0)
104                         y = 0;
105                 else if (y > 0xFF)
106                         y = 0xFF;
107
108                 op_blend_y[i] = y;
109
110                 c1 += dc1;
111
112                 if (c1 < 0)
113                         c1 = 0;
114                 else if (c1 > 0x0F)
115                         c1 = 0x0F;
116
117                 c2 += dc2;
118
119                 if (c2 < 0)
120                         c2 = 0;
121                 else if (c2 > 0x0F)
122                         c2 = 0x0F;
123
124                 op_blend_cr[i] = (c2 << 4) | c1;
125         }
126
127         OPReset();
128 }
129
130
131 //
132 // Object Processor reset
133 //
134 void OPReset(void)
135 {
136 //      memset(objectp_ram, 0x00, 0x40);
137         objectp_running = 0;
138 }
139
140
141 static const char * opType[8] =
142 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
143 static const char * ccType[8] =
144         { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
145 static uint32 object[8192];
146 static uint32 numberOfObjects;
147 //static uint32 objectLink[8192];
148 //static uint32 numberOfLinks;
149
150
151 void OPDone(void)
152 {
153 //#warning "!!! Fix OL dump so that it follows links !!!"
154 //      const char * opType[8] =
155 //      { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
156 //      const char * ccType[8] =
157 //              { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
158
159         uint32 olp = OPGetListPointer();
160         WriteLog("\nOP: OLP = $%08X\n", olp);
161         WriteLog("OP: Phrase dump\n    ----------\n");
162
163 #if 0
164         for(uint32 i=0; i<0x100; i+=8)
165         {
166                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
167                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
168
169                 if ((lo & 0x07) == 3)
170                 {
171                         uint16 ypos = (lo >> 3) & 0x7FF;
172                         uint8  cc   = (lo >> 14) & 0x03;
173                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
174                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
175                 }
176
177                 WriteLog("\n");
178
179                 if ((lo & 0x07) == 0)
180                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
181
182                 if ((lo & 0x07) == 1)
183                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
184         }
185
186         WriteLog("\n");
187 #else
188 //#warning "!!! Fix lockup in OPDiscoverObjects() !!!"
189 //temp, to keep the following function from locking up on bad/weird OLs
190 //return;
191
192         numberOfObjects = 0;
193 //printf("OPDiscoverObjects...\n");
194         OPDiscoverObjects(olp);
195 //printf("OPDumpObjectList...\n");
196         OPDumpObjectList();
197 #endif
198 }
199
200
201 bool OPObjectExists(uint32 address)
202 {
203         // Yes, we really do a linear search, every time. :-/
204         for(uint32 i=0; i<numberOfObjects; i++)
205         {
206                 if (address == object[i])
207                         return true;
208         }
209
210         return false;
211 }
212
213
214 void OPDiscoverObjects(uint32 address)
215 {
216         uint8 objectType = 0;
217
218         do
219         {
220                 // If we've seen this object already, bail out!
221                 // Otherwise, add it to the list
222                 if (OPObjectExists(address))
223                         return;
224
225                 object[numberOfObjects++] = address;
226
227                 // Get the object & decode its type, link address
228                 uint32 hi = JaguarReadLong(address + 0, OP);
229                 uint32 lo = JaguarReadLong(address + 4, OP);
230                 objectType = lo & 0x07;
231                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
232
233                 if (objectType == 3)
234                 {
235                         // Recursion needed to follow all links! This does depth-first recursion
236                         // on the not-taken objects
237                         OPDiscoverObjects(address + 8);
238                 }
239
240                 // Get the next object...
241                 address = link;
242         }
243         while (objectType != 4);
244 }
245
246
247 void OPDumpObjectList(void)
248 {
249         for(uint32 i=0; i<numberOfObjects; i++)
250         {
251                 uint32 address = object[i];
252
253                 uint32 hi = JaguarReadLong(address + 0, OP);
254                 uint32 lo = JaguarReadLong(address + 4, OP);
255                 uint8 objectType = lo & 0x07;
256                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
257                 WriteLog("%08X: %08X %08X %s", address, hi, lo, opType[objectType]);
258
259                 if (objectType == 3)
260                 {
261                         uint16 ypos = (lo >> 3) & 0x7FF;
262                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
263                         WriteLog(" YPOS=%u, CC=%s, link=$%08X", ypos, ccType[cc], link);
264                 }
265
266                 WriteLog("\n");
267
268                 if (objectType == 0)
269                         DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
270
271                 if (objectType == 1)
272                         DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
273                                 OPLoadPhrase(address + 16));
274
275                 if (address == link)    // Ruh roh...
276                 {
277                         // Runaway recursive link is bad!
278                         WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
279                 }
280         }
281
282         WriteLog("\n");
283 }
284
285
286 //
287 // Object Processor memory access
288 // Memory range: F00010 - F00027
289 //
290 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
291 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
292 //      F00026            W   -------- -------x   OBF - object processor flag
293 //
294
295 #if 0
296 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
297 {
298         offset &= 0x3F;
299         return objectp_ram[offset];
300 }
301
302 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
303 {
304         offset &= 0x3F;
305         return GET16(objectp_ram, offset);
306 }
307
308 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
309 {
310         offset &= 0x3F;
311         objectp_ram[offset] = data;
312 }
313
314 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
315 {
316         offset &= 0x3F;
317         SET16(objectp_ram, offset, data);
318
319 /*if (offset == 0x20)
320 WriteLog("OP: Setting lo list pointer: %04X\n", data);
321 if (offset == 0x22)
322 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
323 }
324 #endif
325
326
327 uint32 OPGetListPointer(void)
328 {
329         // Note: This register is LO / HI WORD, hence the funky look of this...
330         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
331 }
332
333
334 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
335
336 uint32 OPGetStatusRegister(void)
337 {
338         return GET16(tomRam8, 0x26);
339 }
340
341
342 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
343
344 void OPSetStatusRegister(uint32 data)
345 {
346         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
347         tomRam8[0x27] |= (data & 0xFE);
348 }
349
350
351 void OPSetCurrentObject(uint64 object)
352 {
353 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
354         // Stored as least significant 32 bits first, ms32 last in big endian
355 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
356         objectp_ram[0x12] = object & 0xFF; object >>= 8;
357         objectp_ram[0x11] = object & 0xFF; object >>= 8;
358         objectp_ram[0x10] = object & 0xFF; object >>= 8;
359
360         objectp_ram[0x17] = object & 0xFF; object >>= 8;
361         objectp_ram[0x16] = object & 0xFF; object >>= 8;
362         objectp_ram[0x15] = object & 0xFF; object >>= 8;
363         objectp_ram[0x14] = object & 0xFF;*/
364 // Let's try regular good old big endian...
365         tomRam8[0x17] = object & 0xFF; object >>= 8;
366         tomRam8[0x16] = object & 0xFF; object >>= 8;
367         tomRam8[0x15] = object & 0xFF; object >>= 8;
368         tomRam8[0x14] = object & 0xFF; object >>= 8;
369
370         tomRam8[0x13] = object & 0xFF; object >>= 8;
371         tomRam8[0x12] = object & 0xFF; object >>= 8;
372         tomRam8[0x11] = object & 0xFF; object >>= 8;
373         tomRam8[0x10] = object & 0xFF;
374 }
375
376
377 uint64 OPLoadPhrase(uint32 offset)
378 {
379         offset &= ~0x07;                                                // 8 byte alignment
380         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
381 }
382
383
384 void OPStorePhrase(uint32 offset, uint64 p)
385 {
386         offset &= ~0x07;                                                // 8 byte alignment
387         JaguarWriteLong(offset, p >> 32, OP);
388         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
389 }
390
391
392 //
393 // Debugging routines
394 //
395 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
396 {
397         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
398         WriteLog("          %08X %08X\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
399         DumpBitmapCore(p0, p1);
400         uint32 hscale = p2 & 0xFF;
401         uint32 vscale = (p2 >> 8) & 0xFF;
402         uint32 remainder = (p2 >> 16) & 0xFF;
403         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
404 }
405
406
407 void DumpFixedObject(uint64 p0, uint64 p1)
408 {
409         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
410         DumpBitmapCore(p0, p1);
411 }
412
413
414 void DumpBitmapCore(uint64 p0, uint64 p1)
415 {
416         uint32 bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
417         uint8 bitdepth = (p1 >> 12) & 0x07;
418 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
419         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
420         int32 xpos = p1 & 0xFFF;
421         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);       // Sign extend that mutha!
422         uint32 iwidth = ((p1 >> 28) & 0x3FF);
423         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
424         uint16 height = ((p0 >> 14) & 0x3FF);
425         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
426         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
427         uint32 firstPix = (p1 >> 49) & 0x3F;
428         uint8 flags = (p1 >> 45) & 0x0F;
429         uint8 idx = (p1 >> 38) & 0x7F;
430         uint32 pitch = (p1 >> 15) & 0x07;
431         WriteLog("    [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), l:%08X, p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
432                 iwidth * bdMultiplier[bitdepth],
433                 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth], link,
434                 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
435                 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
436                 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
437 }
438
439
440 //
441 // Object Processor main routine
442 //
443 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
444 void OPProcessList(int halfline, bool render)
445 {
446 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!
447 // We ignore them, for now; not good
448         halfline &= 0x7FF;
449
450 extern int op_start_log;
451 //      char * condition_to_str[8] =
452 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
453
454         op_pointer = OPGetListPointer();
455
456 //      objectp_stop_reading_list = false;
457
458 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
459 //op_done();
460
461 // *** BEGIN OP PROCESSOR TESTING ONLY ***
462 extern bool interactiveMode;
463 extern bool iToggle;
464 extern int objectPtr;
465 bool inhibit;
466 int bitmapCounter = 0;
467 // *** END OP PROCESSOR TESTING ONLY ***
468
469         uint32 opCyclesToRun = 30000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
470
471 //      if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
472         while (op_pointer)
473         {
474 // *** BEGIN OP PROCESSOR TESTING ONLY ***
475 if (interactiveMode && bitmapCounter == objectPtr)
476         inhibit = iToggle;
477 else
478         inhibit = false;
479 // *** END OP PROCESSOR TESTING ONLY ***
480 //              if (objectp_stop_reading_list)
481 //                      return;
482
483                 uint64 p0 = OPLoadPhrase(op_pointer);
484                 op_pointer += 8;
485 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
486
487 #if 1
488 if (halfline == TOMGetVDB() && op_start_log)
489 //if (halfline == 215 && op_start_log)
490 //if (halfline == 28 && op_start_log)
491 //if (halfline == 0)
492 {
493 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
494 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
495 {
496 WriteLog(" (BITMAP) ");
497 uint64 p1 = OPLoadPhrase(op_pointer);
498 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
499         uint8 bitdepth = (p1 >> 12) & 0x07;
500 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
501         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
502 int32 xpos = p1 & 0xFFF;
503 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
504         uint32 iwidth = ((p1 >> 28) & 0x3FF);
505         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
506         uint16 height = ((p0 >> 14) & 0x3FF);
507         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
508         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
509         uint32 firstPix = (p1 >> 49) & 0x3F;
510         uint8 flags = (p1 >> 45) & 0x0F;
511         uint8 idx = (p1 >> 38) & 0x7F;
512         uint32 pitch = (p1 >> 15) & 0x07;
513 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
514         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
515 }
516 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
517 {
518 WriteLog(" (SCALED BITMAP)");
519 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
520 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
521 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
522         uint8 bitdepth = (p1 >> 12) & 0x07;
523 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
524         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
525 int32 xpos = p1 & 0xFFF;
526 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
527         uint32 iwidth = ((p1 >> 28) & 0x3FF);
528         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
529         uint16 height = ((p0 >> 14) & 0x3FF);
530         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
531         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
532         uint32 firstPix = (p1 >> 49) & 0x3F;
533         uint8 flags = (p1 >> 45) & 0x0F;
534         uint8 idx = (p1 >> 38) & 0x7F;
535         uint32 pitch = (p1 >> 15) & 0x07;
536 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
537         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
538         uint32 hscale = p2 & 0xFF;
539         uint32 vscale = (p2 >> 8) & 0xFF;
540         uint32 remainder = (p2 >> 16) & 0xFF;
541 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
542 }
543 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
544 WriteLog(" (GPU)\n");
545 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
546 {
547 WriteLog(" (BRANCH)\n");
548 uint8 * jaguarMainRam = GetRamPtr();
549 WriteLog("[RAM] --> ");
550 for(int k=0; k<8; k++)
551         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
552 WriteLog("\n");
553 }
554 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
555 WriteLog("    --> List end\n\n");
556 }
557 #endif
558
559                 switch ((uint8)p0 & 0x07)
560                 {
561                 case OBJECT_TYPE_BITMAP:
562                 {
563 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
564                         uint16 ypos = (p0 >> 3) & 0x7FF;
565 // This is only theory implied by Rayman...!
566 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
567 // the VDB value. With interlacing, this would be slightly more tricky.
568 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
569 // to affect any other game in a negative way (that I've seen).
570 // Either that, or it's an undocumented bug...
571
572 //No, the reason this was needed is that the OP code before was wrong. Any value
573 //less than VDB will get written to the top line of the display!
574 #if 0
575 // Not so sure... Let's see what happens here...
576 // No change...
577                         if (ypos == 0)
578                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
579 #endif
580 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
581 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
582 // what's causing things to fuck up. Still no idea why.
583
584                         uint32 height = (p0 & 0xFFC000) >> 14;
585                         uint32 oldOPP = op_pointer - 8;
586 // *** BEGIN OP PROCESSOR TESTING ONLY ***
587 if (inhibit && op_start_log)
588         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
589 bitmapCounter++;
590 if (!inhibit)   // For OP testing only!
591 // *** END OP PROCESSOR TESTING ONLY ***
592                         if (halfline >= ypos && height > 0)
593                         {
594                                 uint64 p1 = OPLoadPhrase(op_pointer);
595                                 op_pointer += 8;
596 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
597 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
598 //                              OPProcessFixedBitmap(halfline, p0, p1, render);
599                                 OPProcessFixedBitmap(p0, p1, render);
600
601                                 // OP write-backs
602
603 //???Does this really happen??? Doesn't seem to work if you do this...!
604 //Probably not. Must be a bug in the documentation...!
605 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
606 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
607 //                              SET16(tom_ram_8, 0x22, link >> 16);
608 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
609                                 if (height - 1 > 0)
610                                         height--;*/
611                                 // NOTE: Would subtract 2 if in interlaced mode...!
612 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
613 //                              if (height)
614                                 height--;
615
616                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
617                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
618                                 data += dwidth;
619
620                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
621                                 p0 |= (uint64)height << 14;
622                                 p0 |= data << 40;
623                                 OPStorePhrase(oldOPP, p0);
624                         }
625 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
626 //Temp, for testing...
627 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
628 //And it does! !!! FIX !!!
629 //Let's remove this "fix" since it screws up more than it fixes.
630 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
631                 return;*/
632
633 // NOTE: The link address only replaces bits 3-21 in the OLP, and this replaces
634 //       EVERYTHING. !!! FIX !!! [DONE]
635 #warning "!!! Link address is not linked properly for all object types !!!"
636 #warning "!!! Only BITMAP is properly handled !!!"
637                         op_pointer &= 0xFFC00007;
638                         op_pointer |= (p0 & 0x000007FFFF000000LL) >> 21;
639 //WriteLog("New OP: %08X\n", op_pointer);
640 //kludge: Seems that memory access is mirrored in the first 8MB of memory...
641 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
642         op_pointer &= 0xFF1FFFFF;       // Knock out bits 21-23
643
644                         break;
645                 }
646                 case OBJECT_TYPE_SCALE:
647                 {
648 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
649                         uint16 ypos = (p0 >> 3) & 0x7FF;
650                         uint32 height = (p0 & 0xFFC000) >> 14;
651                         uint32 oldOPP = op_pointer - 8;
652 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
653 // *** BEGIN OP PROCESSOR TESTING ONLY ***
654 if (inhibit && op_start_log)
655 {
656         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
657         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
658 }
659 bitmapCounter++;
660 if (!inhibit)   // For OP testing only!
661 // *** END OP PROCESSOR TESTING ONLY ***
662                         if (halfline >= ypos && height > 0)
663                         {
664                                 uint64 p1 = OPLoadPhrase(op_pointer);
665                                 op_pointer += 8;
666                                 uint64 p2 = OPLoadPhrase(op_pointer);
667                                 op_pointer += 8;
668 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
669                                 OPProcessScaledBitmap(p0, p1, p2, render);
670
671                                 // OP write-backs
672
673                                 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
674                                 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
675 //Actually, we should skip this object if it has a vscale of zero.
676 //Or do we? Not sure... Atari Karts has a few lines that look like:
677 // (SCALED BITMAP)
678 //000E8268 --> phrase 00010000 7000B00D
679 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
680 //    [hsc: 9A, vsc: 00, rem: 00]
681 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
682 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
683
684                                 if (vscale == 0)
685                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
686
687 //extern int start_logging;
688 //if (start_logging)
689 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
690 //Locks up here:
691 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
692 //There are other problems here, it looks like...
693 //Another lock up:
694 //About to execute OP (508)...
695 /*
696 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
697 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
698 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
699 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
700 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
701 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
702 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
703 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
704 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
705 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
706 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
707 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
708 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
709 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
710 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
711 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
712 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
713 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
714 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
715 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
716 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
717 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
718 */
719 //Here's another problem:
720 //    [hsc: 20, vsc: 20, rem: 00]
721 // Since we're not checking for $E0 (but that's what we get from the above), we end
722 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
723 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
724 //Also note: $E0 = 7.0 which IS a legal vscale value...
725
726 //                              if (remainder & 0x80)                           // I.e., it's negative
727 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
728 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
729 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
730 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
731 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
732                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
733                                 if (remainder < 0x20)
734                                 {
735                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
736                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
737
738 //                                      while (remainder & 0x80)
739 //                                      while ((remainder & 0x80) || remainder == 0)
740 //                                      while ((remainder - 1) >= 0xE0)
741 //                                      while ((remainder >= 0xE1) || remainder == 0)
742 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
743 //                                      while (remainder <= 0x20)
744                                         while (remainder < 0x20)
745                                         {
746                                                 remainder += vscale;
747
748                                                 if (height)
749                                                         height--;
750
751                                                 data += dwidth;
752                                         }
753
754                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
755                                         p0 |= (uint64)height << 14;
756                                         p0 |= data << 40;
757                                         OPStorePhrase(oldOPP, p0);
758                                 }
759
760                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
761
762 //if (start_logging)
763 //      WriteLog("--> Finished writebacks...\n");//*/
764
765 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
766                                 p2 &= ~0x0000000000FF0000LL;
767                                 p2 |= (uint64)remainder << 16;
768 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
769                                 OPStorePhrase(oldOPP + 16, p2);
770 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
771 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
772                         }
773
774                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
775                         break;
776                 }
777                 case OBJECT_TYPE_GPU:
778                 {
779 //WriteLog("OP: Asserting GPU IRQ #3...\n");
780 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
781                         OPSetCurrentObject(p0);
782                         GPUSetIRQLine(3, ASSERT_LINE);
783 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
784 // !!! FIX !!!
785 //Do something like:
786 //OPSuspendedByGPU = true;
787 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
788 //on the next halfline...
789 // --> It continues from where it was interrupted! !!! FIX !!!
790                         break;
791                 }
792                 case OBJECT_TYPE_BRANCH:
793                 {
794                         uint16 ypos = (p0 >> 3) & 0x7FF;
795 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
796 //       conditions! Need at least one more bit for that! :-P
797 // Also, the ASIC nets imply that it uses bits 14-16 (height in BM & SBM objects)
798 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
799                         uint8  cc   = (p0 >> 14) & 0x03;
800                         uint32 link = (p0 >> 21) & 0x3FFFF8;
801
802 //                      if ((ypos!=507)&&(ypos!=25))
803 //                              WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
804                         switch (cc)
805                         {
806                         case CONDITION_EQUAL:
807                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
808                                         op_pointer = link;
809                                 break;
810                         case CONDITION_LESS_THAN:
811                                 if (TOMReadWord(0xF00006, OP) < ypos)
812                                         op_pointer = link;
813                                 break;
814                         case CONDITION_GREATER_THAN:
815                                 if (TOMReadWord(0xF00006, OP) > ypos)
816                                         op_pointer = link;
817                                 break;
818                         case CONDITION_OP_FLAG_SET:
819                                 if (OPGetStatusRegister() & 0x01)
820                                         op_pointer = link;
821                                 break;
822                         case CONDITION_SECOND_HALF_LINE:
823 //Here's the ASIC code:
824 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
825 //which means, do the link if bit 10 of HC is set...
826
827                                 // This basically means branch if bit 10 of HC is set
828 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
829                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
830                                 LogDone();
831                                 exit(0);
832                                 break;
833                         default:
834                                 // Basically, if you do this, the OP does nothing. :-)
835                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
836                         }
837                         break;
838                 }
839                 case OBJECT_TYPE_STOP:
840                 {
841 //op_start_log = 0;
842                         // unsure
843 //WriteLog("OP: --> STOP\n");
844 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
845 //This seems more likely...
846                         OPSetCurrentObject(p0);
847
848                         if (p0 & 0x08)
849                         {
850                                 // We need to check whether these interrupts are enabled or not, THEN
851                                 // set an IRQ + pending flag if necessary...
852                                 if (TOMIRQEnabled(IRQ_OPFLAG))
853                                 {
854                                         TOMSetPendingObjectInt();
855                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
856                                 }
857                         }
858
859                         return;
860 //                      break;
861                 }
862                 default:
863 //                      WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
864                         return;
865                 }
866
867                 // Here is a little sanity check to keep the OP from locking up the machine
868                 // when fed bad data. Better would be to count how many actual cycles it used
869                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
870 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
871                 opCyclesToRun--;
872
873                 if (!opCyclesToRun)
874                         return;
875         }
876 }
877
878
879 //
880 // Store fixed size bitmap in line buffer
881 //
882 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
883 {
884 // Need to make sure that when writing that it stays within the line buffer...
885 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
886         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
887         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
888         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
889         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
890 //#ifdef OP_DEBUG_BMP
891         uint32  firstPix = (p1 >> 49) & 0x3F;
892         // "The LSB is significant only for scaled objects..." -JTRM
893         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
894         firstPix &= 0x3E;
895 //#endif
896 // We can ignore the RELEASE (high order) bit for now--probably forever...!
897 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
898 //Optimize: break these out to their own BOOL values
899         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
900         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
901                 flagRMW = (flags & OPFLAG_RMW ? true : false),
902                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
903 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
904 //  provide the most significant bits of the palette address."
905         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
906         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
907         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
908
909 //      int16 scanlineWidth = tom_getVideoModeWidth();
910         uint8 * tomRam8 = TOMGetRamPointer();
911         uint8 * paletteRAM = &tomRam8[0x400];
912         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
913         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
914         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
915
916 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
917 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
918
919 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
920 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
921 // Pitch == 0 is OK too...
922
923 //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to investigate
924 //        on real hardware...
925 #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
926 if (iwidth == 0)
927         iwidth = 1;
928
929 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
930 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
931         if (!render || iwidth == 0)
932                 return;
933
934 //OK, so we know the position in the line buffer is correct. It's the clipping in
935 //24bpp mode that's wrong!
936 #if 0
937 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
938 //into the line buffer for each pixel.
939 if (depth == 5) // i.e., 24bpp mode...
940         xpos >>= 1;     // Cut it in half...
941 #endif
942
943 //#define OP_DEBUG_BMP
944 //#ifdef OP_DEBUG_BMP
945 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
946 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
947 //#endif
948
949 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
950         int32 startPos = xpos, endPos = xpos +
951                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
952                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
953         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
954         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
955         // Not sure if this is Jaguar Two only location or what...
956         // From the docs, it is... If we want to limit here we should think of something else.
957 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
958 //      int32 limit = 720;
959 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
960 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
961         // This is correct, the OP line buffer is a constant size... 
962         int32 limit = 720;
963         int32 lbufWidth = 719;
964
965         // If the image is completely to the left or right of the line buffer, then bail.
966 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
967 //There are four possibilities:
968 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
969 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
970 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
971 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
972 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
973 // numbers 1 & 3 are of concern.
974 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
975 //      if (rightMargin < 0 || leftMargin > lbufWidth)
976
977 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
978 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
979 // Still have to be careful with the DATA and IWIDTH values though...
980
981 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
982 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
983 //              return;
984         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
985                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
986                 return;
987
988         // Otherwise, find the clip limits and clip the phrase as well...
989         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
990         //       line buffer, but it shouldn't matter since there are two unused line
991         //       buffers below and nothing above and I'll at most write 8 bytes outside
992         //       the line buffer... I could use a fractional clip begin/end value, but
993         //       this makes the blit a *lot* more hairy. I might fix this in the future
994         //       if it becomes necessary. (JLH)
995         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
996         //       which pixel in the phrase is being written, and quit when either end of phrases
997         //       is reached or line buffer extents are surpassed.
998
999 //This stuff is probably wrong as well... !!! FIX !!!
1000 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1001 //Yup. Seems that JagMania doesn't work correctly with this...
1002 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1003 //      if (!flagREFLECT)
1004
1005 /*
1006         if (leftMargin < 0)
1007                 clippedWidth = 0 - leftMargin,
1008                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1009                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1010 //              leftMargin = 0;
1011
1012         if (rightMargin > lbufWidth)
1013                 clippedWidth = rightMargin - lbufWidth,
1014                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
1015 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1016 //              rightMargin = lbufWidth;
1017 */
1018 if (depth > 5)
1019         WriteLog("OP: We're about to encounter a divide by zero error!\n");
1020         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1021         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1022         // !!! FIX !!!
1023         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1024                 clippedWidth = 0 - startPos,
1025                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1026                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1027
1028         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1029                 clippedWidth = 0 - endPos,
1030                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1031
1032         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1033                 clippedWidth = endPos - lbufWidth,
1034                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1035
1036         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1037                 clippedWidth = startPos - lbufWidth,
1038                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1039                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1040 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1041
1042         // If the image is sitting on the line buffer left or right edge, we need to compensate
1043         // by decreasing the image phrase width accordingly.
1044         iwidth -= phraseClippedWidth;
1045
1046         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1047         // the pixel data.
1048 //      data += phraseClippedWidth * (pitch << 3);
1049         data += dataClippedWidth * pitch;
1050
1051         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1052         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1053 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1054 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1055 //Is this a bug in the OP?
1056 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1057 //Though it looks like we're doing it here no matter what...
1058 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1059 //Let's try this:
1060         uint32 lbufAddress = 0x1800 + (startPos * 2);
1061         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1062
1063         // Render.
1064
1065 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1066 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1067 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1068 // anyway.
1069 // This seems to be the case (at least according to the Midsummer docs)...!
1070
1071 // This is to test using palette zeroes instead of bit zeroes...
1072 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1073 //#define OP_USES_PALETTE_ZERO
1074
1075         if (depth == 0)                                                                 // 1 BPP
1076         {
1077                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1078                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1079
1080                 // Fetch 1st phrase...
1081                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1082 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1083 //i.e., we didn't clip on the margin... !!! FIX !!!
1084                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1085                 int i = firstPix;                                                       // Start counter at right spot...
1086
1087                 while (iwidth--)
1088                 {
1089                         while (i++ < 64)
1090                         {
1091                                 uint8 bit = pixels >> 63;
1092 #ifndef OP_USES_PALETTE_ZERO
1093                                 if (flagTRANS && bit == 0)
1094 #else
1095                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1096 #endif
1097                                         ;       // Do nothing...
1098                                 else
1099                                 {
1100                                         if (!flagRMW)
1101 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1102 //Won't optimize RMW case though...
1103                                                 // This is the *only* correct use of endian-dependent code
1104                                                 // (i.e., mem-to-mem direct copying)!
1105                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
1106                                         else
1107                                                 *currentLineBuffer =
1108                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1109                                                 *(currentLineBuffer + 1) =
1110                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1111                                 }
1112
1113                                 currentLineBuffer += lbufDelta;
1114                                 pixels <<= 1;
1115                         }
1116                         i = 0;
1117                         // Fetch next phrase...
1118                         data += pitch;
1119                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1120                 }
1121         }
1122         else if (depth == 1)                                                    // 2 BPP
1123         {
1124 if (firstPix)
1125         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1126                 index &= 0xFC;                                                          // Top six bits form CLUT index
1127                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1128                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1129
1130                 while (iwidth--)
1131                 {
1132                         // Fetch phrase...
1133                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1134                         data += pitch;
1135
1136                         for(int i=0; i<32; i++)
1137                         {
1138                                 uint8 bits = pixels >> 62;
1139 // Seems to me that both of these are in the same endian, so we could cast it as
1140 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1141 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1142 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1143 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1144 #ifndef OP_USES_PALETTE_ZERO
1145                                 if (flagTRANS && bits == 0)
1146 #else
1147                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1148 #endif
1149                                         ;       // Do nothing...
1150                                 else
1151                                 {
1152                                         if (!flagRMW)
1153                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1154                                         else
1155                                                 *currentLineBuffer =
1156                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1157                                                 *(currentLineBuffer + 1) =
1158                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1159                                 }
1160
1161                                 currentLineBuffer += lbufDelta;
1162                                 pixels <<= 2;
1163                         }
1164                 }
1165         }
1166         else if (depth == 2)                                                    // 4 BPP
1167         {
1168 if (firstPix)
1169         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1170                 index &= 0xF0;                                                          // Top four bits form CLUT index
1171                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1172                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1173
1174                 while (iwidth--)
1175                 {
1176                         // Fetch phrase...
1177                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1178                         data += pitch;
1179
1180                         for(int i=0; i<16; i++)
1181                         {
1182                                 uint8 bits = pixels >> 60;
1183 // Seems to me that both of these are in the same endian, so we could cast it as
1184 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1185 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1186 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1187 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1188 #ifndef OP_USES_PALETTE_ZERO
1189                                 if (flagTRANS && bits == 0)
1190 #else
1191                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1192 #endif
1193                                         ;       // Do nothing...
1194                                 else
1195                                 {
1196                                         if (!flagRMW)
1197                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1198                                         else
1199                                                 *currentLineBuffer =
1200                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1201                                                 *(currentLineBuffer + 1) =
1202                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1203                                 }
1204
1205                                 currentLineBuffer += lbufDelta;
1206                                 pixels <<= 4;
1207                         }
1208                 }
1209         }
1210         else if (depth == 3)                                                    // 8 BPP
1211         {
1212                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1213                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1214
1215                 // Fetch 1st phrase...
1216                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1217 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1218 //i.e., we didn't clip on the margin... !!! FIX !!!
1219                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1220                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1221                 int i = firstPix >> 3;                                          // Start counter at right spot...
1222
1223                 while (iwidth--)
1224                 {
1225                         while (i++ < 8)
1226                         {
1227                                 uint8 bits = pixels >> 56;
1228 // Seems to me that both of these are in the same endian, so we could cast it as
1229 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1230 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1231 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1232 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1233 //This would seem to be problematic...
1234 //Because it's the palette entry being zero that makes the pixel transparent...
1235 //Let's try it and see.
1236 #ifndef OP_USES_PALETTE_ZERO
1237                                 if (flagTRANS && bits == 0)
1238 #else
1239                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1240 #endif
1241                                         ;       // Do nothing...
1242                                 else
1243                                 {
1244                                         if (!flagRMW)
1245                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1246                                         else
1247                                                 *currentLineBuffer =
1248                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1249                                                 *(currentLineBuffer + 1) =
1250                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1251                                 }
1252
1253                                 currentLineBuffer += lbufDelta;
1254                                 pixels <<= 8;
1255                         }
1256                         i = 0;
1257                         // Fetch next phrase...
1258                         data += pitch;
1259                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1260                 }
1261         }
1262         else if (depth == 4)                                                    // 16 BPP
1263         {
1264 if (firstPix)
1265         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1266                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1267                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1268
1269                 while (iwidth--)
1270                 {
1271                         // Fetch phrase...
1272                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1273                         data += pitch;
1274
1275                         for(int i=0; i<4; i++)
1276                         {
1277                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1278 // Seems to me that both of these are in the same endian, so we could cast it as
1279 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1280 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1281 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1282 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1283 //This doesn't seem right... Let's try the encoded black value ($8800):
1284 //Apparently, CRY 0 maps to $8800...
1285                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1286 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1287                                         ;       // Do nothing...
1288                                 else
1289                                 {
1290                                         if (!flagRMW)
1291                                                 *currentLineBuffer = bitsHi,
1292                                                 *(currentLineBuffer + 1) = bitsLo;
1293                                         else
1294                                                 *currentLineBuffer =
1295                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1296                                                 *(currentLineBuffer + 1) =
1297                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1298                                 }
1299
1300                                 currentLineBuffer += lbufDelta;
1301                                 pixels <<= 16;
1302                         }
1303                 }
1304         }
1305         else if (depth == 5)                                                    // 24 BPP
1306         {
1307 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1308 //There *might* be others...
1309 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1310 if (firstPix)
1311         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1312                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1313                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1314                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1315
1316                 while (iwidth--)
1317                 {
1318                         // Fetch phrase...
1319                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1320                         data += pitch;
1321
1322                         for(int i=0; i<2; i++)
1323                         {
1324                                 // We don't use a 32-bit var here because of endian issues...!
1325                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1326                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1327
1328                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1329                                         ;       // Do nothing...
1330                                 else
1331                                         *currentLineBuffer = bits3,
1332                                         *(currentLineBuffer + 1) = bits2,
1333                                         *(currentLineBuffer + 2) = bits1,
1334                                         *(currentLineBuffer + 3) = bits0;
1335
1336                                 currentLineBuffer += lbufDelta;
1337                                 pixels <<= 32;
1338                         }
1339                 }
1340         }
1341 }
1342
1343
1344 //
1345 // Store scaled bitmap in line buffer
1346 //
1347 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1348 {
1349 // Need to make sure that when writing that it stays within the line buffer...
1350 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1351         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1352         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1353         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1354         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1355 //#ifdef OP_DEBUG_BMP
1356 // Prolly should use this... Though not sure exactly how.
1357 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1358         uint32 firstPix = (p1 >> 49) & 0x3F;
1359 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1360 if (firstPix)
1361         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1362 //#endif
1363 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1364 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1365 //Optimize: break these out to their own BOOL values [DONE]
1366         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1367         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1368                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1369                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1370         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1371         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1372
1373         uint8 * tomRam8 = TOMGetRamPointer();
1374         uint8 * paletteRAM = &tomRam8[0x400];
1375         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1376         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1377         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1378
1379         uint16 hscale = p2 & 0xFF;
1380 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1381 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1382         uint16 horizontalRemainder = hscale;                            // Not sure if it starts full, but seems reasonable [It's not!]
1383 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1384         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1385         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1386
1387 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1388 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1389
1390 // Looks like an hscale of zero means don't draw!
1391         if (!render || iwidth == 0 || hscale == 0)
1392                 return;
1393
1394 /*extern int start_logging;
1395 if (start_logging)
1396         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1397                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1398 //#define OP_DEBUG_BMP
1399 //#ifdef OP_DEBUG_BMP
1400 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1401 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1402 //#endif
1403
1404         int32 startPos = xpos, endPos = xpos +
1405                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1406         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1407         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1408         // Not sure if this is Jaguar Two only location or what...
1409         // From the docs, it is... If we want to limit here we should think of something else.
1410 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1411         int32 limit = 720;
1412 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1413         int32 lbufWidth = 719;  // Zero based limit...
1414
1415         // If the image is completely to the left or right of the line buffer, then bail.
1416 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1417 //There are four possibilities:
1418 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1419 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1420 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1421 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1422 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1423 // numbers 1 & 3 are of concern.
1424 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1425 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1426
1427 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1428 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1429 // Still have to be careful with the DATA and IWIDTH values though...
1430
1431         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1432                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1433                 return;
1434
1435         // Otherwise, find the clip limits and clip the phrase as well...
1436         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1437         //       line buffer, but it shouldn't matter since there are two unused line
1438         //       buffers below and nothing above and I'll at most write 40 bytes outside
1439         //       the line buffer... I could use a fractional clip begin/end value, but
1440         //       this makes the blit a *lot* more hairy. I might fix this in the future
1441         //       if it becomes necessary. (JLH)
1442         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1443         //       which pixel in the phrase is being written, and quit when either end of phrases
1444         //       is reached or line buffer extents are surpassed.
1445
1446 //This stuff is probably wrong as well... !!! FIX !!!
1447 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1448 //Yup. Seems that JagMania doesn't work correctly with this...
1449 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1450 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1451 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1452 // a bit more accurately... Strange!
1453 //It's probably a case of the REFLECT flag being set and the background being written
1454 //from the right side of the screen...
1455 //But no, it isn't... At least if the diagnostics are telling the truth!
1456
1457         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1458         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1459         // !!! FIX !!!
1460
1461 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1462 //the scaling factor is small. So fix it already! !!! FIX !!!
1463 /*if (scaledPhrasePixels == 0)
1464 {
1465         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1466         DumpScaledObject(p0, p1, p2);
1467 }//*/
1468 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1469
1470 //Try a simple example...
1471 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1472 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1473 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1474 //
1475 // Normally, we would expect this in the line buffer:
1476 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1477 //
1478 // But instead we're getting:
1479 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1480 //
1481 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1482 // on negative boundary--or are we? Hmm...
1483 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1484 //
1485 // Let's try a real world example:
1486 //
1487 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1488 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1489 //
1490 // Really, spp is 27.75 in the second case...
1491 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1492 // start position (14 * 27.75), we get -6.5... NOT -17!
1493
1494 //Now it seems we're working OK, at least for the first case...
1495 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1496
1497         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1498 {
1499 extern int start_logging;
1500 if (start_logging)
1501         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1502 //              clippedWidth = 0 - startPos,
1503                 clippedWidth = (0 - startPos) << 5,
1504 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1505                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1506 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1507                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1508 if (start_logging)
1509         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1510 }
1511
1512         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1513                 clippedWidth = 0 - endPos,
1514                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1515
1516         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1517                 clippedWidth = endPos - lbufWidth,
1518                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1519
1520         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1521                 clippedWidth = startPos - lbufWidth,
1522                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1523                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1524
1525 extern int op_start_log;
1526 if (op_start_log && clippedWidth != 0)
1527         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1528 if (op_start_log && startPos == 13)
1529 {
1530         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1531         DumpScaledObject(p0, p1, p2);
1532         if (iwidth == 7)
1533         {
1534                 WriteLog("    %08X: ", data);
1535                 for(int i=0; i<7*8; i++)
1536                         WriteLog("%02X ", JaguarReadByte(data+i));
1537                 WriteLog("\n");
1538         }
1539 }
1540         // If the image is sitting on the line buffer left or right edge, we need to compensate
1541         // by decreasing the image phrase width accordingly.
1542         iwidth -= phraseClippedWidth;
1543
1544         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1545         // the pixel data.
1546 //      data += phraseClippedWidth * (pitch << 3);
1547         data += dataClippedWidth * (pitch << 3);
1548
1549         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1550         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1551 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1552 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1553         uint32 lbufAddress = 0x1800 + startPos * 2;
1554         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1555 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1556 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1557
1558         // Render.
1559
1560 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1561 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1562 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1563 // anyway.
1564 // This seems to be the case (at least according to the Midsummer docs)...!
1565
1566         if (depth == 0)                                                                 // 1 BPP
1567         {
1568 if (firstPix != 0)
1569         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1570                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1571                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1572
1573                 int pixCount = 0;
1574                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1575
1576                 while ((int32)iwidth > 0)
1577                 {
1578                         uint8 bits = pixels >> 63;
1579
1580 #ifndef OP_USES_PALETTE_ZERO
1581                         if (flagTRANS && bits == 0)
1582 #else
1583                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1584 #endif
1585                                 ;       // Do nothing...
1586                         else
1587                         {
1588                                 if (!flagRMW)
1589                                         // This is the *only* correct use of endian-dependent code
1590                                         // (i.e., mem-to-mem direct copying)!
1591                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1592                                 else
1593                                         *currentLineBuffer =
1594                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1595                                         *(currentLineBuffer + 1) =
1596                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1597                         }
1598
1599                         currentLineBuffer += lbufDelta;
1600
1601 /*
1602 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1603 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1604 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1605 */
1606 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1607                         while (horizontalRemainder & 0x80)
1608                         {
1609                                 horizontalRemainder += hscale;
1610                                 pixCount++;
1611                                 pixels <<= 1;
1612                         }//*/
1613 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1614                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1615                         {
1616                                 horizontalRemainder += hscale;
1617                                 pixCount++;
1618                                 pixels <<= 1;
1619                         }
1620                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1621
1622                         if (pixCount > 63)
1623                         {
1624                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1625
1626                                 data += (pitch << 3) * phrasesToSkip;
1627                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1628                                 pixels <<= 1 * pixelShift;
1629                                 iwidth -= phrasesToSkip;
1630                                 pixCount = pixelShift;
1631                         }
1632                 }
1633         }
1634         else if (depth == 1)                                                    // 2 BPP
1635         {
1636 if (firstPix != 0)
1637         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1638                 index &= 0xFC;                                                          // Top six bits form CLUT index
1639                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1640                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1641
1642                 int pixCount = 0;
1643                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1644
1645                 while ((int32)iwidth > 0)
1646                 {
1647                         uint8 bits = pixels >> 62;
1648
1649 #ifndef OP_USES_PALETTE_ZERO
1650                         if (flagTRANS && bits == 0)
1651 #else
1652                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1653 #endif
1654                                 ;       // Do nothing...
1655                         else
1656                         {
1657                                 if (!flagRMW)
1658                                         // This is the *only* correct use of endian-dependent code
1659                                         // (i.e., mem-to-mem direct copying)!
1660                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1661                                 else
1662                                         *currentLineBuffer =
1663                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1664                                         *(currentLineBuffer + 1) =
1665                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1666                         }
1667
1668                         currentLineBuffer += lbufDelta;
1669
1670 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1671                         while (horizontalRemainder & 0x80)
1672                         {
1673                                 horizontalRemainder += hscale;
1674                                 pixCount++;
1675                                 pixels <<= 2;
1676                         }//*/
1677 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1678                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1679                         {
1680                                 horizontalRemainder += hscale;
1681                                 pixCount++;
1682                                 pixels <<= 2;
1683                         }
1684                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1685
1686                         if (pixCount > 31)
1687                         {
1688                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1689
1690                                 data += (pitch << 3) * phrasesToSkip;
1691                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1692                                 pixels <<= 2 * pixelShift;
1693                                 iwidth -= phrasesToSkip;
1694                                 pixCount = pixelShift;
1695                         }
1696                 }
1697         }
1698         else if (depth == 2)                                                    // 4 BPP
1699         {
1700 if (firstPix != 0)
1701         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1702                 index &= 0xF0;                                                          // Top four bits form CLUT index
1703                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1704                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1705
1706                 int pixCount = 0;
1707                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1708
1709                 while ((int32)iwidth > 0)
1710                 {
1711                         uint8 bits = pixels >> 60;
1712
1713 #ifndef OP_USES_PALETTE_ZERO
1714                         if (flagTRANS && bits == 0)
1715 #else
1716                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1717 #endif
1718                                 ;       // Do nothing...
1719                         else
1720                         {
1721                                 if (!flagRMW)
1722                                         // This is the *only* correct use of endian-dependent code
1723                                         // (i.e., mem-to-mem direct copying)!
1724                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1725                                 else
1726                                         *currentLineBuffer =
1727                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1728                                         *(currentLineBuffer + 1) =
1729                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1730                         }
1731
1732                         currentLineBuffer += lbufDelta;
1733
1734 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1735                         while (horizontalRemainder & 0x80)
1736                         {
1737                                 horizontalRemainder += hscale;
1738                                 pixCount++;
1739                                 pixels <<= 4;
1740                         }//*/
1741 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1742                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1743                         {
1744                                 horizontalRemainder += hscale;
1745                                 pixCount++;
1746                                 pixels <<= 4;
1747                         }
1748                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1749
1750                         if (pixCount > 15)
1751                         {
1752                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1753
1754                                 data += (pitch << 3) * phrasesToSkip;
1755                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1756                                 pixels <<= 4 * pixelShift;
1757                                 iwidth -= phrasesToSkip;
1758                                 pixCount = pixelShift;
1759                         }
1760                 }
1761         }
1762         else if (depth == 3)                                                    // 8 BPP
1763         {
1764 if (firstPix)
1765         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1766                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1767                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1768
1769                 int pixCount = 0;
1770                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1771
1772                 while ((int32)iwidth > 0)
1773                 {
1774                         uint8 bits = pixels >> 56;
1775
1776 #ifndef OP_USES_PALETTE_ZERO
1777                         if (flagTRANS && bits == 0)
1778 #else
1779                         if (flagTRANS && (paletteRAM16[bits] == 0))
1780 #endif
1781                                 ;       // Do nothing...
1782                         else
1783                         {
1784                                 if (!flagRMW)
1785                                         // This is the *only* correct use of endian-dependent code
1786                                         // (i.e., mem-to-mem direct copying)!
1787                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1788 /*                              {
1789                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1790                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1791                                 }*/
1792                                 else
1793                                         *currentLineBuffer =
1794                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1795                                         *(currentLineBuffer + 1) =
1796                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1797                         }
1798
1799                         currentLineBuffer += lbufDelta;
1800
1801 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1802                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1803                         {
1804                                 horizontalRemainder += hscale;
1805                                 pixCount++;
1806                                 pixels <<= 8;
1807                         }
1808                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1809
1810                         if (pixCount > 7)
1811                         {
1812                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1813
1814                                 data += (pitch << 3) * phrasesToSkip;
1815                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1816                                 pixels <<= 8 * pixelShift;
1817                                 iwidth -= phrasesToSkip;
1818                                 pixCount = pixelShift;
1819                         }
1820                 }
1821         }
1822         else if (depth == 4)                                                    // 16 BPP
1823         {
1824 if (firstPix != 0)
1825         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1826                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1827                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1828
1829                 int pixCount = 0;
1830                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1831
1832                 while ((int32)iwidth > 0)
1833                 {
1834                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1835
1836 //This doesn't seem right... Let's try the encoded black value ($8800):
1837 //Apparently, CRY 0 maps to $8800...
1838                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1839 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1840                                 ;       // Do nothing...
1841                         else
1842                         {
1843                                 if (!flagRMW)
1844                                         *currentLineBuffer = bitsHi,
1845                                         *(currentLineBuffer + 1) = bitsLo;
1846                                 else
1847                                         *currentLineBuffer =
1848                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1849                                         *(currentLineBuffer + 1) =
1850                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1851                         }
1852
1853                         currentLineBuffer += lbufDelta;
1854
1855 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1856                         while (horizontalRemainder & 0x80)
1857                         {
1858                                 horizontalRemainder += hscale;
1859                                 pixCount++;
1860                                 pixels <<= 16;
1861                         }//*/
1862 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1863                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1864                         {
1865                                 horizontalRemainder += hscale;
1866                                 pixCount++;
1867                                 pixels <<= 16;
1868                         }
1869                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1870 //*/
1871                         if (pixCount > 3)
1872                         {
1873                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1874
1875                                 data += (pitch << 3) * phrasesToSkip;
1876                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1877                                 pixels <<= 16 * pixelShift;
1878
1879                                 iwidth -= phrasesToSkip;
1880
1881                                 pixCount = pixelShift;
1882                         }
1883                 }
1884         }
1885         else if (depth == 5)                                                    // 24 BPP
1886         {
1887 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1888 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1889 if (firstPix != 0)
1890         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1891                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1892                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1893                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1894
1895                 while (iwidth--)
1896                 {
1897                         // Fetch phrase...
1898                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1899                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1900
1901                         for(int i=0; i<2; i++)
1902                         {
1903                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1904                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1905
1906                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1907                                         ;       // Do nothing...
1908                                 else
1909                                         *currentLineBuffer = bits3,
1910                                         *(currentLineBuffer + 1) = bits2,
1911                                         *(currentLineBuffer + 2) = bits1,
1912                                         *(currentLineBuffer + 3) = bits0;
1913
1914                                 currentLineBuffer += lbufDelta;
1915                                 pixels <<= 32;
1916                         }
1917                 }
1918         }
1919 }