]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
e951d5f855c43b92469f2c663c770366a6e0a2e6
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68000/m68kinterface.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0                       // VC == YPOS
40 #define CONDITION_LESS_THAN                     1                       // VC < YPOS
41 #define CONDITION_GREATER_THAN          2                       // VC > YPOS
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #define OPFLAG_RELEASE          8                                       // Bus release bit
46 #define OPFLAG_TRANS            4                                       // Transparency bit
47 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
48 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
49
50 // Private function prototypes
51
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void OPDiscoverObjects(uint32 address);
55 void OPDumpObjectList(void);
56 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
57 void DumpFixedObject(uint64 p0, uint64 p1);
58 void DumpBitmapCore(uint64 p0, uint64 p1);
59 uint64 OPLoadPhrase(uint32 offset);
60
61 // Local global variables
62
63 // Blend tables (64K each)
64 static uint8 op_blend_y[0x10000];
65 static uint8 op_blend_cr[0x10000];
66 // There may be a problem with this "RAM" overlapping (and thus being independent of)
67 // some of the regular TOM RAM...
68 //#warning objectp_ram is separated from TOM RAM--need to fix that!
69 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
70 uint8 objectp_running = 0;
71 //bool objectp_stop_reading_list;
72
73 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
74 //static uint32 op_bitmap_bit_size[8] =
75 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
76 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
77 static uint32 op_pointer;
78
79 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
80
81
82 //
83 // Object Processor initialization
84 //
85 void OPInit(void)
86 {
87         // Here we calculate the saturating blend of a signed 4-bit value and an
88         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
89         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
90         for(int i=0; i<256*256; i++)
91         {
92                 int y = (i >> 8) & 0xFF;
93                 int dy = (int8)i;                                       // Sign extend the Y index
94                 int c1 = (i >> 8) & 0x0F;
95                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
96                 int c2 = (i >> 12) & 0x0F;
97                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
98
99                 y += dy;
100
101                 if (y < 0)
102                         y = 0;
103                 else if (y > 0xFF)
104                         y = 0xFF;
105
106                 op_blend_y[i] = y;
107
108                 c1 += dc1;
109
110                 if (c1 < 0)
111                         c1 = 0;
112                 else if (c1 > 0x0F)
113                         c1 = 0x0F;
114
115                 c2 += dc2;
116
117                 if (c2 < 0)
118                         c2 = 0;
119                 else if (c2 > 0x0F)
120                         c2 = 0x0F;
121
122                 op_blend_cr[i] = (c2 << 4) | c1;
123         }
124
125         OPReset();
126 }
127
128 //
129 // Object Processor reset
130 //
131 void OPReset(void)
132 {
133 //      memset(objectp_ram, 0x00, 0x40);
134         objectp_running = 0;
135 }
136
137 static const char * opType[8] =
138 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
139 static const char * ccType[8] =
140         { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
141 static uint32 object[8192];
142 static uint32 numberOfObjects;
143 //static uint32 objectLink[8192];
144 //static uint32 numberOfLinks;
145
146 void OPDone(void)
147 {
148 //#warning "!!! Fix OL dump so that it follows links !!!"
149 //      const char * opType[8] =
150 //      { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
151 //      const char * ccType[8] =
152 //              { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
153
154         uint32 olp = OPGetListPointer();
155         WriteLog("\nOP: OLP = $%08X\n", olp);
156         WriteLog("OP: Phrase dump\n    ----------\n");
157
158 #if 0
159         for(uint32 i=0; i<0x100; i+=8)
160         {
161                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
162                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
163
164                 if ((lo & 0x07) == 3)
165                 {
166                         uint16 ypos = (lo >> 3) & 0x7FF;
167                         uint8  cc   = (lo >> 14) & 0x03;
168                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
169                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
170                 }
171
172                 WriteLog("\n");
173
174                 if ((lo & 0x07) == 0)
175                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
176
177                 if ((lo & 0x07) == 1)
178                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
179         }
180
181         WriteLog("\n");
182 #else
183         numberOfObjects = 0;
184         OPDiscoverObjects(olp);
185         OPDumpObjectList();
186 #endif
187 }
188
189 void OPDiscoverObjects(uint32 address)
190 {
191         // Check to see if we've already seen this object
192         for(uint32 i=0; i<numberOfObjects; i++)
193         {
194                 if (address == object[i])
195                         return;
196         }
197
198         // Store the object...
199         object[numberOfObjects++] = address;
200         uint8 objectType = 0;
201
202         do
203         {
204                 uint32 hi = JaguarReadLong(address + 0, OP);
205                 uint32 lo = JaguarReadLong(address + 4, OP);
206                 objectType = lo & 0x07;
207                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
208
209                 if (objectType == 3)
210                 {
211                         uint16 ypos = (lo >> 3) & 0x7FF;
212                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
213
214                         // Recursion needed to follow all links!
215                         OPDiscoverObjects(address + 8);
216                 }
217
218                 if (address == link)    // Ruh roh...
219                 {
220                         // Runaway recursive link is bad!
221                         return;
222                 }
223
224                 address = link;
225
226                 // Check to see if we've already seen this object, and add it if not
227                 bool seenObject = false;
228
229                 for(uint32 i=0; i<numberOfObjects; i++)
230                 {
231                         if (address == object[i])
232                         {
233                                 seenObject = true;
234                                 break;
235                         }
236                 }
237
238                 if (!seenObject)
239                         object[numberOfObjects++] = address;
240         }
241         while (objectType != 4);
242 }
243
244 void OPDumpObjectList(void)
245 {
246         for(uint32 i=0; i<numberOfObjects; i++)
247         {
248                 uint32 address = object[i];
249
250                 uint32 hi = JaguarReadLong(address + 0, OP);
251                 uint32 lo = JaguarReadLong(address + 4, OP);
252                 uint8 objectType = lo & 0x07;
253                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
254                 WriteLog("%08X: %08X %08X %s", address, hi, lo, opType[objectType]);
255
256                 if (objectType == 3)
257                 {
258                         uint16 ypos = (lo >> 3) & 0x7FF;
259                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
260                         WriteLog(" YPOS=%u, CC=%s, link=$%08X", ypos, ccType[cc], link);
261                 }
262
263                 WriteLog("\n");
264
265                 if (objectType == 0)
266                         DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
267
268                 if (objectType == 1)
269                         DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
270                                 OPLoadPhrase(address + 16));
271
272                 if (address == link)    // Ruh roh...
273                 {
274                         // Runaway recursive link is bad!
275                         WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
276                 }
277         }
278
279         WriteLog("\n");
280 }
281
282 //
283 // Object Processor memory access
284 // Memory range: F00010 - F00027
285 //
286 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
287 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
288 //      F00026            W   -------- -------x   OBF - object processor flag
289 //
290
291 #if 0
292 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
293 {
294         offset &= 0x3F;
295         return objectp_ram[offset];
296 }
297
298 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
299 {
300         offset &= 0x3F;
301         return GET16(objectp_ram, offset);
302 }
303
304 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
305 {
306         offset &= 0x3F;
307         objectp_ram[offset] = data;
308 }
309
310 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
311 {
312         offset &= 0x3F;
313         SET16(objectp_ram, offset, data);
314
315 /*if (offset == 0x20)
316 WriteLog("OP: Setting lo list pointer: %04X\n", data);
317 if (offset == 0x22)
318 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
319 }
320 #endif
321
322 uint32 OPGetListPointer(void)
323 {
324         // Note: This register is LO / HI WORD, hence the funky look of this...
325         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
326 }
327
328 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
329
330 uint32 OPGetStatusRegister(void)
331 {
332         return GET16(tomRam8, 0x26);
333 }
334
335 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
336
337 void OPSetStatusRegister(uint32 data)
338 {
339         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
340         tomRam8[0x27] |= (data & 0xFE);
341 }
342
343 void OPSetCurrentObject(uint64 object)
344 {
345 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
346         // Stored as least significant 32 bits first, ms32 last in big endian
347 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
348         objectp_ram[0x12] = object & 0xFF; object >>= 8;
349         objectp_ram[0x11] = object & 0xFF; object >>= 8;
350         objectp_ram[0x10] = object & 0xFF; object >>= 8;
351
352         objectp_ram[0x17] = object & 0xFF; object >>= 8;
353         objectp_ram[0x16] = object & 0xFF; object >>= 8;
354         objectp_ram[0x15] = object & 0xFF; object >>= 8;
355         objectp_ram[0x14] = object & 0xFF;*/
356 // Let's try regular good old big endian...
357         tomRam8[0x17] = object & 0xFF; object >>= 8;
358         tomRam8[0x16] = object & 0xFF; object >>= 8;
359         tomRam8[0x15] = object & 0xFF; object >>= 8;
360         tomRam8[0x14] = object & 0xFF; object >>= 8;
361
362         tomRam8[0x13] = object & 0xFF; object >>= 8;
363         tomRam8[0x12] = object & 0xFF; object >>= 8;
364         tomRam8[0x11] = object & 0xFF; object >>= 8;
365         tomRam8[0x10] = object & 0xFF;
366 }
367
368 uint64 OPLoadPhrase(uint32 offset)
369 {
370         offset &= ~0x07;                                                // 8 byte alignment
371         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
372 }
373
374 void OPStorePhrase(uint32 offset, uint64 p)
375 {
376         offset &= ~0x07;                                                // 8 byte alignment
377         JaguarWriteLong(offset, p >> 32, OP);
378         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
379 }
380
381 //
382 // Debugging routines
383 //
384 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
385 {
386         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
387         WriteLog("          %08X %08X\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
388         DumpBitmapCore(p0, p1);
389         uint32 hscale = p2 & 0xFF;
390         uint32 vscale = (p2 >> 8) & 0xFF;
391         uint32 remainder = (p2 >> 16) & 0xFF;
392         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
393 }
394
395 void DumpFixedObject(uint64 p0, uint64 p1)
396 {
397         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
398         DumpBitmapCore(p0, p1);
399 }
400
401 void DumpBitmapCore(uint64 p0, uint64 p1)
402 {
403         uint32 bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
404         uint8 bitdepth = (p1 >> 12) & 0x07;
405 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
406         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
407         int32 xpos = p1 & 0xFFF;
408         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);       // Sign extend that mutha!
409         uint32 iwidth = ((p1 >> 28) & 0x3FF);
410         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
411         uint16 height = ((p0 >> 14) & 0x3FF);
412         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
413         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
414         uint32 firstPix = (p1 >> 49) & 0x3F;
415         uint8 flags = (p1 >> 45) & 0x0F;
416         uint8 idx = (p1 >> 38) & 0x7F;
417         uint32 pitch = (p1 >> 15) & 0x07;
418         WriteLog("    [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), l:%08X, p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
419                 iwidth * bdMultiplier[bitdepth],
420                 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth], link,
421                 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
422                 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
423                 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
424 }
425
426 //
427 // Object Processor main routine
428 //
429 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
430 void OPProcessList(int halfline, bool render)
431 {
432 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!
433 // We ignore them, for now; not good
434         halfline &= 0x7FF;
435
436 extern int op_start_log;
437 //      char * condition_to_str[8] =
438 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
439
440         op_pointer = OPGetListPointer();
441
442 //      objectp_stop_reading_list = false;
443
444 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
445 //op_done();
446
447 // *** BEGIN OP PROCESSOR TESTING ONLY ***
448 extern bool interactiveMode;
449 extern bool iToggle;
450 extern int objectPtr;
451 bool inhibit;
452 int bitmapCounter = 0;
453 // *** END OP PROCESSOR TESTING ONLY ***
454
455         uint32 opCyclesToRun = 30000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
456
457 //      if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
458         while (op_pointer)
459         {
460 // *** BEGIN OP PROCESSOR TESTING ONLY ***
461 if (interactiveMode && bitmapCounter == objectPtr)
462         inhibit = iToggle;
463 else
464         inhibit = false;
465 // *** END OP PROCESSOR TESTING ONLY ***
466 //              if (objectp_stop_reading_list)
467 //                      return;
468
469                 uint64 p0 = OPLoadPhrase(op_pointer);
470                 op_pointer += 8;
471 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
472
473 #if 1
474 if (halfline == TOMGetVDB() && op_start_log)
475 //if (halfline == 215 && op_start_log)
476 //if (halfline == 28 && op_start_log)
477 //if (halfline == 0)
478 {
479 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
480 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
481 {
482 WriteLog(" (BITMAP) ");
483 uint64 p1 = OPLoadPhrase(op_pointer);
484 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
485         uint8 bitdepth = (p1 >> 12) & 0x07;
486 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
487         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
488 int32 xpos = p1 & 0xFFF;
489 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
490         uint32 iwidth = ((p1 >> 28) & 0x3FF);
491         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
492         uint16 height = ((p0 >> 14) & 0x3FF);
493         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
494         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
495         uint32 firstPix = (p1 >> 49) & 0x3F;
496         uint8 flags = (p1 >> 45) & 0x0F;
497         uint8 idx = (p1 >> 38) & 0x7F;
498         uint32 pitch = (p1 >> 15) & 0x07;
499 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
500         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
501 }
502 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
503 {
504 WriteLog(" (SCALED BITMAP)");
505 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
506 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
507 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
508         uint8 bitdepth = (p1 >> 12) & 0x07;
509 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
510         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
511 int32 xpos = p1 & 0xFFF;
512 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
513         uint32 iwidth = ((p1 >> 28) & 0x3FF);
514         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
515         uint16 height = ((p0 >> 14) & 0x3FF);
516         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
517         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
518         uint32 firstPix = (p1 >> 49) & 0x3F;
519         uint8 flags = (p1 >> 45) & 0x0F;
520         uint8 idx = (p1 >> 38) & 0x7F;
521         uint32 pitch = (p1 >> 15) & 0x07;
522 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
523         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
524         uint32 hscale = p2 & 0xFF;
525         uint32 vscale = (p2 >> 8) & 0xFF;
526         uint32 remainder = (p2 >> 16) & 0xFF;
527 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
528 }
529 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
530 WriteLog(" (GPU)\n");
531 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
532 {
533 WriteLog(" (BRANCH)\n");
534 uint8 * jaguarMainRam = GetRamPtr();
535 WriteLog("[RAM] --> ");
536 for(int k=0; k<8; k++)
537         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
538 WriteLog("\n");
539 }
540 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
541 WriteLog("    --> List end\n\n");
542 }
543 #endif
544
545                 switch ((uint8)p0 & 0x07)
546                 {
547                 case OBJECT_TYPE_BITMAP:
548                 {
549 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
550                         uint16 ypos = (p0 >> 3) & 0x7FF;
551 // This is only theory implied by Rayman...!
552 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
553 // the VDB value. With interlacing, this would be slightly more tricky.
554 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
555 // to affect any other game in a negative way (that I've seen).
556 // Either that, or it's an undocumented bug...
557
558 //No, the reason this was needed is that the OP code before was wrong. Any value
559 //less than VDB will get written to the top line of the display!
560 #if 0
561 // Not so sure... Let's see what happens here...
562 // No change...
563                         if (ypos == 0)
564                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
565 #endif
566 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
567 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
568 // what's causing things to fuck up. Still no idea why.
569
570                         uint32 height = (p0 & 0xFFC000) >> 14;
571                         uint32 oldOPP = op_pointer - 8;
572 // *** BEGIN OP PROCESSOR TESTING ONLY ***
573 if (inhibit && op_start_log)
574         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
575 bitmapCounter++;
576 if (!inhibit)   // For OP testing only!
577 // *** END OP PROCESSOR TESTING ONLY ***
578                         if (halfline >= ypos && height > 0)
579                         {
580                                 uint64 p1 = OPLoadPhrase(op_pointer);
581                                 op_pointer += 8;
582 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
583 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
584 //                              OPProcessFixedBitmap(halfline, p0, p1, render);
585                                 OPProcessFixedBitmap(p0, p1, render);
586
587                                 // OP write-backs
588
589 //???Does this really happen??? Doesn't seem to work if you do this...!
590 //Probably not. Must be a bug in the documentation...!
591 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
592 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
593 //                              SET16(tom_ram_8, 0x22, link >> 16);
594 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
595                                 if (height - 1 > 0)
596                                         height--;*/
597                                 // NOTE: Would subtract 2 if in interlaced mode...!
598 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
599 //                              if (height)
600                                 height--;
601
602                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
603                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
604                                 data += dwidth;
605
606                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
607                                 p0 |= (uint64)height << 14;
608                                 p0 |= data << 40;
609                                 OPStorePhrase(oldOPP, p0);
610                         }
611 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
612 //Temp, for testing...
613 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
614 //And it does! !!! FIX !!!
615 //Let's remove this "fix" since it screws up more than it fixes.
616 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
617                 return;*/
618
619 // NOTE: The link address only replaces bits 3-21 in the OLP, and this replaces
620 //       EVERYTHING. !!! FIX !!! [DONE]
621 #warning "!!! Link address is not linked properly for all object types !!!"
622 #warning "!!! Only BITMAP is properly handled !!!"
623                         op_pointer &= 0xFFC00007;
624                         op_pointer |= (p0 & 0x000007FFFF000000LL) >> 21;
625 //WriteLog("New OP: %08X\n", op_pointer);
626 //kludge: Seems that memory access is mirrored in the first 8MB of memory...
627 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
628         op_pointer &= 0xFF1FFFFF;       // Knock out bits 21-23
629
630                         break;
631                 }
632                 case OBJECT_TYPE_SCALE:
633                 {
634 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
635                         uint16 ypos = (p0 >> 3) & 0x7FF;
636                         uint32 height = (p0 & 0xFFC000) >> 14;
637                         uint32 oldOPP = op_pointer - 8;
638 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
639 // *** BEGIN OP PROCESSOR TESTING ONLY ***
640 if (inhibit && op_start_log)
641 {
642         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
643         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
644 }
645 bitmapCounter++;
646 if (!inhibit)   // For OP testing only!
647 // *** END OP PROCESSOR TESTING ONLY ***
648                         if (halfline >= ypos && height > 0)
649                         {
650                                 uint64 p1 = OPLoadPhrase(op_pointer);
651                                 op_pointer += 8;
652                                 uint64 p2 = OPLoadPhrase(op_pointer);
653                                 op_pointer += 8;
654 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
655                                 OPProcessScaledBitmap(p0, p1, p2, render);
656
657                                 // OP write-backs
658
659                                 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
660                                 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
661 //Actually, we should skip this object if it has a vscale of zero.
662 //Or do we? Not sure... Atari Karts has a few lines that look like:
663 // (SCALED BITMAP)
664 //000E8268 --> phrase 00010000 7000B00D
665 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
666 //    [hsc: 9A, vsc: 00, rem: 00]
667 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
668 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
669
670                                 if (vscale == 0)
671                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
672
673 //extern int start_logging;
674 //if (start_logging)
675 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
676 //Locks up here:
677 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
678 //There are other problems here, it looks like...
679 //Another lock up:
680 //About to execute OP (508)...
681 /*
682 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
683 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
684 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
685 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
686 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
687 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
688 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
689 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
690 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
691 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
692 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
693 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
694 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
695 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
696 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
697 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
698 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
699 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
700 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
701 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
702 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
703 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
704 */
705 //Here's another problem:
706 //    [hsc: 20, vsc: 20, rem: 00]
707 // Since we're not checking for $E0 (but that's what we get from the above), we end
708 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
709 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
710 //Also note: $E0 = 7.0 which IS a legal vscale value...
711
712 //                              if (remainder & 0x80)                           // I.e., it's negative
713 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
714 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
715 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
716 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
717 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
718                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
719                                 if (remainder < 0x20)
720                                 {
721                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
722                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
723
724 //                                      while (remainder & 0x80)
725 //                                      while ((remainder & 0x80) || remainder == 0)
726 //                                      while ((remainder - 1) >= 0xE0)
727 //                                      while ((remainder >= 0xE1) || remainder == 0)
728 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
729 //                                      while (remainder <= 0x20)
730                                         while (remainder < 0x20)
731                                         {
732                                                 remainder += vscale;
733
734                                                 if (height)
735                                                         height--;
736
737                                                 data += dwidth;
738                                         }
739
740                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
741                                         p0 |= (uint64)height << 14;
742                                         p0 |= data << 40;
743                                         OPStorePhrase(oldOPP, p0);
744                                 }
745
746                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
747
748 //if (start_logging)
749 //      WriteLog("--> Finished writebacks...\n");//*/
750
751 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
752                                 p2 &= ~0x0000000000FF0000LL;
753                                 p2 |= (uint64)remainder << 16;
754 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
755                                 OPStorePhrase(oldOPP + 16, p2);
756 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
757 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
758                         }
759
760                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
761                         break;
762                 }
763                 case OBJECT_TYPE_GPU:
764                 {
765 //WriteLog("OP: Asserting GPU IRQ #3...\n");
766 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
767                         OPSetCurrentObject(p0);
768                         GPUSetIRQLine(3, ASSERT_LINE);
769 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
770 // !!! FIX !!!
771 //Do something like:
772 //OPSuspendedByGPU = true;
773 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
774 //on the next halfline...
775 // --> It continues from where it was interrupted! !!! FIX !!!
776                         break;
777                 }
778                 case OBJECT_TYPE_BRANCH:
779                 {
780                         uint16 ypos = (p0 >> 3) & 0x7FF;
781 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
782 //       conditions! Need at least one more bit for that! :-P
783 // Also, the ASIC nets imply that it uses bits 14-16 (height in BM & SBM objects)
784 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
785                         uint8  cc   = (p0 >> 14) & 0x03;
786                         uint32 link = (p0 >> 21) & 0x3FFFF8;
787
788 //                      if ((ypos!=507)&&(ypos!=25))
789 //                              WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
790                         switch (cc)
791                         {
792                         case CONDITION_EQUAL:
793                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
794                                         op_pointer = link;
795                                 break;
796                         case CONDITION_LESS_THAN:
797                                 if (TOMReadWord(0xF00006, OP) < ypos)
798                                         op_pointer = link;
799                                 break;
800                         case CONDITION_GREATER_THAN:
801                                 if (TOMReadWord(0xF00006, OP) > ypos)
802                                         op_pointer = link;
803                                 break;
804                         case CONDITION_OP_FLAG_SET:
805                                 if (OPGetStatusRegister() & 0x01)
806                                         op_pointer = link;
807                                 break;
808                         case CONDITION_SECOND_HALF_LINE:
809 //Here's the ASIC code:
810 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
811 //which means, do the link if bit 10 of HC is set...
812
813                                 // This basically means branch if bit 10 of HC is set
814 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
815                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
816                                 LogDone();
817                                 exit(0);
818                                 break;
819                         default:
820                                 // Basically, if you do this, the OP does nothing. :-)
821                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
822                         }
823                         break;
824                 }
825                 case OBJECT_TYPE_STOP:
826                 {
827 //op_start_log = 0;
828                         // unsure
829 //WriteLog("OP: --> STOP\n");
830 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
831 //This seems more likely...
832                         OPSetCurrentObject(p0);
833
834                         if (p0 & 0x08)
835                         {
836                                 // We need to check whether these interrupts are enabled or not, THEN
837                                 // set an IRQ + pending flag if necessary...
838                                 if (TOMIRQEnabled(IRQ_OPFLAG))
839                                 {
840                                         TOMSetPendingObjectInt();
841                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
842                                 }
843                         }
844
845                         return;
846 //                      break;
847                 }
848                 default:
849 //                      WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
850                         return;
851                 }
852
853                 // Here is a little sanity check to keep the OP from locking up the machine
854                 // when fed bad data. Better would be to count how many actual cycles it used
855                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
856 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
857                 opCyclesToRun--;
858
859                 if (!opCyclesToRun)
860                         return;
861         }
862 }
863
864 //
865 // Store fixed size bitmap in line buffer
866 //
867 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
868 {
869 // Need to make sure that when writing that it stays within the line buffer...
870 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
871         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
872         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
873         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
874         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
875 //#ifdef OP_DEBUG_BMP
876         uint32  firstPix = (p1 >> 49) & 0x3F;
877         // "The LSB is significant only for scaled objects..." -JTRM
878         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
879         firstPix &= 0x3E;
880 //#endif
881 // We can ignore the RELEASE (high order) bit for now--probably forever...!
882 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
883 //Optimize: break these out to their own BOOL values
884         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
885         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
886                 flagRMW = (flags & OPFLAG_RMW ? true : false),
887                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
888 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
889 //  provide the most significant bits of the palette address."
890         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
891         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
892         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
893
894 //      int16 scanlineWidth = tom_getVideoModeWidth();
895         uint8 * tomRam8 = TOMGetRamPointer();
896         uint8 * paletteRAM = &tomRam8[0x400];
897         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
898         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
899         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
900
901 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
902 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
903
904 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
905 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
906 // Pitch == 0 is OK too...
907
908 //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to investigate
909 //        on real hardware...
910 #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
911 if (iwidth == 0)
912         iwidth = 1;
913
914 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
915 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
916         if (!render || iwidth == 0)
917                 return;
918
919 //OK, so we know the position in the line buffer is correct. It's the clipping in
920 //24bpp mode that's wrong!
921 #if 0
922 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
923 //into the line buffer for each pixel.
924 if (depth == 5) // i.e., 24bpp mode...
925         xpos >>= 1;     // Cut it in half...
926 #endif
927
928 //#define OP_DEBUG_BMP
929 //#ifdef OP_DEBUG_BMP
930 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
931 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
932 //#endif
933
934 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
935         int32 startPos = xpos, endPos = xpos +
936                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
937                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
938         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
939         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
940         // Not sure if this is Jaguar Two only location or what...
941         // From the docs, it is... If we want to limit here we should think of something else.
942 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
943 //      int32 limit = 720;
944 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
945 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
946         // This is correct, the OP line buffer is a constant size... 
947         int32 limit = 720;
948         int32 lbufWidth = 719;
949
950         // If the image is completely to the left or right of the line buffer, then bail.
951 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
952 //There are four possibilities:
953 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
954 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
955 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
956 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
957 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
958 // numbers 1 & 3 are of concern.
959 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
960 //      if (rightMargin < 0 || leftMargin > lbufWidth)
961
962 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
963 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
964 // Still have to be careful with the DATA and IWIDTH values though...
965
966 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
967 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
968 //              return;
969         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
970                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
971                 return;
972
973         // Otherwise, find the clip limits and clip the phrase as well...
974         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
975         //       line buffer, but it shouldn't matter since there are two unused line
976         //       buffers below and nothing above and I'll at most write 8 bytes outside
977         //       the line buffer... I could use a fractional clip begin/end value, but
978         //       this makes the blit a *lot* more hairy. I might fix this in the future
979         //       if it becomes necessary. (JLH)
980         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
981         //       which pixel in the phrase is being written, and quit when either end of phrases
982         //       is reached or line buffer extents are surpassed.
983
984 //This stuff is probably wrong as well... !!! FIX !!!
985 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
986 //Yup. Seems that JagMania doesn't work correctly with this...
987 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
988 //      if (!flagREFLECT)
989
990 /*
991         if (leftMargin < 0)
992                 clippedWidth = 0 - leftMargin,
993                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
994                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
995 //              leftMargin = 0;
996
997         if (rightMargin > lbufWidth)
998                 clippedWidth = rightMargin - lbufWidth,
999                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
1000 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1001 //              rightMargin = lbufWidth;
1002 */
1003 if (depth > 5)
1004         WriteLog("OP: We're about to encounter a divide by zero error!\n");
1005         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1006         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1007         // !!! FIX !!!
1008         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1009                 clippedWidth = 0 - startPos,
1010                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1011                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1012
1013         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1014                 clippedWidth = 0 - endPos,
1015                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1016
1017         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1018                 clippedWidth = endPos - lbufWidth,
1019                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1020
1021         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1022                 clippedWidth = startPos - lbufWidth,
1023                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1024                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1025 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1026
1027         // If the image is sitting on the line buffer left or right edge, we need to compensate
1028         // by decreasing the image phrase width accordingly.
1029         iwidth -= phraseClippedWidth;
1030
1031         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1032         // the pixel data.
1033 //      data += phraseClippedWidth * (pitch << 3);
1034         data += dataClippedWidth * pitch;
1035
1036         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1037         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1038 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1039 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1040 //Is this a bug in the OP?
1041 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1042 //Though it looks like we're doing it here no matter what...
1043 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1044 //Let's try this:
1045         uint32 lbufAddress = 0x1800 + (startPos * 2);
1046         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1047
1048         // Render.
1049
1050 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1051 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1052 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1053 // anyway.
1054 // This seems to be the case (at least according to the Midsummer docs)...!
1055
1056 // This is to test using palette zeroes instead of bit zeroes...
1057 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1058 //#define OP_USES_PALETTE_ZERO
1059
1060         if (depth == 0)                                                                 // 1 BPP
1061         {
1062                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1063                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1064
1065                 // Fetch 1st phrase...
1066                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1067 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1068 //i.e., we didn't clip on the margin... !!! FIX !!!
1069                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1070                 int i = firstPix;                                                       // Start counter at right spot...
1071
1072                 while (iwidth--)
1073                 {
1074                         while (i++ < 64)
1075                         {
1076                                 uint8 bit = pixels >> 63;
1077 #ifndef OP_USES_PALETTE_ZERO
1078                                 if (flagTRANS && bit == 0)
1079 #else
1080                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1081 #endif
1082                                         ;       // Do nothing...
1083                                 else
1084                                 {
1085                                         if (!flagRMW)
1086 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1087 //Won't optimize RMW case though...
1088                                                 // This is the *only* correct use of endian-dependent code
1089                                                 // (i.e., mem-to-mem direct copying)!
1090                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
1091                                         else
1092                                                 *currentLineBuffer =
1093                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1094                                                 *(currentLineBuffer + 1) =
1095                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1096                                 }
1097
1098                                 currentLineBuffer += lbufDelta;
1099                                 pixels <<= 1;
1100                         }
1101                         i = 0;
1102                         // Fetch next phrase...
1103                         data += pitch;
1104                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1105                 }
1106         }
1107         else if (depth == 1)                                                    // 2 BPP
1108         {
1109 if (firstPix)
1110         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1111                 index &= 0xFC;                                                          // Top six bits form CLUT index
1112                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1113                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1114
1115                 while (iwidth--)
1116                 {
1117                         // Fetch phrase...
1118                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1119                         data += pitch;
1120
1121                         for(int i=0; i<32; i++)
1122                         {
1123                                 uint8 bits = pixels >> 62;
1124 // Seems to me that both of these are in the same endian, so we could cast it as
1125 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1126 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1127 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1128 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1129 #ifndef OP_USES_PALETTE_ZERO
1130                                 if (flagTRANS && bits == 0)
1131 #else
1132                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1133 #endif
1134                                         ;       // Do nothing...
1135                                 else
1136                                 {
1137                                         if (!flagRMW)
1138                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1139                                         else
1140                                                 *currentLineBuffer =
1141                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1142                                                 *(currentLineBuffer + 1) =
1143                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1144                                 }
1145
1146                                 currentLineBuffer += lbufDelta;
1147                                 pixels <<= 2;
1148                         }
1149                 }
1150         }
1151         else if (depth == 2)                                                    // 4 BPP
1152         {
1153 if (firstPix)
1154         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1155                 index &= 0xF0;                                                          // Top four bits form CLUT index
1156                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1157                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1158
1159                 while (iwidth--)
1160                 {
1161                         // Fetch phrase...
1162                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1163                         data += pitch;
1164
1165                         for(int i=0; i<16; i++)
1166                         {
1167                                 uint8 bits = pixels >> 60;
1168 // Seems to me that both of these are in the same endian, so we could cast it as
1169 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1170 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1171 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1172 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1173 #ifndef OP_USES_PALETTE_ZERO
1174                                 if (flagTRANS && bits == 0)
1175 #else
1176                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1177 #endif
1178                                         ;       // Do nothing...
1179                                 else
1180                                 {
1181                                         if (!flagRMW)
1182                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1183                                         else
1184                                                 *currentLineBuffer =
1185                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1186                                                 *(currentLineBuffer + 1) =
1187                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1188                                 }
1189
1190                                 currentLineBuffer += lbufDelta;
1191                                 pixels <<= 4;
1192                         }
1193                 }
1194         }
1195         else if (depth == 3)                                                    // 8 BPP
1196         {
1197                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1198                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1199
1200                 // Fetch 1st phrase...
1201                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1202 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1203 //i.e., we didn't clip on the margin... !!! FIX !!!
1204                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1205                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1206                 int i = firstPix >> 3;                                          // Start counter at right spot...
1207
1208                 while (iwidth--)
1209                 {
1210                         while (i++ < 8)
1211                         {
1212                                 uint8 bits = pixels >> 56;
1213 // Seems to me that both of these are in the same endian, so we could cast it as
1214 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1215 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1216 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1217 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1218 //This would seem to be problematic...
1219 //Because it's the palette entry being zero that makes the pixel transparent...
1220 //Let's try it and see.
1221 #ifndef OP_USES_PALETTE_ZERO
1222                                 if (flagTRANS && bits == 0)
1223 #else
1224                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1225 #endif
1226                                         ;       // Do nothing...
1227                                 else
1228                                 {
1229                                         if (!flagRMW)
1230                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1231                                         else
1232                                                 *currentLineBuffer =
1233                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1234                                                 *(currentLineBuffer + 1) =
1235                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1236                                 }
1237
1238                                 currentLineBuffer += lbufDelta;
1239                                 pixels <<= 8;
1240                         }
1241                         i = 0;
1242                         // Fetch next phrase...
1243                         data += pitch;
1244                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1245                 }
1246         }
1247         else if (depth == 4)                                                    // 16 BPP
1248         {
1249 if (firstPix)
1250         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1251                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1252                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1253
1254                 while (iwidth--)
1255                 {
1256                         // Fetch phrase...
1257                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1258                         data += pitch;
1259
1260                         for(int i=0; i<4; i++)
1261                         {
1262                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1263 // Seems to me that both of these are in the same endian, so we could cast it as
1264 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1265 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1266 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1267 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1268 //This doesn't seem right... Let's try the encoded black value ($8800):
1269 //Apparently, CRY 0 maps to $8800...
1270                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1271 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1272                                         ;       // Do nothing...
1273                                 else
1274                                 {
1275                                         if (!flagRMW)
1276                                                 *currentLineBuffer = bitsHi,
1277                                                 *(currentLineBuffer + 1) = bitsLo;
1278                                         else
1279                                                 *currentLineBuffer =
1280                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1281                                                 *(currentLineBuffer + 1) =
1282                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1283                                 }
1284
1285                                 currentLineBuffer += lbufDelta;
1286                                 pixels <<= 16;
1287                         }
1288                 }
1289         }
1290         else if (depth == 5)                                                    // 24 BPP
1291         {
1292 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1293 //There *might* be others...
1294 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1295 if (firstPix)
1296         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1297                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1298                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1299                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1300
1301                 while (iwidth--)
1302                 {
1303                         // Fetch phrase...
1304                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1305                         data += pitch;
1306
1307                         for(int i=0; i<2; i++)
1308                         {
1309                                 // We don't use a 32-bit var here because of endian issues...!
1310                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1311                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1312
1313                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1314                                         ;       // Do nothing...
1315                                 else
1316                                         *currentLineBuffer = bits3,
1317                                         *(currentLineBuffer + 1) = bits2,
1318                                         *(currentLineBuffer + 2) = bits1,
1319                                         *(currentLineBuffer + 3) = bits0;
1320
1321                                 currentLineBuffer += lbufDelta;
1322                                 pixels <<= 32;
1323                         }
1324                 }
1325         }
1326 }
1327
1328 //
1329 // Store scaled bitmap in line buffer
1330 //
1331 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1332 {
1333 // Need to make sure that when writing that it stays within the line buffer...
1334 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1335         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1336         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1337         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1338         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1339 //#ifdef OP_DEBUG_BMP
1340 // Prolly should use this... Though not sure exactly how.
1341 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1342         uint32 firstPix = (p1 >> 49) & 0x3F;
1343 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1344 if (firstPix)
1345         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1346 //#endif
1347 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1348 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1349 //Optimize: break these out to their own BOOL values [DONE]
1350         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1351         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1352                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1353                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1354         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1355         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1356
1357         uint8 * tomRam8 = TOMGetRamPointer();
1358         uint8 * paletteRAM = &tomRam8[0x400];
1359         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1360         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1361         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1362
1363         uint16 hscale = p2 & 0xFF;
1364 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1365 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1366         uint16 horizontalRemainder = hscale;                            // Not sure if it starts full, but seems reasonable [It's not!]
1367 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1368         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1369         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1370
1371 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1372 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1373
1374 // Looks like an hscale of zero means don't draw!
1375         if (!render || iwidth == 0 || hscale == 0)
1376                 return;
1377
1378 /*extern int start_logging;
1379 if (start_logging)
1380         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1381                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1382 //#define OP_DEBUG_BMP
1383 //#ifdef OP_DEBUG_BMP
1384 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1385 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1386 //#endif
1387
1388         int32 startPos = xpos, endPos = xpos +
1389                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1390         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1391         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1392         // Not sure if this is Jaguar Two only location or what...
1393         // From the docs, it is... If we want to limit here we should think of something else.
1394 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1395         int32 limit = 720;
1396 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1397         int32 lbufWidth = 719;  // Zero based limit...
1398
1399         // If the image is completely to the left or right of the line buffer, then bail.
1400 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1401 //There are four possibilities:
1402 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1403 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1404 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1405 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1406 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1407 // numbers 1 & 3 are of concern.
1408 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1409 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1410
1411 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1412 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1413 // Still have to be careful with the DATA and IWIDTH values though...
1414
1415         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1416                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1417                 return;
1418
1419         // Otherwise, find the clip limits and clip the phrase as well...
1420         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1421         //       line buffer, but it shouldn't matter since there are two unused line
1422         //       buffers below and nothing above and I'll at most write 40 bytes outside
1423         //       the line buffer... I could use a fractional clip begin/end value, but
1424         //       this makes the blit a *lot* more hairy. I might fix this in the future
1425         //       if it becomes necessary. (JLH)
1426         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1427         //       which pixel in the phrase is being written, and quit when either end of phrases
1428         //       is reached or line buffer extents are surpassed.
1429
1430 //This stuff is probably wrong as well... !!! FIX !!!
1431 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1432 //Yup. Seems that JagMania doesn't work correctly with this...
1433 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1434 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1435 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1436 // a bit more accurately... Strange!
1437 //It's probably a case of the REFLECT flag being set and the background being written
1438 //from the right side of the screen...
1439 //But no, it isn't... At least if the diagnostics are telling the truth!
1440
1441         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1442         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1443         // !!! FIX !!!
1444
1445 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1446 //the scaling factor is small. So fix it already! !!! FIX !!!
1447 /*if (scaledPhrasePixels == 0)
1448 {
1449         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1450         DumpScaledObject(p0, p1, p2);
1451 }//*/
1452 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1453
1454 //Try a simple example...
1455 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1456 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1457 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1458 //
1459 // Normally, we would expect this in the line buffer:
1460 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1461 //
1462 // But instead we're getting:
1463 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1464 //
1465 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1466 // on negative boundary--or are we? Hmm...
1467 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1468 //
1469 // Let's try a real world example:
1470 //
1471 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1472 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1473 //
1474 // Really, spp is 27.75 in the second case...
1475 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1476 // start position (14 * 27.75), we get -6.5... NOT -17!
1477
1478 //Now it seems we're working OK, at least for the first case...
1479 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1480
1481         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1482 {
1483 extern int start_logging;
1484 if (start_logging)
1485         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1486 //              clippedWidth = 0 - startPos,
1487                 clippedWidth = (0 - startPos) << 5,
1488 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1489                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1490 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1491                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1492 if (start_logging)
1493         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1494 }
1495
1496         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1497                 clippedWidth = 0 - endPos,
1498                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1499
1500         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1501                 clippedWidth = endPos - lbufWidth,
1502                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1503
1504         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1505                 clippedWidth = startPos - lbufWidth,
1506                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1507                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1508
1509 extern int op_start_log;
1510 if (op_start_log && clippedWidth != 0)
1511         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1512 if (op_start_log && startPos == 13)
1513 {
1514         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1515         DumpScaledObject(p0, p1, p2);
1516         if (iwidth == 7)
1517         {
1518                 WriteLog("    %08X: ", data);
1519                 for(int i=0; i<7*8; i++)
1520                         WriteLog("%02X ", JaguarReadByte(data+i));
1521                 WriteLog("\n");
1522         }
1523 }
1524         // If the image is sitting on the line buffer left or right edge, we need to compensate
1525         // by decreasing the image phrase width accordingly.
1526         iwidth -= phraseClippedWidth;
1527
1528         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1529         // the pixel data.
1530 //      data += phraseClippedWidth * (pitch << 3);
1531         data += dataClippedWidth * (pitch << 3);
1532
1533         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1534         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1535 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1536 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1537         uint32 lbufAddress = 0x1800 + startPos * 2;
1538         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1539 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1540 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1541
1542         // Render.
1543
1544 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1545 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1546 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1547 // anyway.
1548 // This seems to be the case (at least according to the Midsummer docs)...!
1549
1550         if (depth == 0)                                                                 // 1 BPP
1551         {
1552 if (firstPix != 0)
1553         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1554                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1555                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1556
1557                 int pixCount = 0;
1558                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1559
1560                 while ((int32)iwidth > 0)
1561                 {
1562                         uint8 bits = pixels >> 63;
1563
1564 #ifndef OP_USES_PALETTE_ZERO
1565                         if (flagTRANS && bits == 0)
1566 #else
1567                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1568 #endif
1569                                 ;       // Do nothing...
1570                         else
1571                         {
1572                                 if (!flagRMW)
1573                                         // This is the *only* correct use of endian-dependent code
1574                                         // (i.e., mem-to-mem direct copying)!
1575                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1576                                 else
1577                                         *currentLineBuffer =
1578                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1579                                         *(currentLineBuffer + 1) =
1580                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1581                         }
1582
1583                         currentLineBuffer += lbufDelta;
1584
1585 /*
1586 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1587 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1588 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1589 */
1590 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1591                         while (horizontalRemainder & 0x80)
1592                         {
1593                                 horizontalRemainder += hscale;
1594                                 pixCount++;
1595                                 pixels <<= 1;
1596                         }//*/
1597 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1598                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1599                         {
1600                                 horizontalRemainder += hscale;
1601                                 pixCount++;
1602                                 pixels <<= 1;
1603                         }
1604                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1605
1606                         if (pixCount > 63)
1607                         {
1608                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1609
1610                                 data += (pitch << 3) * phrasesToSkip;
1611                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1612                                 pixels <<= 1 * pixelShift;
1613                                 iwidth -= phrasesToSkip;
1614                                 pixCount = pixelShift;
1615                         }
1616                 }
1617         }
1618         else if (depth == 1)                                                    // 2 BPP
1619         {
1620 if (firstPix != 0)
1621         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1622                 index &= 0xFC;                                                          // Top six bits form CLUT index
1623                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1624                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1625
1626                 int pixCount = 0;
1627                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1628
1629                 while ((int32)iwidth > 0)
1630                 {
1631                         uint8 bits = pixels >> 62;
1632
1633 #ifndef OP_USES_PALETTE_ZERO
1634                         if (flagTRANS && bits == 0)
1635 #else
1636                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1637 #endif
1638                                 ;       // Do nothing...
1639                         else
1640                         {
1641                                 if (!flagRMW)
1642                                         // This is the *only* correct use of endian-dependent code
1643                                         // (i.e., mem-to-mem direct copying)!
1644                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1645                                 else
1646                                         *currentLineBuffer =
1647                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1648                                         *(currentLineBuffer + 1) =
1649                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1650                         }
1651
1652                         currentLineBuffer += lbufDelta;
1653
1654 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1655                         while (horizontalRemainder & 0x80)
1656                         {
1657                                 horizontalRemainder += hscale;
1658                                 pixCount++;
1659                                 pixels <<= 2;
1660                         }//*/
1661 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1662                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1663                         {
1664                                 horizontalRemainder += hscale;
1665                                 pixCount++;
1666                                 pixels <<= 2;
1667                         }
1668                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1669
1670                         if (pixCount > 31)
1671                         {
1672                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1673
1674                                 data += (pitch << 3) * phrasesToSkip;
1675                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1676                                 pixels <<= 2 * pixelShift;
1677                                 iwidth -= phrasesToSkip;
1678                                 pixCount = pixelShift;
1679                         }
1680                 }
1681         }
1682         else if (depth == 2)                                                    // 4 BPP
1683         {
1684 if (firstPix != 0)
1685         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1686                 index &= 0xF0;                                                          // Top four bits form CLUT index
1687                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1688                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1689
1690                 int pixCount = 0;
1691                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1692
1693                 while ((int32)iwidth > 0)
1694                 {
1695                         uint8 bits = pixels >> 60;
1696
1697 #ifndef OP_USES_PALETTE_ZERO
1698                         if (flagTRANS && bits == 0)
1699 #else
1700                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1701 #endif
1702                                 ;       // Do nothing...
1703                         else
1704                         {
1705                                 if (!flagRMW)
1706                                         // This is the *only* correct use of endian-dependent code
1707                                         // (i.e., mem-to-mem direct copying)!
1708                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1709                                 else
1710                                         *currentLineBuffer =
1711                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1712                                         *(currentLineBuffer + 1) =
1713                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1714                         }
1715
1716                         currentLineBuffer += lbufDelta;
1717
1718 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1719                         while (horizontalRemainder & 0x80)
1720                         {
1721                                 horizontalRemainder += hscale;
1722                                 pixCount++;
1723                                 pixels <<= 4;
1724                         }//*/
1725 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1726                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1727                         {
1728                                 horizontalRemainder += hscale;
1729                                 pixCount++;
1730                                 pixels <<= 4;
1731                         }
1732                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1733
1734                         if (pixCount > 15)
1735                         {
1736                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1737
1738                                 data += (pitch << 3) * phrasesToSkip;
1739                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1740                                 pixels <<= 4 * pixelShift;
1741                                 iwidth -= phrasesToSkip;
1742                                 pixCount = pixelShift;
1743                         }
1744                 }
1745         }
1746         else if (depth == 3)                                                    // 8 BPP
1747         {
1748 if (firstPix)
1749         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1750                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1751                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1752
1753                 int pixCount = 0;
1754                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1755
1756                 while ((int32)iwidth > 0)
1757                 {
1758                         uint8 bits = pixels >> 56;
1759
1760 #ifndef OP_USES_PALETTE_ZERO
1761                         if (flagTRANS && bits == 0)
1762 #else
1763                         if (flagTRANS && (paletteRAM16[bits] == 0))
1764 #endif
1765                                 ;       // Do nothing...
1766                         else
1767                         {
1768                                 if (!flagRMW)
1769                                         // This is the *only* correct use of endian-dependent code
1770                                         // (i.e., mem-to-mem direct copying)!
1771                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1772 /*                              {
1773                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1774                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1775                                 }*/
1776                                 else
1777                                         *currentLineBuffer =
1778                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1779                                         *(currentLineBuffer + 1) =
1780                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1781                         }
1782
1783                         currentLineBuffer += lbufDelta;
1784
1785 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1786                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1787                         {
1788                                 horizontalRemainder += hscale;
1789                                 pixCount++;
1790                                 pixels <<= 8;
1791                         }
1792                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1793
1794                         if (pixCount > 7)
1795                         {
1796                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1797
1798                                 data += (pitch << 3) * phrasesToSkip;
1799                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1800                                 pixels <<= 8 * pixelShift;
1801                                 iwidth -= phrasesToSkip;
1802                                 pixCount = pixelShift;
1803                         }
1804                 }
1805         }
1806         else if (depth == 4)                                                    // 16 BPP
1807         {
1808 if (firstPix != 0)
1809         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1810                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1811                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1812
1813                 int pixCount = 0;
1814                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1815
1816                 while ((int32)iwidth > 0)
1817                 {
1818                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1819
1820 //This doesn't seem right... Let's try the encoded black value ($8800):
1821 //Apparently, CRY 0 maps to $8800...
1822                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1823 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1824                                 ;       // Do nothing...
1825                         else
1826                         {
1827                                 if (!flagRMW)
1828                                         *currentLineBuffer = bitsHi,
1829                                         *(currentLineBuffer + 1) = bitsLo;
1830                                 else
1831                                         *currentLineBuffer =
1832                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1833                                         *(currentLineBuffer + 1) =
1834                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1835                         }
1836
1837                         currentLineBuffer += lbufDelta;
1838
1839 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1840                         while (horizontalRemainder & 0x80)
1841                         {
1842                                 horizontalRemainder += hscale;
1843                                 pixCount++;
1844                                 pixels <<= 16;
1845                         }//*/
1846 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1847                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1848                         {
1849                                 horizontalRemainder += hscale;
1850                                 pixCount++;
1851                                 pixels <<= 16;
1852                         }
1853                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1854 //*/
1855                         if (pixCount > 3)
1856                         {
1857                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1858
1859                                 data += (pitch << 3) * phrasesToSkip;
1860                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1861                                 pixels <<= 16 * pixelShift;
1862
1863                                 iwidth -= phrasesToSkip;
1864
1865                                 pixCount = pixelShift;
1866                         }
1867                 }
1868         }
1869         else if (depth == 5)                                                    // 24 BPP
1870         {
1871 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1872 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1873 if (firstPix != 0)
1874         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1875                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1876                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1877                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1878
1879                 while (iwidth--)
1880                 {
1881                         // Fetch phrase...
1882                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1883                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1884
1885                         for(int i=0; i<2; i++)
1886                         {
1887                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1888                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1889
1890                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1891                                         ;       // Do nothing...
1892                                 else
1893                                         *currentLineBuffer = bits3,
1894                                         *(currentLineBuffer + 1) = bits2,
1895                                         *(currentLineBuffer + 2) = bits1,
1896                                         *(currentLineBuffer + 3) = bits0;
1897
1898                                 currentLineBuffer += lbufDelta;
1899                                 pixels <<= 32;
1900                         }
1901                 }
1902         }
1903 }