]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
Fixed DSP/audio options to be unambiguous and consistent.
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68000/m68kinterface.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0                       // VC == YPOS
40 #define CONDITION_LESS_THAN                     1                       // VC < YPOS
41 #define CONDITION_GREATER_THAN          2                       // VC > YPOS
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #define OPFLAG_RELEASE          8                                       // Bus release bit
46 #define OPFLAG_TRANS            4                                       // Transparency bit
47 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
48 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
49
50 // Private function prototypes
51
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void OPDiscoverObjects(uint32 address);
55 void OPDumpObjectList(void);
56 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
57 void DumpFixedObject(uint64 p0, uint64 p1);
58 void DumpBitmapCore(uint64 p0, uint64 p1);
59 uint64 OPLoadPhrase(uint32 offset);
60
61 // Local global variables
62
63 // Blend tables (64K each)
64 static uint8 op_blend_y[0x10000];
65 static uint8 op_blend_cr[0x10000];
66 // There may be a problem with this "RAM" overlapping (and thus being independent of)
67 // some of the regular TOM RAM...
68 //#warning objectp_ram is separated from TOM RAM--need to fix that!
69 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
70 uint8 objectp_running = 0;
71 //bool objectp_stop_reading_list;
72
73 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
74 //static uint32 op_bitmap_bit_size[8] =
75 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
76 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
77 static uint32 op_pointer;
78
79 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
80
81
82 //
83 // Object Processor initialization
84 //
85 void OPInit(void)
86 {
87         // Here we calculate the saturating blend of a signed 4-bit value and an
88         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
89         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
90         for(int i=0; i<256*256; i++)
91         {
92                 int y = (i >> 8) & 0xFF;
93                 int dy = (int8)i;                                       // Sign extend the Y index
94                 int c1 = (i >> 8) & 0x0F;
95                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
96                 int c2 = (i >> 12) & 0x0F;
97                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
98
99                 y += dy;
100
101                 if (y < 0)
102                         y = 0;
103                 else if (y > 0xFF)
104                         y = 0xFF;
105
106                 op_blend_y[i] = y;
107
108                 c1 += dc1;
109
110                 if (c1 < 0)
111                         c1 = 0;
112                 else if (c1 > 0x0F)
113                         c1 = 0x0F;
114
115                 c2 += dc2;
116
117                 if (c2 < 0)
118                         c2 = 0;
119                 else if (c2 > 0x0F)
120                         c2 = 0x0F;
121
122                 op_blend_cr[i] = (c2 << 4) | c1;
123         }
124
125         OPReset();
126 }
127
128 //
129 // Object Processor reset
130 //
131 void OPReset(void)
132 {
133 //      memset(objectp_ram, 0x00, 0x40);
134         objectp_running = 0;
135 }
136
137 static const char * opType[8] =
138 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
139 static const char * ccType[8] =
140         { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
141 static uint32 object[8192];
142 static uint32 numberOfObjects;
143 //static uint32 objectLink[8192];
144 //static uint32 numberOfLinks;
145
146 void OPDone(void)
147 {
148 //#warning "!!! Fix OL dump so that it follows links !!!"
149 //      const char * opType[8] =
150 //      { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
151 //      const char * ccType[8] =
152 //              { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
153
154         uint32 olp = OPGetListPointer();
155         WriteLog("\nOP: OLP = $%08X\n", olp);
156         WriteLog("OP: Phrase dump\n    ----------\n");
157
158 #if 0
159         for(uint32 i=0; i<0x100; i+=8)
160         {
161                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
162                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
163
164                 if ((lo & 0x07) == 3)
165                 {
166                         uint16 ypos = (lo >> 3) & 0x7FF;
167                         uint8  cc   = (lo >> 14) & 0x03;
168                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
169                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
170                 }
171
172                 WriteLog("\n");
173
174                 if ((lo & 0x07) == 0)
175                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
176
177                 if ((lo & 0x07) == 1)
178                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
179         }
180
181         WriteLog("\n");
182 #else
183 #warning "!!! Fix lockup in OPDiscoverObjects() !!!"
184 //temp, to keep the following function from locking up on bad/weird OLs
185 return;
186
187         numberOfObjects = 0;
188         OPDiscoverObjects(olp);
189         OPDumpObjectList();
190 #endif
191 }
192
193 void OPDiscoverObjects(uint32 address)
194 {
195         // Check to see if we've already seen this object
196         for(uint32 i=0; i<numberOfObjects; i++)
197         {
198                 if (address == object[i])
199                         return;
200         }
201
202         // Store the object...
203         object[numberOfObjects++] = address;
204         uint8 objectType = 0;
205
206         do
207         {
208                 uint32 hi = JaguarReadLong(address + 0, OP);
209                 uint32 lo = JaguarReadLong(address + 4, OP);
210                 objectType = lo & 0x07;
211                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
212
213                 if (objectType == 3)
214                 {
215                         uint16 ypos = (lo >> 3) & 0x7FF;
216                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
217
218                         // Recursion needed to follow all links!
219                         OPDiscoverObjects(address + 8);
220                 }
221
222                 if (address == link)    // Ruh roh...
223                 {
224                         // Runaway recursive link is bad!
225                         return;
226                 }
227
228                 address = link;
229
230                 // Check to see if we've already seen this object, and add it if not
231                 bool seenObject = false;
232
233                 for(uint32 i=0; i<numberOfObjects; i++)
234                 {
235                         if (address == object[i])
236                         {
237                                 seenObject = true;
238                                 break;
239                         }
240                 }
241
242                 if (!seenObject)
243                         object[numberOfObjects++] = address;
244         }
245         while (objectType != 4);
246 }
247
248 void OPDumpObjectList(void)
249 {
250         for(uint32 i=0; i<numberOfObjects; i++)
251         {
252                 uint32 address = object[i];
253
254                 uint32 hi = JaguarReadLong(address + 0, OP);
255                 uint32 lo = JaguarReadLong(address + 4, OP);
256                 uint8 objectType = lo & 0x07;
257                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
258                 WriteLog("%08X: %08X %08X %s", address, hi, lo, opType[objectType]);
259
260                 if (objectType == 3)
261                 {
262                         uint16 ypos = (lo >> 3) & 0x7FF;
263                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
264                         WriteLog(" YPOS=%u, CC=%s, link=$%08X", ypos, ccType[cc], link);
265                 }
266
267                 WriteLog("\n");
268
269                 if (objectType == 0)
270                         DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
271
272                 if (objectType == 1)
273                         DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
274                                 OPLoadPhrase(address + 16));
275
276                 if (address == link)    // Ruh roh...
277                 {
278                         // Runaway recursive link is bad!
279                         WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
280                 }
281         }
282
283         WriteLog("\n");
284 }
285
286 //
287 // Object Processor memory access
288 // Memory range: F00010 - F00027
289 //
290 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
291 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
292 //      F00026            W   -------- -------x   OBF - object processor flag
293 //
294
295 #if 0
296 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
297 {
298         offset &= 0x3F;
299         return objectp_ram[offset];
300 }
301
302 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
303 {
304         offset &= 0x3F;
305         return GET16(objectp_ram, offset);
306 }
307
308 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
309 {
310         offset &= 0x3F;
311         objectp_ram[offset] = data;
312 }
313
314 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
315 {
316         offset &= 0x3F;
317         SET16(objectp_ram, offset, data);
318
319 /*if (offset == 0x20)
320 WriteLog("OP: Setting lo list pointer: %04X\n", data);
321 if (offset == 0x22)
322 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
323 }
324 #endif
325
326 uint32 OPGetListPointer(void)
327 {
328         // Note: This register is LO / HI WORD, hence the funky look of this...
329         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
330 }
331
332 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
333
334 uint32 OPGetStatusRegister(void)
335 {
336         return GET16(tomRam8, 0x26);
337 }
338
339 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
340
341 void OPSetStatusRegister(uint32 data)
342 {
343         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
344         tomRam8[0x27] |= (data & 0xFE);
345 }
346
347 void OPSetCurrentObject(uint64 object)
348 {
349 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
350         // Stored as least significant 32 bits first, ms32 last in big endian
351 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
352         objectp_ram[0x12] = object & 0xFF; object >>= 8;
353         objectp_ram[0x11] = object & 0xFF; object >>= 8;
354         objectp_ram[0x10] = object & 0xFF; object >>= 8;
355
356         objectp_ram[0x17] = object & 0xFF; object >>= 8;
357         objectp_ram[0x16] = object & 0xFF; object >>= 8;
358         objectp_ram[0x15] = object & 0xFF; object >>= 8;
359         objectp_ram[0x14] = object & 0xFF;*/
360 // Let's try regular good old big endian...
361         tomRam8[0x17] = object & 0xFF; object >>= 8;
362         tomRam8[0x16] = object & 0xFF; object >>= 8;
363         tomRam8[0x15] = object & 0xFF; object >>= 8;
364         tomRam8[0x14] = object & 0xFF; object >>= 8;
365
366         tomRam8[0x13] = object & 0xFF; object >>= 8;
367         tomRam8[0x12] = object & 0xFF; object >>= 8;
368         tomRam8[0x11] = object & 0xFF; object >>= 8;
369         tomRam8[0x10] = object & 0xFF;
370 }
371
372 uint64 OPLoadPhrase(uint32 offset)
373 {
374         offset &= ~0x07;                                                // 8 byte alignment
375         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
376 }
377
378 void OPStorePhrase(uint32 offset, uint64 p)
379 {
380         offset &= ~0x07;                                                // 8 byte alignment
381         JaguarWriteLong(offset, p >> 32, OP);
382         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
383 }
384
385 //
386 // Debugging routines
387 //
388 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
389 {
390         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
391         WriteLog("          %08X %08X\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
392         DumpBitmapCore(p0, p1);
393         uint32 hscale = p2 & 0xFF;
394         uint32 vscale = (p2 >> 8) & 0xFF;
395         uint32 remainder = (p2 >> 16) & 0xFF;
396         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
397 }
398
399 void DumpFixedObject(uint64 p0, uint64 p1)
400 {
401         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
402         DumpBitmapCore(p0, p1);
403 }
404
405 void DumpBitmapCore(uint64 p0, uint64 p1)
406 {
407         uint32 bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
408         uint8 bitdepth = (p1 >> 12) & 0x07;
409 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
410         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
411         int32 xpos = p1 & 0xFFF;
412         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);       // Sign extend that mutha!
413         uint32 iwidth = ((p1 >> 28) & 0x3FF);
414         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
415         uint16 height = ((p0 >> 14) & 0x3FF);
416         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
417         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
418         uint32 firstPix = (p1 >> 49) & 0x3F;
419         uint8 flags = (p1 >> 45) & 0x0F;
420         uint8 idx = (p1 >> 38) & 0x7F;
421         uint32 pitch = (p1 >> 15) & 0x07;
422         WriteLog("    [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), l:%08X, p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
423                 iwidth * bdMultiplier[bitdepth],
424                 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth], link,
425                 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
426                 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
427                 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
428 }
429
430 //
431 // Object Processor main routine
432 //
433 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
434 void OPProcessList(int halfline, bool render)
435 {
436 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!
437 // We ignore them, for now; not good
438         halfline &= 0x7FF;
439
440 extern int op_start_log;
441 //      char * condition_to_str[8] =
442 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
443
444         op_pointer = OPGetListPointer();
445
446 //      objectp_stop_reading_list = false;
447
448 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
449 //op_done();
450
451 // *** BEGIN OP PROCESSOR TESTING ONLY ***
452 extern bool interactiveMode;
453 extern bool iToggle;
454 extern int objectPtr;
455 bool inhibit;
456 int bitmapCounter = 0;
457 // *** END OP PROCESSOR TESTING ONLY ***
458
459         uint32 opCyclesToRun = 30000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
460
461 //      if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
462         while (op_pointer)
463         {
464 // *** BEGIN OP PROCESSOR TESTING ONLY ***
465 if (interactiveMode && bitmapCounter == objectPtr)
466         inhibit = iToggle;
467 else
468         inhibit = false;
469 // *** END OP PROCESSOR TESTING ONLY ***
470 //              if (objectp_stop_reading_list)
471 //                      return;
472
473                 uint64 p0 = OPLoadPhrase(op_pointer);
474                 op_pointer += 8;
475 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
476
477 #if 1
478 if (halfline == TOMGetVDB() && op_start_log)
479 //if (halfline == 215 && op_start_log)
480 //if (halfline == 28 && op_start_log)
481 //if (halfline == 0)
482 {
483 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
484 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
485 {
486 WriteLog(" (BITMAP) ");
487 uint64 p1 = OPLoadPhrase(op_pointer);
488 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
489         uint8 bitdepth = (p1 >> 12) & 0x07;
490 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
491         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
492 int32 xpos = p1 & 0xFFF;
493 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
494         uint32 iwidth = ((p1 >> 28) & 0x3FF);
495         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
496         uint16 height = ((p0 >> 14) & 0x3FF);
497         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
498         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
499         uint32 firstPix = (p1 >> 49) & 0x3F;
500         uint8 flags = (p1 >> 45) & 0x0F;
501         uint8 idx = (p1 >> 38) & 0x7F;
502         uint32 pitch = (p1 >> 15) & 0x07;
503 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
504         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
505 }
506 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
507 {
508 WriteLog(" (SCALED BITMAP)");
509 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
510 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
511 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
512         uint8 bitdepth = (p1 >> 12) & 0x07;
513 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
514         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
515 int32 xpos = p1 & 0xFFF;
516 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
517         uint32 iwidth = ((p1 >> 28) & 0x3FF);
518         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
519         uint16 height = ((p0 >> 14) & 0x3FF);
520         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
521         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
522         uint32 firstPix = (p1 >> 49) & 0x3F;
523         uint8 flags = (p1 >> 45) & 0x0F;
524         uint8 idx = (p1 >> 38) & 0x7F;
525         uint32 pitch = (p1 >> 15) & 0x07;
526 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
527         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
528         uint32 hscale = p2 & 0xFF;
529         uint32 vscale = (p2 >> 8) & 0xFF;
530         uint32 remainder = (p2 >> 16) & 0xFF;
531 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
532 }
533 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
534 WriteLog(" (GPU)\n");
535 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
536 {
537 WriteLog(" (BRANCH)\n");
538 uint8 * jaguarMainRam = GetRamPtr();
539 WriteLog("[RAM] --> ");
540 for(int k=0; k<8; k++)
541         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
542 WriteLog("\n");
543 }
544 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
545 WriteLog("    --> List end\n\n");
546 }
547 #endif
548
549                 switch ((uint8)p0 & 0x07)
550                 {
551                 case OBJECT_TYPE_BITMAP:
552                 {
553 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
554                         uint16 ypos = (p0 >> 3) & 0x7FF;
555 // This is only theory implied by Rayman...!
556 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
557 // the VDB value. With interlacing, this would be slightly more tricky.
558 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
559 // to affect any other game in a negative way (that I've seen).
560 // Either that, or it's an undocumented bug...
561
562 //No, the reason this was needed is that the OP code before was wrong. Any value
563 //less than VDB will get written to the top line of the display!
564 #if 0
565 // Not so sure... Let's see what happens here...
566 // No change...
567                         if (ypos == 0)
568                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
569 #endif
570 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
571 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
572 // what's causing things to fuck up. Still no idea why.
573
574                         uint32 height = (p0 & 0xFFC000) >> 14;
575                         uint32 oldOPP = op_pointer - 8;
576 // *** BEGIN OP PROCESSOR TESTING ONLY ***
577 if (inhibit && op_start_log)
578         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
579 bitmapCounter++;
580 if (!inhibit)   // For OP testing only!
581 // *** END OP PROCESSOR TESTING ONLY ***
582                         if (halfline >= ypos && height > 0)
583                         {
584                                 uint64 p1 = OPLoadPhrase(op_pointer);
585                                 op_pointer += 8;
586 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
587 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
588 //                              OPProcessFixedBitmap(halfline, p0, p1, render);
589                                 OPProcessFixedBitmap(p0, p1, render);
590
591                                 // OP write-backs
592
593 //???Does this really happen??? Doesn't seem to work if you do this...!
594 //Probably not. Must be a bug in the documentation...!
595 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
596 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
597 //                              SET16(tom_ram_8, 0x22, link >> 16);
598 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
599                                 if (height - 1 > 0)
600                                         height--;*/
601                                 // NOTE: Would subtract 2 if in interlaced mode...!
602 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
603 //                              if (height)
604                                 height--;
605
606                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
607                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
608                                 data += dwidth;
609
610                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
611                                 p0 |= (uint64)height << 14;
612                                 p0 |= data << 40;
613                                 OPStorePhrase(oldOPP, p0);
614                         }
615 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
616 //Temp, for testing...
617 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
618 //And it does! !!! FIX !!!
619 //Let's remove this "fix" since it screws up more than it fixes.
620 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
621                 return;*/
622
623 // NOTE: The link address only replaces bits 3-21 in the OLP, and this replaces
624 //       EVERYTHING. !!! FIX !!! [DONE]
625 #warning "!!! Link address is not linked properly for all object types !!!"
626 #warning "!!! Only BITMAP is properly handled !!!"
627                         op_pointer &= 0xFFC00007;
628                         op_pointer |= (p0 & 0x000007FFFF000000LL) >> 21;
629 //WriteLog("New OP: %08X\n", op_pointer);
630 //kludge: Seems that memory access is mirrored in the first 8MB of memory...
631 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
632         op_pointer &= 0xFF1FFFFF;       // Knock out bits 21-23
633
634                         break;
635                 }
636                 case OBJECT_TYPE_SCALE:
637                 {
638 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
639                         uint16 ypos = (p0 >> 3) & 0x7FF;
640                         uint32 height = (p0 & 0xFFC000) >> 14;
641                         uint32 oldOPP = op_pointer - 8;
642 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
643 // *** BEGIN OP PROCESSOR TESTING ONLY ***
644 if (inhibit && op_start_log)
645 {
646         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
647         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
648 }
649 bitmapCounter++;
650 if (!inhibit)   // For OP testing only!
651 // *** END OP PROCESSOR TESTING ONLY ***
652                         if (halfline >= ypos && height > 0)
653                         {
654                                 uint64 p1 = OPLoadPhrase(op_pointer);
655                                 op_pointer += 8;
656                                 uint64 p2 = OPLoadPhrase(op_pointer);
657                                 op_pointer += 8;
658 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
659                                 OPProcessScaledBitmap(p0, p1, p2, render);
660
661                                 // OP write-backs
662
663                                 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
664                                 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
665 //Actually, we should skip this object if it has a vscale of zero.
666 //Or do we? Not sure... Atari Karts has a few lines that look like:
667 // (SCALED BITMAP)
668 //000E8268 --> phrase 00010000 7000B00D
669 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
670 //    [hsc: 9A, vsc: 00, rem: 00]
671 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
672 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
673
674                                 if (vscale == 0)
675                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
676
677 //extern int start_logging;
678 //if (start_logging)
679 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
680 //Locks up here:
681 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
682 //There are other problems here, it looks like...
683 //Another lock up:
684 //About to execute OP (508)...
685 /*
686 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
687 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
688 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
689 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
690 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
691 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
692 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
693 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
694 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
695 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
696 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
697 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
698 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
699 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
700 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
701 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
702 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
703 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
704 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
705 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
706 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
707 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
708 */
709 //Here's another problem:
710 //    [hsc: 20, vsc: 20, rem: 00]
711 // Since we're not checking for $E0 (but that's what we get from the above), we end
712 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
713 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
714 //Also note: $E0 = 7.0 which IS a legal vscale value...
715
716 //                              if (remainder & 0x80)                           // I.e., it's negative
717 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
718 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
719 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
720 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
721 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
722                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
723                                 if (remainder < 0x20)
724                                 {
725                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
726                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
727
728 //                                      while (remainder & 0x80)
729 //                                      while ((remainder & 0x80) || remainder == 0)
730 //                                      while ((remainder - 1) >= 0xE0)
731 //                                      while ((remainder >= 0xE1) || remainder == 0)
732 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
733 //                                      while (remainder <= 0x20)
734                                         while (remainder < 0x20)
735                                         {
736                                                 remainder += vscale;
737
738                                                 if (height)
739                                                         height--;
740
741                                                 data += dwidth;
742                                         }
743
744                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
745                                         p0 |= (uint64)height << 14;
746                                         p0 |= data << 40;
747                                         OPStorePhrase(oldOPP, p0);
748                                 }
749
750                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
751
752 //if (start_logging)
753 //      WriteLog("--> Finished writebacks...\n");//*/
754
755 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
756                                 p2 &= ~0x0000000000FF0000LL;
757                                 p2 |= (uint64)remainder << 16;
758 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
759                                 OPStorePhrase(oldOPP + 16, p2);
760 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
761 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
762                         }
763
764                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
765                         break;
766                 }
767                 case OBJECT_TYPE_GPU:
768                 {
769 //WriteLog("OP: Asserting GPU IRQ #3...\n");
770 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
771                         OPSetCurrentObject(p0);
772                         GPUSetIRQLine(3, ASSERT_LINE);
773 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
774 // !!! FIX !!!
775 //Do something like:
776 //OPSuspendedByGPU = true;
777 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
778 //on the next halfline...
779 // --> It continues from where it was interrupted! !!! FIX !!!
780                         break;
781                 }
782                 case OBJECT_TYPE_BRANCH:
783                 {
784                         uint16 ypos = (p0 >> 3) & 0x7FF;
785 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
786 //       conditions! Need at least one more bit for that! :-P
787 // Also, the ASIC nets imply that it uses bits 14-16 (height in BM & SBM objects)
788 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
789                         uint8  cc   = (p0 >> 14) & 0x03;
790                         uint32 link = (p0 >> 21) & 0x3FFFF8;
791
792 //                      if ((ypos!=507)&&(ypos!=25))
793 //                              WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
794                         switch (cc)
795                         {
796                         case CONDITION_EQUAL:
797                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
798                                         op_pointer = link;
799                                 break;
800                         case CONDITION_LESS_THAN:
801                                 if (TOMReadWord(0xF00006, OP) < ypos)
802                                         op_pointer = link;
803                                 break;
804                         case CONDITION_GREATER_THAN:
805                                 if (TOMReadWord(0xF00006, OP) > ypos)
806                                         op_pointer = link;
807                                 break;
808                         case CONDITION_OP_FLAG_SET:
809                                 if (OPGetStatusRegister() & 0x01)
810                                         op_pointer = link;
811                                 break;
812                         case CONDITION_SECOND_HALF_LINE:
813 //Here's the ASIC code:
814 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
815 //which means, do the link if bit 10 of HC is set...
816
817                                 // This basically means branch if bit 10 of HC is set
818 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
819                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
820                                 LogDone();
821                                 exit(0);
822                                 break;
823                         default:
824                                 // Basically, if you do this, the OP does nothing. :-)
825                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
826                         }
827                         break;
828                 }
829                 case OBJECT_TYPE_STOP:
830                 {
831 //op_start_log = 0;
832                         // unsure
833 //WriteLog("OP: --> STOP\n");
834 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
835 //This seems more likely...
836                         OPSetCurrentObject(p0);
837
838                         if (p0 & 0x08)
839                         {
840                                 // We need to check whether these interrupts are enabled or not, THEN
841                                 // set an IRQ + pending flag if necessary...
842                                 if (TOMIRQEnabled(IRQ_OPFLAG))
843                                 {
844                                         TOMSetPendingObjectInt();
845                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
846                                 }
847                         }
848
849                         return;
850 //                      break;
851                 }
852                 default:
853 //                      WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
854                         return;
855                 }
856
857                 // Here is a little sanity check to keep the OP from locking up the machine
858                 // when fed bad data. Better would be to count how many actual cycles it used
859                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
860 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
861                 opCyclesToRun--;
862
863                 if (!opCyclesToRun)
864                         return;
865         }
866 }
867
868 //
869 // Store fixed size bitmap in line buffer
870 //
871 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
872 {
873 // Need to make sure that when writing that it stays within the line buffer...
874 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
875         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
876         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
877         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
878         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
879 //#ifdef OP_DEBUG_BMP
880         uint32  firstPix = (p1 >> 49) & 0x3F;
881         // "The LSB is significant only for scaled objects..." -JTRM
882         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
883         firstPix &= 0x3E;
884 //#endif
885 // We can ignore the RELEASE (high order) bit for now--probably forever...!
886 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
887 //Optimize: break these out to their own BOOL values
888         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
889         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
890                 flagRMW = (flags & OPFLAG_RMW ? true : false),
891                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
892 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
893 //  provide the most significant bits of the palette address."
894         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
895         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
896         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
897
898 //      int16 scanlineWidth = tom_getVideoModeWidth();
899         uint8 * tomRam8 = TOMGetRamPointer();
900         uint8 * paletteRAM = &tomRam8[0x400];
901         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
902         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
903         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
904
905 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
906 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
907
908 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
909 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
910 // Pitch == 0 is OK too...
911
912 //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to investigate
913 //        on real hardware...
914 #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
915 if (iwidth == 0)
916         iwidth = 1;
917
918 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
919 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
920         if (!render || iwidth == 0)
921                 return;
922
923 //OK, so we know the position in the line buffer is correct. It's the clipping in
924 //24bpp mode that's wrong!
925 #if 0
926 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
927 //into the line buffer for each pixel.
928 if (depth == 5) // i.e., 24bpp mode...
929         xpos >>= 1;     // Cut it in half...
930 #endif
931
932 //#define OP_DEBUG_BMP
933 //#ifdef OP_DEBUG_BMP
934 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
935 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
936 //#endif
937
938 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
939         int32 startPos = xpos, endPos = xpos +
940                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
941                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
942         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
943         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
944         // Not sure if this is Jaguar Two only location or what...
945         // From the docs, it is... If we want to limit here we should think of something else.
946 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
947 //      int32 limit = 720;
948 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
949 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
950         // This is correct, the OP line buffer is a constant size... 
951         int32 limit = 720;
952         int32 lbufWidth = 719;
953
954         // If the image is completely to the left or right of the line buffer, then bail.
955 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
956 //There are four possibilities:
957 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
958 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
959 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
960 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
961 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
962 // numbers 1 & 3 are of concern.
963 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
964 //      if (rightMargin < 0 || leftMargin > lbufWidth)
965
966 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
967 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
968 // Still have to be careful with the DATA and IWIDTH values though...
969
970 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
971 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
972 //              return;
973         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
974                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
975                 return;
976
977         // Otherwise, find the clip limits and clip the phrase as well...
978         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
979         //       line buffer, but it shouldn't matter since there are two unused line
980         //       buffers below and nothing above and I'll at most write 8 bytes outside
981         //       the line buffer... I could use a fractional clip begin/end value, but
982         //       this makes the blit a *lot* more hairy. I might fix this in the future
983         //       if it becomes necessary. (JLH)
984         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
985         //       which pixel in the phrase is being written, and quit when either end of phrases
986         //       is reached or line buffer extents are surpassed.
987
988 //This stuff is probably wrong as well... !!! FIX !!!
989 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
990 //Yup. Seems that JagMania doesn't work correctly with this...
991 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
992 //      if (!flagREFLECT)
993
994 /*
995         if (leftMargin < 0)
996                 clippedWidth = 0 - leftMargin,
997                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
998                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
999 //              leftMargin = 0;
1000
1001         if (rightMargin > lbufWidth)
1002                 clippedWidth = rightMargin - lbufWidth,
1003                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
1004 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1005 //              rightMargin = lbufWidth;
1006 */
1007 if (depth > 5)
1008         WriteLog("OP: We're about to encounter a divide by zero error!\n");
1009         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1010         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1011         // !!! FIX !!!
1012         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1013                 clippedWidth = 0 - startPos,
1014                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1015                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1016
1017         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1018                 clippedWidth = 0 - endPos,
1019                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1020
1021         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1022                 clippedWidth = endPos - lbufWidth,
1023                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1024
1025         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1026                 clippedWidth = startPos - lbufWidth,
1027                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1028                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1029 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1030
1031         // If the image is sitting on the line buffer left or right edge, we need to compensate
1032         // by decreasing the image phrase width accordingly.
1033         iwidth -= phraseClippedWidth;
1034
1035         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1036         // the pixel data.
1037 //      data += phraseClippedWidth * (pitch << 3);
1038         data += dataClippedWidth * pitch;
1039
1040         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1041         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1042 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1043 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1044 //Is this a bug in the OP?
1045 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1046 //Though it looks like we're doing it here no matter what...
1047 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1048 //Let's try this:
1049         uint32 lbufAddress = 0x1800 + (startPos * 2);
1050         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1051
1052         // Render.
1053
1054 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1055 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1056 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1057 // anyway.
1058 // This seems to be the case (at least according to the Midsummer docs)...!
1059
1060 // This is to test using palette zeroes instead of bit zeroes...
1061 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1062 //#define OP_USES_PALETTE_ZERO
1063
1064         if (depth == 0)                                                                 // 1 BPP
1065         {
1066                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1067                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1068
1069                 // Fetch 1st phrase...
1070                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1071 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1072 //i.e., we didn't clip on the margin... !!! FIX !!!
1073                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1074                 int i = firstPix;                                                       // Start counter at right spot...
1075
1076                 while (iwidth--)
1077                 {
1078                         while (i++ < 64)
1079                         {
1080                                 uint8 bit = pixels >> 63;
1081 #ifndef OP_USES_PALETTE_ZERO
1082                                 if (flagTRANS && bit == 0)
1083 #else
1084                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1085 #endif
1086                                         ;       // Do nothing...
1087                                 else
1088                                 {
1089                                         if (!flagRMW)
1090 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1091 //Won't optimize RMW case though...
1092                                                 // This is the *only* correct use of endian-dependent code
1093                                                 // (i.e., mem-to-mem direct copying)!
1094                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
1095                                         else
1096                                                 *currentLineBuffer =
1097                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1098                                                 *(currentLineBuffer + 1) =
1099                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1100                                 }
1101
1102                                 currentLineBuffer += lbufDelta;
1103                                 pixels <<= 1;
1104                         }
1105                         i = 0;
1106                         // Fetch next phrase...
1107                         data += pitch;
1108                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1109                 }
1110         }
1111         else if (depth == 1)                                                    // 2 BPP
1112         {
1113 if (firstPix)
1114         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1115                 index &= 0xFC;                                                          // Top six bits form CLUT index
1116                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1117                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1118
1119                 while (iwidth--)
1120                 {
1121                         // Fetch phrase...
1122                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1123                         data += pitch;
1124
1125                         for(int i=0; i<32; i++)
1126                         {
1127                                 uint8 bits = pixels >> 62;
1128 // Seems to me that both of these are in the same endian, so we could cast it as
1129 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1130 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1131 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1132 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1133 #ifndef OP_USES_PALETTE_ZERO
1134                                 if (flagTRANS && bits == 0)
1135 #else
1136                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1137 #endif
1138                                         ;       // Do nothing...
1139                                 else
1140                                 {
1141                                         if (!flagRMW)
1142                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1143                                         else
1144                                                 *currentLineBuffer =
1145                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1146                                                 *(currentLineBuffer + 1) =
1147                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1148                                 }
1149
1150                                 currentLineBuffer += lbufDelta;
1151                                 pixels <<= 2;
1152                         }
1153                 }
1154         }
1155         else if (depth == 2)                                                    // 4 BPP
1156         {
1157 if (firstPix)
1158         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1159                 index &= 0xF0;                                                          // Top four bits form CLUT index
1160                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1161                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1162
1163                 while (iwidth--)
1164                 {
1165                         // Fetch phrase...
1166                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1167                         data += pitch;
1168
1169                         for(int i=0; i<16; i++)
1170                         {
1171                                 uint8 bits = pixels >> 60;
1172 // Seems to me that both of these are in the same endian, so we could cast it as
1173 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1174 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1175 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1176 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1177 #ifndef OP_USES_PALETTE_ZERO
1178                                 if (flagTRANS && bits == 0)
1179 #else
1180                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1181 #endif
1182                                         ;       // Do nothing...
1183                                 else
1184                                 {
1185                                         if (!flagRMW)
1186                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1187                                         else
1188                                                 *currentLineBuffer =
1189                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1190                                                 *(currentLineBuffer + 1) =
1191                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1192                                 }
1193
1194                                 currentLineBuffer += lbufDelta;
1195                                 pixels <<= 4;
1196                         }
1197                 }
1198         }
1199         else if (depth == 3)                                                    // 8 BPP
1200         {
1201                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1202                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1203
1204                 // Fetch 1st phrase...
1205                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1206 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1207 //i.e., we didn't clip on the margin... !!! FIX !!!
1208                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1209                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1210                 int i = firstPix >> 3;                                          // Start counter at right spot...
1211
1212                 while (iwidth--)
1213                 {
1214                         while (i++ < 8)
1215                         {
1216                                 uint8 bits = pixels >> 56;
1217 // Seems to me that both of these are in the same endian, so we could cast it as
1218 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1219 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1220 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1221 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1222 //This would seem to be problematic...
1223 //Because it's the palette entry being zero that makes the pixel transparent...
1224 //Let's try it and see.
1225 #ifndef OP_USES_PALETTE_ZERO
1226                                 if (flagTRANS && bits == 0)
1227 #else
1228                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1229 #endif
1230                                         ;       // Do nothing...
1231                                 else
1232                                 {
1233                                         if (!flagRMW)
1234                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1235                                         else
1236                                                 *currentLineBuffer =
1237                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1238                                                 *(currentLineBuffer + 1) =
1239                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1240                                 }
1241
1242                                 currentLineBuffer += lbufDelta;
1243                                 pixels <<= 8;
1244                         }
1245                         i = 0;
1246                         // Fetch next phrase...
1247                         data += pitch;
1248                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1249                 }
1250         }
1251         else if (depth == 4)                                                    // 16 BPP
1252         {
1253 if (firstPix)
1254         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1255                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1256                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1257
1258                 while (iwidth--)
1259                 {
1260                         // Fetch phrase...
1261                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1262                         data += pitch;
1263
1264                         for(int i=0; i<4; i++)
1265                         {
1266                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1267 // Seems to me that both of these are in the same endian, so we could cast it as
1268 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1269 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1270 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1271 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1272 //This doesn't seem right... Let's try the encoded black value ($8800):
1273 //Apparently, CRY 0 maps to $8800...
1274                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1275 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1276                                         ;       // Do nothing...
1277                                 else
1278                                 {
1279                                         if (!flagRMW)
1280                                                 *currentLineBuffer = bitsHi,
1281                                                 *(currentLineBuffer + 1) = bitsLo;
1282                                         else
1283                                                 *currentLineBuffer =
1284                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1285                                                 *(currentLineBuffer + 1) =
1286                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1287                                 }
1288
1289                                 currentLineBuffer += lbufDelta;
1290                                 pixels <<= 16;
1291                         }
1292                 }
1293         }
1294         else if (depth == 5)                                                    // 24 BPP
1295         {
1296 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1297 //There *might* be others...
1298 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1299 if (firstPix)
1300         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1301                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1302                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1303                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1304
1305                 while (iwidth--)
1306                 {
1307                         // Fetch phrase...
1308                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1309                         data += pitch;
1310
1311                         for(int i=0; i<2; i++)
1312                         {
1313                                 // We don't use a 32-bit var here because of endian issues...!
1314                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1315                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1316
1317                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1318                                         ;       // Do nothing...
1319                                 else
1320                                         *currentLineBuffer = bits3,
1321                                         *(currentLineBuffer + 1) = bits2,
1322                                         *(currentLineBuffer + 2) = bits1,
1323                                         *(currentLineBuffer + 3) = bits0;
1324
1325                                 currentLineBuffer += lbufDelta;
1326                                 pixels <<= 32;
1327                         }
1328                 }
1329         }
1330 }
1331
1332 //
1333 // Store scaled bitmap in line buffer
1334 //
1335 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1336 {
1337 // Need to make sure that when writing that it stays within the line buffer...
1338 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1339         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1340         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1341         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1342         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1343 //#ifdef OP_DEBUG_BMP
1344 // Prolly should use this... Though not sure exactly how.
1345 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1346         uint32 firstPix = (p1 >> 49) & 0x3F;
1347 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1348 if (firstPix)
1349         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1350 //#endif
1351 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1352 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1353 //Optimize: break these out to their own BOOL values [DONE]
1354         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1355         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1356                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1357                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1358         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1359         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1360
1361         uint8 * tomRam8 = TOMGetRamPointer();
1362         uint8 * paletteRAM = &tomRam8[0x400];
1363         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1364         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1365         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1366
1367         uint16 hscale = p2 & 0xFF;
1368 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1369 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1370         uint16 horizontalRemainder = hscale;                            // Not sure if it starts full, but seems reasonable [It's not!]
1371 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1372         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1373         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1374
1375 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1376 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1377
1378 // Looks like an hscale of zero means don't draw!
1379         if (!render || iwidth == 0 || hscale == 0)
1380                 return;
1381
1382 /*extern int start_logging;
1383 if (start_logging)
1384         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1385                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1386 //#define OP_DEBUG_BMP
1387 //#ifdef OP_DEBUG_BMP
1388 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1389 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1390 //#endif
1391
1392         int32 startPos = xpos, endPos = xpos +
1393                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1394         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1395         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1396         // Not sure if this is Jaguar Two only location or what...
1397         // From the docs, it is... If we want to limit here we should think of something else.
1398 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1399         int32 limit = 720;
1400 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1401         int32 lbufWidth = 719;  // Zero based limit...
1402
1403         // If the image is completely to the left or right of the line buffer, then bail.
1404 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1405 //There are four possibilities:
1406 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1407 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1408 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1409 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1410 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1411 // numbers 1 & 3 are of concern.
1412 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1413 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1414
1415 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1416 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1417 // Still have to be careful with the DATA and IWIDTH values though...
1418
1419         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1420                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1421                 return;
1422
1423         // Otherwise, find the clip limits and clip the phrase as well...
1424         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1425         //       line buffer, but it shouldn't matter since there are two unused line
1426         //       buffers below and nothing above and I'll at most write 40 bytes outside
1427         //       the line buffer... I could use a fractional clip begin/end value, but
1428         //       this makes the blit a *lot* more hairy. I might fix this in the future
1429         //       if it becomes necessary. (JLH)
1430         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1431         //       which pixel in the phrase is being written, and quit when either end of phrases
1432         //       is reached or line buffer extents are surpassed.
1433
1434 //This stuff is probably wrong as well... !!! FIX !!!
1435 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1436 //Yup. Seems that JagMania doesn't work correctly with this...
1437 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1438 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1439 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1440 // a bit more accurately... Strange!
1441 //It's probably a case of the REFLECT flag being set and the background being written
1442 //from the right side of the screen...
1443 //But no, it isn't... At least if the diagnostics are telling the truth!
1444
1445         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1446         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1447         // !!! FIX !!!
1448
1449 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1450 //the scaling factor is small. So fix it already! !!! FIX !!!
1451 /*if (scaledPhrasePixels == 0)
1452 {
1453         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1454         DumpScaledObject(p0, p1, p2);
1455 }//*/
1456 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1457
1458 //Try a simple example...
1459 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1460 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1461 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1462 //
1463 // Normally, we would expect this in the line buffer:
1464 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1465 //
1466 // But instead we're getting:
1467 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1468 //
1469 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1470 // on negative boundary--or are we? Hmm...
1471 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1472 //
1473 // Let's try a real world example:
1474 //
1475 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1476 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1477 //
1478 // Really, spp is 27.75 in the second case...
1479 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1480 // start position (14 * 27.75), we get -6.5... NOT -17!
1481
1482 //Now it seems we're working OK, at least for the first case...
1483 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1484
1485         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1486 {
1487 extern int start_logging;
1488 if (start_logging)
1489         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1490 //              clippedWidth = 0 - startPos,
1491                 clippedWidth = (0 - startPos) << 5,
1492 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1493                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1494 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1495                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1496 if (start_logging)
1497         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1498 }
1499
1500         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1501                 clippedWidth = 0 - endPos,
1502                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1503
1504         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1505                 clippedWidth = endPos - lbufWidth,
1506                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1507
1508         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1509                 clippedWidth = startPos - lbufWidth,
1510                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1511                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1512
1513 extern int op_start_log;
1514 if (op_start_log && clippedWidth != 0)
1515         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1516 if (op_start_log && startPos == 13)
1517 {
1518         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1519         DumpScaledObject(p0, p1, p2);
1520         if (iwidth == 7)
1521         {
1522                 WriteLog("    %08X: ", data);
1523                 for(int i=0; i<7*8; i++)
1524                         WriteLog("%02X ", JaguarReadByte(data+i));
1525                 WriteLog("\n");
1526         }
1527 }
1528         // If the image is sitting on the line buffer left or right edge, we need to compensate
1529         // by decreasing the image phrase width accordingly.
1530         iwidth -= phraseClippedWidth;
1531
1532         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1533         // the pixel data.
1534 //      data += phraseClippedWidth * (pitch << 3);
1535         data += dataClippedWidth * (pitch << 3);
1536
1537         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1538         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1539 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1540 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1541         uint32 lbufAddress = 0x1800 + startPos * 2;
1542         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1543 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1544 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1545
1546         // Render.
1547
1548 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1549 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1550 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1551 // anyway.
1552 // This seems to be the case (at least according to the Midsummer docs)...!
1553
1554         if (depth == 0)                                                                 // 1 BPP
1555         {
1556 if (firstPix != 0)
1557         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1558                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1559                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1560
1561                 int pixCount = 0;
1562                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1563
1564                 while ((int32)iwidth > 0)
1565                 {
1566                         uint8 bits = pixels >> 63;
1567
1568 #ifndef OP_USES_PALETTE_ZERO
1569                         if (flagTRANS && bits == 0)
1570 #else
1571                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1572 #endif
1573                                 ;       // Do nothing...
1574                         else
1575                         {
1576                                 if (!flagRMW)
1577                                         // This is the *only* correct use of endian-dependent code
1578                                         // (i.e., mem-to-mem direct copying)!
1579                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1580                                 else
1581                                         *currentLineBuffer =
1582                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1583                                         *(currentLineBuffer + 1) =
1584                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1585                         }
1586
1587                         currentLineBuffer += lbufDelta;
1588
1589 /*
1590 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1591 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1592 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1593 */
1594 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1595                         while (horizontalRemainder & 0x80)
1596                         {
1597                                 horizontalRemainder += hscale;
1598                                 pixCount++;
1599                                 pixels <<= 1;
1600                         }//*/
1601 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1602                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1603                         {
1604                                 horizontalRemainder += hscale;
1605                                 pixCount++;
1606                                 pixels <<= 1;
1607                         }
1608                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1609
1610                         if (pixCount > 63)
1611                         {
1612                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1613
1614                                 data += (pitch << 3) * phrasesToSkip;
1615                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1616                                 pixels <<= 1 * pixelShift;
1617                                 iwidth -= phrasesToSkip;
1618                                 pixCount = pixelShift;
1619                         }
1620                 }
1621         }
1622         else if (depth == 1)                                                    // 2 BPP
1623         {
1624 if (firstPix != 0)
1625         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1626                 index &= 0xFC;                                                          // Top six bits form CLUT index
1627                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1628                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1629
1630                 int pixCount = 0;
1631                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1632
1633                 while ((int32)iwidth > 0)
1634                 {
1635                         uint8 bits = pixels >> 62;
1636
1637 #ifndef OP_USES_PALETTE_ZERO
1638                         if (flagTRANS && bits == 0)
1639 #else
1640                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1641 #endif
1642                                 ;       // Do nothing...
1643                         else
1644                         {
1645                                 if (!flagRMW)
1646                                         // This is the *only* correct use of endian-dependent code
1647                                         // (i.e., mem-to-mem direct copying)!
1648                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1649                                 else
1650                                         *currentLineBuffer =
1651                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1652                                         *(currentLineBuffer + 1) =
1653                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1654                         }
1655
1656                         currentLineBuffer += lbufDelta;
1657
1658 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1659                         while (horizontalRemainder & 0x80)
1660                         {
1661                                 horizontalRemainder += hscale;
1662                                 pixCount++;
1663                                 pixels <<= 2;
1664                         }//*/
1665 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1666                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1667                         {
1668                                 horizontalRemainder += hscale;
1669                                 pixCount++;
1670                                 pixels <<= 2;
1671                         }
1672                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1673
1674                         if (pixCount > 31)
1675                         {
1676                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1677
1678                                 data += (pitch << 3) * phrasesToSkip;
1679                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1680                                 pixels <<= 2 * pixelShift;
1681                                 iwidth -= phrasesToSkip;
1682                                 pixCount = pixelShift;
1683                         }
1684                 }
1685         }
1686         else if (depth == 2)                                                    // 4 BPP
1687         {
1688 if (firstPix != 0)
1689         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1690                 index &= 0xF0;                                                          // Top four bits form CLUT index
1691                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1692                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1693
1694                 int pixCount = 0;
1695                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1696
1697                 while ((int32)iwidth > 0)
1698                 {
1699                         uint8 bits = pixels >> 60;
1700
1701 #ifndef OP_USES_PALETTE_ZERO
1702                         if (flagTRANS && bits == 0)
1703 #else
1704                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1705 #endif
1706                                 ;       // Do nothing...
1707                         else
1708                         {
1709                                 if (!flagRMW)
1710                                         // This is the *only* correct use of endian-dependent code
1711                                         // (i.e., mem-to-mem direct copying)!
1712                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1713                                 else
1714                                         *currentLineBuffer =
1715                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1716                                         *(currentLineBuffer + 1) =
1717                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1718                         }
1719
1720                         currentLineBuffer += lbufDelta;
1721
1722 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1723                         while (horizontalRemainder & 0x80)
1724                         {
1725                                 horizontalRemainder += hscale;
1726                                 pixCount++;
1727                                 pixels <<= 4;
1728                         }//*/
1729 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1730                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1731                         {
1732                                 horizontalRemainder += hscale;
1733                                 pixCount++;
1734                                 pixels <<= 4;
1735                         }
1736                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1737
1738                         if (pixCount > 15)
1739                         {
1740                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1741
1742                                 data += (pitch << 3) * phrasesToSkip;
1743                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1744                                 pixels <<= 4 * pixelShift;
1745                                 iwidth -= phrasesToSkip;
1746                                 pixCount = pixelShift;
1747                         }
1748                 }
1749         }
1750         else if (depth == 3)                                                    // 8 BPP
1751         {
1752 if (firstPix)
1753         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1754                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1755                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1756
1757                 int pixCount = 0;
1758                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1759
1760                 while ((int32)iwidth > 0)
1761                 {
1762                         uint8 bits = pixels >> 56;
1763
1764 #ifndef OP_USES_PALETTE_ZERO
1765                         if (flagTRANS && bits == 0)
1766 #else
1767                         if (flagTRANS && (paletteRAM16[bits] == 0))
1768 #endif
1769                                 ;       // Do nothing...
1770                         else
1771                         {
1772                                 if (!flagRMW)
1773                                         // This is the *only* correct use of endian-dependent code
1774                                         // (i.e., mem-to-mem direct copying)!
1775                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1776 /*                              {
1777                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1778                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1779                                 }*/
1780                                 else
1781                                         *currentLineBuffer =
1782                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1783                                         *(currentLineBuffer + 1) =
1784                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1785                         }
1786
1787                         currentLineBuffer += lbufDelta;
1788
1789 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1790                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1791                         {
1792                                 horizontalRemainder += hscale;
1793                                 pixCount++;
1794                                 pixels <<= 8;
1795                         }
1796                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1797
1798                         if (pixCount > 7)
1799                         {
1800                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1801
1802                                 data += (pitch << 3) * phrasesToSkip;
1803                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1804                                 pixels <<= 8 * pixelShift;
1805                                 iwidth -= phrasesToSkip;
1806                                 pixCount = pixelShift;
1807                         }
1808                 }
1809         }
1810         else if (depth == 4)                                                    // 16 BPP
1811         {
1812 if (firstPix != 0)
1813         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1814                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1815                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1816
1817                 int pixCount = 0;
1818                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1819
1820                 while ((int32)iwidth > 0)
1821                 {
1822                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1823
1824 //This doesn't seem right... Let's try the encoded black value ($8800):
1825 //Apparently, CRY 0 maps to $8800...
1826                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1827 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1828                                 ;       // Do nothing...
1829                         else
1830                         {
1831                                 if (!flagRMW)
1832                                         *currentLineBuffer = bitsHi,
1833                                         *(currentLineBuffer + 1) = bitsLo;
1834                                 else
1835                                         *currentLineBuffer =
1836                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1837                                         *(currentLineBuffer + 1) =
1838                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1839                         }
1840
1841                         currentLineBuffer += lbufDelta;
1842
1843 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1844                         while (horizontalRemainder & 0x80)
1845                         {
1846                                 horizontalRemainder += hscale;
1847                                 pixCount++;
1848                                 pixels <<= 16;
1849                         }//*/
1850 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1851                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1852                         {
1853                                 horizontalRemainder += hscale;
1854                                 pixCount++;
1855                                 pixels <<= 16;
1856                         }
1857                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1858 //*/
1859                         if (pixCount > 3)
1860                         {
1861                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1862
1863                                 data += (pitch << 3) * phrasesToSkip;
1864                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1865                                 pixels <<= 16 * pixelShift;
1866
1867                                 iwidth -= phrasesToSkip;
1868
1869                                 pixCount = pixelShift;
1870                         }
1871                 }
1872         }
1873         else if (depth == 5)                                                    // 24 BPP
1874         {
1875 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1876 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1877 if (firstPix != 0)
1878         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1879                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1880                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1881                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1882
1883                 while (iwidth--)
1884                 {
1885                         // Fetch phrase...
1886                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1887                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1888
1889                         for(int i=0; i<2; i++)
1890                         {
1891                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1892                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1893
1894                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1895                                         ;       // Do nothing...
1896                                 else
1897                                         *currentLineBuffer = bits3,
1898                                         *(currentLineBuffer + 1) = bits2,
1899                                         *(currentLineBuffer + 2) = bits1,
1900                                         *(currentLineBuffer + 3) = bits0;
1901
1902                                 currentLineBuffer += lbufDelta;
1903                                 pixels <<= 32;
1904                         }
1905                 }
1906         }
1907 }