]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
Changed some formatting on the stack trace and OP list dump.
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68000/m68kinterface.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0                       // VC == YPOS
40 #define CONDITION_LESS_THAN                     1                       // VC < YPOS
41 #define CONDITION_GREATER_THAN          2                       // VC > YPOS
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #if 0
46 #define OPFLAG_RELEASE          8                                       // Bus release bit
47 #define OPFLAG_TRANS            4                                       // Transparency bit
48 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
49 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
50 #endif
51
52 // Private function prototypes
53
54 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
55 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
56 void OPDiscoverObjects(uint32 address);
57 void OPDumpObjectList(void);
58 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
59 void DumpFixedObject(uint64 p0, uint64 p1);
60 void DumpBitmapCore(uint64 p0, uint64 p1);
61 uint64 OPLoadPhrase(uint32 offset);
62
63 // Local global variables
64
65 // Blend tables (64K each)
66 static uint8 op_blend_y[0x10000];
67 static uint8 op_blend_cr[0x10000];
68 // There may be a problem with this "RAM" overlapping (and thus being independent of)
69 // some of the regular TOM RAM...
70 //#warning objectp_ram is separated from TOM RAM--need to fix that!
71 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
72 uint8 objectp_running = 0;
73 //bool objectp_stop_reading_list;
74
75 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
76 //static uint32 op_bitmap_bit_size[8] =
77 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
78 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
79 static uint32 op_pointer;
80
81 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
82
83
84 //
85 // Object Processor initialization
86 //
87 void OPInit(void)
88 {
89         // Here we calculate the saturating blend of a signed 4-bit value and an
90         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
91         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
92         for(int i=0; i<256*256; i++)
93         {
94                 int y = (i >> 8) & 0xFF;
95                 int dy = (int8)i;                                       // Sign extend the Y index
96                 int c1 = (i >> 8) & 0x0F;
97                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
98                 int c2 = (i >> 12) & 0x0F;
99                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
100
101                 y += dy;
102
103                 if (y < 0)
104                         y = 0;
105                 else if (y > 0xFF)
106                         y = 0xFF;
107
108                 op_blend_y[i] = y;
109
110                 c1 += dc1;
111
112                 if (c1 < 0)
113                         c1 = 0;
114                 else if (c1 > 0x0F)
115                         c1 = 0x0F;
116
117                 c2 += dc2;
118
119                 if (c2 < 0)
120                         c2 = 0;
121                 else if (c2 > 0x0F)
122                         c2 = 0x0F;
123
124                 op_blend_cr[i] = (c2 << 4) | c1;
125         }
126
127         OPReset();
128 }
129
130
131 //
132 // Object Processor reset
133 //
134 void OPReset(void)
135 {
136 //      memset(objectp_ram, 0x00, 0x40);
137         objectp_running = 0;
138 }
139
140
141 static const char * opType[8] =
142 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
143 static const char * ccType[8] =
144         { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
145 static uint32 object[8192];
146 static uint32 numberOfObjects;
147 //static uint32 objectLink[8192];
148 //static uint32 numberOfLinks;
149
150
151 void OPDone(void)
152 {
153 //#warning "!!! Fix OL dump so that it follows links !!!"
154 //      const char * opType[8] =
155 //      { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
156 //      const char * ccType[8] =
157 //              { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
158
159         uint32 olp = OPGetListPointer();
160         WriteLog("\nOP: OLP = $%08X\n", olp);
161         WriteLog("OP: Phrase dump\n    ----------\n");
162
163 #if 0
164         for(uint32 i=0; i<0x100; i+=8)
165         {
166                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
167                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
168
169                 if ((lo & 0x07) == 3)
170                 {
171                         uint16 ypos = (lo >> 3) & 0x7FF;
172                         uint8  cc   = (lo >> 14) & 0x03;
173                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
174                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
175                 }
176
177                 WriteLog("\n");
178
179                 if ((lo & 0x07) == 0)
180                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
181
182                 if ((lo & 0x07) == 1)
183                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
184         }
185
186         WriteLog("\n");
187 #else
188 //#warning "!!! Fix lockup in OPDiscoverObjects() !!!"
189 //temp, to keep the following function from locking up on bad/weird OLs
190 //return;
191
192         numberOfObjects = 0;
193         OPDiscoverObjects(olp);
194         OPDumpObjectList();
195 #endif
196 }
197
198
199 bool OPObjectExists(uint32 address)
200 {
201         // Yes, we really do a linear search, every time. :-/
202         for(uint32 i=0; i<numberOfObjects; i++)
203         {
204                 if (address == object[i])
205                         return true;
206         }
207
208         return false;
209 }
210
211
212 void OPDiscoverObjects(uint32 address)
213 {
214         uint8 objectType = 0;
215
216         do
217         {
218                 // If we've seen this object already, bail out!
219                 // Otherwise, add it to the list
220                 if (OPObjectExists(address))
221                         return;
222
223                 object[numberOfObjects++] = address;
224
225                 // Get the object & decode its type, link address
226                 uint32 hi = JaguarReadLong(address + 0, OP);
227                 uint32 lo = JaguarReadLong(address + 4, OP);
228                 objectType = lo & 0x07;
229                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
230
231                 if (objectType == 3)
232                 {
233                         // Recursion needed to follow all links! This does depth-first recursion
234                         // on the not-taken objects
235                         OPDiscoverObjects(address + 8);
236                 }
237
238                 // Get the next object...
239                 address = link;
240         }
241         while (objectType != 4);
242 }
243
244
245 void OPDumpObjectList(void)
246 {
247         for(uint32 i=0; i<numberOfObjects; i++)
248         {
249                 uint32 address = object[i];
250
251                 uint32 hi = JaguarReadLong(address + 0, OP);
252                 uint32 lo = JaguarReadLong(address + 4, OP);
253                 uint8 objectType = lo & 0x07;
254                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
255                 WriteLog("%08X: %08X %08X %s -> $08X", address, hi, lo, opType[objectType], link);
256
257                 if (objectType == 3)
258                 {
259                         uint16 ypos = (lo >> 3) & 0x7FF;
260                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
261                         WriteLog(" YPOS %s %u", ccType[cc], ypos);
262                 }
263
264                 WriteLog("\n");
265
266                 if (objectType == 0)
267                         DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
268
269                 if (objectType == 1)
270                         DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
271                                 OPLoadPhrase(address + 16));
272
273                 if (address == link)    // Ruh roh...
274                 {
275                         // Runaway recursive link is bad!
276                         WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
277                 }
278         }
279
280         WriteLog("\n");
281 }
282
283
284 //
285 // Object Processor memory access
286 // Memory range: F00010 - F00027
287 //
288 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
289 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
290 //      F00026            W   -------- -------x   OBF - object processor flag
291 //
292
293 #if 0
294 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
295 {
296         offset &= 0x3F;
297         return objectp_ram[offset];
298 }
299
300 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
301 {
302         offset &= 0x3F;
303         return GET16(objectp_ram, offset);
304 }
305
306 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
307 {
308         offset &= 0x3F;
309         objectp_ram[offset] = data;
310 }
311
312 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
313 {
314         offset &= 0x3F;
315         SET16(objectp_ram, offset, data);
316
317 /*if (offset == 0x20)
318 WriteLog("OP: Setting lo list pointer: %04X\n", data);
319 if (offset == 0x22)
320 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
321 }
322 #endif
323
324
325 uint32 OPGetListPointer(void)
326 {
327         // Note: This register is LO / HI WORD, hence the funky look of this...
328         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
329 }
330
331
332 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
333
334 uint32 OPGetStatusRegister(void)
335 {
336         return GET16(tomRam8, 0x26);
337 }
338
339
340 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
341
342 void OPSetStatusRegister(uint32 data)
343 {
344         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
345         tomRam8[0x27] |= (data & 0xFE);
346 }
347
348
349 void OPSetCurrentObject(uint64 object)
350 {
351 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
352         // Stored as least significant 32 bits first, ms32 last in big endian
353 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
354         objectp_ram[0x12] = object & 0xFF; object >>= 8;
355         objectp_ram[0x11] = object & 0xFF; object >>= 8;
356         objectp_ram[0x10] = object & 0xFF; object >>= 8;
357
358         objectp_ram[0x17] = object & 0xFF; object >>= 8;
359         objectp_ram[0x16] = object & 0xFF; object >>= 8;
360         objectp_ram[0x15] = object & 0xFF; object >>= 8;
361         objectp_ram[0x14] = object & 0xFF;*/
362 // Let's try regular good old big endian...
363         tomRam8[0x17] = object & 0xFF; object >>= 8;
364         tomRam8[0x16] = object & 0xFF; object >>= 8;
365         tomRam8[0x15] = object & 0xFF; object >>= 8;
366         tomRam8[0x14] = object & 0xFF; object >>= 8;
367
368         tomRam8[0x13] = object & 0xFF; object >>= 8;
369         tomRam8[0x12] = object & 0xFF; object >>= 8;
370         tomRam8[0x11] = object & 0xFF; object >>= 8;
371         tomRam8[0x10] = object & 0xFF;
372 }
373
374
375 uint64 OPLoadPhrase(uint32 offset)
376 {
377         offset &= ~0x07;                                                // 8 byte alignment
378         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
379 }
380
381
382 void OPStorePhrase(uint32 offset, uint64 p)
383 {
384         offset &= ~0x07;                                                // 8 byte alignment
385         JaguarWriteLong(offset, p >> 32, OP);
386         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
387 }
388
389
390 //
391 // Debugging routines
392 //
393 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
394 {
395         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
396         WriteLog("          %08X %08X\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
397         DumpBitmapCore(p0, p1);
398         uint32 hscale = p2 & 0xFF;
399         uint32 vscale = (p2 >> 8) & 0xFF;
400         uint32 remainder = (p2 >> 16) & 0xFF;
401         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
402 }
403
404
405 void DumpFixedObject(uint64 p0, uint64 p1)
406 {
407         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
408         DumpBitmapCore(p0, p1);
409 }
410
411
412 void DumpBitmapCore(uint64 p0, uint64 p1)
413 {
414         uint32 bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
415         uint8 bitdepth = (p1 >> 12) & 0x07;
416 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
417         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
418         int32 xpos = p1 & 0xFFF;
419         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);       // Sign extend that mutha!
420         uint32 iwidth = ((p1 >> 28) & 0x3FF);
421         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
422         uint16 height = ((p0 >> 14) & 0x3FF);
423         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
424         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
425         uint32 firstPix = (p1 >> 49) & 0x3F;
426         uint8 flags = (p1 >> 45) & 0x0F;
427         uint8 idx = (p1 >> 38) & 0x7F;
428         uint32 pitch = (p1 >> 15) & 0x07;
429         WriteLog("    [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
430                 iwidth * bdMultiplier[bitdepth],
431                 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth],
432                 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
433                 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
434                 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
435 }
436
437
438 //
439 // Object Processor main routine
440 //
441 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
442 void OPProcessList(int halfline, bool render)
443 {
444 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!"
445 // We ignore them, for now; not good
446         halfline &= 0x7FF;
447
448 extern int op_start_log;
449 //      char * condition_to_str[8] =
450 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
451
452         op_pointer = OPGetListPointer();
453
454 //      objectp_stop_reading_list = false;
455
456 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
457 //op_done();
458
459 // *** BEGIN OP PROCESSOR TESTING ONLY ***
460 extern bool interactiveMode;
461 extern bool iToggle;
462 extern int objectPtr;
463 bool inhibit;
464 int bitmapCounter = 0;
465 // *** END OP PROCESSOR TESTING ONLY ***
466
467         uint32 opCyclesToRun = 30000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
468
469 //      if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
470         while (op_pointer)
471         {
472 // *** BEGIN OP PROCESSOR TESTING ONLY ***
473 if (interactiveMode && bitmapCounter == objectPtr)
474         inhibit = iToggle;
475 else
476         inhibit = false;
477 // *** END OP PROCESSOR TESTING ONLY ***
478 //              if (objectp_stop_reading_list)
479 //                      return;
480
481                 uint64 p0 = OPLoadPhrase(op_pointer);
482                 op_pointer += 8;
483 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
484
485 #if 1
486 if (halfline == TOMGetVDB() && op_start_log)
487 //if (halfline == 215 && op_start_log)
488 //if (halfline == 28 && op_start_log)
489 //if (halfline == 0)
490 {
491 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
492 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
493 {
494 WriteLog(" (BITMAP) ");
495 uint64 p1 = OPLoadPhrase(op_pointer);
496 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
497         uint8 bitdepth = (p1 >> 12) & 0x07;
498 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
499         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
500 int32 xpos = p1 & 0xFFF;
501 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
502         uint32 iwidth = ((p1 >> 28) & 0x3FF);
503         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
504         uint16 height = ((p0 >> 14) & 0x3FF);
505         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
506         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
507         uint32 firstPix = (p1 >> 49) & 0x3F;
508         uint8 flags = (p1 >> 45) & 0x0F;
509         uint8 idx = (p1 >> 38) & 0x7F;
510         uint32 pitch = (p1 >> 15) & 0x07;
511 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
512         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
513 }
514 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
515 {
516 WriteLog(" (SCALED BITMAP)");
517 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
518 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
519 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
520         uint8 bitdepth = (p1 >> 12) & 0x07;
521 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
522         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
523 int32 xpos = p1 & 0xFFF;
524 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
525         uint32 iwidth = ((p1 >> 28) & 0x3FF);
526         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
527         uint16 height = ((p0 >> 14) & 0x3FF);
528         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
529         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
530         uint32 firstPix = (p1 >> 49) & 0x3F;
531         uint8 flags = (p1 >> 45) & 0x0F;
532         uint8 idx = (p1 >> 38) & 0x7F;
533         uint32 pitch = (p1 >> 15) & 0x07;
534 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
535         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
536         uint32 hscale = p2 & 0xFF;
537         uint32 vscale = (p2 >> 8) & 0xFF;
538         uint32 remainder = (p2 >> 16) & 0xFF;
539 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
540 }
541 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
542 WriteLog(" (GPU)\n");
543 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
544 {
545 WriteLog(" (BRANCH)\n");
546 uint8 * jaguarMainRam = GetRamPtr();
547 WriteLog("[RAM] --> ");
548 for(int k=0; k<8; k++)
549         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
550 WriteLog("\n");
551 }
552 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
553 WriteLog("    --> List end\n\n");
554 }
555 #endif
556
557                 switch ((uint8)p0 & 0x07)
558                 {
559                 case OBJECT_TYPE_BITMAP:
560                 {
561 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
562                         uint16 ypos = (p0 >> 3) & 0x7FF;
563 // This is only theory implied by Rayman...!
564 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
565 // the VDB value. With interlacing, this would be slightly more tricky.
566 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
567 // to affect any other game in a negative way (that I've seen).
568 // Either that, or it's an undocumented bug...
569
570 //No, the reason this was needed is that the OP code before was wrong. Any value
571 //less than VDB will get written to the top line of the display!
572 #if 0
573 // Not so sure... Let's see what happens here...
574 // No change...
575                         if (ypos == 0)
576                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
577 #endif
578 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
579 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
580 // what's causing things to fuck up. Still no idea why.
581
582                         uint32 height = (p0 & 0xFFC000) >> 14;
583                         uint32 oldOPP = op_pointer - 8;
584 // *** BEGIN OP PROCESSOR TESTING ONLY ***
585 if (inhibit && op_start_log)
586         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
587 bitmapCounter++;
588 if (!inhibit)   // For OP testing only!
589 // *** END OP PROCESSOR TESTING ONLY ***
590                         if (halfline >= ypos && height > 0)
591                         {
592                                 uint64 p1 = OPLoadPhrase(op_pointer);
593                                 op_pointer += 8;
594 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
595 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
596 //                              OPProcessFixedBitmap(halfline, p0, p1, render);
597                                 OPProcessFixedBitmap(p0, p1, render);
598
599                                 // OP write-backs
600
601 //???Does this really happen??? Doesn't seem to work if you do this...!
602 //Probably not. Must be a bug in the documentation...!
603 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
604 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
605 //                              SET16(tom_ram_8, 0x22, link >> 16);
606 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
607                                 if (height - 1 > 0)
608                                         height--;*/
609                                 // NOTE: Would subtract 2 if in interlaced mode...!
610 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
611 //                              if (height)
612                                 height--;
613
614                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
615                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
616                                 data += dwidth;
617
618                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
619                                 p0 |= (uint64)height << 14;
620                                 p0 |= data << 40;
621                                 OPStorePhrase(oldOPP, p0);
622                         }
623 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
624 //Temp, for testing...
625 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
626 //And it does! !!! FIX !!!
627 //Let's remove this "fix" since it screws up more than it fixes.
628 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
629                 return;*/
630
631 // NOTE: The link address only replaces bits 3-21 in the OLP, and this replaces
632 //       EVERYTHING. !!! FIX !!! [DONE]
633 #warning "!!! Link address is not linked properly for all object types !!!"
634 #warning "!!! Only BITMAP is properly handled !!!"
635                         op_pointer &= 0xFFC00007;
636                         op_pointer |= (p0 & 0x000007FFFF000000LL) >> 21;
637 //WriteLog("New OP: %08X\n", op_pointer);
638 //kludge: Seems that memory access is mirrored in the first 8MB of memory...
639 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
640         op_pointer &= 0xFF1FFFFF;       // Knock out bits 21-23
641
642                         break;
643                 }
644                 case OBJECT_TYPE_SCALE:
645                 {
646 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
647                         uint16 ypos = (p0 >> 3) & 0x7FF;
648                         uint32 height = (p0 & 0xFFC000) >> 14;
649                         uint32 oldOPP = op_pointer - 8;
650 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
651 // *** BEGIN OP PROCESSOR TESTING ONLY ***
652 if (inhibit && op_start_log)
653 {
654         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
655         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
656 }
657 bitmapCounter++;
658 if (!inhibit)   // For OP testing only!
659 // *** END OP PROCESSOR TESTING ONLY ***
660                         if (halfline >= ypos && height > 0)
661                         {
662                                 uint64 p1 = OPLoadPhrase(op_pointer);
663                                 op_pointer += 8;
664                                 uint64 p2 = OPLoadPhrase(op_pointer);
665                                 op_pointer += 8;
666 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
667                                 OPProcessScaledBitmap(p0, p1, p2, render);
668
669                                 // OP write-backs
670
671                                 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
672                                 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
673 //Actually, we should skip this object if it has a vscale of zero.
674 //Or do we? Not sure... Atari Karts has a few lines that look like:
675 // (SCALED BITMAP)
676 //000E8268 --> phrase 00010000 7000B00D
677 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
678 //    [hsc: 9A, vsc: 00, rem: 00]
679 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
680 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
681
682                                 if (vscale == 0)
683                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
684
685 //extern int start_logging;
686 //if (start_logging)
687 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
688 //Locks up here:
689 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
690 //There are other problems here, it looks like...
691 //Another lock up:
692 //About to execute OP (508)...
693 /*
694 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
695 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
696 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
697 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
698 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
699 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
700 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
701 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
702 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
703 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
704 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
705 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
706 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
707 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
708 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
709 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
710 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
711 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
712 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
713 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
714 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
715 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
716 */
717 //Here's another problem:
718 //    [hsc: 20, vsc: 20, rem: 00]
719 // Since we're not checking for $E0 (but that's what we get from the above), we end
720 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
721 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
722 //Also note: $E0 = 7.0 which IS a legal vscale value...
723
724 //                              if (remainder & 0x80)                           // I.e., it's negative
725 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
726 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
727 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
728 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
729 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
730                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
731                                 if (remainder < 0x20)
732                                 {
733                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
734                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
735
736 //                                      while (remainder & 0x80)
737 //                                      while ((remainder & 0x80) || remainder == 0)
738 //                                      while ((remainder - 1) >= 0xE0)
739 //                                      while ((remainder >= 0xE1) || remainder == 0)
740 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
741 //                                      while (remainder <= 0x20)
742                                         while (remainder < 0x20)
743                                         {
744                                                 remainder += vscale;
745
746                                                 if (height)
747                                                         height--;
748
749                                                 data += dwidth;
750                                         }
751
752                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
753                                         p0 |= (uint64)height << 14;
754                                         p0 |= data << 40;
755                                         OPStorePhrase(oldOPP, p0);
756                                 }
757
758                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
759
760 //if (start_logging)
761 //      WriteLog("--> Finished writebacks...\n");//*/
762
763 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
764                                 p2 &= ~0x0000000000FF0000LL;
765                                 p2 |= (uint64)remainder << 16;
766 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
767                                 OPStorePhrase(oldOPP + 16, p2);
768 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
769 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
770                         }
771
772                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
773                         break;
774                 }
775                 case OBJECT_TYPE_GPU:
776                 {
777 //WriteLog("OP: Asserting GPU IRQ #3...\n");
778 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
779                         OPSetCurrentObject(p0);
780                         GPUSetIRQLine(3, ASSERT_LINE);
781 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
782 // !!! FIX !!!
783 //Do something like:
784 //OPSuspendedByGPU = true;
785 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
786 //on the next halfline...
787 // --> It continues from where it was interrupted! !!! FIX !!!
788                         break;
789                 }
790                 case OBJECT_TYPE_BRANCH:
791                 {
792                         uint16 ypos = (p0 >> 3) & 0x7FF;
793 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
794 //       conditions! Need at least one more bit for that! :-P
795 // Also, the ASIC nets imply that it uses bits 14-16 (height in BM & SBM objects)
796 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
797                         uint8  cc   = (p0 >> 14) & 0x03;
798                         uint32 link = (p0 >> 21) & 0x3FFFF8;
799
800 //                      if ((ypos!=507)&&(ypos!=25))
801 //                              WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
802                         switch (cc)
803                         {
804                         case CONDITION_EQUAL:
805                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
806                                         op_pointer = link;
807                                 break;
808                         case CONDITION_LESS_THAN:
809                                 if (TOMReadWord(0xF00006, OP) < ypos)
810                                         op_pointer = link;
811                                 break;
812                         case CONDITION_GREATER_THAN:
813                                 if (TOMReadWord(0xF00006, OP) > ypos)
814                                         op_pointer = link;
815                                 break;
816                         case CONDITION_OP_FLAG_SET:
817                                 if (OPGetStatusRegister() & 0x01)
818                                         op_pointer = link;
819                                 break;
820                         case CONDITION_SECOND_HALF_LINE:
821 //Here's the ASIC code:
822 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
823 //which means, do the link if bit 10 of HC is set...
824
825                                 // This basically means branch if bit 10 of HC is set
826 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
827                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
828                                 LogDone();
829                                 exit(0);
830                                 break;
831                         default:
832                                 // Basically, if you do this, the OP does nothing. :-)
833                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
834                         }
835                         break;
836                 }
837                 case OBJECT_TYPE_STOP:
838                 {
839 //op_start_log = 0;
840                         // unsure
841 //WriteLog("OP: --> STOP\n");
842 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
843 //This seems more likely...
844                         OPSetCurrentObject(p0);
845
846                         if (p0 & 0x08)
847                         {
848                                 // We need to check whether these interrupts are enabled or not, THEN
849                                 // set an IRQ + pending flag if necessary...
850                                 if (TOMIRQEnabled(IRQ_OPFLAG))
851                                 {
852                                         TOMSetPendingObjectInt();
853                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
854                                 }
855                         }
856
857                         return;
858 //                      break;
859                 }
860                 default:
861 //                      WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
862                         return;
863                 }
864
865                 // Here is a little sanity check to keep the OP from locking up the machine
866                 // when fed bad data. Better would be to count how many actual cycles it used
867                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
868 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
869                 opCyclesToRun--;
870
871                 if (!opCyclesToRun)
872                         return;
873         }
874 }
875
876
877 //
878 // Store fixed size bitmap in line buffer
879 //
880 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
881 {
882 // Need to make sure that when writing that it stays within the line buffer...
883 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
884         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
885         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
886         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
887         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
888 //#ifdef OP_DEBUG_BMP
889         uint32  firstPix = (p1 >> 49) & 0x3F;
890         // "The LSB is significant only for scaled objects..." -JTRM
891         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
892         firstPix &= 0x3E;
893 //#endif
894 // We can ignore the RELEASE (high order) bit for now--probably forever...!
895 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
896 //Optimize: break these out to their own BOOL values
897         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
898         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
899                 flagRMW = (flags & OPFLAG_RMW ? true : false),
900                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
901 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
902 //  provide the most significant bits of the palette address."
903         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
904         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
905         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
906
907 //      int16 scanlineWidth = tom_getVideoModeWidth();
908         uint8 * tomRam8 = TOMGetRamPointer();
909         uint8 * paletteRAM = &tomRam8[0x400];
910         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
911         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
912         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
913
914 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
915 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
916
917 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
918 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
919 // Pitch == 0 is OK too...
920
921 //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to investigate
922 //        on real hardware...
923 #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
924 if (iwidth == 0)
925         iwidth = 1;
926
927 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
928 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
929         if (!render || iwidth == 0)
930                 return;
931
932 //OK, so we know the position in the line buffer is correct. It's the clipping in
933 //24bpp mode that's wrong!
934 #if 0
935 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
936 //into the line buffer for each pixel.
937 if (depth == 5) // i.e., 24bpp mode...
938         xpos >>= 1;     // Cut it in half...
939 #endif
940
941 //#define OP_DEBUG_BMP
942 //#ifdef OP_DEBUG_BMP
943 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
944 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
945 //#endif
946
947 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
948         int32 startPos = xpos, endPos = xpos +
949                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
950                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
951         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
952         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
953         // Not sure if this is Jaguar Two only location or what...
954         // From the docs, it is... If we want to limit here we should think of something else.
955 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
956 //      int32 limit = 720;
957 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
958 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
959         // This is correct, the OP line buffer is a constant size... 
960         int32 limit = 720;
961         int32 lbufWidth = 719;
962
963         // If the image is completely to the left or right of the line buffer, then bail.
964 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
965 //There are four possibilities:
966 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
967 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
968 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
969 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
970 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
971 // numbers 1 & 3 are of concern.
972 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
973 //      if (rightMargin < 0 || leftMargin > lbufWidth)
974
975 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
976 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
977 // Still have to be careful with the DATA and IWIDTH values though...
978
979 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
980 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
981 //              return;
982         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
983                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
984                 return;
985
986         // Otherwise, find the clip limits and clip the phrase as well...
987         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
988         //       line buffer, but it shouldn't matter since there are two unused line
989         //       buffers below and nothing above and I'll at most write 8 bytes outside
990         //       the line buffer... I could use a fractional clip begin/end value, but
991         //       this makes the blit a *lot* more hairy. I might fix this in the future
992         //       if it becomes necessary. (JLH)
993         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
994         //       which pixel in the phrase is being written, and quit when either end of phrases
995         //       is reached or line buffer extents are surpassed.
996
997 //This stuff is probably wrong as well... !!! FIX !!!
998 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
999 //Yup. Seems that JagMania doesn't work correctly with this...
1000 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1001 //      if (!flagREFLECT)
1002
1003 /*
1004         if (leftMargin < 0)
1005                 clippedWidth = 0 - leftMargin,
1006                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1007                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1008 //              leftMargin = 0;
1009
1010         if (rightMargin > lbufWidth)
1011                 clippedWidth = rightMargin - lbufWidth,
1012                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
1013 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1014 //              rightMargin = lbufWidth;
1015 */
1016 if (depth > 5)
1017         WriteLog("OP: We're about to encounter a divide by zero error!\n");
1018         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1019         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1020         // !!! FIX !!!
1021         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1022                 clippedWidth = 0 - startPos,
1023                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1024                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1025
1026         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1027                 clippedWidth = 0 - endPos,
1028                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1029
1030         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1031                 clippedWidth = endPos - lbufWidth,
1032                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1033
1034         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1035                 clippedWidth = startPos - lbufWidth,
1036                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1037                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1038 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1039
1040         // If the image is sitting on the line buffer left or right edge, we need to compensate
1041         // by decreasing the image phrase width accordingly.
1042         iwidth -= phraseClippedWidth;
1043
1044         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1045         // the pixel data.
1046 //      data += phraseClippedWidth * (pitch << 3);
1047         data += dataClippedWidth * pitch;
1048
1049         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1050         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1051 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1052 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1053 //Is this a bug in the OP?
1054 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1055 //Though it looks like we're doing it here no matter what...
1056 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1057 //Let's try this:
1058         uint32 lbufAddress = 0x1800 + (startPos * 2);
1059         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1060
1061         // Render.
1062
1063 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1064 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1065 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1066 // anyway.
1067 // This seems to be the case (at least according to the Midsummer docs)...!
1068
1069 // This is to test using palette zeroes instead of bit zeroes...
1070 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1071 //#define OP_USES_PALETTE_ZERO
1072
1073         if (depth == 0)                                                                 // 1 BPP
1074         {
1075                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1076                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1077
1078                 // Fetch 1st phrase...
1079                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1080 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1081 //i.e., we didn't clip on the margin... !!! FIX !!!
1082                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1083                 int i = firstPix;                                                       // Start counter at right spot...
1084
1085                 while (iwidth--)
1086                 {
1087                         while (i++ < 64)
1088                         {
1089                                 uint8 bit = pixels >> 63;
1090 #ifndef OP_USES_PALETTE_ZERO
1091                                 if (flagTRANS && bit == 0)
1092 #else
1093                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1094 #endif
1095                                         ;       // Do nothing...
1096                                 else
1097                                 {
1098                                         if (!flagRMW)
1099 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1100 //Won't optimize RMW case though...
1101                                                 // This is the *only* correct use of endian-dependent code
1102                                                 // (i.e., mem-to-mem direct copying)!
1103                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
1104                                         else
1105                                                 *currentLineBuffer =
1106                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1107                                                 *(currentLineBuffer + 1) =
1108                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1109                                 }
1110
1111                                 currentLineBuffer += lbufDelta;
1112                                 pixels <<= 1;
1113                         }
1114                         i = 0;
1115                         // Fetch next phrase...
1116                         data += pitch;
1117                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1118                 }
1119         }
1120         else if (depth == 1)                                                    // 2 BPP
1121         {
1122 if (firstPix)
1123         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1124                 index &= 0xFC;                                                          // Top six bits form CLUT index
1125                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1126                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1127
1128                 while (iwidth--)
1129                 {
1130                         // Fetch phrase...
1131                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1132                         data += pitch;
1133
1134                         for(int i=0; i<32; i++)
1135                         {
1136                                 uint8 bits = pixels >> 62;
1137 // Seems to me that both of these are in the same endian, so we could cast it as
1138 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1139 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1140 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1141 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1142 #ifndef OP_USES_PALETTE_ZERO
1143                                 if (flagTRANS && bits == 0)
1144 #else
1145                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1146 #endif
1147                                         ;       // Do nothing...
1148                                 else
1149                                 {
1150                                         if (!flagRMW)
1151                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1152                                         else
1153                                                 *currentLineBuffer =
1154                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1155                                                 *(currentLineBuffer + 1) =
1156                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1157                                 }
1158
1159                                 currentLineBuffer += lbufDelta;
1160                                 pixels <<= 2;
1161                         }
1162                 }
1163         }
1164         else if (depth == 2)                                                    // 4 BPP
1165         {
1166 if (firstPix)
1167         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1168                 index &= 0xF0;                                                          // Top four bits form CLUT index
1169                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1170                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1171
1172                 while (iwidth--)
1173                 {
1174                         // Fetch phrase...
1175                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1176                         data += pitch;
1177
1178                         for(int i=0; i<16; i++)
1179                         {
1180                                 uint8 bits = pixels >> 60;
1181 // Seems to me that both of these are in the same endian, so we could cast it as
1182 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1183 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1184 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1185 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1186 #ifndef OP_USES_PALETTE_ZERO
1187                                 if (flagTRANS && bits == 0)
1188 #else
1189                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1190 #endif
1191                                         ;       // Do nothing...
1192                                 else
1193                                 {
1194                                         if (!flagRMW)
1195                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1196                                         else
1197                                                 *currentLineBuffer =
1198                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1199                                                 *(currentLineBuffer + 1) =
1200                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1201                                 }
1202
1203                                 currentLineBuffer += lbufDelta;
1204                                 pixels <<= 4;
1205                         }
1206                 }
1207         }
1208         else if (depth == 3)                                                    // 8 BPP
1209         {
1210                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1211                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1212
1213                 // Fetch 1st phrase...
1214                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1215 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1216 //i.e., we didn't clip on the margin... !!! FIX !!!
1217                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1218                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1219                 int i = firstPix >> 3;                                          // Start counter at right spot...
1220
1221                 while (iwidth--)
1222                 {
1223                         while (i++ < 8)
1224                         {
1225                                 uint8 bits = pixels >> 56;
1226 // Seems to me that both of these are in the same endian, so we could cast it as
1227 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1228 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1229 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1230 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1231 //This would seem to be problematic...
1232 //Because it's the palette entry being zero that makes the pixel transparent...
1233 //Let's try it and see.
1234 #ifndef OP_USES_PALETTE_ZERO
1235                                 if (flagTRANS && bits == 0)
1236 #else
1237                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1238 #endif
1239                                         ;       // Do nothing...
1240                                 else
1241                                 {
1242                                         if (!flagRMW)
1243                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1244                                         else
1245                                                 *currentLineBuffer =
1246                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1247                                                 *(currentLineBuffer + 1) =
1248                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1249                                 }
1250
1251                                 currentLineBuffer += lbufDelta;
1252                                 pixels <<= 8;
1253                         }
1254                         i = 0;
1255                         // Fetch next phrase...
1256                         data += pitch;
1257                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1258                 }
1259         }
1260         else if (depth == 4)                                                    // 16 BPP
1261         {
1262 if (firstPix)
1263         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1264                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1265                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1266
1267                 while (iwidth--)
1268                 {
1269                         // Fetch phrase...
1270                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1271                         data += pitch;
1272
1273                         for(int i=0; i<4; i++)
1274                         {
1275                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1276 // Seems to me that both of these are in the same endian, so we could cast it as
1277 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1278 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1279 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1280 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1281 //This doesn't seem right... Let's try the encoded black value ($8800):
1282 //Apparently, CRY 0 maps to $8800...
1283                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1284 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1285                                         ;       // Do nothing...
1286                                 else
1287                                 {
1288                                         if (!flagRMW)
1289                                                 *currentLineBuffer = bitsHi,
1290                                                 *(currentLineBuffer + 1) = bitsLo;
1291                                         else
1292                                                 *currentLineBuffer =
1293                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1294                                                 *(currentLineBuffer + 1) =
1295                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1296                                 }
1297
1298                                 currentLineBuffer += lbufDelta;
1299                                 pixels <<= 16;
1300                         }
1301                 }
1302         }
1303         else if (depth == 5)                                                    // 24 BPP
1304         {
1305 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1306 //There *might* be others...
1307 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1308 if (firstPix)
1309         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1310                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1311                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1312                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1313
1314                 while (iwidth--)
1315                 {
1316                         // Fetch phrase...
1317                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1318                         data += pitch;
1319
1320                         for(int i=0; i<2; i++)
1321                         {
1322                                 // We don't use a 32-bit var here because of endian issues...!
1323                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1324                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1325
1326                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1327                                         ;       // Do nothing...
1328                                 else
1329                                         *currentLineBuffer = bits3,
1330                                         *(currentLineBuffer + 1) = bits2,
1331                                         *(currentLineBuffer + 2) = bits1,
1332                                         *(currentLineBuffer + 3) = bits0;
1333
1334                                 currentLineBuffer += lbufDelta;
1335                                 pixels <<= 32;
1336                         }
1337                 }
1338         }
1339 }
1340
1341
1342 //
1343 // Store scaled bitmap in line buffer
1344 //
1345 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1346 {
1347 // Need to make sure that when writing that it stays within the line buffer...
1348 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1349         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1350         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1351         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1352         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1353 //#ifdef OP_DEBUG_BMP
1354 // Prolly should use this... Though not sure exactly how.
1355 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1356         uint32 firstPix = (p1 >> 49) & 0x3F;
1357 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1358 if (firstPix)
1359         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1360 //#endif
1361 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1362 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1363 //Optimize: break these out to their own BOOL values [DONE]
1364         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1365         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1366                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1367                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1368         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1369         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1370
1371         uint8 * tomRam8 = TOMGetRamPointer();
1372         uint8 * paletteRAM = &tomRam8[0x400];
1373         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1374         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1375         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1376
1377         uint16 hscale = p2 & 0xFF;
1378 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1379 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1380         uint16 horizontalRemainder = hscale;                            // Not sure if it starts full, but seems reasonable [It's not!]
1381 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1382         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1383         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1384
1385 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1386 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1387
1388 // Looks like an hscale of zero means don't draw!
1389         if (!render || iwidth == 0 || hscale == 0)
1390                 return;
1391
1392 /*extern int start_logging;
1393 if (start_logging)
1394         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1395                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1396 //#define OP_DEBUG_BMP
1397 //#ifdef OP_DEBUG_BMP
1398 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1399 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1400 //#endif
1401
1402         int32 startPos = xpos, endPos = xpos +
1403                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1404         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1405         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1406         // Not sure if this is Jaguar Two only location or what...
1407         // From the docs, it is... If we want to limit here we should think of something else.
1408 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1409         int32 limit = 720;
1410 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1411         int32 lbufWidth = 719;  // Zero based limit...
1412
1413         // If the image is completely to the left or right of the line buffer, then bail.
1414 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1415 //There are four possibilities:
1416 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1417 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1418 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1419 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1420 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1421 // numbers 1 & 3 are of concern.
1422 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1423 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1424
1425 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1426 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1427 // Still have to be careful with the DATA and IWIDTH values though...
1428
1429         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1430                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1431                 return;
1432
1433         // Otherwise, find the clip limits and clip the phrase as well...
1434         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1435         //       line buffer, but it shouldn't matter since there are two unused line
1436         //       buffers below and nothing above and I'll at most write 40 bytes outside
1437         //       the line buffer... I could use a fractional clip begin/end value, but
1438         //       this makes the blit a *lot* more hairy. I might fix this in the future
1439         //       if it becomes necessary. (JLH)
1440         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1441         //       which pixel in the phrase is being written, and quit when either end of phrases
1442         //       is reached or line buffer extents are surpassed.
1443
1444 //This stuff is probably wrong as well... !!! FIX !!!
1445 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1446 //Yup. Seems that JagMania doesn't work correctly with this...
1447 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1448 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1449 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1450 // a bit more accurately... Strange!
1451 //It's probably a case of the REFLECT flag being set and the background being written
1452 //from the right side of the screen...
1453 //But no, it isn't... At least if the diagnostics are telling the truth!
1454
1455         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1456         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1457         // !!! FIX !!!
1458
1459 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1460 //the scaling factor is small. So fix it already! !!! FIX !!!
1461 /*if (scaledPhrasePixels == 0)
1462 {
1463         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1464         DumpScaledObject(p0, p1, p2);
1465 }//*/
1466 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1467
1468 //Try a simple example...
1469 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1470 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1471 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1472 //
1473 // Normally, we would expect this in the line buffer:
1474 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1475 //
1476 // But instead we're getting:
1477 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1478 //
1479 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1480 // on negative boundary--or are we? Hmm...
1481 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1482 //
1483 // Let's try a real world example:
1484 //
1485 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1486 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1487 //
1488 // Really, spp is 27.75 in the second case...
1489 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1490 // start position (14 * 27.75), we get -6.5... NOT -17!
1491
1492 //Now it seems we're working OK, at least for the first case...
1493 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1494
1495         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1496 {
1497 extern int start_logging;
1498 if (start_logging)
1499         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1500 //              clippedWidth = 0 - startPos,
1501                 clippedWidth = (0 - startPos) << 5,
1502 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1503                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1504 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1505                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1506 if (start_logging)
1507         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1508 }
1509
1510         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1511                 clippedWidth = 0 - endPos,
1512                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1513
1514         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1515                 clippedWidth = endPos - lbufWidth,
1516                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1517
1518         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1519                 clippedWidth = startPos - lbufWidth,
1520                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1521                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1522
1523 extern int op_start_log;
1524 if (op_start_log && clippedWidth != 0)
1525         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1526 if (op_start_log && startPos == 13)
1527 {
1528         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1529         DumpScaledObject(p0, p1, p2);
1530         if (iwidth == 7)
1531         {
1532                 WriteLog("    %08X: ", data);
1533                 for(int i=0; i<7*8; i++)
1534                         WriteLog("%02X ", JaguarReadByte(data+i));
1535                 WriteLog("\n");
1536         }
1537 }
1538         // If the image is sitting on the line buffer left or right edge, we need to compensate
1539         // by decreasing the image phrase width accordingly.
1540         iwidth -= phraseClippedWidth;
1541
1542         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1543         // the pixel data.
1544 //      data += phraseClippedWidth * (pitch << 3);
1545         data += dataClippedWidth * (pitch << 3);
1546
1547         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1548         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1549 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1550 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1551         uint32 lbufAddress = 0x1800 + startPos * 2;
1552         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1553 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1554 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1555
1556         // Render.
1557
1558 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1559 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1560 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1561 // anyway.
1562 // This seems to be the case (at least according to the Midsummer docs)...!
1563
1564         if (depth == 0)                                                                 // 1 BPP
1565         {
1566 if (firstPix != 0)
1567         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1568                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1569                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1570
1571                 int pixCount = 0;
1572                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1573
1574                 while ((int32)iwidth > 0)
1575                 {
1576                         uint8 bits = pixels >> 63;
1577
1578 #ifndef OP_USES_PALETTE_ZERO
1579                         if (flagTRANS && bits == 0)
1580 #else
1581                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1582 #endif
1583                                 ;       // Do nothing...
1584                         else
1585                         {
1586                                 if (!flagRMW)
1587                                         // This is the *only* correct use of endian-dependent code
1588                                         // (i.e., mem-to-mem direct copying)!
1589                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1590                                 else
1591                                         *currentLineBuffer =
1592                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1593                                         *(currentLineBuffer + 1) =
1594                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1595                         }
1596
1597                         currentLineBuffer += lbufDelta;
1598
1599 /*
1600 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1601 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1602 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1603 */
1604 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1605                         while (horizontalRemainder & 0x80)
1606                         {
1607                                 horizontalRemainder += hscale;
1608                                 pixCount++;
1609                                 pixels <<= 1;
1610                         }//*/
1611 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1612                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1613                         {
1614                                 horizontalRemainder += hscale;
1615                                 pixCount++;
1616                                 pixels <<= 1;
1617                         }
1618                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1619
1620                         if (pixCount > 63)
1621                         {
1622                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1623
1624                                 data += (pitch << 3) * phrasesToSkip;
1625                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1626                                 pixels <<= 1 * pixelShift;
1627                                 iwidth -= phrasesToSkip;
1628                                 pixCount = pixelShift;
1629                         }
1630                 }
1631         }
1632         else if (depth == 1)                                                    // 2 BPP
1633         {
1634 if (firstPix != 0)
1635         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1636                 index &= 0xFC;                                                          // Top six bits form CLUT index
1637                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1638                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1639
1640                 int pixCount = 0;
1641                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1642
1643                 while ((int32)iwidth > 0)
1644                 {
1645                         uint8 bits = pixels >> 62;
1646
1647 #ifndef OP_USES_PALETTE_ZERO
1648                         if (flagTRANS && bits == 0)
1649 #else
1650                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1651 #endif
1652                                 ;       // Do nothing...
1653                         else
1654                         {
1655                                 if (!flagRMW)
1656                                         // This is the *only* correct use of endian-dependent code
1657                                         // (i.e., mem-to-mem direct copying)!
1658                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1659                                 else
1660                                         *currentLineBuffer =
1661                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1662                                         *(currentLineBuffer + 1) =
1663                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1664                         }
1665
1666                         currentLineBuffer += lbufDelta;
1667
1668 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1669                         while (horizontalRemainder & 0x80)
1670                         {
1671                                 horizontalRemainder += hscale;
1672                                 pixCount++;
1673                                 pixels <<= 2;
1674                         }//*/
1675 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1676                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1677                         {
1678                                 horizontalRemainder += hscale;
1679                                 pixCount++;
1680                                 pixels <<= 2;
1681                         }
1682                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1683
1684                         if (pixCount > 31)
1685                         {
1686                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1687
1688                                 data += (pitch << 3) * phrasesToSkip;
1689                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1690                                 pixels <<= 2 * pixelShift;
1691                                 iwidth -= phrasesToSkip;
1692                                 pixCount = pixelShift;
1693                         }
1694                 }
1695         }
1696         else if (depth == 2)                                                    // 4 BPP
1697         {
1698 if (firstPix != 0)
1699         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1700                 index &= 0xF0;                                                          // Top four bits form CLUT index
1701                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1702                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1703
1704                 int pixCount = 0;
1705                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1706
1707                 while ((int32)iwidth > 0)
1708                 {
1709                         uint8 bits = pixels >> 60;
1710
1711 #ifndef OP_USES_PALETTE_ZERO
1712                         if (flagTRANS && bits == 0)
1713 #else
1714                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1715 #endif
1716                                 ;       // Do nothing...
1717                         else
1718                         {
1719                                 if (!flagRMW)
1720                                         // This is the *only* correct use of endian-dependent code
1721                                         // (i.e., mem-to-mem direct copying)!
1722                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1723                                 else
1724                                         *currentLineBuffer =
1725                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1726                                         *(currentLineBuffer + 1) =
1727                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1728                         }
1729
1730                         currentLineBuffer += lbufDelta;
1731
1732 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1733                         while (horizontalRemainder & 0x80)
1734                         {
1735                                 horizontalRemainder += hscale;
1736                                 pixCount++;
1737                                 pixels <<= 4;
1738                         }//*/
1739 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1740                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1741                         {
1742                                 horizontalRemainder += hscale;
1743                                 pixCount++;
1744                                 pixels <<= 4;
1745                         }
1746                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1747
1748                         if (pixCount > 15)
1749                         {
1750                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1751
1752                                 data += (pitch << 3) * phrasesToSkip;
1753                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1754                                 pixels <<= 4 * pixelShift;
1755                                 iwidth -= phrasesToSkip;
1756                                 pixCount = pixelShift;
1757                         }
1758                 }
1759         }
1760         else if (depth == 3)                                                    // 8 BPP
1761         {
1762 if (firstPix)
1763         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1764                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1765                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1766
1767                 int pixCount = 0;
1768                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1769
1770                 while ((int32)iwidth > 0)
1771                 {
1772                         uint8 bits = pixels >> 56;
1773
1774 #ifndef OP_USES_PALETTE_ZERO
1775                         if (flagTRANS && bits == 0)
1776 #else
1777                         if (flagTRANS && (paletteRAM16[bits] == 0))
1778 #endif
1779                                 ;       // Do nothing...
1780                         else
1781                         {
1782                                 if (!flagRMW)
1783                                         // This is the *only* correct use of endian-dependent code
1784                                         // (i.e., mem-to-mem direct copying)!
1785                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1786 /*                              {
1787                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1788                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1789                                 }*/
1790                                 else
1791                                         *currentLineBuffer =
1792                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1793                                         *(currentLineBuffer + 1) =
1794                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1795                         }
1796
1797                         currentLineBuffer += lbufDelta;
1798
1799 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1800                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1801                         {
1802                                 horizontalRemainder += hscale;
1803                                 pixCount++;
1804                                 pixels <<= 8;
1805                         }
1806                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1807
1808                         if (pixCount > 7)
1809                         {
1810                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1811
1812                                 data += (pitch << 3) * phrasesToSkip;
1813                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1814                                 pixels <<= 8 * pixelShift;
1815                                 iwidth -= phrasesToSkip;
1816                                 pixCount = pixelShift;
1817                         }
1818                 }
1819         }
1820         else if (depth == 4)                                                    // 16 BPP
1821         {
1822 if (firstPix != 0)
1823         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1824                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1825                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1826
1827                 int pixCount = 0;
1828                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1829
1830                 while ((int32)iwidth > 0)
1831                 {
1832                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1833
1834 //This doesn't seem right... Let's try the encoded black value ($8800):
1835 //Apparently, CRY 0 maps to $8800...
1836                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1837 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1838                                 ;       // Do nothing...
1839                         else
1840                         {
1841                                 if (!flagRMW)
1842                                         *currentLineBuffer = bitsHi,
1843                                         *(currentLineBuffer + 1) = bitsLo;
1844                                 else
1845                                         *currentLineBuffer =
1846                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1847                                         *(currentLineBuffer + 1) =
1848                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1849                         }
1850
1851                         currentLineBuffer += lbufDelta;
1852
1853 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1854                         while (horizontalRemainder & 0x80)
1855                         {
1856                                 horizontalRemainder += hscale;
1857                                 pixCount++;
1858                                 pixels <<= 16;
1859                         }//*/
1860 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1861                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1862                         {
1863                                 horizontalRemainder += hscale;
1864                                 pixCount++;
1865                                 pixels <<= 16;
1866                         }
1867                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1868 //*/
1869                         if (pixCount > 3)
1870                         {
1871                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1872
1873                                 data += (pitch << 3) * phrasesToSkip;
1874                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1875                                 pixels <<= 16 * pixelShift;
1876
1877                                 iwidth -= phrasesToSkip;
1878
1879                                 pixCount = pixelShift;
1880                         }
1881                 }
1882         }
1883         else if (depth == 5)                                                    // 24 BPP
1884         {
1885 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1886 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1887 if (firstPix != 0)
1888         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1889                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1890                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1891                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1892
1893                 while (iwidth--)
1894                 {
1895                         // Fetch phrase...
1896                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1897                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1898
1899                         for(int i=0; i<2; i++)
1900                         {
1901                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1902                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1903
1904                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1905                                         ;       // Do nothing...
1906                                 else
1907                                         *currentLineBuffer = bits3,
1908                                         *(currentLineBuffer + 1) = bits2,
1909                                         *(currentLineBuffer + 2) = bits1,
1910                                         *(currentLineBuffer + 3) = bits0;
1911
1912                                 currentLineBuffer += lbufDelta;
1913                                 pixels <<= 32;
1914                         }
1915                 }
1916         }
1917 }