]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
Fix for bad branch handling in OP.
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68000/m68kinterface.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16_t)dst<<8)) | ((uint16_t)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16_t)dst)<<8) | ((uint16_t)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0                       // VC == YPOS
40 #define CONDITION_LESS_THAN                     1                       // VC < YPOS
41 #define CONDITION_GREATER_THAN          2                       // VC > YPOS
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #if 0
46 #define OPFLAG_RELEASE          8                                       // Bus release bit
47 #define OPFLAG_TRANS            4                                       // Transparency bit
48 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
49 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
50 #endif
51
52 // Private function prototypes
53
54 void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render);
55 void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render);
56 void OPDiscoverObjects(uint32_t address);
57 void OPDumpObjectList(void);
58 void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2);
59 void DumpFixedObject(uint64_t p0, uint64_t p1);
60 void DumpBitmapCore(uint64_t p0, uint64_t p1);
61 uint64_t OPLoadPhrase(uint32_t offset);
62
63 // Local global variables
64
65 // Blend tables (64K each)
66 static uint8_t op_blend_y[0x10000];
67 static uint8_t op_blend_cr[0x10000];
68 // There may be a problem with this "RAM" overlapping (and thus being independent of)
69 // some of the regular TOM RAM...
70 //#warning objectp_ram is separated from TOM RAM--need to fix that!
71 //static uint8_t objectp_ram[0x40];                     // This is based at $F00000
72 uint8_t objectp_running = 0;
73 //bool objectp_stop_reading_list;
74
75 static uint8_t op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
76 //static uint32_t op_bitmap_bit_size[8] =
77 //      { (uint32_t)(0.125*65536), (uint32_t)(0.25*65536), (uint32_t)(0.5*65536), (uint32_t)(1*65536),
78 //        (uint32_t)(2*65536),     (uint32_t)(1*65536),    (uint32_t)(1*65536),   (uint32_t)(1*65536) };
79 static uint32_t op_pointer;
80
81 int32_t phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
82
83
84 //
85 // Object Processor initialization
86 //
87 void OPInit(void)
88 {
89         // Here we calculate the saturating blend of a signed 4-bit value and an
90         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
91         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
92         for(int i=0; i<256*256; i++)
93         {
94                 int y = (i >> 8) & 0xFF;
95                 int dy = (int8_t)i;                                     // Sign extend the Y index
96                 int c1 = (i >> 8) & 0x0F;
97                 int dc1 = (int8_t)(i << 4) >> 4;                // Sign extend the R index
98                 int c2 = (i >> 12) & 0x0F;
99                 int dc2 = (int8_t)(i & 0xF0) >> 4;      // Sign extend the C index
100
101                 y += dy;
102
103                 if (y < 0)
104                         y = 0;
105                 else if (y > 0xFF)
106                         y = 0xFF;
107
108                 op_blend_y[i] = y;
109
110                 c1 += dc1;
111
112                 if (c1 < 0)
113                         c1 = 0;
114                 else if (c1 > 0x0F)
115                         c1 = 0x0F;
116
117                 c2 += dc2;
118
119                 if (c2 < 0)
120                         c2 = 0;
121                 else if (c2 > 0x0F)
122                         c2 = 0x0F;
123
124                 op_blend_cr[i] = (c2 << 4) | c1;
125         }
126
127         OPReset();
128 }
129
130
131 //
132 // Object Processor reset
133 //
134 void OPReset(void)
135 {
136 //      memset(objectp_ram, 0x00, 0x40);
137         objectp_running = 0;
138 }
139
140
141 static const char * opType[8] =
142 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
143 static const char * ccType[8] =
144         { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
145 static uint32_t object[8192];
146 static uint32_t numberOfObjects;
147 //static uint32_t objectLink[8192];
148 //static uint32_t numberOfLinks;
149
150
151 void OPDone(void)
152 {
153 //#warning "!!! Fix OL dump so that it follows links !!!"
154 //      const char * opType[8] =
155 //      { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
156 //      const char * ccType[8] =
157 //              { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
158
159         uint32_t olp = OPGetListPointer();
160         WriteLog("\nOP: OLP = $%08X\n", olp);
161         WriteLog("OP: Phrase dump\n    ----------\n");
162
163 #if 0
164         for(uint32_t i=0; i<0x100; i+=8)
165         {
166                 uint32_t hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
167                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
168
169                 if ((lo & 0x07) == 3)
170                 {
171                         uint16_t ypos = (lo >> 3) & 0x7FF;
172                         uint8_t  cc   = (lo >> 14) & 0x03;
173                         uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
174                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
175                 }
176
177                 WriteLog("\n");
178
179                 if ((lo & 0x07) == 0)
180                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
181
182                 if ((lo & 0x07) == 1)
183                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
184         }
185
186         WriteLog("\n");
187 #else
188 //#warning "!!! Fix lockup in OPDiscoverObjects() !!!"
189 //temp, to keep the following function from locking up on bad/weird OLs
190 //return;
191
192         numberOfObjects = 0;
193         OPDiscoverObjects(olp);
194         OPDumpObjectList();
195 #endif
196 }
197
198
199 bool OPObjectExists(uint32_t address)
200 {
201         // Yes, we really do a linear search, every time. :-/
202         for(uint32_t i=0; i<numberOfObjects; i++)
203         {
204                 if (address == object[i])
205                         return true;
206         }
207
208         return false;
209 }
210
211
212 void OPDiscoverObjects(uint32_t address)
213 {
214         uint8_t objectType = 0;
215
216         do
217         {
218                 // If we've seen this object already, bail out!
219                 // Otherwise, add it to the list
220                 if (OPObjectExists(address))
221                         return;
222
223                 object[numberOfObjects++] = address;
224
225                 // Get the object & decode its type, link address
226                 uint32_t hi = JaguarReadLong(address + 0, OP);
227                 uint32_t lo = JaguarReadLong(address + 4, OP);
228                 objectType = lo & 0x07;
229                 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
230
231                 if (objectType == 3)
232                 {
233                         // Recursion needed to follow all links! This does depth-first recursion
234                         // on the not-taken objects
235                         OPDiscoverObjects(address + 8);
236                 }
237
238                 // Get the next object...
239                 address = link;
240         }
241         while (objectType != 4);
242 }
243
244
245 void OPDumpObjectList(void)
246 {
247         for(uint32_t i=0; i<numberOfObjects; i++)
248         {
249                 uint32_t address = object[i];
250
251                 uint32_t hi = JaguarReadLong(address + 0, OP);
252                 uint32_t lo = JaguarReadLong(address + 4, OP);
253                 uint8_t objectType = lo & 0x07;
254                 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
255                 WriteLog("%08X: %08X %08X %s -> $08X", address, hi, lo, opType[objectType], link);
256
257                 if (objectType == 3)
258                 {
259                         uint16_t ypos = (lo >> 3) & 0x7FF;
260                         uint8_t  cc   = (lo >> 14) & 0x07;      // Proper # of bits == 3
261                         WriteLog(" YPOS %s %u", ccType[cc], ypos);
262                 }
263
264                 WriteLog("\n");
265
266                 if (objectType == 0)
267                         DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
268
269                 if (objectType == 1)
270                         DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
271                                 OPLoadPhrase(address + 16));
272
273                 if (address == link)    // Ruh roh...
274                 {
275                         // Runaway recursive link is bad!
276                         WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
277                 }
278         }
279
280         WriteLog("\n");
281 }
282
283
284 //
285 // Object Processor memory access
286 // Memory range: F00010 - F00027
287 //
288 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
289 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
290 //      F00026            W   -------- -------x   OBF - object processor flag
291 //
292
293 #if 0
294 uint8_t OPReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
295 {
296         offset &= 0x3F;
297         return objectp_ram[offset];
298 }
299
300 uint16_t OPReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
301 {
302         offset &= 0x3F;
303         return GET16(objectp_ram, offset);
304 }
305
306 void OPWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
307 {
308         offset &= 0x3F;
309         objectp_ram[offset] = data;
310 }
311
312 void OPWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
313 {
314         offset &= 0x3F;
315         SET16(objectp_ram, offset, data);
316
317 /*if (offset == 0x20)
318 WriteLog("OP: Setting lo list pointer: %04X\n", data);
319 if (offset == 0x22)
320 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
321 }
322 #endif
323
324
325 uint32_t OPGetListPointer(void)
326 {
327         // Note: This register is LO / HI WORD, hence the funky look of this...
328         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
329 }
330
331
332 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
333
334 uint32_t OPGetStatusRegister(void)
335 {
336         return GET16(tomRam8, 0x26);
337 }
338
339
340 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
341
342 void OPSetStatusRegister(uint32_t data)
343 {
344         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
345         tomRam8[0x27] |= (data & 0xFE);
346 }
347
348
349 void OPSetCurrentObject(uint64_t object)
350 {
351 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
352         // Stored as least significant 32 bits first, ms32 last in big endian
353 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
354         objectp_ram[0x12] = object & 0xFF; object >>= 8;
355         objectp_ram[0x11] = object & 0xFF; object >>= 8;
356         objectp_ram[0x10] = object & 0xFF; object >>= 8;
357
358         objectp_ram[0x17] = object & 0xFF; object >>= 8;
359         objectp_ram[0x16] = object & 0xFF; object >>= 8;
360         objectp_ram[0x15] = object & 0xFF; object >>= 8;
361         objectp_ram[0x14] = object & 0xFF;*/
362 // Let's try regular good old big endian...
363         tomRam8[0x17] = object & 0xFF; object >>= 8;
364         tomRam8[0x16] = object & 0xFF; object >>= 8;
365         tomRam8[0x15] = object & 0xFF; object >>= 8;
366         tomRam8[0x14] = object & 0xFF; object >>= 8;
367
368         tomRam8[0x13] = object & 0xFF; object >>= 8;
369         tomRam8[0x12] = object & 0xFF; object >>= 8;
370         tomRam8[0x11] = object & 0xFF; object >>= 8;
371         tomRam8[0x10] = object & 0xFF;
372 }
373
374
375 uint64_t OPLoadPhrase(uint32_t offset)
376 {
377         offset &= ~0x07;                                                // 8 byte alignment
378         return ((uint64_t)JaguarReadLong(offset, OP) << 32) | (uint64_t)JaguarReadLong(offset+4, OP);
379 }
380
381
382 void OPStorePhrase(uint32_t offset, uint64_t p)
383 {
384         offset &= ~0x07;                                                // 8 byte alignment
385         JaguarWriteLong(offset, p >> 32, OP);
386         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
387 }
388
389
390 //
391 // Debugging routines
392 //
393 void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2)
394 {
395         WriteLog("          %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF));
396         WriteLog("          %08X %08X\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
397         DumpBitmapCore(p0, p1);
398         uint32_t hscale = p2 & 0xFF;
399         uint32_t vscale = (p2 >> 8) & 0xFF;
400         uint32_t remainder = (p2 >> 16) & 0xFF;
401         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
402 }
403
404
405 void DumpFixedObject(uint64_t p0, uint64_t p1)
406 {
407         WriteLog("          %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF));
408         DumpBitmapCore(p0, p1);
409 }
410
411
412 void DumpBitmapCore(uint64_t p0, uint64_t p1)
413 {
414         uint32_t bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
415         uint8_t bitdepth = (p1 >> 12) & 0x07;
416 //WAS:  int16_t ypos = ((p0 >> 3) & 0x3FF);                     // ??? What if not interlaced (/2)?
417         int16_t ypos = ((p0 >> 3) & 0x7FF);                     // ??? What if not interlaced (/2)?
418         int32_t xpos = p1 & 0xFFF;
419         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);       // Sign extend that mutha!
420         uint32_t iwidth = ((p1 >> 28) & 0x3FF);
421         uint32_t dwidth = ((p1 >> 18) & 0x3FF);         // Unsigned!
422         uint16_t height = ((p0 >> 14) & 0x3FF);
423         uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
424         uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
425         uint32_t firstPix = (p1 >> 49) & 0x3F;
426         uint8_t flags = (p1 >> 45) & 0x0F;
427         uint8_t idx = (p1 >> 38) & 0x7F;
428         uint32_t pitch = (p1 >> 15) & 0x07;
429         WriteLog("    [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
430                 iwidth * bdMultiplier[bitdepth],
431                 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth],
432                 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
433                 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
434                 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
435 }
436
437
438 //
439 // Object Processor main routine
440 //
441 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
442 void OPProcessList(int halfline, bool render)
443 {
444 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!"
445 // We ignore them, for now; not good
446         halfline &= 0x7FF;
447
448 extern int op_start_log;
449 //      char * condition_to_str[8] =
450 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
451
452         op_pointer = OPGetListPointer();
453
454 //      objectp_stop_reading_list = false;
455
456 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
457 //op_done();
458
459 // *** BEGIN OP PROCESSOR TESTING ONLY ***
460 extern bool interactiveMode;
461 extern bool iToggle;
462 extern int objectPtr;
463 bool inhibit;
464 int bitmapCounter = 0;
465 // *** END OP PROCESSOR TESTING ONLY ***
466
467         uint32_t opCyclesToRun = 30000;                                 // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
468
469 //      if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
470         while (op_pointer)
471         {
472 // *** BEGIN OP PROCESSOR TESTING ONLY ***
473 if (interactiveMode && bitmapCounter == objectPtr)
474         inhibit = iToggle;
475 else
476         inhibit = false;
477 // *** END OP PROCESSOR TESTING ONLY ***
478 //              if (objectp_stop_reading_list)
479 //                      return;
480
481                 uint64_t p0 = OPLoadPhrase(op_pointer);
482                 op_pointer += 8;
483 //WriteLog("\t%08X type %i\n", op_pointer, (uint8_t)p0 & 0x07);
484
485 #if 1
486 if (halfline == TOMGetVDB() && op_start_log)
487 //if (halfline == 215 && op_start_log)
488 //if (halfline == 28 && op_start_log)
489 //if (halfline == 0)
490 {
491 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
492 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
493 {
494 WriteLog(" (BITMAP) ");
495 uint64_t p1 = OPLoadPhrase(op_pointer);
496 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
497         uint8_t bitdepth = (p1 >> 12) & 0x07;
498 //WAS:  int16_t ypos = ((p0 >> 3) & 0x3FF);                     // ??? What if not interlaced (/2)?
499         int16_t ypos = ((p0 >> 3) & 0x7FF);                     // ??? What if not interlaced (/2)?
500 int32_t xpos = p1 & 0xFFF;
501 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
502         uint32_t iwidth = ((p1 >> 28) & 0x3FF);
503         uint32_t dwidth = ((p1 >> 18) & 0x3FF);         // Unsigned!
504         uint16_t height = ((p0 >> 14) & 0x3FF);
505         uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
506         uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
507         uint32_t firstPix = (p1 >> 49) & 0x3F;
508         uint8_t flags = (p1 >> 45) & 0x0F;
509         uint8_t idx = (p1 >> 38) & 0x7F;
510         uint32_t pitch = (p1 >> 15) & 0x07;
511 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
512         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
513 }
514 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
515 {
516 WriteLog(" (SCALED BITMAP)");
517 uint64_t p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
518 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
519 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
520         uint8_t bitdepth = (p1 >> 12) & 0x07;
521 //WAS:  int16_t ypos = ((p0 >> 3) & 0x3FF);                     // ??? What if not interlaced (/2)?
522         int16_t ypos = ((p0 >> 3) & 0x7FF);                     // ??? What if not interlaced (/2)?
523 int32_t xpos = p1 & 0xFFF;
524 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
525         uint32_t iwidth = ((p1 >> 28) & 0x3FF);
526         uint32_t dwidth = ((p1 >> 18) & 0x3FF);         // Unsigned!
527         uint16_t height = ((p0 >> 14) & 0x3FF);
528         uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
529         uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
530         uint32_t firstPix = (p1 >> 49) & 0x3F;
531         uint8_t flags = (p1 >> 45) & 0x0F;
532         uint8_t idx = (p1 >> 38) & 0x7F;
533         uint32_t pitch = (p1 >> 15) & 0x07;
534 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
535         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
536         uint32_t hscale = p2 & 0xFF;
537         uint32_t vscale = (p2 >> 8) & 0xFF;
538         uint32_t remainder = (p2 >> 16) & 0xFF;
539 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
540 }
541 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
542 WriteLog(" (GPU)\n");
543 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
544 {
545 WriteLog(" (BRANCH)\n");
546 uint8_t * jaguarMainRam = GetRamPtr();
547 WriteLog("[RAM] --> ");
548 for(int k=0; k<8; k++)
549         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
550 WriteLog("\n");
551 }
552 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
553 WriteLog("    --> List end\n\n");
554 }
555 #endif
556
557                 switch ((uint8_t)p0 & 0x07)
558                 {
559                 case OBJECT_TYPE_BITMAP:
560                 {
561 //WAS:                  uint16_t ypos = (p0 >> 3) & 0x3FF;
562                         uint16_t ypos = (p0 >> 3) & 0x7FF;
563 // This is only theory implied by Rayman...!
564 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
565 // the VDB value. With interlacing, this would be slightly more tricky.
566 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
567 // to affect any other game in a negative way (that I've seen).
568 // Either that, or it's an undocumented bug...
569
570 //No, the reason this was needed is that the OP code before was wrong. Any value
571 //less than VDB will get written to the top line of the display!
572 #if 0
573 // Not so sure... Let's see what happens here...
574 // No change...
575                         if (ypos == 0)
576                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
577 #endif
578 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
579 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
580 // what's causing things to fuck up. Still no idea why.
581
582                         uint32_t height = (p0 & 0xFFC000) >> 14;
583                         uint32_t oldOPP = op_pointer - 8;
584 // *** BEGIN OP PROCESSOR TESTING ONLY ***
585 if (inhibit && op_start_log)
586         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
587 bitmapCounter++;
588 if (!inhibit)   // For OP testing only!
589 // *** END OP PROCESSOR TESTING ONLY ***
590                         if (halfline >= ypos && height > 0)
591                         {
592                                 uint64_t p1 = OPLoadPhrase(op_pointer);
593                                 op_pointer += 8;
594 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
595 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
596 //                              OPProcessFixedBitmap(halfline, p0, p1, render);
597                                 OPProcessFixedBitmap(p0, p1, render);
598
599                                 // OP write-backs
600
601 //???Does this really happen??? Doesn't seem to work if you do this...!
602 //Probably not. Must be a bug in the documentation...!
603 //                              uint32_t link = (p0 & 0x7FFFF000000) >> 21;
604 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
605 //                              SET16(tom_ram_8, 0x22, link >> 16);
606 /*                              uint32_t height = (p0 & 0xFFC000) >> 14;
607                                 if (height - 1 > 0)
608                                         height--;*/
609                                 // NOTE: Would subtract 2 if in interlaced mode...!
610 //                              uint64_t height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
611 //                              if (height)
612                                 height--;
613
614                                 uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40;
615                                 uint64_t dwidth = (p1 & 0xFFC0000) >> 15;
616                                 data += dwidth;
617
618                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
619                                 p0 |= (uint64_t)height << 14;
620                                 p0 |= data << 40;
621                                 OPStorePhrase(oldOPP, p0);
622                         }
623 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
624 //Temp, for testing...
625 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
626 //And it does! !!! FIX !!!
627 //Let's remove this "fix" since it screws up more than it fixes.
628 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
629                 return;*/
630
631 // NOTE: The link address only replaces bits 3-21 in the OLP, and this replaces
632 //       EVERYTHING. !!! FIX !!! [DONE]
633 #warning "!!! Link address is not linked properly for all object types !!!"
634 #warning "!!! Only BITMAP is properly handled !!!"
635                         op_pointer &= 0xFFC00007;
636                         op_pointer |= (p0 & 0x000007FFFF000000LL) >> 21;
637 //WriteLog("New OP: %08X\n", op_pointer);
638 //kludge: Seems that memory access is mirrored in the first 8MB of memory...
639 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
640         op_pointer &= 0xFF1FFFFF;       // Knock out bits 21-23
641
642                         break;
643                 }
644                 case OBJECT_TYPE_SCALE:
645                 {
646 //WAS:                  uint16_t ypos = (p0 >> 3) & 0x3FF;
647                         uint16_t ypos = (p0 >> 3) & 0x7FF;
648                         uint32_t height = (p0 & 0xFFC000) >> 14;
649                         uint32_t oldOPP = op_pointer - 8;
650 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
651 // *** BEGIN OP PROCESSOR TESTING ONLY ***
652 if (inhibit && op_start_log)
653 {
654         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
655         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
656 }
657 bitmapCounter++;
658 if (!inhibit)   // For OP testing only!
659 // *** END OP PROCESSOR TESTING ONLY ***
660                         if (halfline >= ypos && height > 0)
661                         {
662                                 uint64_t p1 = OPLoadPhrase(op_pointer);
663                                 op_pointer += 8;
664                                 uint64_t p2 = OPLoadPhrase(op_pointer);
665                                 op_pointer += 8;
666 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32_t)(p0>>32), (uint32_t)(p0&0xFFFFFFFF), (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF), (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
667                                 OPProcessScaledBitmap(p0, p1, p2, render);
668
669                                 // OP write-backs
670
671                                 uint16_t remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
672                                 uint8_t /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
673 //Actually, we should skip this object if it has a vscale of zero.
674 //Or do we? Not sure... Atari Karts has a few lines that look like:
675 // (SCALED BITMAP)
676 //000E8268 --> phrase 00010000 7000B00D
677 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
678 //    [hsc: 9A, vsc: 00, rem: 00]
679 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
680 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
681
682                                 if (vscale == 0)
683                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
684
685 //extern int start_logging;
686 //if (start_logging)
687 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
688 //Locks up here:
689 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
690 //There are other problems here, it looks like...
691 //Another lock up:
692 //About to execute OP (508)...
693 /*
694 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
695 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
696 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
697 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
698 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
699 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
700 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
701 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
702 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
703 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
704 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
705 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
706 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
707 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
708 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
709 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
710 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
711 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
712 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
713 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
714 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
715 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
716 */
717 //Here's another problem:
718 //    [hsc: 20, vsc: 20, rem: 00]
719 // Since we're not checking for $E0 (but that's what we get from the above), we end
720 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
721 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
722 //Also note: $E0 = 7.0 which IS a legal vscale value...
723
724 //                              if (remainder & 0x80)                           // I.e., it's negative
725 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
726 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
727 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
728 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
729 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
730                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
731                                 if (remainder < 0x20)
732                                 {
733                                         uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40;
734                                         uint64_t dwidth = (p1 & 0xFFC0000) >> 15;
735
736 //                                      while (remainder & 0x80)
737 //                                      while ((remainder & 0x80) || remainder == 0)
738 //                                      while ((remainder - 1) >= 0xE0)
739 //                                      while ((remainder >= 0xE1) || remainder == 0)
740 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
741 //                                      while (remainder <= 0x20)
742                                         while (remainder < 0x20)
743                                         {
744                                                 remainder += vscale;
745
746                                                 if (height)
747                                                         height--;
748
749                                                 data += dwidth;
750                                         }
751
752                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
753                                         p0 |= (uint64_t)height << 14;
754                                         p0 |= data << 40;
755                                         OPStorePhrase(oldOPP, p0);
756                                 }
757
758                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
759
760 //if (start_logging)
761 //      WriteLog("--> Finished writebacks...\n");//*/
762
763 //WriteLog(" [%08X%08X -> ", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
764                                 p2 &= ~0x0000000000FF0000LL;
765                                 p2 |= (uint64_t)remainder << 16;
766 //WriteLog("%08X%08X]\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
767                                 OPStorePhrase(oldOPP + 16, p2);
768 //remainder = (uint8_t)(p2 >> 16), vscale = (uint8_t)(p2 >> 8);
769 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
770                         }
771
772                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
773                         break;
774                 }
775                 case OBJECT_TYPE_GPU:
776                 {
777 //WriteLog("OP: Asserting GPU IRQ #3...\n");
778 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
779                         OPSetCurrentObject(p0);
780                         GPUSetIRQLine(3, ASSERT_LINE);
781 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
782 // !!! FIX !!!
783 //Do something like:
784 //OPSuspendedByGPU = true;
785 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
786 //on the next halfline...
787 // --> It continues from where it was interrupted! !!! FIX !!!
788                         break;
789                 }
790                 case OBJECT_TYPE_BRANCH:
791                 {
792                         uint16_t ypos = (p0 >> 3) & 0x7FF;
793 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
794 //       conditions! Need at least one more bit for that! :-P
795 // Also, the ASIC nets imply that it uses bits 14-16 (height in BM & SBM objects)
796 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
797                         uint8_t  cc   = (p0 >> 14) & 0x03;
798                         uint32_t link = (p0 >> 21) & 0x3FFFF8;
799
800                         // If no branch is taken, we need to ensure that it goes to the
801                         // next object (it doesn't go +8, but +16 to following object)
802                         op_pointer += 8;
803
804 //                      if ((ypos!=507)&&(ypos!=25))
805 //                              WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
806                         switch (cc)
807                         {
808                         case CONDITION_EQUAL:
809                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
810                                         op_pointer = link;
811                                 break;
812                         case CONDITION_LESS_THAN:
813                                 if (TOMReadWord(0xF00006, OP) < ypos)
814                                         op_pointer = link;
815                                 break;
816                         case CONDITION_GREATER_THAN:
817                                 if (TOMReadWord(0xF00006, OP) > ypos)
818                                         op_pointer = link;
819                                 break;
820                         case CONDITION_OP_FLAG_SET:
821                                 if (OPGetStatusRegister() & 0x01)
822                                         op_pointer = link;
823                                 break;
824                         case CONDITION_SECOND_HALF_LINE:
825 //Here's the ASIC code:
826 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
827 //which means, do the link if bit 10 of HC is set...
828
829                                 // This basically means branch if bit 10 of HC is set
830 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
831                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
832                                 LogDone();
833                                 exit(0);
834                                 break;
835                         default:
836                                 // Basically, if you do this, the OP does nothing. :-)
837                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
838                         }
839                         break;
840                 }
841                 case OBJECT_TYPE_STOP:
842                 {
843 //op_start_log = 0;
844                         // unsure
845 //WriteLog("OP: --> STOP\n");
846 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
847 //This seems more likely...
848                         OPSetCurrentObject(p0);
849
850                         if (p0 & 0x08)
851                         {
852                                 // We need to check whether these interrupts are enabled or not, THEN
853                                 // set an IRQ + pending flag if necessary...
854                                 if (TOMIRQEnabled(IRQ_OPFLAG))
855                                 {
856                                         TOMSetPendingObjectInt();
857                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
858                                 }
859                         }
860
861                         return;
862 //                      break;
863                 }
864                 default:
865 //                      WriteLog("op: unknown object type %i\n", ((uint8_t)p0 & 0x07));
866                         return;
867                 }
868
869                 // Here is a little sanity check to keep the OP from locking up the machine
870                 // when fed bad data. Better would be to count how many actual cycles it used
871                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
872 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
873                 opCyclesToRun--;
874
875                 if (!opCyclesToRun)
876                         return;
877         }
878 }
879
880
881 //
882 // Store fixed size bitmap in line buffer
883 //
884 void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render)
885 {
886 // Need to make sure that when writing that it stays within the line buffer...
887 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
888         uint8_t depth = (p1 >> 12) & 0x07;                              // Color depth of image
889         int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
890         uint32_t iwidth = (p1 >> 28) & 0x3FF;                           // Image width in *phrases*
891         uint32_t data = (p0 >> 40) & 0xFFFFF8;                  // Pixel data address
892 //#ifdef OP_DEBUG_BMP
893         uint32_t firstPix = (p1 >> 49) & 0x3F;
894         // "The LSB is significant only for scaled objects..." -JTRM
895         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
896         firstPix &= 0x3E;
897 //#endif
898 // We can ignore the RELEASE (high order) bit for now--probably forever...!
899 //      uint8_t flags = (p1 >> 45) & 0x0F;      // REFLECT, RMW, TRANS, RELEASE
900 //Optimize: break these out to their own BOOL values
901         uint8_t flags = (p1 >> 45) & 0x07;                              // REFLECT (0), RMW (1), TRANS (2)
902         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
903                 flagRMW = (flags & OPFLAG_RMW ? true : false),
904                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
905 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
906 //  provide the most significant bits of the palette address."
907         uint8_t index = (p1 >> 37) & 0xFE;                              // CLUT index offset (upper pix, 1-4 bpp)
908         uint32_t pitch = (p1 >> 15) & 0x07;                             // Phrase pitch
909         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
910
911 //      int16_t scanlineWidth = tom_getVideoModeWidth();
912         uint8_t * tomRam8 = TOMGetRamPointer();
913         uint8_t * paletteRAM = &tomRam8[0x400];
914         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
915         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
916         uint16_t * paletteRAM16 = (uint16_t *)paletteRAM;
917
918 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
919 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
920
921 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
922 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
923 // Pitch == 0 is OK too...
924
925 //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to investigate
926 //        on real hardware...
927 #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
928 if (iwidth == 0)
929         iwidth = 1;
930
931 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
932 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
933         if (!render || iwidth == 0)
934                 return;
935
936 //OK, so we know the position in the line buffer is correct. It's the clipping in
937 //24bpp mode that's wrong!
938 #if 0
939 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
940 //into the line buffer for each pixel.
941 if (depth == 5) // i.e., 24bpp mode...
942         xpos >>= 1;     // Cut it in half...
943 #endif
944
945 //#define OP_DEBUG_BMP
946 //#ifdef OP_DEBUG_BMP
947 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
948 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
949 //#endif
950
951 //      int32_t leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
952         int32_t startPos = xpos, endPos = xpos +
953                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
954                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
955         uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
956         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
957         // Not sure if this is Jaguar Two only location or what...
958         // From the docs, it is... If we want to limit here we should think of something else.
959 //      int32_t limit = GET16(tom_ram_8, 0x0008);                       // LIMIT
960 //      int32_t limit = 720;
961 //      int32_t lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1);       // Zero based limit...
962 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
963         // This is correct, the OP line buffer is a constant size... 
964         int32_t limit = 720;
965         int32_t lbufWidth = 719;
966
967         // If the image is completely to the left or right of the line buffer, then bail.
968 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
969 //There are four possibilities:
970 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
971 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
972 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
973 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
974 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
975 // numbers 1 & 3 are of concern.
976 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
977 //      if (rightMargin < 0 || leftMargin > lbufWidth)
978
979 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
980 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
981 // Still have to be careful with the DATA and IWIDTH values though...
982
983 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
984 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
985 //              return;
986         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
987                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
988                 return;
989
990         // Otherwise, find the clip limits and clip the phrase as well...
991         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
992         //       line buffer, but it shouldn't matter since there are two unused line
993         //       buffers below and nothing above and I'll at most write 8 bytes outside
994         //       the line buffer... I could use a fractional clip begin/end value, but
995         //       this makes the blit a *lot* more hairy. I might fix this in the future
996         //       if it becomes necessary. (JLH)
997         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
998         //       which pixel in the phrase is being written, and quit when either end of phrases
999         //       is reached or line buffer extents are surpassed.
1000
1001 //This stuff is probably wrong as well... !!! FIX !!!
1002 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1003 //Yup. Seems that JagMania doesn't work correctly with this...
1004 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1005 //      if (!flagREFLECT)
1006
1007 /*
1008         if (leftMargin < 0)
1009                 clippedWidth = 0 - leftMargin,
1010                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1011                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1012 //              leftMargin = 0;
1013
1014         if (rightMargin > lbufWidth)
1015                 clippedWidth = rightMargin - lbufWidth,
1016                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
1017 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1018 //              rightMargin = lbufWidth;
1019 */
1020 if (depth > 5)
1021         WriteLog("OP: We're about to encounter a divide by zero error!\n");
1022         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1023         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1024         // !!! FIX !!!
1025         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1026                 clippedWidth = 0 - startPos,
1027                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1028                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1029
1030         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1031                 clippedWidth = 0 - endPos,
1032                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1033
1034         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1035                 clippedWidth = endPos - lbufWidth,
1036                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1037
1038         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1039                 clippedWidth = startPos - lbufWidth,
1040                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1041                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1042 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1043
1044         // If the image is sitting on the line buffer left or right edge, we need to compensate
1045         // by decreasing the image phrase width accordingly.
1046         iwidth -= phraseClippedWidth;
1047
1048         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1049         // the pixel data.
1050 //      data += phraseClippedWidth * (pitch << 3);
1051         data += dataClippedWidth * pitch;
1052
1053         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1054         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1055 //      uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1056 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1057 //Is this a bug in the OP?
1058 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1059 //Though it looks like we're doing it here no matter what...
1060 //      uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1061 //Let's try this:
1062         uint32_t lbufAddress = 0x1800 + (startPos * 2);
1063         uint8_t * currentLineBuffer = &tomRam8[lbufAddress];
1064
1065         // Render.
1066
1067 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1068 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1069 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1070 // anyway.
1071 // This seems to be the case (at least according to the Midsummer docs)...!
1072
1073 // This is to test using palette zeroes instead of bit zeroes...
1074 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1075 //#define OP_USES_PALETTE_ZERO
1076
1077         if (depth == 0)                                                                 // 1 BPP
1078         {
1079                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1080                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1081
1082                 // Fetch 1st phrase...
1083                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1084 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1085 //i.e., we didn't clip on the margin... !!! FIX !!!
1086                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1087                 int i = firstPix;                                                       // Start counter at right spot...
1088
1089                 while (iwidth--)
1090                 {
1091                         while (i++ < 64)
1092                         {
1093                                 uint8_t bit = pixels >> 63;
1094 #ifndef OP_USES_PALETTE_ZERO
1095                                 if (flagTRANS && bit == 0)
1096 #else
1097                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1098 #endif
1099                                         ;       // Do nothing...
1100                                 else
1101                                 {
1102                                         if (!flagRMW)
1103 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1104 //Won't optimize RMW case though...
1105                                                 // This is the *only* correct use of endian-dependent code
1106                                                 // (i.e., mem-to-mem direct copying)!
1107                                                 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bit];
1108                                         else
1109                                                 *currentLineBuffer =
1110                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1111                                                 *(currentLineBuffer + 1) =
1112                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1113                                 }
1114
1115                                 currentLineBuffer += lbufDelta;
1116                                 pixels <<= 1;
1117                         }
1118                         i = 0;
1119                         // Fetch next phrase...
1120                         data += pitch;
1121                         pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1122                 }
1123         }
1124         else if (depth == 1)                                                    // 2 BPP
1125         {
1126 if (firstPix)
1127         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1128                 index &= 0xFC;                                                          // Top six bits form CLUT index
1129                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1130                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1131
1132                 while (iwidth--)
1133                 {
1134                         // Fetch phrase...
1135                         uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1136                         data += pitch;
1137
1138                         for(int i=0; i<32; i++)
1139                         {
1140                                 uint8_t bits = pixels >> 62;
1141 // Seems to me that both of these are in the same endian, so we could cast it as
1142 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1143 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1144 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1145 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1146 #ifndef OP_USES_PALETTE_ZERO
1147                                 if (flagTRANS && bits == 0)
1148 #else
1149                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1150 #endif
1151                                         ;       // Do nothing...
1152                                 else
1153                                 {
1154                                         if (!flagRMW)
1155                                                 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1156                                         else
1157                                                 *currentLineBuffer =
1158                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1159                                                 *(currentLineBuffer + 1) =
1160                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1161                                 }
1162
1163                                 currentLineBuffer += lbufDelta;
1164                                 pixels <<= 2;
1165                         }
1166                 }
1167         }
1168         else if (depth == 2)                                                    // 4 BPP
1169         {
1170 if (firstPix)
1171         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1172                 index &= 0xF0;                                                          // Top four bits form CLUT index
1173                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1174                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1175
1176                 while (iwidth--)
1177                 {
1178                         // Fetch phrase...
1179                         uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1180                         data += pitch;
1181
1182                         for(int i=0; i<16; i++)
1183                         {
1184                                 uint8_t bits = pixels >> 60;
1185 // Seems to me that both of these are in the same endian, so we could cast it as
1186 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1187 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1188 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1189 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1190 #ifndef OP_USES_PALETTE_ZERO
1191                                 if (flagTRANS && bits == 0)
1192 #else
1193                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1194 #endif
1195                                         ;       // Do nothing...
1196                                 else
1197                                 {
1198                                         if (!flagRMW)
1199                                                 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1200                                         else
1201                                                 *currentLineBuffer =
1202                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1203                                                 *(currentLineBuffer + 1) =
1204                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1205                                 }
1206
1207                                 currentLineBuffer += lbufDelta;
1208                                 pixels <<= 4;
1209                         }
1210                 }
1211         }
1212         else if (depth == 3)                                                    // 8 BPP
1213         {
1214                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1215                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1216
1217                 // Fetch 1st phrase...
1218                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1219 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1220 //i.e., we didn't clip on the margin... !!! FIX !!!
1221                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1222                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1223                 int i = firstPix >> 3;                                          // Start counter at right spot...
1224
1225                 while (iwidth--)
1226                 {
1227                         while (i++ < 8)
1228                         {
1229                                 uint8_t bits = pixels >> 56;
1230 // Seems to me that both of these are in the same endian, so we could cast it as
1231 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1232 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1233 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1234 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1235 //This would seem to be problematic...
1236 //Because it's the palette entry being zero that makes the pixel transparent...
1237 //Let's try it and see.
1238 #ifndef OP_USES_PALETTE_ZERO
1239                                 if (flagTRANS && bits == 0)
1240 #else
1241                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1242 #endif
1243                                         ;       // Do nothing...
1244                                 else
1245                                 {
1246                                         if (!flagRMW)
1247                                                 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1248                                         else
1249                                                 *currentLineBuffer =
1250                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1251                                                 *(currentLineBuffer + 1) =
1252                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1253                                 }
1254
1255                                 currentLineBuffer += lbufDelta;
1256                                 pixels <<= 8;
1257                         }
1258                         i = 0;
1259                         // Fetch next phrase...
1260                         data += pitch;
1261                         pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1262                 }
1263         }
1264         else if (depth == 4)                                                    // 16 BPP
1265         {
1266 if (firstPix)
1267         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1268                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1269                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1270
1271                 while (iwidth--)
1272                 {
1273                         // Fetch phrase...
1274                         uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1275                         data += pitch;
1276
1277                         for(int i=0; i<4; i++)
1278                         {
1279                                 uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1280 // Seems to me that both of these are in the same endian, so we could cast it as
1281 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1282 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1283 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1284 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1285 //This doesn't seem right... Let's try the encoded black value ($8800):
1286 //Apparently, CRY 0 maps to $8800...
1287                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1288 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1289                                         ;       // Do nothing...
1290                                 else
1291                                 {
1292                                         if (!flagRMW)
1293                                                 *currentLineBuffer = bitsHi,
1294                                                 *(currentLineBuffer + 1) = bitsLo;
1295                                         else
1296                                                 *currentLineBuffer =
1297                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1298                                                 *(currentLineBuffer + 1) =
1299                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1300                                 }
1301
1302                                 currentLineBuffer += lbufDelta;
1303                                 pixels <<= 16;
1304                         }
1305                 }
1306         }
1307         else if (depth == 5)                                                    // 24 BPP
1308         {
1309 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1310 //There *might* be others...
1311 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1312 if (firstPix)
1313         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1314                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1315                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1316                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04;
1317
1318                 while (iwidth--)
1319                 {
1320                         // Fetch phrase...
1321                         uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1322                         data += pitch;
1323
1324                         for(int i=0; i<2; i++)
1325                         {
1326                                 // We don't use a 32-bit var here because of endian issues...!
1327                                 uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48,
1328                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1329
1330                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1331                                         ;       // Do nothing...
1332                                 else
1333                                         *currentLineBuffer = bits3,
1334                                         *(currentLineBuffer + 1) = bits2,
1335                                         *(currentLineBuffer + 2) = bits1,
1336                                         *(currentLineBuffer + 3) = bits0;
1337
1338                                 currentLineBuffer += lbufDelta;
1339                                 pixels <<= 32;
1340                         }
1341                 }
1342         }
1343 }
1344
1345
1346 //
1347 // Store scaled bitmap in line buffer
1348 //
1349 void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render)
1350 {
1351 // Need to make sure that when writing that it stays within the line buffer...
1352 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1353         uint8_t depth = (p1 >> 12) & 0x07;                              // Color depth of image
1354         int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1355         uint32_t iwidth = (p1 >> 28) & 0x3FF;                           // Image width in *phrases*
1356         uint32_t data = (p0 >> 40) & 0xFFFFF8;                  // Pixel data address
1357 //#ifdef OP_DEBUG_BMP
1358 // Prolly should use this... Though not sure exactly how.
1359 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1360         uint32_t firstPix = (p1 >> 49) & 0x3F;
1361 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1362 if (firstPix)
1363         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1364 //#endif
1365 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1366 //      uint8_t flags = (p1 >> 45) & 0x0F;      // REFLECT, RMW, TRANS, RELEASE
1367 //Optimize: break these out to their own BOOL values [DONE]
1368         uint8_t flags = (p1 >> 45) & 0x07;                              // REFLECT (0), RMW (1), TRANS (2)
1369         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1370                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1371                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1372         uint8_t index = (p1 >> 37) & 0xFE;                              // CLUT index offset (upper pix, 1-4 bpp)
1373         uint32_t pitch = (p1 >> 15) & 0x07;                             // Phrase pitch
1374
1375         uint8_t * tomRam8 = TOMGetRamPointer();
1376         uint8_t * paletteRAM = &tomRam8[0x400];
1377         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1378         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1379         uint16_t * paletteRAM16 = (uint16_t *)paletteRAM;
1380
1381         uint16_t hscale = p2 & 0xFF;
1382 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1383 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1384         uint16_t horizontalRemainder = hscale;                          // Not sure if it starts full, but seems reasonable [It's not!]
1385 //      uint8_t horizontalRemainder = 0;                                        // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1386         int32_t scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1387         uint32_t scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1388
1389 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1390 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1391
1392 // Looks like an hscale of zero means don't draw!
1393         if (!render || iwidth == 0 || hscale == 0)
1394                 return;
1395
1396 /*extern int start_logging;
1397 if (start_logging)
1398         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1399                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1400 //#define OP_DEBUG_BMP
1401 //#ifdef OP_DEBUG_BMP
1402 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1403 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1404 //#endif
1405
1406         int32_t startPos = xpos, endPos = xpos +
1407                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1408         uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1409         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1410         // Not sure if this is Jaguar Two only location or what...
1411         // From the docs, it is... If we want to limit here we should think of something else.
1412 //      int32_t limit = GET16(tom_ram_8, 0x0008);                       // LIMIT
1413         int32_t limit = 720;
1414 //      int32_t lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1);       // Zero based limit...
1415         int32_t lbufWidth = 719;        // Zero based limit...
1416
1417         // If the image is completely to the left or right of the line buffer, then bail.
1418 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1419 //There are four possibilities:
1420 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1421 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1422 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1423 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1424 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1425 // numbers 1 & 3 are of concern.
1426 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1427 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1428
1429 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1430 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1431 // Still have to be careful with the DATA and IWIDTH values though...
1432
1433         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1434                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1435                 return;
1436
1437         // Otherwise, find the clip limits and clip the phrase as well...
1438         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1439         //       line buffer, but it shouldn't matter since there are two unused line
1440         //       buffers below and nothing above and I'll at most write 40 bytes outside
1441         //       the line buffer... I could use a fractional clip begin/end value, but
1442         //       this makes the blit a *lot* more hairy. I might fix this in the future
1443         //       if it becomes necessary. (JLH)
1444         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1445         //       which pixel in the phrase is being written, and quit when either end of phrases
1446         //       is reached or line buffer extents are surpassed.
1447
1448 //This stuff is probably wrong as well... !!! FIX !!!
1449 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1450 //Yup. Seems that JagMania doesn't work correctly with this...
1451 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1452 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1453 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1454 // a bit more accurately... Strange!
1455 //It's probably a case of the REFLECT flag being set and the background being written
1456 //from the right side of the screen...
1457 //But no, it isn't... At least if the diagnostics are telling the truth!
1458
1459         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1460         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1461         // !!! FIX !!!
1462
1463 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1464 //the scaling factor is small. So fix it already! !!! FIX !!!
1465 /*if (scaledPhrasePixels == 0)
1466 {
1467         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1468         DumpScaledObject(p0, p1, p2);
1469 }//*/
1470 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1471
1472 //Try a simple example...
1473 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1474 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1475 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1476 //
1477 // Normally, we would expect this in the line buffer:
1478 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1479 //
1480 // But instead we're getting:
1481 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1482 //
1483 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1484 // on negative boundary--or are we? Hmm...
1485 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1486 //
1487 // Let's try a real world example:
1488 //
1489 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1490 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1491 //
1492 // Really, spp is 27.75 in the second case...
1493 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1494 // start position (14 * 27.75), we get -6.5... NOT -17!
1495
1496 //Now it seems we're working OK, at least for the first case...
1497 uint32_t scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1498
1499         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1500 {
1501 extern int start_logging;
1502 if (start_logging)
1503         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1504 //              clippedWidth = 0 - startPos,
1505                 clippedWidth = (0 - startPos) << 5,
1506 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1507                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1508 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1509                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1510 if (start_logging)
1511         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1512 }
1513
1514         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1515                 clippedWidth = 0 - endPos,
1516                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1517
1518         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1519                 clippedWidth = endPos - lbufWidth,
1520                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1521
1522         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1523                 clippedWidth = startPos - lbufWidth,
1524                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1525                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1526
1527 extern int op_start_log;
1528 if (op_start_log && clippedWidth != 0)
1529         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1530 if (op_start_log && startPos == 13)
1531 {
1532         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1533         DumpScaledObject(p0, p1, p2);
1534         if (iwidth == 7)
1535         {
1536                 WriteLog("    %08X: ", data);
1537                 for(int i=0; i<7*8; i++)
1538                         WriteLog("%02X ", JaguarReadByte(data+i));
1539                 WriteLog("\n");
1540         }
1541 }
1542         // If the image is sitting on the line buffer left or right edge, we need to compensate
1543         // by decreasing the image phrase width accordingly.
1544         iwidth -= phraseClippedWidth;
1545
1546         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1547         // the pixel data.
1548 //      data += phraseClippedWidth * (pitch << 3);
1549         data += dataClippedWidth * (pitch << 3);
1550
1551         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1552         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1553 //      uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1554 //      uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1555         uint32_t lbufAddress = 0x1800 + startPos * 2;
1556         uint8_t * currentLineBuffer = &tomRam8[lbufAddress];
1557 //uint8_t * lineBufferLowerLimit = &tom_ram_8[0x1800],
1558 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1559
1560         // Render.
1561
1562 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1563 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1564 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1565 // anyway.
1566 // This seems to be the case (at least according to the Midsummer docs)...!
1567
1568         if (depth == 0)                                                                 // 1 BPP
1569         {
1570 if (firstPix != 0)
1571         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1572                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1573                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1574
1575                 int pixCount = 0;
1576                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1577
1578                 while ((int32_t)iwidth > 0)
1579                 {
1580                         uint8_t bits = pixels >> 63;
1581
1582 #ifndef OP_USES_PALETTE_ZERO
1583                         if (flagTRANS && bits == 0)
1584 #else
1585                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1586 #endif
1587                                 ;       // Do nothing...
1588                         else
1589                         {
1590                                 if (!flagRMW)
1591                                         // This is the *only* correct use of endian-dependent code
1592                                         // (i.e., mem-to-mem direct copying)!
1593                                         *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1594                                 else
1595                                         *currentLineBuffer =
1596                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1597                                         *(currentLineBuffer + 1) =
1598                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1599                         }
1600
1601                         currentLineBuffer += lbufDelta;
1602
1603 /*
1604 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1605 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1606 wide, so we could probably go back to that (as long as we make it an int16_t and not a uint16!)
1607 */
1608 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1609                         while (horizontalRemainder & 0x80)
1610                         {
1611                                 horizontalRemainder += hscale;
1612                                 pixCount++;
1613                                 pixels <<= 1;
1614                         }//*/
1615 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1616                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1617                         {
1618                                 horizontalRemainder += hscale;
1619                                 pixCount++;
1620                                 pixels <<= 1;
1621                         }
1622                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1623
1624                         if (pixCount > 63)
1625                         {
1626                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1627
1628                                 data += (pitch << 3) * phrasesToSkip;
1629                                 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1630                                 pixels <<= 1 * pixelShift;
1631                                 iwidth -= phrasesToSkip;
1632                                 pixCount = pixelShift;
1633                         }
1634                 }
1635         }
1636         else if (depth == 1)                                                    // 2 BPP
1637         {
1638 if (firstPix != 0)
1639         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1640                 index &= 0xFC;                                                          // Top six bits form CLUT index
1641                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1642                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1643
1644                 int pixCount = 0;
1645                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1646
1647                 while ((int32_t)iwidth > 0)
1648                 {
1649                         uint8_t bits = pixels >> 62;
1650
1651 #ifndef OP_USES_PALETTE_ZERO
1652                         if (flagTRANS && bits == 0)
1653 #else
1654                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1655 #endif
1656                                 ;       // Do nothing...
1657                         else
1658                         {
1659                                 if (!flagRMW)
1660                                         // This is the *only* correct use of endian-dependent code
1661                                         // (i.e., mem-to-mem direct copying)!
1662                                         *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1663                                 else
1664                                         *currentLineBuffer =
1665                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1666                                         *(currentLineBuffer + 1) =
1667                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1668                         }
1669
1670                         currentLineBuffer += lbufDelta;
1671
1672 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1673                         while (horizontalRemainder & 0x80)
1674                         {
1675                                 horizontalRemainder += hscale;
1676                                 pixCount++;
1677                                 pixels <<= 2;
1678                         }//*/
1679 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1680                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1681                         {
1682                                 horizontalRemainder += hscale;
1683                                 pixCount++;
1684                                 pixels <<= 2;
1685                         }
1686                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1687
1688                         if (pixCount > 31)
1689                         {
1690                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1691
1692                                 data += (pitch << 3) * phrasesToSkip;
1693                                 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1694                                 pixels <<= 2 * pixelShift;
1695                                 iwidth -= phrasesToSkip;
1696                                 pixCount = pixelShift;
1697                         }
1698                 }
1699         }
1700         else if (depth == 2)                                                    // 4 BPP
1701         {
1702 if (firstPix != 0)
1703         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1704                 index &= 0xF0;                                                          // Top four bits form CLUT index
1705                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1706                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1707
1708                 int pixCount = 0;
1709                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1710
1711                 while ((int32_t)iwidth > 0)
1712                 {
1713                         uint8_t bits = pixels >> 60;
1714
1715 #ifndef OP_USES_PALETTE_ZERO
1716                         if (flagTRANS && bits == 0)
1717 #else
1718                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1719 #endif
1720                                 ;       // Do nothing...
1721                         else
1722                         {
1723                                 if (!flagRMW)
1724                                         // This is the *only* correct use of endian-dependent code
1725                                         // (i.e., mem-to-mem direct copying)!
1726                                         *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1727                                 else
1728                                         *currentLineBuffer =
1729                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1730                                         *(currentLineBuffer + 1) =
1731                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1732                         }
1733
1734                         currentLineBuffer += lbufDelta;
1735
1736 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1737                         while (horizontalRemainder & 0x80)
1738                         {
1739                                 horizontalRemainder += hscale;
1740                                 pixCount++;
1741                                 pixels <<= 4;
1742                         }//*/
1743 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1744                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1745                         {
1746                                 horizontalRemainder += hscale;
1747                                 pixCount++;
1748                                 pixels <<= 4;
1749                         }
1750                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1751
1752                         if (pixCount > 15)
1753                         {
1754                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1755
1756                                 data += (pitch << 3) * phrasesToSkip;
1757                                 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1758                                 pixels <<= 4 * pixelShift;
1759                                 iwidth -= phrasesToSkip;
1760                                 pixCount = pixelShift;
1761                         }
1762                 }
1763         }
1764         else if (depth == 3)                                                    // 8 BPP
1765         {
1766 if (firstPix)
1767         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1768                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1769                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1770
1771                 int pixCount = 0;
1772                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1773
1774                 while ((int32_t)iwidth > 0)
1775                 {
1776                         uint8_t bits = pixels >> 56;
1777
1778 #ifndef OP_USES_PALETTE_ZERO
1779                         if (flagTRANS && bits == 0)
1780 #else
1781                         if (flagTRANS && (paletteRAM16[bits] == 0))
1782 #endif
1783                                 ;       // Do nothing...
1784                         else
1785                         {
1786                                 if (!flagRMW)
1787                                         // This is the *only* correct use of endian-dependent code
1788                                         // (i.e., mem-to-mem direct copying)!
1789                                         *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1790 /*                              {
1791                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1792                                                 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1793                                 }*/
1794                                 else
1795                                         *currentLineBuffer =
1796                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1797                                         *(currentLineBuffer + 1) =
1798                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1799                         }
1800
1801                         currentLineBuffer += lbufDelta;
1802
1803 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1804                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1805                         {
1806                                 horizontalRemainder += hscale;
1807                                 pixCount++;
1808                                 pixels <<= 8;
1809                         }
1810                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1811
1812                         if (pixCount > 7)
1813                         {
1814                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1815
1816                                 data += (pitch << 3) * phrasesToSkip;
1817                                 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1818                                 pixels <<= 8 * pixelShift;
1819                                 iwidth -= phrasesToSkip;
1820                                 pixCount = pixelShift;
1821                         }
1822                 }
1823         }
1824         else if (depth == 4)                                                    // 16 BPP
1825         {
1826 if (firstPix != 0)
1827         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1828                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1829                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1830
1831                 int pixCount = 0;
1832                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1833
1834                 while ((int32_t)iwidth > 0)
1835                 {
1836                         uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1837
1838 //This doesn't seem right... Let's try the encoded black value ($8800):
1839 //Apparently, CRY 0 maps to $8800...
1840                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1841 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1842                                 ;       // Do nothing...
1843                         else
1844                         {
1845                                 if (!flagRMW)
1846                                         *currentLineBuffer = bitsHi,
1847                                         *(currentLineBuffer + 1) = bitsLo;
1848                                 else
1849                                         *currentLineBuffer =
1850                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1851                                         *(currentLineBuffer + 1) =
1852                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1853                         }
1854
1855                         currentLineBuffer += lbufDelta;
1856
1857 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1858                         while (horizontalRemainder & 0x80)
1859                         {
1860                                 horizontalRemainder += hscale;
1861                                 pixCount++;
1862                                 pixels <<= 16;
1863                         }//*/
1864 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1865                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1866                         {
1867                                 horizontalRemainder += hscale;
1868                                 pixCount++;
1869                                 pixels <<= 16;
1870                         }
1871                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1872 //*/
1873                         if (pixCount > 3)
1874                         {
1875                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1876
1877                                 data += (pitch << 3) * phrasesToSkip;
1878                                 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1879                                 pixels <<= 16 * pixelShift;
1880
1881                                 iwidth -= phrasesToSkip;
1882
1883                                 pixCount = pixelShift;
1884                         }
1885                 }
1886         }
1887         else if (depth == 5)                                                    // 24 BPP
1888         {
1889 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1890 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1891 if (firstPix != 0)
1892         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1893                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1894                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1895                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04;
1896
1897                 while (iwidth--)
1898                 {
1899                         // Fetch phrase...
1900                         uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1901                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1902
1903                         for(int i=0; i<2; i++)
1904                         {
1905                                 uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48,
1906                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1907
1908                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1909                                         ;       // Do nothing...
1910                                 else
1911                                         *currentLineBuffer = bits3,
1912                                         *(currentLineBuffer + 1) = bits2,
1913                                         *(currentLineBuffer + 2) = bits1,
1914                                         *(currentLineBuffer + 3) = bits0;
1915
1916                                 currentLineBuffer += lbufDelta;
1917                                 pixels <<= 32;
1918                         }
1919                 }
1920         }
1921 }