4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James Hammons
7 // (C) 2010 Underground Software
9 // JLH = James Hammons <jlhamm@acm.org>
12 // --- ---------- -----------------------------------------------------------
13 // JLH 01/16/2010 Created this log ;-)
23 #include "m68000/m68kinterface.h"
28 //#define OP_DEBUG_BMP
30 #define BLEND_Y(dst, src) op_blend_y[(((uint16_t)dst<<8)) | ((uint16_t)(src))]
31 #define BLEND_CR(dst, src) op_blend_cr[(((uint16_t)dst)<<8) | ((uint16_t)(src))]
33 #define OBJECT_TYPE_BITMAP 0 // 000
34 #define OBJECT_TYPE_SCALE 1 // 001
35 #define OBJECT_TYPE_GPU 2 // 010
36 #define OBJECT_TYPE_BRANCH 3 // 011
37 #define OBJECT_TYPE_STOP 4 // 100
39 #define CONDITION_EQUAL 0 // VC == YPOS
40 #define CONDITION_LESS_THAN 1 // VC < YPOS
41 #define CONDITION_GREATER_THAN 2 // VC > YPOS
42 #define CONDITION_OP_FLAG_SET 3
43 #define CONDITION_SECOND_HALF_LINE 4
46 #define OPFLAG_RELEASE 8 // Bus release bit
47 #define OPFLAG_TRANS 4 // Transparency bit
48 #define OPFLAG_RMW 2 // Read-Modify-Write bit
49 #define OPFLAG_REFLECT 1 // Horizontal mirror bit
52 // Private function prototypes
54 void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render);
55 void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render);
56 void OPDiscoverObjects(uint32_t address);
57 void OPDumpObjectList(void);
58 void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2);
59 void DumpFixedObject(uint64_t p0, uint64_t p1);
60 void DumpBitmapCore(uint64_t p0, uint64_t p1);
61 uint64_t OPLoadPhrase(uint32_t offset);
63 // Local global variables
65 // Blend tables (64K each)
66 static uint8_t op_blend_y[0x10000];
67 static uint8_t op_blend_cr[0x10000];
68 // There may be a problem with this "RAM" overlapping (and thus being independent of)
69 // some of the regular TOM RAM...
70 //#warning objectp_ram is separated from TOM RAM--need to fix that!
71 //static uint8_t objectp_ram[0x40]; // This is based at $F00000
72 uint8_t objectp_running = 0;
73 //bool objectp_stop_reading_list;
75 static uint8_t op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
76 //static uint32_t op_bitmap_bit_size[8] =
77 // { (uint32_t)(0.125*65536), (uint32_t)(0.25*65536), (uint32_t)(0.5*65536), (uint32_t)(1*65536),
78 // (uint32_t)(2*65536), (uint32_t)(1*65536), (uint32_t)(1*65536), (uint32_t)(1*65536) };
79 static uint32_t op_pointer;
81 int32_t phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
85 // Object Processor initialization
89 // Here we calculate the saturating blend of a signed 4-bit value and an
90 // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
91 // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
92 for(int i=0; i<256*256; i++)
94 int y = (i >> 8) & 0xFF;
95 int dy = (int8_t)i; // Sign extend the Y index
96 int c1 = (i >> 8) & 0x0F;
97 int dc1 = (int8_t)(i << 4) >> 4; // Sign extend the R index
98 int c2 = (i >> 12) & 0x0F;
99 int dc2 = (int8_t)(i & 0xF0) >> 4; // Sign extend the C index
124 op_blend_cr[i] = (c2 << 4) | c1;
132 // Object Processor reset
136 // memset(objectp_ram, 0x00, 0x40);
141 static const char * opType[8] =
142 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
143 static const char * ccType[8] =
144 { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
145 static uint32_t object[8192];
146 static uint32_t numberOfObjects;
147 //static uint32_t objectLink[8192];
148 //static uint32_t numberOfLinks;
153 //#warning "!!! Fix OL dump so that it follows links !!!"
154 // const char * opType[8] =
155 // { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
156 // const char * ccType[8] =
157 // { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
159 uint32_t olp = OPGetListPointer();
160 WriteLog("\nOP: OLP = $%08X\n", olp);
161 WriteLog("OP: Phrase dump\n ----------\n");
164 for(uint32_t i=0; i<0x100; i+=8)
166 uint32_t hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
167 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
169 if ((lo & 0x07) == 3)
171 uint16_t ypos = (lo >> 3) & 0x7FF;
172 uint8_t cc = (lo >> 14) & 0x03;
173 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
174 WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
179 if ((lo & 0x07) == 0)
180 DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
182 if ((lo & 0x07) == 1)
183 DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
188 //#warning "!!! Fix lockup in OPDiscoverObjects() !!!"
189 //temp, to keep the following function from locking up on bad/weird OLs
193 OPDiscoverObjects(olp);
199 bool OPObjectExists(uint32_t address)
201 // Yes, we really do a linear search, every time. :-/
202 for(uint32_t i=0; i<numberOfObjects; i++)
204 if (address == object[i])
212 void OPDiscoverObjects(uint32_t address)
214 uint8_t objectType = 0;
218 // If we've seen this object already, bail out!
219 // Otherwise, add it to the list
220 if (OPObjectExists(address))
223 object[numberOfObjects++] = address;
225 // Get the object & decode its type, link address
226 uint32_t hi = JaguarReadLong(address + 0, OP);
227 uint32_t lo = JaguarReadLong(address + 4, OP);
228 objectType = lo & 0x07;
229 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
233 // Branch if YPOS < 2047 (or YPOS > 0) can be treated as a GOTO, so
234 // don't do any discovery in that case. Otherwise, have at it:
235 if (((lo & 0xFFFF) != 0x7FFB) && ((lo & 0xFFFF) != 0x8003))
236 // Recursion needed to follow all links! This does depth-first
237 // recursion on the not-taken objects
238 OPDiscoverObjects(address + 8);
241 // Get the next object...
244 while (objectType != 4);
248 void OPDumpObjectList(void)
250 for(uint32_t i=0; i<numberOfObjects; i++)
252 uint32_t address = object[i];
254 uint32_t hi = JaguarReadLong(address + 0, OP);
255 uint32_t lo = JaguarReadLong(address + 4, OP);
256 uint8_t objectType = lo & 0x07;
257 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
258 WriteLog("%08X: %08X %08X %s -> $%08X", address, hi, lo, opType[objectType], link);
262 uint16_t ypos = (lo >> 3) & 0x7FF;
263 uint8_t cc = (lo >> 14) & 0x07; // Proper # of bits == 3
264 WriteLog(" YPOS %s %u", ccType[cc], ypos);
269 // Yes, this is how the OP finds follow-on phrases for bitmap/scaled
270 // bitmap objects...!
272 DumpFixedObject(OPLoadPhrase(address + 0),
273 OPLoadPhrase(address | 0x08));
276 DumpScaledObject(OPLoadPhrase(address + 0),
277 OPLoadPhrase(address | 0x08), OPLoadPhrase(address | 0x10));
279 if (address == link) // Ruh roh...
281 // Runaway recursive link is bad!
282 WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
291 // Object Processor memory access
292 // Memory range: F00010 - F00027
294 // F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
295 // F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
296 // F00026 W -------- -------x OBF - object processor flag
300 uint8_t OPReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
303 return objectp_ram[offset];
306 uint16_t OPReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
309 return GET16(objectp_ram, offset);
312 void OPWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
315 objectp_ram[offset] = data;
318 void OPWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
321 SET16(objectp_ram, offset, data);
323 /*if (offset == 0x20)
324 WriteLog("OP: Setting lo list pointer: %04X\n", data);
326 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
331 uint32_t OPGetListPointer(void)
333 // Note: This register is LO / HI WORD, hence the funky look of this...
334 return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
338 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
340 uint32_t OPGetStatusRegister(void)
342 return GET16(tomRam8, 0x26);
346 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
348 void OPSetStatusRegister(uint32_t data)
350 tomRam8[0x26] = (data & 0x0000FF00) >> 8;
351 tomRam8[0x27] |= (data & 0xFE);
355 void OPSetCurrentObject(uint64_t object)
357 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
358 // Stored as least significant 32 bits first, ms32 last in big endian
359 /* objectp_ram[0x13] = object & 0xFF; object >>= 8;
360 objectp_ram[0x12] = object & 0xFF; object >>= 8;
361 objectp_ram[0x11] = object & 0xFF; object >>= 8;
362 objectp_ram[0x10] = object & 0xFF; object >>= 8;
364 objectp_ram[0x17] = object & 0xFF; object >>= 8;
365 objectp_ram[0x16] = object & 0xFF; object >>= 8;
366 objectp_ram[0x15] = object & 0xFF; object >>= 8;
367 objectp_ram[0x14] = object & 0xFF;*/
368 // Let's try regular good old big endian...
369 tomRam8[0x17] = object & 0xFF; object >>= 8;
370 tomRam8[0x16] = object & 0xFF; object >>= 8;
371 tomRam8[0x15] = object & 0xFF; object >>= 8;
372 tomRam8[0x14] = object & 0xFF; object >>= 8;
374 tomRam8[0x13] = object & 0xFF; object >>= 8;
375 tomRam8[0x12] = object & 0xFF; object >>= 8;
376 tomRam8[0x11] = object & 0xFF; object >>= 8;
377 tomRam8[0x10] = object & 0xFF;
381 uint64_t OPLoadPhrase(uint32_t offset)
383 offset &= ~0x07; // 8 byte alignment
384 return ((uint64_t)JaguarReadLong(offset, OP) << 32) | (uint64_t)JaguarReadLong(offset+4, OP);
388 void OPStorePhrase(uint32_t offset, uint64_t p)
390 offset &= ~0x07; // 8 byte alignment
391 JaguarWriteLong(offset, p >> 32, OP);
392 JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
397 // Debugging routines
399 void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2)
401 WriteLog(" %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF));
402 WriteLog(" %08X %08X\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
403 DumpBitmapCore(p0, p1);
404 uint32_t hscale = p2 & 0xFF;
405 uint32_t vscale = (p2 >> 8) & 0xFF;
406 uint32_t remainder = (p2 >> 16) & 0xFF;
407 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
411 void DumpFixedObject(uint64_t p0, uint64_t p1)
413 WriteLog(" %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF));
414 DumpBitmapCore(p0, p1);
418 void DumpBitmapCore(uint64_t p0, uint64_t p1)
420 uint32_t bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
421 uint8_t bitdepth = (p1 >> 12) & 0x07;
422 //WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
423 int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
424 int32_t xpos = p1 & 0xFFF;
425 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); // Sign extend that mutha!
426 uint32_t iwidth = ((p1 >> 28) & 0x3FF);
427 uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
428 uint16_t height = ((p0 >> 14) & 0x3FF);
429 uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
430 uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
431 uint32_t firstPix = (p1 >> 49) & 0x3F;
432 uint8_t flags = (p1 >> 45) & 0x0F;
433 uint8_t idx = (p1 >> 38) & 0x7F;
434 uint32_t pitch = (p1 >> 15) & 0x07;
435 WriteLog(" [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
436 iwidth * bdMultiplier[bitdepth],
437 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth],
438 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
439 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
440 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
445 // Object Processor main routine
447 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
448 void OPProcessList(int halfline, bool render)
450 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!"
451 // We ignore them, for now; not good D-:
452 // N.B.: Half-lines are exactly that, half-lines. When in interlaced mode, it
453 // draws the screen exactly the same way as it does in non, one line at a
454 // time. The only way you know you're in field #2 is that the topmost bit
455 // of VC is set. Half-line mode is so you can draw higher horizontal
456 // resolutions than you normally could, as the line buffer is only 720
460 extern int op_start_log;
462 op_pointer = OPGetListPointer();
464 // objectp_stop_reading_list = false;
466 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
469 // *** BEGIN OP PROCESSOR TESTING ONLY ***
470 extern bool interactiveMode;
472 extern int objectPtr;
474 int bitmapCounter = 0;
475 // *** END OP PROCESSOR TESTING ONLY ***
477 uint32_t opCyclesToRun = 30000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
479 // if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
482 // *** BEGIN OP PROCESSOR TESTING ONLY ***
483 if (interactiveMode && bitmapCounter == objectPtr)
487 // *** END OP PROCESSOR TESTING ONLY ***
488 // if (objectp_stop_reading_list)
491 uint64_t p0 = OPLoadPhrase(op_pointer);
493 //WriteLog("\t%08X type %i\n", op_pointer, (uint8_t)p0 & 0x07);
496 if (halfline == TOMGetVDB() && op_start_log)
497 //if (halfline == 215 && op_start_log)
498 //if (halfline == 28 && op_start_log)
501 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
502 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
504 WriteLog(" (BITMAP) ");
505 uint64_t p1 = OPLoadPhrase(op_pointer);
506 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
507 uint8_t bitdepth = (p1 >> 12) & 0x07;
508 //WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
509 int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
510 int32_t xpos = p1 & 0xFFF;
511 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
512 uint32_t iwidth = ((p1 >> 28) & 0x3FF);
513 uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
514 uint16_t height = ((p0 >> 14) & 0x3FF);
515 uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
516 uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
517 uint32_t firstPix = (p1 >> 49) & 0x3F;
518 uint8_t flags = (p1 >> 45) & 0x0F;
519 uint8_t idx = (p1 >> 38) & 0x7F;
520 uint32_t pitch = (p1 >> 15) & 0x07;
521 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
522 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
524 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
526 WriteLog(" (SCALED BITMAP)");
527 uint64_t p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
528 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
529 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
530 uint8_t bitdepth = (p1 >> 12) & 0x07;
531 //WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
532 int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
533 int32_t xpos = p1 & 0xFFF;
534 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
535 uint32_t iwidth = ((p1 >> 28) & 0x3FF);
536 uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
537 uint16_t height = ((p0 >> 14) & 0x3FF);
538 uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
539 uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
540 uint32_t firstPix = (p1 >> 49) & 0x3F;
541 uint8_t flags = (p1 >> 45) & 0x0F;
542 uint8_t idx = (p1 >> 38) & 0x7F;
543 uint32_t pitch = (p1 >> 15) & 0x07;
544 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
545 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
546 uint32_t hscale = p2 & 0xFF;
547 uint32_t vscale = (p2 >> 8) & 0xFF;
548 uint32_t remainder = (p2 >> 16) & 0xFF;
549 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
551 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
552 WriteLog(" (GPU)\n");
553 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
555 WriteLog(" (BRANCH)\n");
556 uint8_t * jaguarMainRam = GetRamPtr();
557 WriteLog("[RAM] --> ");
558 for(int k=0; k<8; k++)
559 WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
562 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
563 WriteLog(" --> List end\n\n");
567 switch ((uint8_t)p0 & 0x07)
569 case OBJECT_TYPE_BITMAP:
571 uint16_t ypos = (p0 >> 3) & 0x7FF;
572 // This is only theory implied by Rayman...!
573 // It seems that if the YPOS is zero, then bump the YPOS value so that it
574 // coincides with the VDB value. With interlacing, this would be slightly more
575 // tricky. There's probably another bit somewhere that enables this mode--but
576 // so far, doesn't seem to affect any other game in a negative way (that I've
577 // seen). Either that, or it's an undocumented bug...
579 //No, the reason this was needed is that the OP code before was wrong. Any value
580 //less than VDB will get written to the top line of the display!
582 // Not so sure... Let's see what happens here...
585 ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
587 // Actually, no. Any item less than VDB will get only the lines that hang over
588 // VDB displayed. Actually, this is incorrect. It seems that VDB value is wrong
589 // somewhere and that's what's causing things to fuck up. Still no idea why.
591 uint32_t height = (p0 & 0xFFC000) >> 14;
592 uint32_t oldOPP = op_pointer - 8;
593 // *** BEGIN OP PROCESSOR TESTING ONLY ***
594 if (inhibit && op_start_log)
595 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
597 if (!inhibit) // For OP testing only!
598 // *** END OP PROCESSOR TESTING ONLY ***
599 if (halfline >= ypos && height > 0)
601 // Believe it or not, this is what the OP actually does...
602 // which is why they're required to be on a dphrase boundary!
603 uint64_t p1 = OPLoadPhrase(oldOPP | 0x08);
604 //unneeded op_pointer += 8;
605 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
606 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
607 // OPProcessFixedBitmap(halfline, p0, p1, render);
608 OPProcessFixedBitmap(p0, p1, render);
614 uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40;
615 uint64_t dwidth = (p1 & 0xFFC0000) >> 15;
618 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
619 p0 |= (uint64_t)height << 14;
621 OPStorePhrase(oldOPP, p0);
624 // OP bottom 3 bits are hardwired to zero. The link address
625 // reflects this, so we only need the top 19 bits of the address
626 // (which is why we only shift 21, and not 24).
627 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
629 // KLUDGE: Seems that memory access is mirrored in the first 8MB of
631 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
632 op_pointer &= 0xFF1FFFFF; // Knock out bits 21-23
636 case OBJECT_TYPE_SCALE:
638 //WAS: uint16_t ypos = (p0 >> 3) & 0x3FF;
639 uint16_t ypos = (p0 >> 3) & 0x7FF;
640 uint32_t height = (p0 & 0xFFC000) >> 14;
641 uint32_t oldOPP = op_pointer - 8;
642 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
643 // *** BEGIN OP PROCESSOR TESTING ONLY ***
644 if (inhibit && op_start_log)
646 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
647 DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
650 if (!inhibit) // For OP testing only!
651 // *** END OP PROCESSOR TESTING ONLY ***
652 if (halfline >= ypos && height > 0)
654 // Believe it or not, this is what the OP actually does...
655 // which is why they're required to be on a qphrase boundary!
656 uint64_t p1 = OPLoadPhrase(oldOPP | 0x08);
657 uint64_t p2 = OPLoadPhrase(oldOPP | 0x10);
658 //unneeded op_pointer += 16;
659 OPProcessScaledBitmap(p0, p1, p2, render);
663 uint16_t remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
664 uint8_t /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
665 //Actually, we should skip this object if it has a vscale of zero.
666 //Or do we? Not sure... Atari Karts has a few lines that look like:
668 //000E8268 --> phrase 00010000 7000B00D
669 // [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
670 // [hsc: 9A, vsc: 00, rem: 00]
671 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
672 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
675 vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
677 //extern int start_logging;
679 // WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
681 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
682 //There are other problems here, it looks like...
684 //About to execute OP (508)...
686 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
687 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
688 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
689 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
690 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
691 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
692 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
693 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
694 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
695 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
696 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
697 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
698 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
699 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
700 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
701 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
702 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
703 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
704 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
705 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
706 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
707 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
709 //Here's another problem:
710 // [hsc: 20, vsc: 20, rem: 00]
711 // Since we're not checking for $E0 (but that's what we get from the above), we
712 // end up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but...
713 // still not quite right. Either that, or the Accolade team that wrote Bubsy
714 // screwed up royal.]
715 //Also note: $E0 = 7.0 which IS a legal vscale value...
717 // if (remainder & 0x80) // I.e., it's negative
718 // if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0
719 // if ((remainder - 1) >= 0xE0) // I.e., it's <= 0
720 // if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
721 // if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
722 // if (remainder <= 0x20) // I.e., it's <= 1.0
723 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
724 if (remainder < 0x20)
726 uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40;
727 uint64_t dwidth = (p1 & 0xFFC0000) >> 15;
729 // while (remainder & 0x80)
730 // while ((remainder & 0x80) || remainder == 0)
731 // while ((remainder - 1) >= 0xE0)
732 // while ((remainder >= 0xE1) || remainder == 0)
733 // while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
734 // while (remainder <= 0x20)
735 while (remainder < 0x20)
745 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
746 p0 |= (uint64_t)height << 14;
748 OPStorePhrase(oldOPP, p0);
751 remainder -= 0x20; // 1.0f in [3.5] fixed point format
754 // WriteLog("--> Finished writebacks...\n");//*/
756 //WriteLog(" [%08X%08X -> ", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
757 p2 &= ~0x0000000000FF0000LL;
758 p2 |= (uint64_t)remainder << 16;
759 //WriteLog("%08X%08X]\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
760 OPStorePhrase(oldOPP + 16, p2);
761 //remainder = (uint8_t)(p2 >> 16), vscale = (uint8_t)(p2 >> 8);
762 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
765 // OP bottom 3 bits are hardwired to zero. The link address
766 // reflects this, so we only need the top 19 bits of the address
767 // (which is why we only shift 21, and not 24).
768 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
770 // KLUDGE: Seems that memory access is mirrored in the first 8MB of
772 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
773 op_pointer &= 0xFF1FFFFF; // Knock out bits 21-23
777 case OBJECT_TYPE_GPU:
779 //WriteLog("OP: Asserting GPU IRQ #3...\n");
780 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
781 OPSetCurrentObject(p0);
782 GPUSetIRQLine(3, ASSERT_LINE);
783 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
786 //OPSuspendedByGPU = true;
787 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
788 //on the next halfline...
789 // --> It continues from where it was interrupted! !!! FIX !!!
792 case OBJECT_TYPE_BRANCH:
794 uint16_t ypos = (p0 >> 3) & 0x7FF;
795 // JTRM is wrong: CC is bits 14-16 (3 bits, *not* 2)
796 uint8_t cc = (p0 >> 14) & 0x07;
797 uint32_t link = (p0 >> 21) & 0x3FFFF8;
801 case CONDITION_EQUAL:
802 if (halfline == ypos || ypos == 0x7FF)
805 case CONDITION_LESS_THAN:
809 case CONDITION_GREATER_THAN:
813 case CONDITION_OP_FLAG_SET:
814 if (OPGetStatusRegister() & 0x01)
817 case CONDITION_SECOND_HALF_LINE:
818 // Branch if bit 10 of HC is set...
819 if (TOMGetHC() & 0x0400)
823 // Basically, if you do this, the OP does nothing. :-)
824 WriteLog("OP: Unimplemented branch condition %i\n", cc);
828 case OBJECT_TYPE_STOP:
830 OPSetCurrentObject(p0);
832 if ((p0 & 0x08) && TOMIRQEnabled(IRQ_OPFLAG))
834 TOMSetPendingObjectInt();
835 m68k_set_irq(2); // Cause a 68K IPL 2 to occur...
838 // Bail out, we're done...
842 WriteLog("OP: Unknown object type %i\n", (uint8_t)p0 & 0x07);
845 // Here is a little sanity check to keep the OP from locking up the
846 // machine when fed bad data. Better would be to count how many actual
847 // cycles it used and bail out/reenter to properly simulate an
848 // overloaded OP... !!! FIX !!!
849 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
859 // Store fixed size bitmap in line buffer
861 void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render)
863 // Need to make sure that when writing that it stays within the line buffer...
864 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
865 uint8_t depth = (p1 >> 12) & 0x07; // Color depth of image
866 int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
867 uint32_t iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
868 uint32_t data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
869 uint32_t firstPix = (p1 >> 49) & 0x3F;
870 // "The LSB is significant only for scaled objects..." -JTRM
871 // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top
872 // four are significant..."
875 // We can ignore the RELEASE (high order) bit for now--probably forever...!
876 // uint8_t flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
877 //Optimize: break these out to their own BOOL values
878 uint8_t flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
879 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
880 flagRMW = (flags & OPFLAG_RMW ? true : false),
881 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
882 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
883 // provide the most significant bits of the palette address."
884 uint8_t index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
885 uint32_t pitch = (p1 >> 15) & 0x07; // Phrase pitch
886 pitch <<= 3; // Optimization: Multiply pitch by 8
888 // int16_t scanlineWidth = tom_getVideoModeWidth();
889 uint8_t * tomRam8 = TOMGetRamPointer();
890 uint8_t * paletteRAM = &tomRam8[0x400];
891 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct
892 // copies--NOT for use when using endian-corrected data (i.e., any of the
893 // *_word_read functions!)
894 uint16_t * paletteRAM16 = (uint16_t *)paletteRAM;
896 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
897 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
899 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
900 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as
902 // Pitch == 0 is OK too...
904 //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to
905 // investigate on real hardware...
906 #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
910 // if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
911 //I'm not convinced that we need to concern ourselves with data & op_pointer
913 if (!render || iwidth == 0)
916 //OK, so we know the position in the line buffer is correct. It's the clipping
917 //in 24bpp mode that's wrong!
919 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
920 //into the line buffer for each pixel.
921 if (depth == 5) // i.e., 24bpp mode...
922 xpos >>= 1; // Cut it in half...
925 //#define OP_DEBUG_BMP
926 //#ifdef OP_DEBUG_BMP
927 // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
928 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
931 // int32_t leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
932 int32_t startPos = xpos, endPos = xpos +
933 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
934 : -((phraseWidthToPixels[depth] * iwidth) + 1));
935 uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
936 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
937 // This is correct, the OP line buffer is a constant size...
939 int32_t lbufWidth = 719;
941 // If the image is completely to the left or right of the line buffer, then
943 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
944 //There are four possibilities:
945 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
946 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
947 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
948 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
949 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
950 // numbers 1 & 3 are of concern.
951 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
952 // if (rightMargin < 0 || leftMargin > lbufWidth)
954 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
955 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
956 // Still have to be careful with the DATA and IWIDTH values though...
958 // if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
959 // || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
961 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
962 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
965 // Otherwise, find the clip limits and clip the phrase as well...
966 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
967 // line buffer, but it shouldn't matter since there are two unused line
968 // buffers below and nothing above and I'll at most write 8 bytes outside
969 // the line buffer... I could use a fractional clip begin/end value, but
970 // this makes the blit a *lot* more hairy. I might fix this in the future
971 // if it becomes necessary. (JLH)
972 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
973 // which pixel in the phrase is being written, and quit when either end of phrases
974 // is reached or line buffer extents are surpassed.
976 //This stuff is probably wrong as well... !!! FIX !!!
977 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
978 //Yup. Seems that JagMania doesn't work correctly with this...
979 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
984 clippedWidth = 0 - leftMargin,
985 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
986 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
989 if (rightMargin > lbufWidth)
990 clippedWidth = rightMargin - lbufWidth,
991 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
992 // rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
993 // rightMargin = lbufWidth;
996 WriteLog("OP: We're about to encounter a divide by zero error!\n");
997 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
998 // ALSO: There may be another case where we start out of bounds and end out
1001 if (startPos < 0) // Case #1: Begin out, end in, L to R
1002 clippedWidth = 0 - startPos,
1003 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1004 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1006 if (endPos < 0) // Case #2: Begin in, end out, R to L
1007 clippedWidth = 0 - endPos,
1008 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1010 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1011 clippedWidth = endPos - lbufWidth,
1012 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1014 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1015 clippedWidth = startPos - lbufWidth,
1016 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1017 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1018 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1020 // If the image is sitting on the line buffer left or right edge, we need to compensate
1021 // by decreasing the image phrase width accordingly.
1022 iwidth -= phraseClippedWidth;
1024 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1026 // data += phraseClippedWidth * (pitch << 3);
1027 data += dataClippedWidth * pitch;
1029 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1030 // bitmap! This makes clipping & etc. MUCH, much easier...!
1031 // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1032 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1033 //Is this a bug in the OP?
1034 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1035 //Though it looks like we're doing it here no matter what...
1036 // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1038 uint32_t lbufAddress = 0x1800 + (startPos * 2);
1039 uint8_t * currentLineBuffer = &tomRam8[lbufAddress];
1043 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1044 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1045 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1047 // This seems to be the case (at least according to the Midsummer docs)...!
1049 // This is to test using palette zeroes instead of bit zeroes...
1050 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1051 //#define OP_USES_PALETTE_ZERO
1053 if (depth == 0) // 1 BPP
1055 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1056 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1058 // Fetch 1st phrase...
1059 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1060 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1061 //i.e., we didn't clip on the margin... !!! FIX !!!
1062 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1063 int i = firstPix; // Start counter at right spot...
1069 uint8_t bit = pixels >> 63;
1070 #ifndef OP_USES_PALETTE_ZERO
1071 if (flagTRANS && bit == 0)
1073 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1079 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1080 //Won't optimize RMW case though...
1081 // This is the *only* correct use of endian-dependent code
1082 // (i.e., mem-to-mem direct copying)!
1083 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bit];
1085 *currentLineBuffer =
1086 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1087 *(currentLineBuffer + 1) =
1088 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1091 currentLineBuffer += lbufDelta;
1095 // Fetch next phrase...
1097 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1100 else if (depth == 1) // 2 BPP
1103 WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1104 index &= 0xFC; // Top six bits form CLUT index
1105 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1106 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1111 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1114 for(int i=0; i<32; i++)
1116 uint8_t bits = pixels >> 62;
1117 // Seems to me that both of these are in the same endian, so we could cast it as
1118 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1119 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1120 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1121 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1122 #ifndef OP_USES_PALETTE_ZERO
1123 if (flagTRANS && bits == 0)
1125 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1131 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1133 *currentLineBuffer =
1134 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1135 *(currentLineBuffer + 1) =
1136 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1139 currentLineBuffer += lbufDelta;
1144 else if (depth == 2) // 4 BPP
1147 WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1148 index &= 0xF0; // Top four bits form CLUT index
1149 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1150 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1155 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1158 for(int i=0; i<16; i++)
1160 uint8_t bits = pixels >> 60;
1161 // Seems to me that both of these are in the same endian, so we could cast it as
1162 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1163 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1164 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1165 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1166 #ifndef OP_USES_PALETTE_ZERO
1167 if (flagTRANS && bits == 0)
1169 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1175 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1177 *currentLineBuffer =
1178 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1179 *(currentLineBuffer + 1) =
1180 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1183 currentLineBuffer += lbufDelta;
1188 else if (depth == 3) // 8 BPP
1190 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1191 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1193 // Fetch 1st phrase...
1194 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1195 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1196 //i.e., we didn't clip on the margin... !!! FIX !!!
1197 firstPix &= 0x30; // Only top two bits are valid for 8 BPP
1198 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1199 int i = firstPix >> 3; // Start counter at right spot...
1205 uint8_t bits = pixels >> 56;
1206 // Seems to me that both of these are in the same endian, so we could cast it as
1207 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1208 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1209 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1210 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1211 //This would seem to be problematic...
1212 //Because it's the palette entry being zero that makes the pixel transparent...
1213 //Let's try it and see.
1214 #ifndef OP_USES_PALETTE_ZERO
1215 if (flagTRANS && bits == 0)
1217 if (flagTRANS && (paletteRAM16[bits] == 0))
1223 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1225 *currentLineBuffer =
1226 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1227 *(currentLineBuffer + 1) =
1228 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1231 currentLineBuffer += lbufDelta;
1235 // Fetch next phrase...
1237 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1240 else if (depth == 4) // 16 BPP
1243 WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1244 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1245 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1250 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1253 for(int i=0; i<4; i++)
1255 uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1256 // Seems to me that both of these are in the same endian, so we could cast it
1257 // as uint16_t * and do straight across copies (what about 24 bpp? Treat it
1258 // differently...) This only works for the palettized modes (1 - 8 BPP), since
1259 // we actually have to copy data from memory in 16 BPP mode (or does it? Isn't
1260 // this the same as the CLUT case?) No, it isn't because we read the memory in
1261 // an endian safe way--it *won't* work...
1262 //This doesn't seem right... Let's try the encoded black value ($8800):
1263 //Apparently, CRY 0 maps to $8800...
1264 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1265 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1270 *currentLineBuffer = bitsHi,
1271 *(currentLineBuffer + 1) = bitsLo;
1273 *currentLineBuffer =
1274 BLEND_CR(*currentLineBuffer, bitsHi),
1275 *(currentLineBuffer + 1) =
1276 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1279 currentLineBuffer += lbufDelta;
1284 else if (depth == 5) // 24 BPP
1286 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1287 //There *might* be others...
1288 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1290 WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1291 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1292 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1293 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04;
1298 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1301 for(int i=0; i<2; i++)
1303 // We don't use a 32-bit var here because of endian issues...!
1304 uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48,
1305 bits1 = pixels >> 40, bits0 = pixels >> 32;
1307 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1310 *currentLineBuffer = bits3,
1311 *(currentLineBuffer + 1) = bits2,
1312 *(currentLineBuffer + 2) = bits1,
1313 *(currentLineBuffer + 3) = bits0;
1315 currentLineBuffer += lbufDelta;
1324 // Store scaled bitmap in line buffer
1326 void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render)
1328 // Need to make sure that when writing that it stays within the line buffer...
1329 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1330 uint8_t depth = (p1 >> 12) & 0x07; // Color depth of image
1331 int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1332 uint32_t iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
1333 uint32_t data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
1334 //#ifdef OP_DEBUG_BMP
1335 // Prolly should use this... Though not sure exactly how.
1336 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1337 uint32_t firstPix = (p1 >> 49) & 0x3F;
1338 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1340 WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1342 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1343 // uint8_t flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
1344 //Optimize: break these out to their own BOOL values [DONE]
1345 uint8_t flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
1346 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1347 flagRMW = (flags & OPFLAG_RMW ? true : false),
1348 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1349 uint8_t index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
1350 uint32_t pitch = (p1 >> 15) & 0x07; // Phrase pitch
1352 uint8_t * tomRam8 = TOMGetRamPointer();
1353 uint8_t * paletteRAM = &tomRam8[0x400];
1354 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct
1355 // copies--NOT for use when using endian-corrected data (i.e., any of the
1356 // *ReadWord functions!)
1357 uint16_t * paletteRAM16 = (uint16_t *)paletteRAM;
1359 uint16_t hscale = p2 & 0xFF;
1360 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this.
1361 // Not sure why, but seems to be consistent with the vertical scaling now (and
1362 // it may turn out to be wrong!)...
1363 uint16_t horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!]
1364 // uint8_t horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1365 int32_t scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1366 uint32_t scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1368 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1369 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1371 // Looks like an hscale of zero means don't draw!
1372 if (!render || iwidth == 0 || hscale == 0)
1375 /*extern int start_logging;
1377 WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1378 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1379 //#define OP_DEBUG_BMP
1380 //#ifdef OP_DEBUG_BMP
1381 // WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1382 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1385 int32_t startPos = xpos, endPos = xpos +
1386 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1387 uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1388 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
1389 // Not sure if this is Jaguar Two only location or what...
1390 // From the docs, it is... If we want to limit here we should think of something else.
1391 // int32_t limit = GET16(tom_ram_8, 0x0008); // LIMIT
1392 int32_t limit = 720;
1393 // int32_t lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1394 int32_t lbufWidth = 719; // Zero based limit...
1396 // If the image is completely to the left or right of the line buffer, then bail.
1397 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1398 //There are four possibilities:
1399 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1400 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1401 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1402 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1403 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1404 // numbers 1 & 3 are of concern.
1405 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1406 // if (rightMargin < 0 || leftMargin > lbufWidth)
1408 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1409 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1410 // Still have to be careful with the DATA and IWIDTH values though...
1412 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1413 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1416 // Otherwise, find the clip limits and clip the phrase as well...
1417 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of
1418 // the line buffer, but it shouldn't matter since there are two
1419 // unused line buffers below and nothing above and I'll at most write
1420 // 40 bytes outside the line buffer... I could use a fractional clip
1421 // begin/end value, but this makes the blit a *lot* more hairy. I
1422 // might fix this in the future if it becomes necessary. (JLH)
1423 // Probably wouldn't be *that* hairy. Just use a delta that tells the
1424 // inner loop which pixel in the phrase is being written, and quit
1425 // when either end of phrases is reached or line buffer extents are
1428 //This stuff is probably wrong as well... !!! FIX !!!
1429 //The strange thing is that it seems to work, but that's no guarantee that it's
1431 //Yup. Seems that JagMania doesn't work correctly with this...
1432 //Dunno if this is the problem, but Atari Karts is showing *some* of the road
1434 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the
1435 //problem lies elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases
1436 //seems to draw the ground a bit more accurately... Strange!
1437 //It's probably a case of the REFLECT flag being set and the background being
1438 //written from the right side of the screen...
1439 //But no, it isn't... At least if the diagnostics are telling the truth!
1441 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1442 // ALSO: There may be another case where we start out of bounds and end out
1446 //There's a problem here with scaledPhrasePixels in that it can be forced to
1447 //zero when the scaling factor is small. So fix it already! !!! FIX !!!
1448 /*if (scaledPhrasePixels == 0)
1450 WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1451 DumpScaledObject(p0, p1, p2);
1453 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1455 //Try a simple example...
1456 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1457 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1458 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1460 // Normally, we would expect this in the line buffer:
1461 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1463 // But instead we're getting:
1464 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1466 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1467 // on negative boundary--or are we? Hmm...
1468 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1470 // Let's try a real world example:
1472 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1473 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1475 // Really, spp is 27.75 in the second case...
1476 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1477 // start position (14 * 27.75), we get -6.5... NOT -17!
1479 //Now it seems we're working OK, at least for the first case...
1480 uint32_t scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1482 if (startPos < 0) // Case #1: Begin out, end in, L to R
1484 extern int start_logging;
1486 WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1487 // clippedWidth = 0 - startPos,
1488 clippedWidth = (0 - startPos) << 5,
1489 // dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1490 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1491 // startPos = 0 - (clippedWidth % scaledPhrasePixels);
1492 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1494 WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1497 if (endPos < 0) // Case #2: Begin in, end out, R to L
1498 clippedWidth = 0 - endPos,
1499 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1501 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1502 clippedWidth = endPos - lbufWidth,
1503 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1505 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1506 clippedWidth = startPos - lbufWidth,
1507 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1508 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1510 extern int op_start_log;
1511 if (op_start_log && clippedWidth != 0)
1512 WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1513 if (op_start_log && startPos == 13)
1515 WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1516 DumpScaledObject(p0, p1, p2);
1519 WriteLog(" %08X: ", data);
1520 for(int i=0; i<7*8; i++)
1521 WriteLog("%02X ", JaguarReadByte(data+i));
1525 // If the image is sitting on the line buffer left or right edge, we need to compensate
1526 // by decreasing the image phrase width accordingly.
1527 iwidth -= phraseClippedWidth;
1529 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1531 // data += phraseClippedWidth * (pitch << 3);
1532 data += dataClippedWidth * (pitch << 3);
1534 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1535 // bitmap! This makes clipping & etc. MUCH, much easier...!
1536 // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1537 // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1538 uint32_t lbufAddress = 0x1800 + startPos * 2;
1539 uint8_t * currentLineBuffer = &tomRam8[lbufAddress];
1540 //uint8_t * lineBufferLowerLimit = &tom_ram_8[0x1800],
1541 // * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1545 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1546 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1547 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1549 // This seems to be the case (at least according to the Midsummer docs)...!
1551 if (depth == 0) // 1 BPP
1554 WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1555 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1556 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1559 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1561 while ((int32_t)iwidth > 0)
1563 uint8_t bits = pixels >> 63;
1565 #ifndef OP_USES_PALETTE_ZERO
1566 if (flagTRANS && bits == 0)
1568 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1574 // This is the *only* correct use of endian-dependent code
1575 // (i.e., mem-to-mem direct copying)!
1576 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1578 *currentLineBuffer =
1579 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1580 *(currentLineBuffer + 1) =
1581 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1584 currentLineBuffer += lbufDelta;
1587 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1588 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1589 wide, so we could probably go back to that (as long as we make it an int16_t and not a uint16!)
1591 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1592 while (horizontalRemainder & 0x80)
1594 horizontalRemainder += hscale;
1598 // while (horizontalRemainder <= 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1599 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1601 horizontalRemainder += hscale;
1605 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1609 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1611 data += (pitch << 3) * phrasesToSkip;
1612 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1613 pixels <<= 1 * pixelShift;
1614 iwidth -= phrasesToSkip;
1615 pixCount = pixelShift;
1619 else if (depth == 1) // 2 BPP
1622 WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1623 index &= 0xFC; // Top six bits form CLUT index
1624 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1625 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1628 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1630 while ((int32_t)iwidth > 0)
1632 uint8_t bits = pixels >> 62;
1634 #ifndef OP_USES_PALETTE_ZERO
1635 if (flagTRANS && bits == 0)
1637 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1643 // This is the *only* correct use of endian-dependent code
1644 // (i.e., mem-to-mem direct copying)!
1645 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1647 *currentLineBuffer =
1648 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1649 *(currentLineBuffer + 1) =
1650 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1653 currentLineBuffer += lbufDelta;
1655 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1656 while (horizontalRemainder & 0x80)
1658 horizontalRemainder += hscale;
1662 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1663 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1665 horizontalRemainder += hscale;
1669 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1673 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1675 data += (pitch << 3) * phrasesToSkip;
1676 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1677 pixels <<= 2 * pixelShift;
1678 iwidth -= phrasesToSkip;
1679 pixCount = pixelShift;
1683 else if (depth == 2) // 4 BPP
1686 WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1687 index &= 0xF0; // Top four bits form CLUT index
1688 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1689 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1692 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1694 while ((int32_t)iwidth > 0)
1696 uint8_t bits = pixels >> 60;
1698 #ifndef OP_USES_PALETTE_ZERO
1699 if (flagTRANS && bits == 0)
1701 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1707 // This is the *only* correct use of endian-dependent code
1708 // (i.e., mem-to-mem direct copying)!
1709 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1711 *currentLineBuffer =
1712 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1713 *(currentLineBuffer + 1) =
1714 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1717 currentLineBuffer += lbufDelta;
1719 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1720 while (horizontalRemainder & 0x80)
1722 horizontalRemainder += hscale;
1726 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1727 while (horizontalRemainder < 0x20) // I.e., it's <= 0 (*before* subtraction)
1729 horizontalRemainder += hscale;
1733 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1737 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1739 data += (pitch << 3) * phrasesToSkip;
1740 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1741 pixels <<= 4 * pixelShift;
1742 iwidth -= phrasesToSkip;
1743 pixCount = pixelShift;
1747 else if (depth == 3) // 8 BPP
1750 WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1751 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1752 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1755 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1757 while ((int32_t)iwidth > 0)
1759 uint8_t bits = pixels >> 56;
1761 #ifndef OP_USES_PALETTE_ZERO
1762 if (flagTRANS && bits == 0)
1764 if (flagTRANS && (paletteRAM16[bits] == 0))
1770 // This is the *only* correct use of endian-dependent code
1771 // (i.e., mem-to-mem direct copying)!
1772 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1774 if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1775 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1778 *currentLineBuffer =
1779 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1780 *(currentLineBuffer + 1) =
1781 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1784 currentLineBuffer += lbufDelta;
1786 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1787 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1789 horizontalRemainder += hscale;
1793 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1797 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1799 data += (pitch << 3) * phrasesToSkip;
1800 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1801 pixels <<= 8 * pixelShift;
1802 iwidth -= phrasesToSkip;
1803 pixCount = pixelShift;
1807 else if (depth == 4) // 16 BPP
1810 WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1811 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1812 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1815 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1817 while ((int32_t)iwidth > 0)
1819 uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1821 //This doesn't seem right... Let's try the encoded black value ($8800):
1822 //Apparently, CRY 0 maps to $8800...
1823 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1824 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1829 *currentLineBuffer = bitsHi,
1830 *(currentLineBuffer + 1) = bitsLo;
1832 *currentLineBuffer =
1833 BLEND_CR(*currentLineBuffer, bitsHi),
1834 *(currentLineBuffer + 1) =
1835 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1838 currentLineBuffer += lbufDelta;
1840 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1841 while (horizontalRemainder & 0x80)
1843 horizontalRemainder += hscale;
1847 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1848 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1850 horizontalRemainder += hscale;
1854 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1858 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1860 data += (pitch << 3) * phrasesToSkip;
1861 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1862 pixels <<= 16 * pixelShift;
1864 iwidth -= phrasesToSkip;
1866 pixCount = pixelShift;
1870 else if (depth == 5) // 24 BPP
1872 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1873 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1875 WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1876 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1877 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1878 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04;
1883 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1884 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1886 for(int i=0; i<2; i++)
1888 uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48,
1889 bits1 = pixels >> 40, bits0 = pixels >> 32;
1891 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1894 *currentLineBuffer = bits3,
1895 *(currentLineBuffer + 1) = bits2,
1896 *(currentLineBuffer + 2) = bits1,
1897 *(currentLineBuffer + 3) = bits0;
1899 currentLineBuffer += lbufDelta;