4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James Hammons
7 // (C) 2010 Underground Software
9 // JLH = James Hammons <jlhamm@acm.org>
12 // --- ---------- -------------------------------------------------------------
13 // JLH 01/16/2010 Created this log ;-)
23 #include "m68000/m68kinterface.h"
28 //#define OP_DEBUG_BMP
30 #define BLEND_Y(dst, src) op_blend_y[(((uint16_t)dst<<8)) | ((uint16_t)(src))]
31 #define BLEND_CR(dst, src) op_blend_cr[(((uint16_t)dst)<<8) | ((uint16_t)(src))]
33 #define OBJECT_TYPE_BITMAP 0 // 000
34 #define OBJECT_TYPE_SCALE 1 // 001
35 #define OBJECT_TYPE_GPU 2 // 010
36 #define OBJECT_TYPE_BRANCH 3 // 011
37 #define OBJECT_TYPE_STOP 4 // 100
39 #define CONDITION_EQUAL 0 // VC == YPOS
40 #define CONDITION_LESS_THAN 1 // VC < YPOS
41 #define CONDITION_GREATER_THAN 2 // VC > YPOS
42 #define CONDITION_OP_FLAG_SET 3
43 #define CONDITION_SECOND_HALF_LINE 4
46 #define OPFLAG_RELEASE 8 // Bus release bit
47 #define OPFLAG_TRANS 4 // Transparency bit
48 #define OPFLAG_RMW 2 // Read-Modify-Write bit
49 #define OPFLAG_REFLECT 1 // Horizontal mirror bit
52 // Private function prototypes
54 void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render);
55 void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render);
56 void OPDiscoverObjects(uint32_t address);
57 void OPDumpObjectList(void);
58 void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2);
59 void DumpFixedObject(uint64_t p0, uint64_t p1);
60 void DumpBitmapCore(uint64_t p0, uint64_t p1);
61 uint64_t OPLoadPhrase(uint32_t offset);
63 // Local global variables
65 // Blend tables (64K each)
66 static uint8_t op_blend_y[0x10000];
67 static uint8_t op_blend_cr[0x10000];
68 // There may be a problem with this "RAM" overlapping (and thus being independent of)
69 // some of the regular TOM RAM...
70 //#warning objectp_ram is separated from TOM RAM--need to fix that!
71 //static uint8_t objectp_ram[0x40]; // This is based at $F00000
72 uint8_t objectp_running = 0;
73 //bool objectp_stop_reading_list;
75 static uint8_t op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
76 //static uint32_t op_bitmap_bit_size[8] =
77 // { (uint32_t)(0.125*65536), (uint32_t)(0.25*65536), (uint32_t)(0.5*65536), (uint32_t)(1*65536),
78 // (uint32_t)(2*65536), (uint32_t)(1*65536), (uint32_t)(1*65536), (uint32_t)(1*65536) };
79 static uint32_t op_pointer;
81 int32_t phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
85 // Object Processor initialization
89 // Here we calculate the saturating blend of a signed 4-bit value and an
90 // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
91 // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
92 for(int i=0; i<256*256; i++)
94 int y = (i >> 8) & 0xFF;
95 int dy = (int8_t)i; // Sign extend the Y index
96 int c1 = (i >> 8) & 0x0F;
97 int dc1 = (int8_t)(i << 4) >> 4; // Sign extend the R index
98 int c2 = (i >> 12) & 0x0F;
99 int dc2 = (int8_t)(i & 0xF0) >> 4; // Sign extend the C index
124 op_blend_cr[i] = (c2 << 4) | c1;
132 // Object Processor reset
136 // memset(objectp_ram, 0x00, 0x40);
141 static const char * opType[8] =
142 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
143 static const char * ccType[8] =
144 { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
145 static uint32_t object[8192];
146 static uint32_t numberOfObjects;
147 //static uint32_t objectLink[8192];
148 //static uint32_t numberOfLinks;
153 //#warning "!!! Fix OL dump so that it follows links !!!"
154 // const char * opType[8] =
155 // { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
156 // const char * ccType[8] =
157 // { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
159 uint32_t olp = OPGetListPointer();
160 WriteLog("\nOP: OLP = $%08X\n", olp);
161 WriteLog("OP: Phrase dump\n ----------\n");
164 for(uint32_t i=0; i<0x100; i+=8)
166 uint32_t hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
167 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
169 if ((lo & 0x07) == 3)
171 uint16_t ypos = (lo >> 3) & 0x7FF;
172 uint8_t cc = (lo >> 14) & 0x03;
173 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
174 WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
179 if ((lo & 0x07) == 0)
180 DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
182 if ((lo & 0x07) == 1)
183 DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
188 //#warning "!!! Fix lockup in OPDiscoverObjects() !!!"
189 //temp, to keep the following function from locking up on bad/weird OLs
193 OPDiscoverObjects(olp);
199 bool OPObjectExists(uint32_t address)
201 // Yes, we really do a linear search, every time. :-/
202 for(uint32_t i=0; i<numberOfObjects; i++)
204 if (address == object[i])
212 void OPDiscoverObjects(uint32_t address)
214 uint8_t objectType = 0;
218 // If we've seen this object already, bail out!
219 // Otherwise, add it to the list
220 if (OPObjectExists(address))
223 object[numberOfObjects++] = address;
225 // Get the object & decode its type, link address
226 uint32_t hi = JaguarReadLong(address + 0, OP);
227 uint32_t lo = JaguarReadLong(address + 4, OP);
228 objectType = lo & 0x07;
229 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
233 // Recursion needed to follow all links! This does depth-first recursion
234 // on the not-taken objects
235 OPDiscoverObjects(address + 8);
238 // Get the next object...
241 while (objectType != 4);
245 void OPDumpObjectList(void)
247 for(uint32_t i=0; i<numberOfObjects; i++)
249 uint32_t address = object[i];
251 uint32_t hi = JaguarReadLong(address + 0, OP);
252 uint32_t lo = JaguarReadLong(address + 4, OP);
253 uint8_t objectType = lo & 0x07;
254 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
255 WriteLog("%08X: %08X %08X %s -> $08X", address, hi, lo, opType[objectType], link);
259 uint16_t ypos = (lo >> 3) & 0x7FF;
260 uint8_t cc = (lo >> 14) & 0x07; // Proper # of bits == 3
261 WriteLog(" YPOS %s %u", ccType[cc], ypos);
267 DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
270 DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
271 OPLoadPhrase(address + 16));
273 if (address == link) // Ruh roh...
275 // Runaway recursive link is bad!
276 WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
285 // Object Processor memory access
286 // Memory range: F00010 - F00027
288 // F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
289 // F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
290 // F00026 W -------- -------x OBF - object processor flag
294 uint8_t OPReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
297 return objectp_ram[offset];
300 uint16_t OPReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
303 return GET16(objectp_ram, offset);
306 void OPWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
309 objectp_ram[offset] = data;
312 void OPWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
315 SET16(objectp_ram, offset, data);
317 /*if (offset == 0x20)
318 WriteLog("OP: Setting lo list pointer: %04X\n", data);
320 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
325 uint32_t OPGetListPointer(void)
327 // Note: This register is LO / HI WORD, hence the funky look of this...
328 return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
332 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
334 uint32_t OPGetStatusRegister(void)
336 return GET16(tomRam8, 0x26);
340 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
342 void OPSetStatusRegister(uint32_t data)
344 tomRam8[0x26] = (data & 0x0000FF00) >> 8;
345 tomRam8[0x27] |= (data & 0xFE);
349 void OPSetCurrentObject(uint64_t object)
351 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
352 // Stored as least significant 32 bits first, ms32 last in big endian
353 /* objectp_ram[0x13] = object & 0xFF; object >>= 8;
354 objectp_ram[0x12] = object & 0xFF; object >>= 8;
355 objectp_ram[0x11] = object & 0xFF; object >>= 8;
356 objectp_ram[0x10] = object & 0xFF; object >>= 8;
358 objectp_ram[0x17] = object & 0xFF; object >>= 8;
359 objectp_ram[0x16] = object & 0xFF; object >>= 8;
360 objectp_ram[0x15] = object & 0xFF; object >>= 8;
361 objectp_ram[0x14] = object & 0xFF;*/
362 // Let's try regular good old big endian...
363 tomRam8[0x17] = object & 0xFF; object >>= 8;
364 tomRam8[0x16] = object & 0xFF; object >>= 8;
365 tomRam8[0x15] = object & 0xFF; object >>= 8;
366 tomRam8[0x14] = object & 0xFF; object >>= 8;
368 tomRam8[0x13] = object & 0xFF; object >>= 8;
369 tomRam8[0x12] = object & 0xFF; object >>= 8;
370 tomRam8[0x11] = object & 0xFF; object >>= 8;
371 tomRam8[0x10] = object & 0xFF;
375 uint64_t OPLoadPhrase(uint32_t offset)
377 offset &= ~0x07; // 8 byte alignment
378 return ((uint64_t)JaguarReadLong(offset, OP) << 32) | (uint64_t)JaguarReadLong(offset+4, OP);
382 void OPStorePhrase(uint32_t offset, uint64_t p)
384 offset &= ~0x07; // 8 byte alignment
385 JaguarWriteLong(offset, p >> 32, OP);
386 JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
391 // Debugging routines
393 void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2)
395 WriteLog(" %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF));
396 WriteLog(" %08X %08X\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
397 DumpBitmapCore(p0, p1);
398 uint32_t hscale = p2 & 0xFF;
399 uint32_t vscale = (p2 >> 8) & 0xFF;
400 uint32_t remainder = (p2 >> 16) & 0xFF;
401 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
405 void DumpFixedObject(uint64_t p0, uint64_t p1)
407 WriteLog(" %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF));
408 DumpBitmapCore(p0, p1);
412 void DumpBitmapCore(uint64_t p0, uint64_t p1)
414 uint32_t bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
415 uint8_t bitdepth = (p1 >> 12) & 0x07;
416 //WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
417 int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
418 int32_t xpos = p1 & 0xFFF;
419 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); // Sign extend that mutha!
420 uint32_t iwidth = ((p1 >> 28) & 0x3FF);
421 uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
422 uint16_t height = ((p0 >> 14) & 0x3FF);
423 uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
424 uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
425 uint32_t firstPix = (p1 >> 49) & 0x3F;
426 uint8_t flags = (p1 >> 45) & 0x0F;
427 uint8_t idx = (p1 >> 38) & 0x7F;
428 uint32_t pitch = (p1 >> 15) & 0x07;
429 WriteLog(" [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
430 iwidth * bdMultiplier[bitdepth],
431 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth],
432 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
433 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
434 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
439 // Object Processor main routine
441 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
442 void OPProcessList(int halfline, bool render)
444 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!"
445 // We ignore them, for now; not good
448 extern int op_start_log;
449 // char * condition_to_str[8] =
450 // { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
452 op_pointer = OPGetListPointer();
454 // objectp_stop_reading_list = false;
456 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
459 // *** BEGIN OP PROCESSOR TESTING ONLY ***
460 extern bool interactiveMode;
462 extern int objectPtr;
464 int bitmapCounter = 0;
465 // *** END OP PROCESSOR TESTING ONLY ***
467 uint32_t opCyclesToRun = 30000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
469 // if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
472 // *** BEGIN OP PROCESSOR TESTING ONLY ***
473 if (interactiveMode && bitmapCounter == objectPtr)
477 // *** END OP PROCESSOR TESTING ONLY ***
478 // if (objectp_stop_reading_list)
481 uint64_t p0 = OPLoadPhrase(op_pointer);
483 //WriteLog("\t%08X type %i\n", op_pointer, (uint8_t)p0 & 0x07);
486 if (halfline == TOMGetVDB() && op_start_log)
487 //if (halfline == 215 && op_start_log)
488 //if (halfline == 28 && op_start_log)
491 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
492 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
494 WriteLog(" (BITMAP) ");
495 uint64_t p1 = OPLoadPhrase(op_pointer);
496 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
497 uint8_t bitdepth = (p1 >> 12) & 0x07;
498 //WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
499 int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
500 int32_t xpos = p1 & 0xFFF;
501 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
502 uint32_t iwidth = ((p1 >> 28) & 0x3FF);
503 uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
504 uint16_t height = ((p0 >> 14) & 0x3FF);
505 uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
506 uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
507 uint32_t firstPix = (p1 >> 49) & 0x3F;
508 uint8_t flags = (p1 >> 45) & 0x0F;
509 uint8_t idx = (p1 >> 38) & 0x7F;
510 uint32_t pitch = (p1 >> 15) & 0x07;
511 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
512 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
514 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
516 WriteLog(" (SCALED BITMAP)");
517 uint64_t p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
518 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
519 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
520 uint8_t bitdepth = (p1 >> 12) & 0x07;
521 //WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
522 int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
523 int32_t xpos = p1 & 0xFFF;
524 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
525 uint32_t iwidth = ((p1 >> 28) & 0x3FF);
526 uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
527 uint16_t height = ((p0 >> 14) & 0x3FF);
528 uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
529 uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
530 uint32_t firstPix = (p1 >> 49) & 0x3F;
531 uint8_t flags = (p1 >> 45) & 0x0F;
532 uint8_t idx = (p1 >> 38) & 0x7F;
533 uint32_t pitch = (p1 >> 15) & 0x07;
534 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
535 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
536 uint32_t hscale = p2 & 0xFF;
537 uint32_t vscale = (p2 >> 8) & 0xFF;
538 uint32_t remainder = (p2 >> 16) & 0xFF;
539 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
541 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
542 WriteLog(" (GPU)\n");
543 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
545 WriteLog(" (BRANCH)\n");
546 uint8_t * jaguarMainRam = GetRamPtr();
547 WriteLog("[RAM] --> ");
548 for(int k=0; k<8; k++)
549 WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
552 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
553 WriteLog(" --> List end\n\n");
557 switch ((uint8_t)p0 & 0x07)
559 case OBJECT_TYPE_BITMAP:
561 //WAS: uint16_t ypos = (p0 >> 3) & 0x3FF;
562 uint16_t ypos = (p0 >> 3) & 0x7FF;
563 // This is only theory implied by Rayman...!
564 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
565 // the VDB value. With interlacing, this would be slightly more tricky.
566 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
567 // to affect any other game in a negative way (that I've seen).
568 // Either that, or it's an undocumented bug...
570 //No, the reason this was needed is that the OP code before was wrong. Any value
571 //less than VDB will get written to the top line of the display!
573 // Not so sure... Let's see what happens here...
576 ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
578 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
579 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
580 // what's causing things to fuck up. Still no idea why.
582 uint32_t height = (p0 & 0xFFC000) >> 14;
583 uint32_t oldOPP = op_pointer - 8;
584 // *** BEGIN OP PROCESSOR TESTING ONLY ***
585 if (inhibit && op_start_log)
586 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
588 if (!inhibit) // For OP testing only!
589 // *** END OP PROCESSOR TESTING ONLY ***
590 if (halfline >= ypos && height > 0)
592 uint64_t p1 = OPLoadPhrase(op_pointer);
594 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
595 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
596 // OPProcessFixedBitmap(halfline, p0, p1, render);
597 OPProcessFixedBitmap(p0, p1, render);
601 //???Does this really happen??? Doesn't seem to work if you do this...!
602 //Probably not. Must be a bug in the documentation...!
603 // uint32_t link = (p0 & 0x7FFFF000000) >> 21;
604 // SET16(tom_ram_8, 0x20, link & 0xFFFF); // OLP
605 // SET16(tom_ram_8, 0x22, link >> 16);
606 /* uint32_t height = (p0 & 0xFFC000) >> 14;
609 // NOTE: Would subtract 2 if in interlaced mode...!
610 // uint64_t height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
614 uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40;
615 uint64_t dwidth = (p1 & 0xFFC0000) >> 15;
618 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
619 p0 |= (uint64_t)height << 14;
621 OPStorePhrase(oldOPP, p0);
623 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
624 //Temp, for testing...
625 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
626 //And it does! !!! FIX !!!
627 //Let's remove this "fix" since it screws up more than it fixes.
628 /* if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
631 // NOTE: The link address only replaces bits 3-21 in the OLP, and this replaces
632 // EVERYTHING. !!! FIX !!! [DONE]
633 #warning "!!! Link address is not linked properly for all object types !!!"
634 #warning "!!! Only BITMAP is properly handled !!!"
635 op_pointer &= 0xFFC00007;
636 op_pointer |= (p0 & 0x000007FFFF000000LL) >> 21;
637 //WriteLog("New OP: %08X\n", op_pointer);
638 //kludge: Seems that memory access is mirrored in the first 8MB of memory...
639 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
640 op_pointer &= 0xFF1FFFFF; // Knock out bits 21-23
644 case OBJECT_TYPE_SCALE:
646 //WAS: uint16_t ypos = (p0 >> 3) & 0x3FF;
647 uint16_t ypos = (p0 >> 3) & 0x7FF;
648 uint32_t height = (p0 & 0xFFC000) >> 14;
649 uint32_t oldOPP = op_pointer - 8;
650 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
651 // *** BEGIN OP PROCESSOR TESTING ONLY ***
652 if (inhibit && op_start_log)
654 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
655 DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
658 if (!inhibit) // For OP testing only!
659 // *** END OP PROCESSOR TESTING ONLY ***
660 if (halfline >= ypos && height > 0)
662 uint64_t p1 = OPLoadPhrase(op_pointer);
664 uint64_t p2 = OPLoadPhrase(op_pointer);
666 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32_t)(p0>>32), (uint32_t)(p0&0xFFFFFFFF), (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF), (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
667 OPProcessScaledBitmap(p0, p1, p2, render);
671 uint16_t remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
672 uint8_t /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
673 //Actually, we should skip this object if it has a vscale of zero.
674 //Or do we? Not sure... Atari Karts has a few lines that look like:
676 //000E8268 --> phrase 00010000 7000B00D
677 // [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
678 // [hsc: 9A, vsc: 00, rem: 00]
679 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
680 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
683 vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
685 //extern int start_logging;
687 // WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
689 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
690 //There are other problems here, it looks like...
692 //About to execute OP (508)...
694 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
695 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
696 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
697 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
698 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
699 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
700 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
701 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
702 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
703 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
704 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
705 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
706 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
707 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
708 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
709 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
710 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
711 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
712 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
713 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
714 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
715 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
717 //Here's another problem:
718 // [hsc: 20, vsc: 20, rem: 00]
719 // Since we're not checking for $E0 (but that's what we get from the above), we end
720 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
721 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
722 //Also note: $E0 = 7.0 which IS a legal vscale value...
724 // if (remainder & 0x80) // I.e., it's negative
725 // if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0
726 // if ((remainder - 1) >= 0xE0) // I.e., it's <= 0
727 // if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
728 // if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
729 // if (remainder <= 0x20) // I.e., it's <= 1.0
730 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
731 if (remainder < 0x20)
733 uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40;
734 uint64_t dwidth = (p1 & 0xFFC0000) >> 15;
736 // while (remainder & 0x80)
737 // while ((remainder & 0x80) || remainder == 0)
738 // while ((remainder - 1) >= 0xE0)
739 // while ((remainder >= 0xE1) || remainder == 0)
740 // while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
741 // while (remainder <= 0x20)
742 while (remainder < 0x20)
752 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
753 p0 |= (uint64_t)height << 14;
755 OPStorePhrase(oldOPP, p0);
758 remainder -= 0x20; // 1.0f in [3.5] fixed point format
761 // WriteLog("--> Finished writebacks...\n");//*/
763 //WriteLog(" [%08X%08X -> ", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
764 p2 &= ~0x0000000000FF0000LL;
765 p2 |= (uint64_t)remainder << 16;
766 //WriteLog("%08X%08X]\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
767 OPStorePhrase(oldOPP + 16, p2);
768 //remainder = (uint8_t)(p2 >> 16), vscale = (uint8_t)(p2 >> 8);
769 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
772 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
775 case OBJECT_TYPE_GPU:
777 //WriteLog("OP: Asserting GPU IRQ #3...\n");
778 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
779 OPSetCurrentObject(p0);
780 GPUSetIRQLine(3, ASSERT_LINE);
781 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
784 //OPSuspendedByGPU = true;
785 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
786 //on the next halfline...
787 // --> It continues from where it was interrupted! !!! FIX !!!
790 case OBJECT_TYPE_BRANCH:
792 uint16_t ypos = (p0 >> 3) & 0x7FF;
793 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
794 // conditions! Need at least one more bit for that! :-P
795 // Also, the ASIC nets imply that it uses bits 14-16 (height in BM & SBM objects)
796 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
797 uint8_t cc = (p0 >> 14) & 0x03;
798 uint32_t link = (p0 >> 21) & 0x3FFFF8;
800 // if ((ypos!=507)&&(ypos!=25))
801 // WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
804 case CONDITION_EQUAL:
805 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
808 case CONDITION_LESS_THAN:
809 if (TOMReadWord(0xF00006, OP) < ypos)
812 case CONDITION_GREATER_THAN:
813 if (TOMReadWord(0xF00006, OP) > ypos)
816 case CONDITION_OP_FLAG_SET:
817 if (OPGetStatusRegister() & 0x01)
820 case CONDITION_SECOND_HALF_LINE:
821 //Here's the ASIC code:
822 // ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
823 //which means, do the link if bit 10 of HC is set...
825 // This basically means branch if bit 10 of HC is set
826 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
827 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
832 // Basically, if you do this, the OP does nothing. :-)
833 WriteLog("OP: Unimplemented branch condition %i\n", cc);
837 case OBJECT_TYPE_STOP:
841 //WriteLog("OP: --> STOP\n");
842 // op_set_status_register(((p0>>3) & 0xFFFFFFFF));
843 //This seems more likely...
844 OPSetCurrentObject(p0);
848 // We need to check whether these interrupts are enabled or not, THEN
849 // set an IRQ + pending flag if necessary...
850 if (TOMIRQEnabled(IRQ_OPFLAG))
852 TOMSetPendingObjectInt();
853 m68k_set_irq(2); // Cause a 68K IPL 2 to occur...
861 // WriteLog("op: unknown object type %i\n", ((uint8_t)p0 & 0x07));
865 // Here is a little sanity check to keep the OP from locking up the machine
866 // when fed bad data. Better would be to count how many actual cycles it used
867 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
868 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
878 // Store fixed size bitmap in line buffer
880 void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render)
882 // Need to make sure that when writing that it stays within the line buffer...
883 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
884 uint8_t depth = (p1 >> 12) & 0x07; // Color depth of image
885 int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
886 uint32_t iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
887 uint32_t data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
888 //#ifdef OP_DEBUG_BMP
889 uint32_t firstPix = (p1 >> 49) & 0x3F;
890 // "The LSB is significant only for scaled objects..." -JTRM
891 // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
894 // We can ignore the RELEASE (high order) bit for now--probably forever...!
895 // uint8_t flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
896 //Optimize: break these out to their own BOOL values
897 uint8_t flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
898 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
899 flagRMW = (flags & OPFLAG_RMW ? true : false),
900 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
901 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
902 // provide the most significant bits of the palette address."
903 uint8_t index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
904 uint32_t pitch = (p1 >> 15) & 0x07; // Phrase pitch
905 pitch <<= 3; // Optimization: Multiply pitch by 8
907 // int16_t scanlineWidth = tom_getVideoModeWidth();
908 uint8_t * tomRam8 = TOMGetRamPointer();
909 uint8_t * paletteRAM = &tomRam8[0x400];
910 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
911 // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
912 uint16_t * paletteRAM16 = (uint16_t *)paletteRAM;
914 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
915 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
917 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
918 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
919 // Pitch == 0 is OK too...
921 //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to investigate
922 // on real hardware...
923 #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
927 // if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
928 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
929 if (!render || iwidth == 0)
932 //OK, so we know the position in the line buffer is correct. It's the clipping in
933 //24bpp mode that's wrong!
935 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
936 //into the line buffer for each pixel.
937 if (depth == 5) // i.e., 24bpp mode...
938 xpos >>= 1; // Cut it in half...
941 //#define OP_DEBUG_BMP
942 //#ifdef OP_DEBUG_BMP
943 // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
944 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
947 // int32_t leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
948 int32_t startPos = xpos, endPos = xpos +
949 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
950 : -((phraseWidthToPixels[depth] * iwidth) + 1));
951 uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
952 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
953 // Not sure if this is Jaguar Two only location or what...
954 // From the docs, it is... If we want to limit here we should think of something else.
955 // int32_t limit = GET16(tom_ram_8, 0x0008); // LIMIT
956 // int32_t limit = 720;
957 // int32_t lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
958 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
959 // This is correct, the OP line buffer is a constant size...
961 int32_t lbufWidth = 719;
963 // If the image is completely to the left or right of the line buffer, then bail.
964 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
965 //There are four possibilities:
966 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
967 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
968 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
969 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
970 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
971 // numbers 1 & 3 are of concern.
972 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
973 // if (rightMargin < 0 || leftMargin > lbufWidth)
975 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
976 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
977 // Still have to be careful with the DATA and IWIDTH values though...
979 // if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
980 // || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
982 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
983 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
986 // Otherwise, find the clip limits and clip the phrase as well...
987 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
988 // line buffer, but it shouldn't matter since there are two unused line
989 // buffers below and nothing above and I'll at most write 8 bytes outside
990 // the line buffer... I could use a fractional clip begin/end value, but
991 // this makes the blit a *lot* more hairy. I might fix this in the future
992 // if it becomes necessary. (JLH)
993 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
994 // which pixel in the phrase is being written, and quit when either end of phrases
995 // is reached or line buffer extents are surpassed.
997 //This stuff is probably wrong as well... !!! FIX !!!
998 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
999 //Yup. Seems that JagMania doesn't work correctly with this...
1000 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1001 // if (!flagREFLECT)
1005 clippedWidth = 0 - leftMargin,
1006 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1007 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1010 if (rightMargin > lbufWidth)
1011 clippedWidth = rightMargin - lbufWidth,
1012 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
1013 // rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1014 // rightMargin = lbufWidth;
1017 WriteLog("OP: We're about to encounter a divide by zero error!\n");
1018 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1019 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1021 if (startPos < 0) // Case #1: Begin out, end in, L to R
1022 clippedWidth = 0 - startPos,
1023 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1024 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1026 if (endPos < 0) // Case #2: Begin in, end out, R to L
1027 clippedWidth = 0 - endPos,
1028 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1030 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1031 clippedWidth = endPos - lbufWidth,
1032 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1034 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1035 clippedWidth = startPos - lbufWidth,
1036 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1037 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1038 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1040 // If the image is sitting on the line buffer left or right edge, we need to compensate
1041 // by decreasing the image phrase width accordingly.
1042 iwidth -= phraseClippedWidth;
1044 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1046 // data += phraseClippedWidth * (pitch << 3);
1047 data += dataClippedWidth * pitch;
1049 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1050 // bitmap! This makes clipping & etc. MUCH, much easier...!
1051 // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1052 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1053 //Is this a bug in the OP?
1054 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1055 //Though it looks like we're doing it here no matter what...
1056 // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1058 uint32_t lbufAddress = 0x1800 + (startPos * 2);
1059 uint8_t * currentLineBuffer = &tomRam8[lbufAddress];
1063 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1064 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1065 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1067 // This seems to be the case (at least according to the Midsummer docs)...!
1069 // This is to test using palette zeroes instead of bit zeroes...
1070 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1071 //#define OP_USES_PALETTE_ZERO
1073 if (depth == 0) // 1 BPP
1075 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1076 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1078 // Fetch 1st phrase...
1079 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1080 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1081 //i.e., we didn't clip on the margin... !!! FIX !!!
1082 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1083 int i = firstPix; // Start counter at right spot...
1089 uint8_t bit = pixels >> 63;
1090 #ifndef OP_USES_PALETTE_ZERO
1091 if (flagTRANS && bit == 0)
1093 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1099 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1100 //Won't optimize RMW case though...
1101 // This is the *only* correct use of endian-dependent code
1102 // (i.e., mem-to-mem direct copying)!
1103 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bit];
1105 *currentLineBuffer =
1106 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1107 *(currentLineBuffer + 1) =
1108 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1111 currentLineBuffer += lbufDelta;
1115 // Fetch next phrase...
1117 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1120 else if (depth == 1) // 2 BPP
1123 WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1124 index &= 0xFC; // Top six bits form CLUT index
1125 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1126 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1131 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1134 for(int i=0; i<32; i++)
1136 uint8_t bits = pixels >> 62;
1137 // Seems to me that both of these are in the same endian, so we could cast it as
1138 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1139 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1140 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1141 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1142 #ifndef OP_USES_PALETTE_ZERO
1143 if (flagTRANS && bits == 0)
1145 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1151 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1153 *currentLineBuffer =
1154 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1155 *(currentLineBuffer + 1) =
1156 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1159 currentLineBuffer += lbufDelta;
1164 else if (depth == 2) // 4 BPP
1167 WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1168 index &= 0xF0; // Top four bits form CLUT index
1169 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1170 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1175 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1178 for(int i=0; i<16; i++)
1180 uint8_t bits = pixels >> 60;
1181 // Seems to me that both of these are in the same endian, so we could cast it as
1182 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1183 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1184 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1185 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1186 #ifndef OP_USES_PALETTE_ZERO
1187 if (flagTRANS && bits == 0)
1189 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1195 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1197 *currentLineBuffer =
1198 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1199 *(currentLineBuffer + 1) =
1200 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1203 currentLineBuffer += lbufDelta;
1208 else if (depth == 3) // 8 BPP
1210 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1211 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1213 // Fetch 1st phrase...
1214 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1215 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1216 //i.e., we didn't clip on the margin... !!! FIX !!!
1217 firstPix &= 0x30; // Only top two bits are valid for 8 BPP
1218 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1219 int i = firstPix >> 3; // Start counter at right spot...
1225 uint8_t bits = pixels >> 56;
1226 // Seems to me that both of these are in the same endian, so we could cast it as
1227 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1228 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1229 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1230 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1231 //This would seem to be problematic...
1232 //Because it's the palette entry being zero that makes the pixel transparent...
1233 //Let's try it and see.
1234 #ifndef OP_USES_PALETTE_ZERO
1235 if (flagTRANS && bits == 0)
1237 if (flagTRANS && (paletteRAM16[bits] == 0))
1243 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1245 *currentLineBuffer =
1246 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1247 *(currentLineBuffer + 1) =
1248 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1251 currentLineBuffer += lbufDelta;
1255 // Fetch next phrase...
1257 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1260 else if (depth == 4) // 16 BPP
1263 WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1264 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1265 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1270 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1273 for(int i=0; i<4; i++)
1275 uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1276 // Seems to me that both of these are in the same endian, so we could cast it as
1277 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1278 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1279 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1280 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1281 //This doesn't seem right... Let's try the encoded black value ($8800):
1282 //Apparently, CRY 0 maps to $8800...
1283 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1284 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1289 *currentLineBuffer = bitsHi,
1290 *(currentLineBuffer + 1) = bitsLo;
1292 *currentLineBuffer =
1293 BLEND_CR(*currentLineBuffer, bitsHi),
1294 *(currentLineBuffer + 1) =
1295 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1298 currentLineBuffer += lbufDelta;
1303 else if (depth == 5) // 24 BPP
1305 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1306 //There *might* be others...
1307 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1309 WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1310 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1311 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1312 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04;
1317 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1320 for(int i=0; i<2; i++)
1322 // We don't use a 32-bit var here because of endian issues...!
1323 uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48,
1324 bits1 = pixels >> 40, bits0 = pixels >> 32;
1326 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1329 *currentLineBuffer = bits3,
1330 *(currentLineBuffer + 1) = bits2,
1331 *(currentLineBuffer + 2) = bits1,
1332 *(currentLineBuffer + 3) = bits0;
1334 currentLineBuffer += lbufDelta;
1343 // Store scaled bitmap in line buffer
1345 void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render)
1347 // Need to make sure that when writing that it stays within the line buffer...
1348 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1349 uint8_t depth = (p1 >> 12) & 0x07; // Color depth of image
1350 int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1351 uint32_t iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
1352 uint32_t data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
1353 //#ifdef OP_DEBUG_BMP
1354 // Prolly should use this... Though not sure exactly how.
1355 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1356 uint32_t firstPix = (p1 >> 49) & 0x3F;
1357 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1359 WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1361 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1362 // uint8_t flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
1363 //Optimize: break these out to their own BOOL values [DONE]
1364 uint8_t flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
1365 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1366 flagRMW = (flags & OPFLAG_RMW ? true : false),
1367 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1368 uint8_t index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
1369 uint32_t pitch = (p1 >> 15) & 0x07; // Phrase pitch
1371 uint8_t * tomRam8 = TOMGetRamPointer();
1372 uint8_t * paletteRAM = &tomRam8[0x400];
1373 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1374 // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1375 uint16_t * paletteRAM16 = (uint16_t *)paletteRAM;
1377 uint16_t hscale = p2 & 0xFF;
1378 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1379 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1380 uint16_t horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!]
1381 // uint8_t horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1382 int32_t scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1383 uint32_t scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1385 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1386 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1388 // Looks like an hscale of zero means don't draw!
1389 if (!render || iwidth == 0 || hscale == 0)
1392 /*extern int start_logging;
1394 WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1395 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1396 //#define OP_DEBUG_BMP
1397 //#ifdef OP_DEBUG_BMP
1398 // WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1399 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1402 int32_t startPos = xpos, endPos = xpos +
1403 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1404 uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1405 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
1406 // Not sure if this is Jaguar Two only location or what...
1407 // From the docs, it is... If we want to limit here we should think of something else.
1408 // int32_t limit = GET16(tom_ram_8, 0x0008); // LIMIT
1409 int32_t limit = 720;
1410 // int32_t lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1411 int32_t lbufWidth = 719; // Zero based limit...
1413 // If the image is completely to the left or right of the line buffer, then bail.
1414 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1415 //There are four possibilities:
1416 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1417 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1418 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1419 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1420 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1421 // numbers 1 & 3 are of concern.
1422 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1423 // if (rightMargin < 0 || leftMargin > lbufWidth)
1425 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1426 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1427 // Still have to be careful with the DATA and IWIDTH values though...
1429 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1430 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1433 // Otherwise, find the clip limits and clip the phrase as well...
1434 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1435 // line buffer, but it shouldn't matter since there are two unused line
1436 // buffers below and nothing above and I'll at most write 40 bytes outside
1437 // the line buffer... I could use a fractional clip begin/end value, but
1438 // this makes the blit a *lot* more hairy. I might fix this in the future
1439 // if it becomes necessary. (JLH)
1440 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1441 // which pixel in the phrase is being written, and quit when either end of phrases
1442 // is reached or line buffer extents are surpassed.
1444 //This stuff is probably wrong as well... !!! FIX !!!
1445 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1446 //Yup. Seems that JagMania doesn't work correctly with this...
1447 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1448 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1449 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1450 // a bit more accurately... Strange!
1451 //It's probably a case of the REFLECT flag being set and the background being written
1452 //from the right side of the screen...
1453 //But no, it isn't... At least if the diagnostics are telling the truth!
1455 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1456 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1459 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1460 //the scaling factor is small. So fix it already! !!! FIX !!!
1461 /*if (scaledPhrasePixels == 0)
1463 WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1464 DumpScaledObject(p0, p1, p2);
1466 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1468 //Try a simple example...
1469 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1470 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1471 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1473 // Normally, we would expect this in the line buffer:
1474 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1476 // But instead we're getting:
1477 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1479 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1480 // on negative boundary--or are we? Hmm...
1481 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1483 // Let's try a real world example:
1485 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1486 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1488 // Really, spp is 27.75 in the second case...
1489 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1490 // start position (14 * 27.75), we get -6.5... NOT -17!
1492 //Now it seems we're working OK, at least for the first case...
1493 uint32_t scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1495 if (startPos < 0) // Case #1: Begin out, end in, L to R
1497 extern int start_logging;
1499 WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1500 // clippedWidth = 0 - startPos,
1501 clippedWidth = (0 - startPos) << 5,
1502 // dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1503 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1504 // startPos = 0 - (clippedWidth % scaledPhrasePixels);
1505 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1507 WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1510 if (endPos < 0) // Case #2: Begin in, end out, R to L
1511 clippedWidth = 0 - endPos,
1512 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1514 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1515 clippedWidth = endPos - lbufWidth,
1516 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1518 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1519 clippedWidth = startPos - lbufWidth,
1520 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1521 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1523 extern int op_start_log;
1524 if (op_start_log && clippedWidth != 0)
1525 WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1526 if (op_start_log && startPos == 13)
1528 WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1529 DumpScaledObject(p0, p1, p2);
1532 WriteLog(" %08X: ", data);
1533 for(int i=0; i<7*8; i++)
1534 WriteLog("%02X ", JaguarReadByte(data+i));
1538 // If the image is sitting on the line buffer left or right edge, we need to compensate
1539 // by decreasing the image phrase width accordingly.
1540 iwidth -= phraseClippedWidth;
1542 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1544 // data += phraseClippedWidth * (pitch << 3);
1545 data += dataClippedWidth * (pitch << 3);
1547 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1548 // bitmap! This makes clipping & etc. MUCH, much easier...!
1549 // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1550 // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1551 uint32_t lbufAddress = 0x1800 + startPos * 2;
1552 uint8_t * currentLineBuffer = &tomRam8[lbufAddress];
1553 //uint8_t * lineBufferLowerLimit = &tom_ram_8[0x1800],
1554 // * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1558 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1559 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1560 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1562 // This seems to be the case (at least according to the Midsummer docs)...!
1564 if (depth == 0) // 1 BPP
1567 WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1568 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1569 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1572 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1574 while ((int32_t)iwidth > 0)
1576 uint8_t bits = pixels >> 63;
1578 #ifndef OP_USES_PALETTE_ZERO
1579 if (flagTRANS && bits == 0)
1581 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1587 // This is the *only* correct use of endian-dependent code
1588 // (i.e., mem-to-mem direct copying)!
1589 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1591 *currentLineBuffer =
1592 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1593 *(currentLineBuffer + 1) =
1594 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1597 currentLineBuffer += lbufDelta;
1600 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1601 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1602 wide, so we could probably go back to that (as long as we make it an int16_t and not a uint16!)
1604 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1605 while (horizontalRemainder & 0x80)
1607 horizontalRemainder += hscale;
1611 // while (horizontalRemainder <= 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1612 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1614 horizontalRemainder += hscale;
1618 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1622 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1624 data += (pitch << 3) * phrasesToSkip;
1625 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1626 pixels <<= 1 * pixelShift;
1627 iwidth -= phrasesToSkip;
1628 pixCount = pixelShift;
1632 else if (depth == 1) // 2 BPP
1635 WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1636 index &= 0xFC; // Top six bits form CLUT index
1637 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1638 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1641 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1643 while ((int32_t)iwidth > 0)
1645 uint8_t bits = pixels >> 62;
1647 #ifndef OP_USES_PALETTE_ZERO
1648 if (flagTRANS && bits == 0)
1650 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1656 // This is the *only* correct use of endian-dependent code
1657 // (i.e., mem-to-mem direct copying)!
1658 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1660 *currentLineBuffer =
1661 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1662 *(currentLineBuffer + 1) =
1663 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1666 currentLineBuffer += lbufDelta;
1668 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1669 while (horizontalRemainder & 0x80)
1671 horizontalRemainder += hscale;
1675 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1676 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1678 horizontalRemainder += hscale;
1682 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1686 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1688 data += (pitch << 3) * phrasesToSkip;
1689 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1690 pixels <<= 2 * pixelShift;
1691 iwidth -= phrasesToSkip;
1692 pixCount = pixelShift;
1696 else if (depth == 2) // 4 BPP
1699 WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1700 index &= 0xF0; // Top four bits form CLUT index
1701 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1702 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1705 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1707 while ((int32_t)iwidth > 0)
1709 uint8_t bits = pixels >> 60;
1711 #ifndef OP_USES_PALETTE_ZERO
1712 if (flagTRANS && bits == 0)
1714 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1720 // This is the *only* correct use of endian-dependent code
1721 // (i.e., mem-to-mem direct copying)!
1722 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1724 *currentLineBuffer =
1725 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1726 *(currentLineBuffer + 1) =
1727 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1730 currentLineBuffer += lbufDelta;
1732 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1733 while (horizontalRemainder & 0x80)
1735 horizontalRemainder += hscale;
1739 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1740 while (horizontalRemainder < 0x20) // I.e., it's <= 0 (*before* subtraction)
1742 horizontalRemainder += hscale;
1746 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1750 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1752 data += (pitch << 3) * phrasesToSkip;
1753 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1754 pixels <<= 4 * pixelShift;
1755 iwidth -= phrasesToSkip;
1756 pixCount = pixelShift;
1760 else if (depth == 3) // 8 BPP
1763 WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1764 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1765 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1768 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1770 while ((int32_t)iwidth > 0)
1772 uint8_t bits = pixels >> 56;
1774 #ifndef OP_USES_PALETTE_ZERO
1775 if (flagTRANS && bits == 0)
1777 if (flagTRANS && (paletteRAM16[bits] == 0))
1783 // This is the *only* correct use of endian-dependent code
1784 // (i.e., mem-to-mem direct copying)!
1785 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1787 if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1788 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1791 *currentLineBuffer =
1792 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1793 *(currentLineBuffer + 1) =
1794 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1797 currentLineBuffer += lbufDelta;
1799 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1800 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1802 horizontalRemainder += hscale;
1806 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1810 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1812 data += (pitch << 3) * phrasesToSkip;
1813 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1814 pixels <<= 8 * pixelShift;
1815 iwidth -= phrasesToSkip;
1816 pixCount = pixelShift;
1820 else if (depth == 4) // 16 BPP
1823 WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1824 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1825 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1828 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1830 while ((int32_t)iwidth > 0)
1832 uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1834 //This doesn't seem right... Let's try the encoded black value ($8800):
1835 //Apparently, CRY 0 maps to $8800...
1836 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1837 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1842 *currentLineBuffer = bitsHi,
1843 *(currentLineBuffer + 1) = bitsLo;
1845 *currentLineBuffer =
1846 BLEND_CR(*currentLineBuffer, bitsHi),
1847 *(currentLineBuffer + 1) =
1848 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1851 currentLineBuffer += lbufDelta;
1853 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1854 while (horizontalRemainder & 0x80)
1856 horizontalRemainder += hscale;
1860 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1861 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1863 horizontalRemainder += hscale;
1867 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1871 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1873 data += (pitch << 3) * phrasesToSkip;
1874 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1875 pixels <<= 16 * pixelShift;
1877 iwidth -= phrasesToSkip;
1879 pixCount = pixelShift;
1883 else if (depth == 5) // 24 BPP
1885 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1886 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1888 WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1889 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1890 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1891 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04;
1896 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1897 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1899 for(int i=0; i<2; i++)
1901 uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48,
1902 bits1 = pixels >> 40, bits0 = pixels >> 32;
1904 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1907 *currentLineBuffer = bits3,
1908 *(currentLineBuffer + 1) = bits2,
1909 *(currentLineBuffer + 2) = bits1,
1910 *(currentLineBuffer + 3) = bits0;
1912 currentLineBuffer += lbufDelta;