4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 // (C) 2010 Underground Software
9 // JLH = James L. Hammons <jlhamm@acm.org>
12 // --- ---------- -------------------------------------------------------------
13 // JLH 01/16/2010 Created this log ;-)
28 //#define OP_DEBUG_BMP
30 #define BLEND_Y(dst, src) op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src) op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
33 #define OBJECT_TYPE_BITMAP 0 // 000
34 #define OBJECT_TYPE_SCALE 1 // 001
35 #define OBJECT_TYPE_GPU 2 // 010
36 #define OBJECT_TYPE_BRANCH 3 // 011
37 #define OBJECT_TYPE_STOP 4 // 100
39 #define CONDITION_EQUAL 0
40 #define CONDITION_LESS_THAN 1
41 #define CONDITION_GREATER_THAN 2
42 #define CONDITION_OP_FLAG_SET 3
43 #define CONDITION_SECOND_HALF_LINE 4
45 #define OPFLAG_RELEASE 8 // Bus release bit
46 #define OPFLAG_TRANS 4 // Transparency bit
47 #define OPFLAG_RMW 2 // Read-Modify-Write bit
48 #define OPFLAG_REFLECT 1 // Horizontal mirror bit
50 // Private function prototypes
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void OPDiscoverObjects(uint32 address);
55 void OPDumpObjectList(void);
56 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
57 void DumpFixedObject(uint64 p0, uint64 p1);
58 void DumpBitmapCore(uint64 p0, uint64 p1);
59 uint64 OPLoadPhrase(uint32 offset);
61 // Local global variables
63 // Blend tables (64K each)
64 static uint8 op_blend_y[0x10000];
65 static uint8 op_blend_cr[0x10000];
66 // There may be a problem with this "RAM" overlapping (and thus being independent of)
67 // some of the regular TOM RAM...
68 //#warning objectp_ram is separated from TOM RAM--need to fix that!
69 //static uint8 objectp_ram[0x40]; // This is based at $F00000
70 uint8 objectp_running = 0;
71 //bool objectp_stop_reading_list;
73 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
74 //static uint32 op_bitmap_bit_size[8] =
75 // { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
76 // (uint32)(2*65536), (uint32)(1*65536), (uint32)(1*65536), (uint32)(1*65536) };
77 static uint32 op_pointer;
79 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
83 // Object Processor initialization
87 // Here we calculate the saturating blend of a signed 4-bit value and an
88 // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
89 // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
90 for(int i=0; i<256*256; i++)
92 int y = (i >> 8) & 0xFF;
93 int dy = (int8)i; // Sign extend the Y index
94 int c1 = (i >> 8) & 0x0F;
95 int dc1 = (int8)(i << 4) >> 4; // Sign extend the R index
96 int c2 = (i >> 12) & 0x0F;
97 int dc2 = (int8)(i & 0xF0) >> 4; // Sign extend the C index
122 op_blend_cr[i] = (c2 << 4) | c1;
129 // Object Processor reset
133 // memset(objectp_ram, 0x00, 0x40);
137 static const char * opType[8] =
138 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
139 static const char * ccType[8] =
140 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
141 static uint32 object[8192];
142 static uint32 numberOfObjects;
143 //static uint32 objectLink[8192];
144 //static uint32 numberOfLinks;
148 //#warning "!!! Fix OL dump so that it follows links !!!"
149 // const char * opType[8] =
150 // { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
151 // const char * ccType[8] =
152 // { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
154 uint32 olp = OPGetListPointer();
155 WriteLog("\nOP: OLP = $%08X\n", olp);
156 WriteLog("OP: Phrase dump\n ----------\n");
159 for(uint32 i=0; i<0x100; i+=8)
161 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
162 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
164 if ((lo & 0x07) == 3)
166 uint16 ypos = (lo >> 3) & 0x7FF;
167 uint8 cc = (lo >> 14) & 0x03;
168 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
169 WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
174 if ((lo & 0x07) == 0)
175 DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
177 if ((lo & 0x07) == 1)
178 DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
184 OPDiscoverObjects(olp);
189 void OPDiscoverObjects(uint32 address)
191 // Check to see if we've already seen this object
192 for(uint32 i=0; i<numberOfObjects; i++)
194 if (address == object[i])
198 // Store the object...
199 object[numberOfObjects++] = address;
200 uint8 objectType = 0;
204 uint32 hi = JaguarReadLong(address + 0, OP);
205 uint32 lo = JaguarReadLong(address + 4, OP);
206 objectType = lo & 0x07;
207 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
211 uint16 ypos = (lo >> 3) & 0x7FF;
212 uint8 cc = (lo >> 14) & 0x07; // Proper # of bits == 3
214 // Recursion needed to follow all links!
215 OPDiscoverObjects(address + 8);
218 if (address == link) // Ruh roh...
220 // Runaway recursive link is bad!
226 // Check to see if we've already seen this object, and add it if not
227 bool seenObject = false;
229 for(uint32 i=0; i<numberOfObjects; i++)
231 if (address == object[i])
239 object[numberOfObjects++] = address;
241 while (objectType != 4);
244 void OPDumpObjectList(void)
246 for(uint32 i=0; i<numberOfObjects; i++)
248 uint32 address = object[i];
250 uint32 hi = JaguarReadLong(address + 0, OP);
251 uint32 lo = JaguarReadLong(address + 4, OP);
252 uint8 objectType = lo & 0x07;
253 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
254 WriteLog("%08X: %08X %08X %s", address, hi, lo, opType[objectType]);
258 uint16 ypos = (lo >> 3) & 0x7FF;
259 uint8 cc = (lo >> 14) & 0x07; // Proper # of bits == 3
260 WriteLog(" YPOS=%u, CC=%s, link=$%08X", ypos, ccType[cc], link);
266 DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
269 DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
270 OPLoadPhrase(address + 16));
272 if (address == link) // Ruh roh...
274 // Runaway recursive link is bad!
275 WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
283 // Object Processor memory access
284 // Memory range: F00010 - F00027
286 // F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
287 // F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
288 // F00026 W -------- -------x OBF - object processor flag
292 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
295 return objectp_ram[offset];
298 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
301 return GET16(objectp_ram, offset);
304 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
307 objectp_ram[offset] = data;
310 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
313 SET16(objectp_ram, offset, data);
315 /*if (offset == 0x20)
316 WriteLog("OP: Setting lo list pointer: %04X\n", data);
318 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
322 uint32 OPGetListPointer(void)
324 // Note: This register is LO / HI WORD, hence the funky look of this...
325 return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
328 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
330 uint32 OPGetStatusRegister(void)
332 return GET16(tomRam8, 0x26);
335 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
337 void OPSetStatusRegister(uint32 data)
339 tomRam8[0x26] = (data & 0x0000FF00) >> 8;
340 tomRam8[0x27] |= (data & 0xFE);
343 void OPSetCurrentObject(uint64 object)
345 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
346 // Stored as least significant 32 bits first, ms32 last in big endian
347 /* objectp_ram[0x13] = object & 0xFF; object >>= 8;
348 objectp_ram[0x12] = object & 0xFF; object >>= 8;
349 objectp_ram[0x11] = object & 0xFF; object >>= 8;
350 objectp_ram[0x10] = object & 0xFF; object >>= 8;
352 objectp_ram[0x17] = object & 0xFF; object >>= 8;
353 objectp_ram[0x16] = object & 0xFF; object >>= 8;
354 objectp_ram[0x15] = object & 0xFF; object >>= 8;
355 objectp_ram[0x14] = object & 0xFF;*/
356 // Let's try regular good old big endian...
357 tomRam8[0x17] = object & 0xFF; object >>= 8;
358 tomRam8[0x16] = object & 0xFF; object >>= 8;
359 tomRam8[0x15] = object & 0xFF; object >>= 8;
360 tomRam8[0x14] = object & 0xFF; object >>= 8;
362 tomRam8[0x13] = object & 0xFF; object >>= 8;
363 tomRam8[0x12] = object & 0xFF; object >>= 8;
364 tomRam8[0x11] = object & 0xFF; object >>= 8;
365 tomRam8[0x10] = object & 0xFF;
368 uint64 OPLoadPhrase(uint32 offset)
370 offset &= ~0x07; // 8 byte alignment
371 return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
374 void OPStorePhrase(uint32 offset, uint64 p)
376 offset &= ~0x07; // 8 byte alignment
377 JaguarWriteLong(offset, p >> 32, OP);
378 JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
382 // Debugging routines
384 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
386 WriteLog(" %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
387 WriteLog(" %08X %08X\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
388 DumpBitmapCore(p0, p1);
389 uint32 hscale = p2 & 0xFF;
390 uint32 vscale = (p2 >> 8) & 0xFF;
391 uint32 remainder = (p2 >> 16) & 0xFF;
392 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
395 void DumpFixedObject(uint64 p0, uint64 p1)
397 WriteLog(" %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
398 DumpBitmapCore(p0, p1);
401 void DumpBitmapCore(uint64 p0, uint64 p1)
403 uint8 bitdepth = (p1 >> 12) & 0x07;
404 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
405 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
406 int32 xpos = p1 & 0xFFF;
407 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); // Sign extend that mutha!
408 uint32 iwidth = ((p1 >> 28) & 0x3FF);
409 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
410 uint16 height = ((p0 >> 14) & 0x3FF);
411 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
412 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
413 uint32 firstPix = (p1 >> 49) & 0x3F;
414 uint8 flags = (p1 >> 45) & 0x0F;
415 uint8 idx = (p1 >> 38) & 0x7F;
416 uint32 pitch = (p1 >> 15) & 0x07;
417 WriteLog(" [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
418 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link,
419 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
420 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
421 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
425 // Object Processor main routine
427 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
428 void OPProcessList(int halfline, bool render)
430 extern int op_start_log;
431 // char * condition_to_str[8] =
432 // { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
434 op_pointer = OPGetListPointer();
436 // objectp_stop_reading_list = false;
438 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
441 // *** BEGIN OP PROCESSOR TESTING ONLY ***
442 extern bool interactiveMode;
444 extern int objectPtr;
446 int bitmapCounter = 0;
447 // *** END OP PROCESSOR TESTING ONLY ***
449 uint32 opCyclesToRun = 30000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
451 // if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
454 // *** BEGIN OP PROCESSOR TESTING ONLY ***
455 if (interactiveMode && bitmapCounter == objectPtr)
459 // *** END OP PROCESSOR TESTING ONLY ***
460 // if (objectp_stop_reading_list)
463 uint64 p0 = OPLoadPhrase(op_pointer);
465 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
468 if (halfline == TOMGetVDB() && op_start_log)
469 //if (halfline == 215 && op_start_log)
470 //if (halfline == 28 && op_start_log)
473 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
474 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
476 WriteLog(" (BITMAP) ");
477 uint64 p1 = OPLoadPhrase(op_pointer);
478 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
479 uint8 bitdepth = (p1 >> 12) & 0x07;
480 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
481 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
482 int32 xpos = p1 & 0xFFF;
483 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
484 uint32 iwidth = ((p1 >> 28) & 0x3FF);
485 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
486 uint16 height = ((p0 >> 14) & 0x3FF);
487 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
488 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
489 uint32 firstPix = (p1 >> 49) & 0x3F;
490 uint8 flags = (p1 >> 45) & 0x0F;
491 uint8 idx = (p1 >> 38) & 0x7F;
492 uint32 pitch = (p1 >> 15) & 0x07;
493 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
494 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
496 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
498 WriteLog(" (SCALED BITMAP)");
499 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
500 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
501 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
502 uint8 bitdepth = (p1 >> 12) & 0x07;
503 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
504 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
505 int32 xpos = p1 & 0xFFF;
506 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
507 uint32 iwidth = ((p1 >> 28) & 0x3FF);
508 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
509 uint16 height = ((p0 >> 14) & 0x3FF);
510 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
511 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
512 uint32 firstPix = (p1 >> 49) & 0x3F;
513 uint8 flags = (p1 >> 45) & 0x0F;
514 uint8 idx = (p1 >> 38) & 0x7F;
515 uint32 pitch = (p1 >> 15) & 0x07;
516 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
517 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
518 uint32 hscale = p2 & 0xFF;
519 uint32 vscale = (p2 >> 8) & 0xFF;
520 uint32 remainder = (p2 >> 16) & 0xFF;
521 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
523 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
524 WriteLog(" (GPU)\n");
525 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
527 WriteLog(" (BRANCH)\n");
528 uint8 * jaguarMainRam = GetRamPtr();
529 WriteLog("[RAM] --> ");
530 for(int k=0; k<8; k++)
531 WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
534 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
535 WriteLog(" --> List end\n\n");
539 switch ((uint8)p0 & 0x07)
541 case OBJECT_TYPE_BITMAP:
543 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
544 uint16 ypos = (p0 >> 3) & 0x7FF;
545 // This is only theory implied by Rayman...!
546 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
547 // the VDB value. With interlacing, this would be slightly more tricky.
548 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
549 // to affect any other game in a negative way (that I've seen).
550 // Either that, or it's an undocumented bug...
552 //No, the reason this was needed is that the OP code before was wrong. Any value
553 //less than VDB will get written to the top line of the display!
555 // Not so sure... Let's see what happens here...
558 ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
560 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
561 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
562 // what's causing things to fuck up. Still no idea why.
564 uint32 height = (p0 & 0xFFC000) >> 14;
565 uint32 oldOPP = op_pointer - 8;
566 // *** BEGIN OP PROCESSOR TESTING ONLY ***
567 if (inhibit && op_start_log)
568 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
570 if (!inhibit) // For OP testing only!
571 // *** END OP PROCESSOR TESTING ONLY ***
572 if (halfline >= ypos && height > 0)
574 uint64 p1 = OPLoadPhrase(op_pointer);
576 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
577 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
578 // OPProcessFixedBitmap(halfline, p0, p1, render);
579 OPProcessFixedBitmap(p0, p1, render);
583 //???Does this really happen??? Doesn't seem to work if you do this...!
584 //Probably not. Must be a bug in the documentation...!
585 // uint32 link = (p0 & 0x7FFFF000000) >> 21;
586 // SET16(tom_ram_8, 0x20, link & 0xFFFF); // OLP
587 // SET16(tom_ram_8, 0x22, link >> 16);
588 /* uint32 height = (p0 & 0xFFC000) >> 14;
591 // NOTE: Would subtract 2 if in interlaced mode...!
592 // uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
596 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
597 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
600 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
601 p0 |= (uint64)height << 14;
603 OPStorePhrase(oldOPP, p0);
605 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
606 //Temp, for testing...
607 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
608 //And it does! !!! FIX !!!
609 //Let's remove this "fix" since it screws up more than it fixes.
610 /* if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
613 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
614 //WriteLog("New OP: %08X\n", op_pointer);
617 case OBJECT_TYPE_SCALE:
619 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
620 uint16 ypos = (p0 >> 3) & 0x7FF;
621 uint32 height = (p0 & 0xFFC000) >> 14;
622 uint32 oldOPP = op_pointer - 8;
623 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
624 // *** BEGIN OP PROCESSOR TESTING ONLY ***
625 if (inhibit && op_start_log)
627 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
628 DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
631 if (!inhibit) // For OP testing only!
632 // *** END OP PROCESSOR TESTING ONLY ***
633 if (halfline >= ypos && height > 0)
635 uint64 p1 = OPLoadPhrase(op_pointer);
637 uint64 p2 = OPLoadPhrase(op_pointer);
639 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
640 OPProcessScaledBitmap(p0, p1, p2, render);
644 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
645 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
646 //Actually, we should skip this object if it has a vscale of zero.
647 //Or do we? Not sure... Atari Karts has a few lines that look like:
649 //000E8268 --> phrase 00010000 7000B00D
650 // [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
651 // [hsc: 9A, vsc: 00, rem: 00]
652 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
653 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
656 vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
658 //extern int start_logging;
660 // WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
662 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
663 //There are other problems here, it looks like...
665 //About to execute OP (508)...
667 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
668 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
669 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
670 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
671 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
672 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
673 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
674 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
675 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
676 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
677 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
678 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
679 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
680 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
681 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
682 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
683 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
684 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
685 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
686 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
687 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
688 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
690 //Here's another problem:
691 // [hsc: 20, vsc: 20, rem: 00]
692 // Since we're not checking for $E0 (but that's what we get from the above), we end
693 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
694 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
695 //Also note: $E0 = 7.0 which IS a legal vscale value...
697 // if (remainder & 0x80) // I.e., it's negative
698 // if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0
699 // if ((remainder - 1) >= 0xE0) // I.e., it's <= 0
700 // if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
701 // if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
702 // if (remainder <= 0x20) // I.e., it's <= 1.0
703 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
704 if (remainder < 0x20)
706 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
707 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
709 // while (remainder & 0x80)
710 // while ((remainder & 0x80) || remainder == 0)
711 // while ((remainder - 1) >= 0xE0)
712 // while ((remainder >= 0xE1) || remainder == 0)
713 // while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
714 // while (remainder <= 0x20)
715 while (remainder < 0x20)
725 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
726 p0 |= (uint64)height << 14;
728 OPStorePhrase(oldOPP, p0);
731 remainder -= 0x20; // 1.0f in [3.5] fixed point format
734 // WriteLog("--> Finished writebacks...\n");//*/
736 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
737 p2 &= ~0x0000000000FF0000LL;
738 p2 |= (uint64)remainder << 16;
739 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
740 OPStorePhrase(oldOPP + 16, p2);
741 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
742 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
745 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
748 case OBJECT_TYPE_GPU:
750 //WriteLog("OP: Asserting GPU IRQ #3...\n");
751 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
752 OPSetCurrentObject(p0);
753 GPUSetIRQLine(3, ASSERT_LINE);
754 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
757 //OPSuspendedByGPU = true;
758 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
759 //on the next halfline...
760 // --> It continues from where it was interrupted! !!! FIX !!!
763 case OBJECT_TYPE_BRANCH:
765 uint16 ypos = (p0 >> 3) & 0x7FF;
766 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
767 // conditions! Need at least one more bit for that! :-P
768 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
769 uint8 cc = (p0 >> 14) & 0x03;
770 uint32 link = (p0 >> 21) & 0x3FFFF8;
772 // if ((ypos!=507)&&(ypos!=25))
773 // WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
776 case CONDITION_EQUAL:
777 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
780 case CONDITION_LESS_THAN:
781 if (TOMReadWord(0xF00006, OP) < ypos)
784 case CONDITION_GREATER_THAN:
785 if (TOMReadWord(0xF00006, OP) > ypos)
788 case CONDITION_OP_FLAG_SET:
789 if (OPGetStatusRegister() & 0x01)
792 case CONDITION_SECOND_HALF_LINE:
793 //Here's the ASIC code:
794 // ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
795 //which means, do the link if bit 10 of HC is set...
797 // This basically means branch if bit 10 of HC is set
798 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
799 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
804 // Basically, if you do this, the OP does nothing. :-)
805 WriteLog("OP: Unimplemented branch condition %i\n", cc);
809 case OBJECT_TYPE_STOP:
813 //WriteLog("OP: --> STOP\n");
814 // op_set_status_register(((p0>>3) & 0xFFFFFFFF));
815 //This seems more likely...
816 OPSetCurrentObject(p0);
820 // We need to check whether these interrupts are enabled or not, THEN
821 // set an IRQ + pending flag if necessary...
822 if (TOMIRQEnabled(IRQ_OPFLAG))
824 TOMSetPendingObjectInt();
825 m68k_set_irq(2); // Cause a 68K IPL 2 to occur...
833 WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
837 // Here is a little sanity check to keep the OP from locking up the machine
838 // when fed bad data. Better would be to count how many actual cycles it used
839 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
840 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
849 // Store fixed size bitmap in line buffer
851 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
853 // Need to make sure that when writing that it stays within the line buffer...
854 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
855 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
856 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
857 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
858 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
859 //#ifdef OP_DEBUG_BMP
860 uint32 firstPix = (p1 >> 49) & 0x3F;
861 // "The LSB is significant only for scaled objects..." -JTRM
862 // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
865 // We can ignore the RELEASE (high order) bit for now--probably forever...!
866 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
867 //Optimize: break these out to their own BOOL values
868 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
869 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
870 flagRMW = (flags & OPFLAG_RMW ? true : false),
871 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
872 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
873 // provide the most significant bits of the palette address."
874 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
875 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
876 pitch <<= 3; // Optimization: Multiply pitch by 8
878 // int16 scanlineWidth = tom_getVideoModeWidth();
879 uint8 * tomRam8 = TOMGetRamPointer();
880 uint8 * paletteRAM = &tomRam8[0x400];
881 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
882 // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
883 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
885 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
886 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
888 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
889 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
890 // Pitch == 0 is OK too...
891 // if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
892 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
893 if (!render || iwidth == 0)
896 //OK, so we know the position in the line buffer is correct. It's the clipping in
897 //24bpp mode that's wrong!
899 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
900 //into the line buffer for each pixel.
901 if (depth == 5) // i.e., 24bpp mode...
902 xpos >>= 1; // Cut it in half...
905 //#define OP_DEBUG_BMP
906 //#ifdef OP_DEBUG_BMP
907 // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
908 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
911 // int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
912 int32 startPos = xpos, endPos = xpos +
913 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
914 : -((phraseWidthToPixels[depth] * iwidth) + 1));
915 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
916 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
917 // Not sure if this is Jaguar Two only location or what...
918 // From the docs, it is... If we want to limit here we should think of something else.
919 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
920 // int32 limit = 720;
921 // int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
922 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
923 // This is correct, the OP line buffer is a constant size...
925 int32 lbufWidth = 719;
927 // If the image is completely to the left or right of the line buffer, then bail.
928 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
929 //There are four possibilities:
930 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
931 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
932 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
933 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
934 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
935 // numbers 1 & 3 are of concern.
936 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
937 // if (rightMargin < 0 || leftMargin > lbufWidth)
939 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
940 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
941 // Still have to be careful with the DATA and IWIDTH values though...
943 // if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
944 // || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
946 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
947 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
950 // Otherwise, find the clip limits and clip the phrase as well...
951 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
952 // line buffer, but it shouldn't matter since there are two unused line
953 // buffers below and nothing above and I'll at most write 8 bytes outside
954 // the line buffer... I could use a fractional clip begin/end value, but
955 // this makes the blit a *lot* more hairy. I might fix this in the future
956 // if it becomes necessary. (JLH)
957 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
958 // which pixel in the phrase is being written, and quit when either end of phrases
959 // is reached or line buffer extents are surpassed.
961 //This stuff is probably wrong as well... !!! FIX !!!
962 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
963 //Yup. Seems that JagMania doesn't work correctly with this...
964 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
969 clippedWidth = 0 - leftMargin,
970 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
971 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
974 if (rightMargin > lbufWidth)
975 clippedWidth = rightMargin - lbufWidth,
976 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
977 // rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
978 // rightMargin = lbufWidth;
981 WriteLog("OP: We're about to encounter a divide by zero error!\n");
982 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
983 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
985 if (startPos < 0) // Case #1: Begin out, end in, L to R
986 clippedWidth = 0 - startPos,
987 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
988 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
990 if (endPos < 0) // Case #2: Begin in, end out, R to L
991 clippedWidth = 0 - endPos,
992 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
994 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
995 clippedWidth = endPos - lbufWidth,
996 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
998 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
999 clippedWidth = startPos - lbufWidth,
1000 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1001 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1002 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1004 // If the image is sitting on the line buffer left or right edge, we need to compensate
1005 // by decreasing the image phrase width accordingly.
1006 iwidth -= phraseClippedWidth;
1008 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1010 // data += phraseClippedWidth * (pitch << 3);
1011 data += dataClippedWidth * pitch;
1013 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1014 // bitmap! This makes clipping & etc. MUCH, much easier...!
1015 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1016 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1017 //Is this a bug in the OP?
1018 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1019 //Though it looks like we're doing it here no matter what...
1020 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1022 uint32 lbufAddress = 0x1800 + (startPos * 2);
1023 uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1027 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1028 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1029 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1031 // This seems to be the case (at least according to the Midsummer docs)...!
1033 // This is to test using palette zeroes instead of bit zeroes...
1034 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1035 //#define OP_USES_PALETTE_ZERO
1037 if (depth == 0) // 1 BPP
1039 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1040 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1042 // Fetch 1st phrase...
1043 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1044 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1045 //i.e., we didn't clip on the margin... !!! FIX !!!
1046 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1047 int i = firstPix; // Start counter at right spot...
1053 uint8 bit = pixels >> 63;
1054 #ifndef OP_USES_PALETTE_ZERO
1055 if (flagTRANS && bit == 0)
1057 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1063 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1064 //Won't optimize RMW case though...
1065 // This is the *only* correct use of endian-dependent code
1066 // (i.e., mem-to-mem direct copying)!
1067 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
1069 *currentLineBuffer =
1070 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1071 *(currentLineBuffer + 1) =
1072 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1075 currentLineBuffer += lbufDelta;
1079 // Fetch next phrase...
1081 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1084 else if (depth == 1) // 2 BPP
1087 WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1088 index &= 0xFC; // Top six bits form CLUT index
1089 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1090 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1095 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1098 for(int i=0; i<32; i++)
1100 uint8 bits = pixels >> 62;
1101 // Seems to me that both of these are in the same endian, so we could cast it as
1102 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1103 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1104 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1105 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1106 #ifndef OP_USES_PALETTE_ZERO
1107 if (flagTRANS && bits == 0)
1109 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1115 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1117 *currentLineBuffer =
1118 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1119 *(currentLineBuffer + 1) =
1120 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1123 currentLineBuffer += lbufDelta;
1128 else if (depth == 2) // 4 BPP
1131 WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1132 index &= 0xF0; // Top four bits form CLUT index
1133 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1134 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1139 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1142 for(int i=0; i<16; i++)
1144 uint8 bits = pixels >> 60;
1145 // Seems to me that both of these are in the same endian, so we could cast it as
1146 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1147 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1148 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1149 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1150 #ifndef OP_USES_PALETTE_ZERO
1151 if (flagTRANS && bits == 0)
1153 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1159 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1161 *currentLineBuffer =
1162 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1163 *(currentLineBuffer + 1) =
1164 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1167 currentLineBuffer += lbufDelta;
1172 else if (depth == 3) // 8 BPP
1174 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1175 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1177 // Fetch 1st phrase...
1178 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1179 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1180 //i.e., we didn't clip on the margin... !!! FIX !!!
1181 firstPix &= 0x30; // Only top two bits are valid for 8 BPP
1182 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1183 int i = firstPix >> 3; // Start counter at right spot...
1189 uint8 bits = pixels >> 56;
1190 // Seems to me that both of these are in the same endian, so we could cast it as
1191 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1192 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1193 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1194 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1195 //This would seem to be problematic...
1196 //Because it's the palette entry being zero that makes the pixel transparent...
1197 //Let's try it and see.
1198 #ifndef OP_USES_PALETTE_ZERO
1199 if (flagTRANS && bits == 0)
1201 if (flagTRANS && (paletteRAM16[bits] == 0))
1207 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1209 *currentLineBuffer =
1210 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1211 *(currentLineBuffer + 1) =
1212 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1215 currentLineBuffer += lbufDelta;
1219 // Fetch next phrase...
1221 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1224 else if (depth == 4) // 16 BPP
1227 WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1228 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1229 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1234 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1237 for(int i=0; i<4; i++)
1239 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1240 // Seems to me that both of these are in the same endian, so we could cast it as
1241 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1242 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1243 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1244 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1245 //This doesn't seem right... Let's try the encoded black value ($8800):
1246 //Apparently, CRY 0 maps to $8800...
1247 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1248 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1253 *currentLineBuffer = bitsHi,
1254 *(currentLineBuffer + 1) = bitsLo;
1256 *currentLineBuffer =
1257 BLEND_CR(*currentLineBuffer, bitsHi),
1258 *(currentLineBuffer + 1) =
1259 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1262 currentLineBuffer += lbufDelta;
1267 else if (depth == 5) // 24 BPP
1269 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1270 //There *might* be others...
1271 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1273 WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1274 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1275 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1276 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1281 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1284 for(int i=0; i<2; i++)
1286 // We don't use a 32-bit var here because of endian issues...!
1287 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1288 bits1 = pixels >> 40, bits0 = pixels >> 32;
1290 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1293 *currentLineBuffer = bits3,
1294 *(currentLineBuffer + 1) = bits2,
1295 *(currentLineBuffer + 2) = bits1,
1296 *(currentLineBuffer + 3) = bits0;
1298 currentLineBuffer += lbufDelta;
1306 // Store scaled bitmap in line buffer
1308 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1310 // Need to make sure that when writing that it stays within the line buffer...
1311 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1312 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
1313 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1314 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
1315 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
1316 //#ifdef OP_DEBUG_BMP
1317 // Prolly should use this... Though not sure exactly how.
1318 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1319 uint32 firstPix = (p1 >> 49) & 0x3F;
1320 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1322 WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1324 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1325 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
1326 //Optimize: break these out to their own BOOL values [DONE]
1327 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
1328 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1329 flagRMW = (flags & OPFLAG_RMW ? true : false),
1330 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1331 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
1332 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
1334 uint8 * tomRam8 = TOMGetRamPointer();
1335 uint8 * paletteRAM = &tomRam8[0x400];
1336 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1337 // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1338 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1340 uint16 hscale = p2 & 0xFF;
1341 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1342 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1343 uint16 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!]
1344 // uint8 horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1345 int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1346 uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1348 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1349 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1351 // Looks like an hscale of zero means don't draw!
1352 if (!render || iwidth == 0 || hscale == 0)
1355 /*extern int start_logging;
1357 WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1358 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1359 //#define OP_DEBUG_BMP
1360 //#ifdef OP_DEBUG_BMP
1361 // WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1362 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1365 int32 startPos = xpos, endPos = xpos +
1366 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1367 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1368 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
1369 // Not sure if this is Jaguar Two only location or what...
1370 // From the docs, it is... If we want to limit here we should think of something else.
1371 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
1373 // int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1374 int32 lbufWidth = 719; // Zero based limit...
1376 // If the image is completely to the left or right of the line buffer, then bail.
1377 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1378 //There are four possibilities:
1379 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1380 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1381 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1382 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1383 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1384 // numbers 1 & 3 are of concern.
1385 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1386 // if (rightMargin < 0 || leftMargin > lbufWidth)
1388 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1389 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1390 // Still have to be careful with the DATA and IWIDTH values though...
1392 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1393 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1396 // Otherwise, find the clip limits and clip the phrase as well...
1397 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1398 // line buffer, but it shouldn't matter since there are two unused line
1399 // buffers below and nothing above and I'll at most write 40 bytes outside
1400 // the line buffer... I could use a fractional clip begin/end value, but
1401 // this makes the blit a *lot* more hairy. I might fix this in the future
1402 // if it becomes necessary. (JLH)
1403 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1404 // which pixel in the phrase is being written, and quit when either end of phrases
1405 // is reached or line buffer extents are surpassed.
1407 //This stuff is probably wrong as well... !!! FIX !!!
1408 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1409 //Yup. Seems that JagMania doesn't work correctly with this...
1410 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1411 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1412 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1413 // a bit more accurately... Strange!
1414 //It's probably a case of the REFLECT flag being set and the background being written
1415 //from the right side of the screen...
1416 //But no, it isn't... At least if the diagnostics are telling the truth!
1418 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1419 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1422 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1423 //the scaling factor is small. So fix it already! !!! FIX !!!
1424 /*if (scaledPhrasePixels == 0)
1426 WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1427 DumpScaledObject(p0, p1, p2);
1429 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1431 //Try a simple example...
1432 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1433 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1434 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1436 // Normally, we would expect this in the line buffer:
1437 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1439 // But instead we're getting:
1440 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1442 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1443 // on negative boundary--or are we? Hmm...
1444 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1446 // Let's try a real world example:
1448 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1449 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1451 // Really, spp is 27.75 in the second case...
1452 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1453 // start position (14 * 27.75), we get -6.5... NOT -17!
1455 //Now it seems we're working OK, at least for the first case...
1456 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1458 if (startPos < 0) // Case #1: Begin out, end in, L to R
1460 extern int start_logging;
1462 WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1463 // clippedWidth = 0 - startPos,
1464 clippedWidth = (0 - startPos) << 5,
1465 // dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1466 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1467 // startPos = 0 - (clippedWidth % scaledPhrasePixels);
1468 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1470 WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1473 if (endPos < 0) // Case #2: Begin in, end out, R to L
1474 clippedWidth = 0 - endPos,
1475 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1477 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1478 clippedWidth = endPos - lbufWidth,
1479 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1481 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1482 clippedWidth = startPos - lbufWidth,
1483 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1484 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1486 extern int op_start_log;
1487 if (op_start_log && clippedWidth != 0)
1488 WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1489 if (op_start_log && startPos == 13)
1491 WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1492 DumpScaledObject(p0, p1, p2);
1495 WriteLog(" %08X: ", data);
1496 for(int i=0; i<7*8; i++)
1497 WriteLog("%02X ", JaguarReadByte(data+i));
1501 // If the image is sitting on the line buffer left or right edge, we need to compensate
1502 // by decreasing the image phrase width accordingly.
1503 iwidth -= phraseClippedWidth;
1505 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1507 // data += phraseClippedWidth * (pitch << 3);
1508 data += dataClippedWidth * (pitch << 3);
1510 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1511 // bitmap! This makes clipping & etc. MUCH, much easier...!
1512 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1513 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1514 uint32 lbufAddress = 0x1800 + startPos * 2;
1515 uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1516 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1517 // * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1521 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1522 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1523 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1525 // This seems to be the case (at least according to the Midsummer docs)...!
1527 if (depth == 0) // 1 BPP
1530 WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1531 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1532 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1535 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1537 while ((int32)iwidth > 0)
1539 uint8 bits = pixels >> 63;
1541 #ifndef OP_USES_PALETTE_ZERO
1542 if (flagTRANS && bits == 0)
1544 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1550 // This is the *only* correct use of endian-dependent code
1551 // (i.e., mem-to-mem direct copying)!
1552 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1554 *currentLineBuffer =
1555 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1556 *(currentLineBuffer + 1) =
1557 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1560 currentLineBuffer += lbufDelta;
1563 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1564 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1565 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1567 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1568 while (horizontalRemainder & 0x80)
1570 horizontalRemainder += hscale;
1574 // while (horizontalRemainder <= 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1575 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1577 horizontalRemainder += hscale;
1581 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1585 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1587 data += (pitch << 3) * phrasesToSkip;
1588 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1589 pixels <<= 1 * pixelShift;
1590 iwidth -= phrasesToSkip;
1591 pixCount = pixelShift;
1595 else if (depth == 1) // 2 BPP
1598 WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1599 index &= 0xFC; // Top six bits form CLUT index
1600 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1601 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1604 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1606 while ((int32)iwidth > 0)
1608 uint8 bits = pixels >> 62;
1610 #ifndef OP_USES_PALETTE_ZERO
1611 if (flagTRANS && bits == 0)
1613 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1619 // This is the *only* correct use of endian-dependent code
1620 // (i.e., mem-to-mem direct copying)!
1621 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1623 *currentLineBuffer =
1624 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1625 *(currentLineBuffer + 1) =
1626 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1629 currentLineBuffer += lbufDelta;
1631 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1632 while (horizontalRemainder & 0x80)
1634 horizontalRemainder += hscale;
1638 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1639 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1641 horizontalRemainder += hscale;
1645 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1649 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1651 data += (pitch << 3) * phrasesToSkip;
1652 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1653 pixels <<= 2 * pixelShift;
1654 iwidth -= phrasesToSkip;
1655 pixCount = pixelShift;
1659 else if (depth == 2) // 4 BPP
1662 WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1663 index &= 0xF0; // Top four bits form CLUT index
1664 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1665 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1668 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1670 while ((int32)iwidth > 0)
1672 uint8 bits = pixels >> 60;
1674 #ifndef OP_USES_PALETTE_ZERO
1675 if (flagTRANS && bits == 0)
1677 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1683 // This is the *only* correct use of endian-dependent code
1684 // (i.e., mem-to-mem direct copying)!
1685 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1687 *currentLineBuffer =
1688 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1689 *(currentLineBuffer + 1) =
1690 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1693 currentLineBuffer += lbufDelta;
1695 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1696 while (horizontalRemainder & 0x80)
1698 horizontalRemainder += hscale;
1702 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1703 while (horizontalRemainder < 0x20) // I.e., it's <= 0 (*before* subtraction)
1705 horizontalRemainder += hscale;
1709 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1713 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1715 data += (pitch << 3) * phrasesToSkip;
1716 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1717 pixels <<= 4 * pixelShift;
1718 iwidth -= phrasesToSkip;
1719 pixCount = pixelShift;
1723 else if (depth == 3) // 8 BPP
1726 WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1727 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1728 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1731 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1733 while ((int32)iwidth > 0)
1735 uint8 bits = pixels >> 56;
1737 #ifndef OP_USES_PALETTE_ZERO
1738 if (flagTRANS && bits == 0)
1740 if (flagTRANS && (paletteRAM16[bits] == 0))
1746 // This is the *only* correct use of endian-dependent code
1747 // (i.e., mem-to-mem direct copying)!
1748 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1750 if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1751 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1754 *currentLineBuffer =
1755 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1756 *(currentLineBuffer + 1) =
1757 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1760 currentLineBuffer += lbufDelta;
1762 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1763 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1765 horizontalRemainder += hscale;
1769 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1773 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1775 data += (pitch << 3) * phrasesToSkip;
1776 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1777 pixels <<= 8 * pixelShift;
1778 iwidth -= phrasesToSkip;
1779 pixCount = pixelShift;
1783 else if (depth == 4) // 16 BPP
1786 WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1787 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1788 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1791 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1793 while ((int32)iwidth > 0)
1795 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1797 //This doesn't seem right... Let's try the encoded black value ($8800):
1798 //Apparently, CRY 0 maps to $8800...
1799 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1800 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1805 *currentLineBuffer = bitsHi,
1806 *(currentLineBuffer + 1) = bitsLo;
1808 *currentLineBuffer =
1809 BLEND_CR(*currentLineBuffer, bitsHi),
1810 *(currentLineBuffer + 1) =
1811 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1814 currentLineBuffer += lbufDelta;
1816 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1817 while (horizontalRemainder & 0x80)
1819 horizontalRemainder += hscale;
1823 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1824 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1826 horizontalRemainder += hscale;
1830 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1834 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1836 data += (pitch << 3) * phrasesToSkip;
1837 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1838 pixels <<= 16 * pixelShift;
1840 iwidth -= phrasesToSkip;
1842 pixCount = pixelShift;
1846 else if (depth == 5) // 24 BPP
1848 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1849 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1851 WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1852 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1853 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1854 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1859 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1860 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1862 for(int i=0; i<2; i++)
1864 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1865 bits1 = pixels >> 40, bits0 = pixels >> 32;
1867 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1870 *currentLineBuffer = bits3,
1871 *(currentLineBuffer + 1) = bits2,
1872 *(currentLineBuffer + 2) = bits1,
1873 *(currentLineBuffer + 3) = bits0;
1875 currentLineBuffer += lbufDelta;