4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James Hammons
7 // (C) 2010 Underground Software
9 // JLH = James Hammons <jlhamm@acm.org>
12 // --- ---------- -------------------------------------------------------------
13 // JLH 01/16/2010 Created this log ;-)
28 //#define OP_DEBUG_BMP
30 #define BLEND_Y(dst, src) op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src) op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
33 #define OBJECT_TYPE_BITMAP 0 // 000
34 #define OBJECT_TYPE_SCALE 1 // 001
35 #define OBJECT_TYPE_GPU 2 // 010
36 #define OBJECT_TYPE_BRANCH 3 // 011
37 #define OBJECT_TYPE_STOP 4 // 100
39 #define CONDITION_EQUAL 0 // VC == YPOS
40 #define CONDITION_LESS_THAN 1 // VC < YPOS
41 #define CONDITION_GREATER_THAN 2 // VC > YPOS
42 #define CONDITION_OP_FLAG_SET 3
43 #define CONDITION_SECOND_HALF_LINE 4
45 #define OPFLAG_RELEASE 8 // Bus release bit
46 #define OPFLAG_TRANS 4 // Transparency bit
47 #define OPFLAG_RMW 2 // Read-Modify-Write bit
48 #define OPFLAG_REFLECT 1 // Horizontal mirror bit
50 // Private function prototypes
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void OPDiscoverObjects(uint32 address);
55 void OPDumpObjectList(void);
56 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
57 void DumpFixedObject(uint64 p0, uint64 p1);
58 void DumpBitmapCore(uint64 p0, uint64 p1);
59 uint64 OPLoadPhrase(uint32 offset);
61 // Local global variables
63 // Blend tables (64K each)
64 static uint8 op_blend_y[0x10000];
65 static uint8 op_blend_cr[0x10000];
66 // There may be a problem with this "RAM" overlapping (and thus being independent of)
67 // some of the regular TOM RAM...
68 //#warning objectp_ram is separated from TOM RAM--need to fix that!
69 //static uint8 objectp_ram[0x40]; // This is based at $F00000
70 uint8 objectp_running = 0;
71 //bool objectp_stop_reading_list;
73 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
74 //static uint32 op_bitmap_bit_size[8] =
75 // { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
76 // (uint32)(2*65536), (uint32)(1*65536), (uint32)(1*65536), (uint32)(1*65536) };
77 static uint32 op_pointer;
79 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
83 // Object Processor initialization
87 // Here we calculate the saturating blend of a signed 4-bit value and an
88 // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
89 // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
90 for(int i=0; i<256*256; i++)
92 int y = (i >> 8) & 0xFF;
93 int dy = (int8)i; // Sign extend the Y index
94 int c1 = (i >> 8) & 0x0F;
95 int dc1 = (int8)(i << 4) >> 4; // Sign extend the R index
96 int c2 = (i >> 12) & 0x0F;
97 int dc2 = (int8)(i & 0xF0) >> 4; // Sign extend the C index
122 op_blend_cr[i] = (c2 << 4) | c1;
129 // Object Processor reset
133 // memset(objectp_ram, 0x00, 0x40);
137 static const char * opType[8] =
138 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
139 static const char * ccType[8] =
140 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
141 static uint32 object[8192];
142 static uint32 numberOfObjects;
143 //static uint32 objectLink[8192];
144 //static uint32 numberOfLinks;
148 //#warning "!!! Fix OL dump so that it follows links !!!"
149 // const char * opType[8] =
150 // { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
151 // const char * ccType[8] =
152 // { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
154 uint32 olp = OPGetListPointer();
155 WriteLog("\nOP: OLP = $%08X\n", olp);
156 WriteLog("OP: Phrase dump\n ----------\n");
159 for(uint32 i=0; i<0x100; i+=8)
161 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
162 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
164 if ((lo & 0x07) == 3)
166 uint16 ypos = (lo >> 3) & 0x7FF;
167 uint8 cc = (lo >> 14) & 0x03;
168 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
169 WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
174 if ((lo & 0x07) == 0)
175 DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
177 if ((lo & 0x07) == 1)
178 DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
184 OPDiscoverObjects(olp);
189 void OPDiscoverObjects(uint32 address)
191 // Check to see if we've already seen this object
192 for(uint32 i=0; i<numberOfObjects; i++)
194 if (address == object[i])
198 // Store the object...
199 object[numberOfObjects++] = address;
200 uint8 objectType = 0;
204 uint32 hi = JaguarReadLong(address + 0, OP);
205 uint32 lo = JaguarReadLong(address + 4, OP);
206 objectType = lo & 0x07;
207 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
211 uint16 ypos = (lo >> 3) & 0x7FF;
212 uint8 cc = (lo >> 14) & 0x07; // Proper # of bits == 3
214 // Recursion needed to follow all links!
215 OPDiscoverObjects(address + 8);
218 if (address == link) // Ruh roh...
220 // Runaway recursive link is bad!
226 // Check to see if we've already seen this object, and add it if not
227 bool seenObject = false;
229 for(uint32 i=0; i<numberOfObjects; i++)
231 if (address == object[i])
239 object[numberOfObjects++] = address;
241 while (objectType != 4);
244 void OPDumpObjectList(void)
246 for(uint32 i=0; i<numberOfObjects; i++)
248 uint32 address = object[i];
250 uint32 hi = JaguarReadLong(address + 0, OP);
251 uint32 lo = JaguarReadLong(address + 4, OP);
252 uint8 objectType = lo & 0x07;
253 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
254 WriteLog("%08X: %08X %08X %s", address, hi, lo, opType[objectType]);
258 uint16 ypos = (lo >> 3) & 0x7FF;
259 uint8 cc = (lo >> 14) & 0x07; // Proper # of bits == 3
260 WriteLog(" YPOS=%u, CC=%s, link=$%08X", ypos, ccType[cc], link);
266 DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
269 DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
270 OPLoadPhrase(address + 16));
272 if (address == link) // Ruh roh...
274 // Runaway recursive link is bad!
275 WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
283 // Object Processor memory access
284 // Memory range: F00010 - F00027
286 // F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
287 // F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
288 // F00026 W -------- -------x OBF - object processor flag
292 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
295 return objectp_ram[offset];
298 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
301 return GET16(objectp_ram, offset);
304 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
307 objectp_ram[offset] = data;
310 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
313 SET16(objectp_ram, offset, data);
315 /*if (offset == 0x20)
316 WriteLog("OP: Setting lo list pointer: %04X\n", data);
318 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
322 uint32 OPGetListPointer(void)
324 // Note: This register is LO / HI WORD, hence the funky look of this...
325 return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
328 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
330 uint32 OPGetStatusRegister(void)
332 return GET16(tomRam8, 0x26);
335 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
337 void OPSetStatusRegister(uint32 data)
339 tomRam8[0x26] = (data & 0x0000FF00) >> 8;
340 tomRam8[0x27] |= (data & 0xFE);
343 void OPSetCurrentObject(uint64 object)
345 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
346 // Stored as least significant 32 bits first, ms32 last in big endian
347 /* objectp_ram[0x13] = object & 0xFF; object >>= 8;
348 objectp_ram[0x12] = object & 0xFF; object >>= 8;
349 objectp_ram[0x11] = object & 0xFF; object >>= 8;
350 objectp_ram[0x10] = object & 0xFF; object >>= 8;
352 objectp_ram[0x17] = object & 0xFF; object >>= 8;
353 objectp_ram[0x16] = object & 0xFF; object >>= 8;
354 objectp_ram[0x15] = object & 0xFF; object >>= 8;
355 objectp_ram[0x14] = object & 0xFF;*/
356 // Let's try regular good old big endian...
357 tomRam8[0x17] = object & 0xFF; object >>= 8;
358 tomRam8[0x16] = object & 0xFF; object >>= 8;
359 tomRam8[0x15] = object & 0xFF; object >>= 8;
360 tomRam8[0x14] = object & 0xFF; object >>= 8;
362 tomRam8[0x13] = object & 0xFF; object >>= 8;
363 tomRam8[0x12] = object & 0xFF; object >>= 8;
364 tomRam8[0x11] = object & 0xFF; object >>= 8;
365 tomRam8[0x10] = object & 0xFF;
368 uint64 OPLoadPhrase(uint32 offset)
370 offset &= ~0x07; // 8 byte alignment
371 return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
374 void OPStorePhrase(uint32 offset, uint64 p)
376 offset &= ~0x07; // 8 byte alignment
377 JaguarWriteLong(offset, p >> 32, OP);
378 JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
382 // Debugging routines
384 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
386 WriteLog(" %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
387 WriteLog(" %08X %08X\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
388 DumpBitmapCore(p0, p1);
389 uint32 hscale = p2 & 0xFF;
390 uint32 vscale = (p2 >> 8) & 0xFF;
391 uint32 remainder = (p2 >> 16) & 0xFF;
392 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
395 void DumpFixedObject(uint64 p0, uint64 p1)
397 WriteLog(" %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
398 DumpBitmapCore(p0, p1);
401 void DumpBitmapCore(uint64 p0, uint64 p1)
403 uint32 bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
404 uint8 bitdepth = (p1 >> 12) & 0x07;
405 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
406 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
407 int32 xpos = p1 & 0xFFF;
408 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); // Sign extend that mutha!
409 uint32 iwidth = ((p1 >> 28) & 0x3FF);
410 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
411 uint16 height = ((p0 >> 14) & 0x3FF);
412 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
413 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
414 uint32 firstPix = (p1 >> 49) & 0x3F;
415 uint8 flags = (p1 >> 45) & 0x0F;
416 uint8 idx = (p1 >> 38) & 0x7F;
417 uint32 pitch = (p1 >> 15) & 0x07;
418 WriteLog(" [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), l:%08X, p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
419 iwidth * bdMultiplier[bitdepth],
420 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth], link,
421 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
422 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
423 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
427 // Object Processor main routine
429 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
430 void OPProcessList(int halfline, bool render)
432 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!
433 // We ignore them, for now; not good
436 extern int op_start_log;
437 // char * condition_to_str[8] =
438 // { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
440 op_pointer = OPGetListPointer();
442 // objectp_stop_reading_list = false;
444 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
447 // *** BEGIN OP PROCESSOR TESTING ONLY ***
448 extern bool interactiveMode;
450 extern int objectPtr;
452 int bitmapCounter = 0;
453 // *** END OP PROCESSOR TESTING ONLY ***
455 uint32 opCyclesToRun = 30000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
457 // if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
460 // *** BEGIN OP PROCESSOR TESTING ONLY ***
461 if (interactiveMode && bitmapCounter == objectPtr)
465 // *** END OP PROCESSOR TESTING ONLY ***
466 // if (objectp_stop_reading_list)
469 uint64 p0 = OPLoadPhrase(op_pointer);
471 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
474 if (halfline == TOMGetVDB() && op_start_log)
475 //if (halfline == 215 && op_start_log)
476 //if (halfline == 28 && op_start_log)
479 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
480 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
482 WriteLog(" (BITMAP) ");
483 uint64 p1 = OPLoadPhrase(op_pointer);
484 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
485 uint8 bitdepth = (p1 >> 12) & 0x07;
486 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
487 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
488 int32 xpos = p1 & 0xFFF;
489 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
490 uint32 iwidth = ((p1 >> 28) & 0x3FF);
491 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
492 uint16 height = ((p0 >> 14) & 0x3FF);
493 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
494 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
495 uint32 firstPix = (p1 >> 49) & 0x3F;
496 uint8 flags = (p1 >> 45) & 0x0F;
497 uint8 idx = (p1 >> 38) & 0x7F;
498 uint32 pitch = (p1 >> 15) & 0x07;
499 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
500 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
502 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
504 WriteLog(" (SCALED BITMAP)");
505 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
506 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
507 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
508 uint8 bitdepth = (p1 >> 12) & 0x07;
509 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
510 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
511 int32 xpos = p1 & 0xFFF;
512 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
513 uint32 iwidth = ((p1 >> 28) & 0x3FF);
514 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
515 uint16 height = ((p0 >> 14) & 0x3FF);
516 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
517 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
518 uint32 firstPix = (p1 >> 49) & 0x3F;
519 uint8 flags = (p1 >> 45) & 0x0F;
520 uint8 idx = (p1 >> 38) & 0x7F;
521 uint32 pitch = (p1 >> 15) & 0x07;
522 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
523 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
524 uint32 hscale = p2 & 0xFF;
525 uint32 vscale = (p2 >> 8) & 0xFF;
526 uint32 remainder = (p2 >> 16) & 0xFF;
527 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
529 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
530 WriteLog(" (GPU)\n");
531 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
533 WriteLog(" (BRANCH)\n");
534 uint8 * jaguarMainRam = GetRamPtr();
535 WriteLog("[RAM] --> ");
536 for(int k=0; k<8; k++)
537 WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
540 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
541 WriteLog(" --> List end\n\n");
545 switch ((uint8)p0 & 0x07)
547 case OBJECT_TYPE_BITMAP:
549 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
550 uint16 ypos = (p0 >> 3) & 0x7FF;
551 // This is only theory implied by Rayman...!
552 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
553 // the VDB value. With interlacing, this would be slightly more tricky.
554 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
555 // to affect any other game in a negative way (that I've seen).
556 // Either that, or it's an undocumented bug...
558 //No, the reason this was needed is that the OP code before was wrong. Any value
559 //less than VDB will get written to the top line of the display!
561 // Not so sure... Let's see what happens here...
564 ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
566 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
567 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
568 // what's causing things to fuck up. Still no idea why.
570 uint32 height = (p0 & 0xFFC000) >> 14;
571 uint32 oldOPP = op_pointer - 8;
572 // *** BEGIN OP PROCESSOR TESTING ONLY ***
573 if (inhibit && op_start_log)
574 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
576 if (!inhibit) // For OP testing only!
577 // *** END OP PROCESSOR TESTING ONLY ***
578 if (halfline >= ypos && height > 0)
580 uint64 p1 = OPLoadPhrase(op_pointer);
582 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
583 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
584 // OPProcessFixedBitmap(halfline, p0, p1, render);
585 OPProcessFixedBitmap(p0, p1, render);
589 //???Does this really happen??? Doesn't seem to work if you do this...!
590 //Probably not. Must be a bug in the documentation...!
591 // uint32 link = (p0 & 0x7FFFF000000) >> 21;
592 // SET16(tom_ram_8, 0x20, link & 0xFFFF); // OLP
593 // SET16(tom_ram_8, 0x22, link >> 16);
594 /* uint32 height = (p0 & 0xFFC000) >> 14;
597 // NOTE: Would subtract 2 if in interlaced mode...!
598 // uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
602 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
603 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
606 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
607 p0 |= (uint64)height << 14;
609 OPStorePhrase(oldOPP, p0);
611 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
612 //Temp, for testing...
613 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
614 //And it does! !!! FIX !!!
615 //Let's remove this "fix" since it screws up more than it fixes.
616 /* if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
619 // NOTE: The link address only replaces bits 3-21 in the OLP, and this replaces
620 // EVERYTHING. !!! FIX !!! [DONE]
621 #warning "!!! Link address is not linked properly for all object types !!!"
622 #warning "!!! Only BITMAP is properly handled !!!"
623 op_pointer &= 0xFFC00007;
624 op_pointer |= (p0 & 0x000007FFFF000000LL) >> 21;
625 //WriteLog("New OP: %08X\n", op_pointer);
626 //kludge: Seems that memory access is mirrored in the first 8MB of memory...
627 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
628 op_pointer &= 0xFF1FFFFF; // Knock out bits 21-23
632 case OBJECT_TYPE_SCALE:
634 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
635 uint16 ypos = (p0 >> 3) & 0x7FF;
636 uint32 height = (p0 & 0xFFC000) >> 14;
637 uint32 oldOPP = op_pointer - 8;
638 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
639 // *** BEGIN OP PROCESSOR TESTING ONLY ***
640 if (inhibit && op_start_log)
642 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
643 DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
646 if (!inhibit) // For OP testing only!
647 // *** END OP PROCESSOR TESTING ONLY ***
648 if (halfline >= ypos && height > 0)
650 uint64 p1 = OPLoadPhrase(op_pointer);
652 uint64 p2 = OPLoadPhrase(op_pointer);
654 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
655 OPProcessScaledBitmap(p0, p1, p2, render);
659 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
660 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
661 //Actually, we should skip this object if it has a vscale of zero.
662 //Or do we? Not sure... Atari Karts has a few lines that look like:
664 //000E8268 --> phrase 00010000 7000B00D
665 // [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
666 // [hsc: 9A, vsc: 00, rem: 00]
667 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
668 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
671 vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
673 //extern int start_logging;
675 // WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
677 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
678 //There are other problems here, it looks like...
680 //About to execute OP (508)...
682 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
683 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
684 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
685 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
686 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
687 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
688 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
689 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
690 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
691 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
692 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
693 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
694 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
695 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
696 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
697 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
698 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
699 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
700 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
701 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
702 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
703 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
705 //Here's another problem:
706 // [hsc: 20, vsc: 20, rem: 00]
707 // Since we're not checking for $E0 (but that's what we get from the above), we end
708 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
709 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
710 //Also note: $E0 = 7.0 which IS a legal vscale value...
712 // if (remainder & 0x80) // I.e., it's negative
713 // if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0
714 // if ((remainder - 1) >= 0xE0) // I.e., it's <= 0
715 // if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
716 // if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
717 // if (remainder <= 0x20) // I.e., it's <= 1.0
718 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
719 if (remainder < 0x20)
721 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
722 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
724 // while (remainder & 0x80)
725 // while ((remainder & 0x80) || remainder == 0)
726 // while ((remainder - 1) >= 0xE0)
727 // while ((remainder >= 0xE1) || remainder == 0)
728 // while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
729 // while (remainder <= 0x20)
730 while (remainder < 0x20)
740 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
741 p0 |= (uint64)height << 14;
743 OPStorePhrase(oldOPP, p0);
746 remainder -= 0x20; // 1.0f in [3.5] fixed point format
749 // WriteLog("--> Finished writebacks...\n");//*/
751 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
752 p2 &= ~0x0000000000FF0000LL;
753 p2 |= (uint64)remainder << 16;
754 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
755 OPStorePhrase(oldOPP + 16, p2);
756 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
757 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
760 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
763 case OBJECT_TYPE_GPU:
765 //WriteLog("OP: Asserting GPU IRQ #3...\n");
766 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
767 OPSetCurrentObject(p0);
768 GPUSetIRQLine(3, ASSERT_LINE);
769 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
772 //OPSuspendedByGPU = true;
773 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
774 //on the next halfline...
775 // --> It continues from where it was interrupted! !!! FIX !!!
778 case OBJECT_TYPE_BRANCH:
780 uint16 ypos = (p0 >> 3) & 0x7FF;
781 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
782 // conditions! Need at least one more bit for that! :-P
783 // Also, the ASIC nets imply that it uses bits 14-16 (height in BM & SBM objects)
784 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
785 uint8 cc = (p0 >> 14) & 0x03;
786 uint32 link = (p0 >> 21) & 0x3FFFF8;
788 // if ((ypos!=507)&&(ypos!=25))
789 // WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
792 case CONDITION_EQUAL:
793 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
796 case CONDITION_LESS_THAN:
797 if (TOMReadWord(0xF00006, OP) < ypos)
800 case CONDITION_GREATER_THAN:
801 if (TOMReadWord(0xF00006, OP) > ypos)
804 case CONDITION_OP_FLAG_SET:
805 if (OPGetStatusRegister() & 0x01)
808 case CONDITION_SECOND_HALF_LINE:
809 //Here's the ASIC code:
810 // ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
811 //which means, do the link if bit 10 of HC is set...
813 // This basically means branch if bit 10 of HC is set
814 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
815 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
820 // Basically, if you do this, the OP does nothing. :-)
821 WriteLog("OP: Unimplemented branch condition %i\n", cc);
825 case OBJECT_TYPE_STOP:
829 //WriteLog("OP: --> STOP\n");
830 // op_set_status_register(((p0>>3) & 0xFFFFFFFF));
831 //This seems more likely...
832 OPSetCurrentObject(p0);
836 // We need to check whether these interrupts are enabled or not, THEN
837 // set an IRQ + pending flag if necessary...
838 if (TOMIRQEnabled(IRQ_OPFLAG))
840 TOMSetPendingObjectInt();
841 m68k_set_irq(2); // Cause a 68K IPL 2 to occur...
849 // WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
853 // Here is a little sanity check to keep the OP from locking up the machine
854 // when fed bad data. Better would be to count how many actual cycles it used
855 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
856 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
865 // Store fixed size bitmap in line buffer
867 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
869 // Need to make sure that when writing that it stays within the line buffer...
870 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
871 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
872 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
873 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
874 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
875 //#ifdef OP_DEBUG_BMP
876 uint32 firstPix = (p1 >> 49) & 0x3F;
877 // "The LSB is significant only for scaled objects..." -JTRM
878 // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
881 // We can ignore the RELEASE (high order) bit for now--probably forever...!
882 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
883 //Optimize: break these out to their own BOOL values
884 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
885 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
886 flagRMW = (flags & OPFLAG_RMW ? true : false),
887 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
888 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
889 // provide the most significant bits of the palette address."
890 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
891 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
892 pitch <<= 3; // Optimization: Multiply pitch by 8
894 // int16 scanlineWidth = tom_getVideoModeWidth();
895 uint8 * tomRam8 = TOMGetRamPointer();
896 uint8 * paletteRAM = &tomRam8[0x400];
897 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
898 // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
899 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
901 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
902 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
904 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
905 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
906 // Pitch == 0 is OK too...
908 //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to investigate
909 // on real hardware...
910 #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
914 // if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
915 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
916 if (!render || iwidth == 0)
919 //OK, so we know the position in the line buffer is correct. It's the clipping in
920 //24bpp mode that's wrong!
922 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
923 //into the line buffer for each pixel.
924 if (depth == 5) // i.e., 24bpp mode...
925 xpos >>= 1; // Cut it in half...
928 //#define OP_DEBUG_BMP
929 //#ifdef OP_DEBUG_BMP
930 // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
931 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
934 // int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
935 int32 startPos = xpos, endPos = xpos +
936 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
937 : -((phraseWidthToPixels[depth] * iwidth) + 1));
938 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
939 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
940 // Not sure if this is Jaguar Two only location or what...
941 // From the docs, it is... If we want to limit here we should think of something else.
942 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
943 // int32 limit = 720;
944 // int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
945 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
946 // This is correct, the OP line buffer is a constant size...
948 int32 lbufWidth = 719;
950 // If the image is completely to the left or right of the line buffer, then bail.
951 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
952 //There are four possibilities:
953 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
954 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
955 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
956 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
957 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
958 // numbers 1 & 3 are of concern.
959 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
960 // if (rightMargin < 0 || leftMargin > lbufWidth)
962 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
963 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
964 // Still have to be careful with the DATA and IWIDTH values though...
966 // if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
967 // || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
969 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
970 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
973 // Otherwise, find the clip limits and clip the phrase as well...
974 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
975 // line buffer, but it shouldn't matter since there are two unused line
976 // buffers below and nothing above and I'll at most write 8 bytes outside
977 // the line buffer... I could use a fractional clip begin/end value, but
978 // this makes the blit a *lot* more hairy. I might fix this in the future
979 // if it becomes necessary. (JLH)
980 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
981 // which pixel in the phrase is being written, and quit when either end of phrases
982 // is reached or line buffer extents are surpassed.
984 //This stuff is probably wrong as well... !!! FIX !!!
985 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
986 //Yup. Seems that JagMania doesn't work correctly with this...
987 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
992 clippedWidth = 0 - leftMargin,
993 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
994 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
997 if (rightMargin > lbufWidth)
998 clippedWidth = rightMargin - lbufWidth,
999 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
1000 // rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1001 // rightMargin = lbufWidth;
1004 WriteLog("OP: We're about to encounter a divide by zero error!\n");
1005 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1006 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1008 if (startPos < 0) // Case #1: Begin out, end in, L to R
1009 clippedWidth = 0 - startPos,
1010 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1011 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1013 if (endPos < 0) // Case #2: Begin in, end out, R to L
1014 clippedWidth = 0 - endPos,
1015 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1017 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1018 clippedWidth = endPos - lbufWidth,
1019 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1021 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1022 clippedWidth = startPos - lbufWidth,
1023 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1024 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1025 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1027 // If the image is sitting on the line buffer left or right edge, we need to compensate
1028 // by decreasing the image phrase width accordingly.
1029 iwidth -= phraseClippedWidth;
1031 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1033 // data += phraseClippedWidth * (pitch << 3);
1034 data += dataClippedWidth * pitch;
1036 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1037 // bitmap! This makes clipping & etc. MUCH, much easier...!
1038 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1039 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1040 //Is this a bug in the OP?
1041 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1042 //Though it looks like we're doing it here no matter what...
1043 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1045 uint32 lbufAddress = 0x1800 + (startPos * 2);
1046 uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1050 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1051 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1052 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1054 // This seems to be the case (at least according to the Midsummer docs)...!
1056 // This is to test using palette zeroes instead of bit zeroes...
1057 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1058 //#define OP_USES_PALETTE_ZERO
1060 if (depth == 0) // 1 BPP
1062 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1063 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1065 // Fetch 1st phrase...
1066 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1067 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1068 //i.e., we didn't clip on the margin... !!! FIX !!!
1069 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1070 int i = firstPix; // Start counter at right spot...
1076 uint8 bit = pixels >> 63;
1077 #ifndef OP_USES_PALETTE_ZERO
1078 if (flagTRANS && bit == 0)
1080 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1086 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1087 //Won't optimize RMW case though...
1088 // This is the *only* correct use of endian-dependent code
1089 // (i.e., mem-to-mem direct copying)!
1090 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
1092 *currentLineBuffer =
1093 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1094 *(currentLineBuffer + 1) =
1095 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1098 currentLineBuffer += lbufDelta;
1102 // Fetch next phrase...
1104 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1107 else if (depth == 1) // 2 BPP
1110 WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1111 index &= 0xFC; // Top six bits form CLUT index
1112 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1113 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1118 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1121 for(int i=0; i<32; i++)
1123 uint8 bits = pixels >> 62;
1124 // Seems to me that both of these are in the same endian, so we could cast it as
1125 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1126 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1127 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1128 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1129 #ifndef OP_USES_PALETTE_ZERO
1130 if (flagTRANS && bits == 0)
1132 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1138 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1140 *currentLineBuffer =
1141 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1142 *(currentLineBuffer + 1) =
1143 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1146 currentLineBuffer += lbufDelta;
1151 else if (depth == 2) // 4 BPP
1154 WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1155 index &= 0xF0; // Top four bits form CLUT index
1156 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1157 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1162 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1165 for(int i=0; i<16; i++)
1167 uint8 bits = pixels >> 60;
1168 // Seems to me that both of these are in the same endian, so we could cast it as
1169 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1170 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1171 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1172 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1173 #ifndef OP_USES_PALETTE_ZERO
1174 if (flagTRANS && bits == 0)
1176 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1182 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1184 *currentLineBuffer =
1185 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1186 *(currentLineBuffer + 1) =
1187 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1190 currentLineBuffer += lbufDelta;
1195 else if (depth == 3) // 8 BPP
1197 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1198 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1200 // Fetch 1st phrase...
1201 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1202 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1203 //i.e., we didn't clip on the margin... !!! FIX !!!
1204 firstPix &= 0x30; // Only top two bits are valid for 8 BPP
1205 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1206 int i = firstPix >> 3; // Start counter at right spot...
1212 uint8 bits = pixels >> 56;
1213 // Seems to me that both of these are in the same endian, so we could cast it as
1214 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1215 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1216 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1217 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1218 //This would seem to be problematic...
1219 //Because it's the palette entry being zero that makes the pixel transparent...
1220 //Let's try it and see.
1221 #ifndef OP_USES_PALETTE_ZERO
1222 if (flagTRANS && bits == 0)
1224 if (flagTRANS && (paletteRAM16[bits] == 0))
1230 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1232 *currentLineBuffer =
1233 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1234 *(currentLineBuffer + 1) =
1235 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1238 currentLineBuffer += lbufDelta;
1242 // Fetch next phrase...
1244 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1247 else if (depth == 4) // 16 BPP
1250 WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1251 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1252 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1257 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1260 for(int i=0; i<4; i++)
1262 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1263 // Seems to me that both of these are in the same endian, so we could cast it as
1264 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1265 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1266 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1267 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1268 //This doesn't seem right... Let's try the encoded black value ($8800):
1269 //Apparently, CRY 0 maps to $8800...
1270 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1271 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1276 *currentLineBuffer = bitsHi,
1277 *(currentLineBuffer + 1) = bitsLo;
1279 *currentLineBuffer =
1280 BLEND_CR(*currentLineBuffer, bitsHi),
1281 *(currentLineBuffer + 1) =
1282 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1285 currentLineBuffer += lbufDelta;
1290 else if (depth == 5) // 24 BPP
1292 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1293 //There *might* be others...
1294 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1296 WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1297 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1298 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1299 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1304 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1307 for(int i=0; i<2; i++)
1309 // We don't use a 32-bit var here because of endian issues...!
1310 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1311 bits1 = pixels >> 40, bits0 = pixels >> 32;
1313 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1316 *currentLineBuffer = bits3,
1317 *(currentLineBuffer + 1) = bits2,
1318 *(currentLineBuffer + 2) = bits1,
1319 *(currentLineBuffer + 3) = bits0;
1321 currentLineBuffer += lbufDelta;
1329 // Store scaled bitmap in line buffer
1331 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1333 // Need to make sure that when writing that it stays within the line buffer...
1334 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1335 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
1336 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1337 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
1338 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
1339 //#ifdef OP_DEBUG_BMP
1340 // Prolly should use this... Though not sure exactly how.
1341 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1342 uint32 firstPix = (p1 >> 49) & 0x3F;
1343 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1345 WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1347 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1348 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
1349 //Optimize: break these out to their own BOOL values [DONE]
1350 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
1351 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1352 flagRMW = (flags & OPFLAG_RMW ? true : false),
1353 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1354 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
1355 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
1357 uint8 * tomRam8 = TOMGetRamPointer();
1358 uint8 * paletteRAM = &tomRam8[0x400];
1359 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1360 // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1361 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1363 uint16 hscale = p2 & 0xFF;
1364 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1365 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1366 uint16 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!]
1367 // uint8 horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1368 int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1369 uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1371 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1372 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1374 // Looks like an hscale of zero means don't draw!
1375 if (!render || iwidth == 0 || hscale == 0)
1378 /*extern int start_logging;
1380 WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1381 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1382 //#define OP_DEBUG_BMP
1383 //#ifdef OP_DEBUG_BMP
1384 // WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1385 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1388 int32 startPos = xpos, endPos = xpos +
1389 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1390 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1391 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
1392 // Not sure if this is Jaguar Two only location or what...
1393 // From the docs, it is... If we want to limit here we should think of something else.
1394 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
1396 // int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1397 int32 lbufWidth = 719; // Zero based limit...
1399 // If the image is completely to the left or right of the line buffer, then bail.
1400 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1401 //There are four possibilities:
1402 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1403 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1404 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1405 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1406 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1407 // numbers 1 & 3 are of concern.
1408 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1409 // if (rightMargin < 0 || leftMargin > lbufWidth)
1411 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1412 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1413 // Still have to be careful with the DATA and IWIDTH values though...
1415 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1416 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1419 // Otherwise, find the clip limits and clip the phrase as well...
1420 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1421 // line buffer, but it shouldn't matter since there are two unused line
1422 // buffers below and nothing above and I'll at most write 40 bytes outside
1423 // the line buffer... I could use a fractional clip begin/end value, but
1424 // this makes the blit a *lot* more hairy. I might fix this in the future
1425 // if it becomes necessary. (JLH)
1426 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1427 // which pixel in the phrase is being written, and quit when either end of phrases
1428 // is reached or line buffer extents are surpassed.
1430 //This stuff is probably wrong as well... !!! FIX !!!
1431 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1432 //Yup. Seems that JagMania doesn't work correctly with this...
1433 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1434 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1435 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1436 // a bit more accurately... Strange!
1437 //It's probably a case of the REFLECT flag being set and the background being written
1438 //from the right side of the screen...
1439 //But no, it isn't... At least if the diagnostics are telling the truth!
1441 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1442 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1445 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1446 //the scaling factor is small. So fix it already! !!! FIX !!!
1447 /*if (scaledPhrasePixels == 0)
1449 WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1450 DumpScaledObject(p0, p1, p2);
1452 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1454 //Try a simple example...
1455 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1456 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1457 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1459 // Normally, we would expect this in the line buffer:
1460 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1462 // But instead we're getting:
1463 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1465 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1466 // on negative boundary--or are we? Hmm...
1467 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1469 // Let's try a real world example:
1471 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1472 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1474 // Really, spp is 27.75 in the second case...
1475 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1476 // start position (14 * 27.75), we get -6.5... NOT -17!
1478 //Now it seems we're working OK, at least for the first case...
1479 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1481 if (startPos < 0) // Case #1: Begin out, end in, L to R
1483 extern int start_logging;
1485 WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1486 // clippedWidth = 0 - startPos,
1487 clippedWidth = (0 - startPos) << 5,
1488 // dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1489 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1490 // startPos = 0 - (clippedWidth % scaledPhrasePixels);
1491 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1493 WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1496 if (endPos < 0) // Case #2: Begin in, end out, R to L
1497 clippedWidth = 0 - endPos,
1498 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1500 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1501 clippedWidth = endPos - lbufWidth,
1502 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1504 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1505 clippedWidth = startPos - lbufWidth,
1506 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1507 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1509 extern int op_start_log;
1510 if (op_start_log && clippedWidth != 0)
1511 WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1512 if (op_start_log && startPos == 13)
1514 WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1515 DumpScaledObject(p0, p1, p2);
1518 WriteLog(" %08X: ", data);
1519 for(int i=0; i<7*8; i++)
1520 WriteLog("%02X ", JaguarReadByte(data+i));
1524 // If the image is sitting on the line buffer left or right edge, we need to compensate
1525 // by decreasing the image phrase width accordingly.
1526 iwidth -= phraseClippedWidth;
1528 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1530 // data += phraseClippedWidth * (pitch << 3);
1531 data += dataClippedWidth * (pitch << 3);
1533 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1534 // bitmap! This makes clipping & etc. MUCH, much easier...!
1535 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1536 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1537 uint32 lbufAddress = 0x1800 + startPos * 2;
1538 uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1539 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1540 // * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1544 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1545 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1546 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1548 // This seems to be the case (at least according to the Midsummer docs)...!
1550 if (depth == 0) // 1 BPP
1553 WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1554 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1555 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1558 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1560 while ((int32)iwidth > 0)
1562 uint8 bits = pixels >> 63;
1564 #ifndef OP_USES_PALETTE_ZERO
1565 if (flagTRANS && bits == 0)
1567 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1573 // This is the *only* correct use of endian-dependent code
1574 // (i.e., mem-to-mem direct copying)!
1575 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1577 *currentLineBuffer =
1578 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1579 *(currentLineBuffer + 1) =
1580 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1583 currentLineBuffer += lbufDelta;
1586 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1587 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1588 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1590 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1591 while (horizontalRemainder & 0x80)
1593 horizontalRemainder += hscale;
1597 // while (horizontalRemainder <= 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1598 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1600 horizontalRemainder += hscale;
1604 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1608 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1610 data += (pitch << 3) * phrasesToSkip;
1611 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1612 pixels <<= 1 * pixelShift;
1613 iwidth -= phrasesToSkip;
1614 pixCount = pixelShift;
1618 else if (depth == 1) // 2 BPP
1621 WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1622 index &= 0xFC; // Top six bits form CLUT index
1623 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1624 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1627 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1629 while ((int32)iwidth > 0)
1631 uint8 bits = pixels >> 62;
1633 #ifndef OP_USES_PALETTE_ZERO
1634 if (flagTRANS && bits == 0)
1636 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1642 // This is the *only* correct use of endian-dependent code
1643 // (i.e., mem-to-mem direct copying)!
1644 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1646 *currentLineBuffer =
1647 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1648 *(currentLineBuffer + 1) =
1649 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1652 currentLineBuffer += lbufDelta;
1654 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1655 while (horizontalRemainder & 0x80)
1657 horizontalRemainder += hscale;
1661 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1662 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1664 horizontalRemainder += hscale;
1668 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1672 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1674 data += (pitch << 3) * phrasesToSkip;
1675 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1676 pixels <<= 2 * pixelShift;
1677 iwidth -= phrasesToSkip;
1678 pixCount = pixelShift;
1682 else if (depth == 2) // 4 BPP
1685 WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1686 index &= 0xF0; // Top four bits form CLUT index
1687 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1688 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1691 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1693 while ((int32)iwidth > 0)
1695 uint8 bits = pixels >> 60;
1697 #ifndef OP_USES_PALETTE_ZERO
1698 if (flagTRANS && bits == 0)
1700 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1706 // This is the *only* correct use of endian-dependent code
1707 // (i.e., mem-to-mem direct copying)!
1708 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1710 *currentLineBuffer =
1711 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1712 *(currentLineBuffer + 1) =
1713 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1716 currentLineBuffer += lbufDelta;
1718 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1719 while (horizontalRemainder & 0x80)
1721 horizontalRemainder += hscale;
1725 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1726 while (horizontalRemainder < 0x20) // I.e., it's <= 0 (*before* subtraction)
1728 horizontalRemainder += hscale;
1732 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1736 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1738 data += (pitch << 3) * phrasesToSkip;
1739 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1740 pixels <<= 4 * pixelShift;
1741 iwidth -= phrasesToSkip;
1742 pixCount = pixelShift;
1746 else if (depth == 3) // 8 BPP
1749 WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1750 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1751 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1754 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1756 while ((int32)iwidth > 0)
1758 uint8 bits = pixels >> 56;
1760 #ifndef OP_USES_PALETTE_ZERO
1761 if (flagTRANS && bits == 0)
1763 if (flagTRANS && (paletteRAM16[bits] == 0))
1769 // This is the *only* correct use of endian-dependent code
1770 // (i.e., mem-to-mem direct copying)!
1771 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1773 if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1774 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1777 *currentLineBuffer =
1778 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1779 *(currentLineBuffer + 1) =
1780 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1783 currentLineBuffer += lbufDelta;
1785 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1786 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1788 horizontalRemainder += hscale;
1792 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1796 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1798 data += (pitch << 3) * phrasesToSkip;
1799 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1800 pixels <<= 8 * pixelShift;
1801 iwidth -= phrasesToSkip;
1802 pixCount = pixelShift;
1806 else if (depth == 4) // 16 BPP
1809 WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1810 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1811 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1814 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1816 while ((int32)iwidth > 0)
1818 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1820 //This doesn't seem right... Let's try the encoded black value ($8800):
1821 //Apparently, CRY 0 maps to $8800...
1822 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1823 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1828 *currentLineBuffer = bitsHi,
1829 *(currentLineBuffer + 1) = bitsLo;
1831 *currentLineBuffer =
1832 BLEND_CR(*currentLineBuffer, bitsHi),
1833 *(currentLineBuffer + 1) =
1834 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1837 currentLineBuffer += lbufDelta;
1839 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1840 while (horizontalRemainder & 0x80)
1842 horizontalRemainder += hscale;
1846 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1847 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1849 horizontalRemainder += hscale;
1853 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1857 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1859 data += (pitch << 3) * phrasesToSkip;
1860 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1861 pixels <<= 16 * pixelShift;
1863 iwidth -= phrasesToSkip;
1865 pixCount = pixelShift;
1869 else if (depth == 5) // 24 BPP
1871 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1872 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1874 WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1875 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1876 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1877 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1882 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1883 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1885 for(int i=0; i<2; i++)
1887 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1888 bits1 = pixels >> 40, bits0 = pixels >> 32;
1890 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1893 *currentLineBuffer = bits3,
1894 *(currentLineBuffer + 1) = bits2,
1895 *(currentLineBuffer + 2) = bits1,
1896 *(currentLineBuffer + 3) = bits0;
1898 currentLineBuffer += lbufDelta;