4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James Hammons
7 // (C) 2010 Underground Software
9 // JLH = James Hammons <jlhamm@acm.org>
12 // --- ---------- -------------------------------------------------------------
13 // JLH 01/16/2010 Created this log ;-)
23 #include "m68000/m68kinterface.h"
28 //#define OP_DEBUG_BMP
30 #define BLEND_Y(dst, src) op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src) op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
33 #define OBJECT_TYPE_BITMAP 0 // 000
34 #define OBJECT_TYPE_SCALE 1 // 001
35 #define OBJECT_TYPE_GPU 2 // 010
36 #define OBJECT_TYPE_BRANCH 3 // 011
37 #define OBJECT_TYPE_STOP 4 // 100
39 #define CONDITION_EQUAL 0 // VC == YPOS
40 #define CONDITION_LESS_THAN 1 // VC < YPOS
41 #define CONDITION_GREATER_THAN 2 // VC > YPOS
42 #define CONDITION_OP_FLAG_SET 3
43 #define CONDITION_SECOND_HALF_LINE 4
46 #define OPFLAG_RELEASE 8 // Bus release bit
47 #define OPFLAG_TRANS 4 // Transparency bit
48 #define OPFLAG_RMW 2 // Read-Modify-Write bit
49 #define OPFLAG_REFLECT 1 // Horizontal mirror bit
52 // Private function prototypes
54 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
55 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
56 void OPDiscoverObjects(uint32 address);
57 void OPDumpObjectList(void);
58 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
59 void DumpFixedObject(uint64 p0, uint64 p1);
60 void DumpBitmapCore(uint64 p0, uint64 p1);
61 uint64 OPLoadPhrase(uint32 offset);
63 // Local global variables
65 // Blend tables (64K each)
66 static uint8 op_blend_y[0x10000];
67 static uint8 op_blend_cr[0x10000];
68 // There may be a problem with this "RAM" overlapping (and thus being independent of)
69 // some of the regular TOM RAM...
70 //#warning objectp_ram is separated from TOM RAM--need to fix that!
71 //static uint8 objectp_ram[0x40]; // This is based at $F00000
72 uint8 objectp_running = 0;
73 //bool objectp_stop_reading_list;
75 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
76 //static uint32 op_bitmap_bit_size[8] =
77 // { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
78 // (uint32)(2*65536), (uint32)(1*65536), (uint32)(1*65536), (uint32)(1*65536) };
79 static uint32 op_pointer;
81 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
85 // Object Processor initialization
89 // Here we calculate the saturating blend of a signed 4-bit value and an
90 // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
91 // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
92 for(int i=0; i<256*256; i++)
94 int y = (i >> 8) & 0xFF;
95 int dy = (int8)i; // Sign extend the Y index
96 int c1 = (i >> 8) & 0x0F;
97 int dc1 = (int8)(i << 4) >> 4; // Sign extend the R index
98 int c2 = (i >> 12) & 0x0F;
99 int dc2 = (int8)(i & 0xF0) >> 4; // Sign extend the C index
124 op_blend_cr[i] = (c2 << 4) | c1;
132 // Object Processor reset
136 // memset(objectp_ram, 0x00, 0x40);
141 static const char * opType[8] =
142 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
143 static const char * ccType[8] =
144 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
145 static uint32 object[8192];
146 static uint32 numberOfObjects;
147 //static uint32 objectLink[8192];
148 //static uint32 numberOfLinks;
153 //#warning "!!! Fix OL dump so that it follows links !!!"
154 // const char * opType[8] =
155 // { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
156 // const char * ccType[8] =
157 // { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
159 uint32 olp = OPGetListPointer();
160 WriteLog("\nOP: OLP = $%08X\n", olp);
161 WriteLog("OP: Phrase dump\n ----------\n");
164 for(uint32 i=0; i<0x100; i+=8)
166 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
167 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
169 if ((lo & 0x07) == 3)
171 uint16 ypos = (lo >> 3) & 0x7FF;
172 uint8 cc = (lo >> 14) & 0x03;
173 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
174 WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
179 if ((lo & 0x07) == 0)
180 DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
182 if ((lo & 0x07) == 1)
183 DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
188 //#warning "!!! Fix lockup in OPDiscoverObjects() !!!"
189 //temp, to keep the following function from locking up on bad/weird OLs
193 //printf("OPDiscoverObjects...\n");
194 OPDiscoverObjects(olp);
195 //printf("OPDumpObjectList...\n");
201 bool OPObjectExists(uint32 address)
203 // Yes, we really do a linear search, every time. :-/
204 for(uint32 i=0; i<numberOfObjects; i++)
206 if (address == object[i])
214 void OPDiscoverObjects(uint32 address)
216 uint8 objectType = 0;
220 // If we've seen this object already, bail out!
221 // Otherwise, add it to the list
222 if (OPObjectExists(address))
225 object[numberOfObjects++] = address;
227 // Get the object & decode its type, link address
228 uint32 hi = JaguarReadLong(address + 0, OP);
229 uint32 lo = JaguarReadLong(address + 4, OP);
230 objectType = lo & 0x07;
231 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
235 // Recursion needed to follow all links! This does depth-first recursion
236 // on the not-taken objects
237 OPDiscoverObjects(address + 8);
240 // Get the next object...
243 while (objectType != 4);
247 void OPDumpObjectList(void)
249 for(uint32 i=0; i<numberOfObjects; i++)
251 uint32 address = object[i];
253 uint32 hi = JaguarReadLong(address + 0, OP);
254 uint32 lo = JaguarReadLong(address + 4, OP);
255 uint8 objectType = lo & 0x07;
256 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
257 WriteLog("%08X: %08X %08X %s", address, hi, lo, opType[objectType]);
261 uint16 ypos = (lo >> 3) & 0x7FF;
262 uint8 cc = (lo >> 14) & 0x07; // Proper # of bits == 3
263 WriteLog(" YPOS=%u, CC=%s, link=$%08X", ypos, ccType[cc], link);
269 DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
272 DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
273 OPLoadPhrase(address + 16));
275 if (address == link) // Ruh roh...
277 // Runaway recursive link is bad!
278 WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
287 // Object Processor memory access
288 // Memory range: F00010 - F00027
290 // F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
291 // F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
292 // F00026 W -------- -------x OBF - object processor flag
296 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
299 return objectp_ram[offset];
302 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
305 return GET16(objectp_ram, offset);
308 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
311 objectp_ram[offset] = data;
314 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
317 SET16(objectp_ram, offset, data);
319 /*if (offset == 0x20)
320 WriteLog("OP: Setting lo list pointer: %04X\n", data);
322 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
327 uint32 OPGetListPointer(void)
329 // Note: This register is LO / HI WORD, hence the funky look of this...
330 return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
334 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
336 uint32 OPGetStatusRegister(void)
338 return GET16(tomRam8, 0x26);
342 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
344 void OPSetStatusRegister(uint32 data)
346 tomRam8[0x26] = (data & 0x0000FF00) >> 8;
347 tomRam8[0x27] |= (data & 0xFE);
351 void OPSetCurrentObject(uint64 object)
353 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
354 // Stored as least significant 32 bits first, ms32 last in big endian
355 /* objectp_ram[0x13] = object & 0xFF; object >>= 8;
356 objectp_ram[0x12] = object & 0xFF; object >>= 8;
357 objectp_ram[0x11] = object & 0xFF; object >>= 8;
358 objectp_ram[0x10] = object & 0xFF; object >>= 8;
360 objectp_ram[0x17] = object & 0xFF; object >>= 8;
361 objectp_ram[0x16] = object & 0xFF; object >>= 8;
362 objectp_ram[0x15] = object & 0xFF; object >>= 8;
363 objectp_ram[0x14] = object & 0xFF;*/
364 // Let's try regular good old big endian...
365 tomRam8[0x17] = object & 0xFF; object >>= 8;
366 tomRam8[0x16] = object & 0xFF; object >>= 8;
367 tomRam8[0x15] = object & 0xFF; object >>= 8;
368 tomRam8[0x14] = object & 0xFF; object >>= 8;
370 tomRam8[0x13] = object & 0xFF; object >>= 8;
371 tomRam8[0x12] = object & 0xFF; object >>= 8;
372 tomRam8[0x11] = object & 0xFF; object >>= 8;
373 tomRam8[0x10] = object & 0xFF;
377 uint64 OPLoadPhrase(uint32 offset)
379 offset &= ~0x07; // 8 byte alignment
380 return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
384 void OPStorePhrase(uint32 offset, uint64 p)
386 offset &= ~0x07; // 8 byte alignment
387 JaguarWriteLong(offset, p >> 32, OP);
388 JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
393 // Debugging routines
395 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
397 WriteLog(" %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
398 WriteLog(" %08X %08X\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
399 DumpBitmapCore(p0, p1);
400 uint32 hscale = p2 & 0xFF;
401 uint32 vscale = (p2 >> 8) & 0xFF;
402 uint32 remainder = (p2 >> 16) & 0xFF;
403 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
407 void DumpFixedObject(uint64 p0, uint64 p1)
409 WriteLog(" %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
410 DumpBitmapCore(p0, p1);
414 void DumpBitmapCore(uint64 p0, uint64 p1)
416 uint32 bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
417 uint8 bitdepth = (p1 >> 12) & 0x07;
418 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
419 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
420 int32 xpos = p1 & 0xFFF;
421 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); // Sign extend that mutha!
422 uint32 iwidth = ((p1 >> 28) & 0x3FF);
423 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
424 uint16 height = ((p0 >> 14) & 0x3FF);
425 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
426 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
427 uint32 firstPix = (p1 >> 49) & 0x3F;
428 uint8 flags = (p1 >> 45) & 0x0F;
429 uint8 idx = (p1 >> 38) & 0x7F;
430 uint32 pitch = (p1 >> 15) & 0x07;
431 WriteLog(" [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), l:%08X, p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
432 iwidth * bdMultiplier[bitdepth],
433 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth], link,
434 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
435 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
436 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
441 // Object Processor main routine
443 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
444 void OPProcessList(int halfline, bool render)
446 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!
447 // We ignore them, for now; not good
450 extern int op_start_log;
451 // char * condition_to_str[8] =
452 // { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
454 op_pointer = OPGetListPointer();
456 // objectp_stop_reading_list = false;
458 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
461 // *** BEGIN OP PROCESSOR TESTING ONLY ***
462 extern bool interactiveMode;
464 extern int objectPtr;
466 int bitmapCounter = 0;
467 // *** END OP PROCESSOR TESTING ONLY ***
469 uint32 opCyclesToRun = 30000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
471 // if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
474 // *** BEGIN OP PROCESSOR TESTING ONLY ***
475 if (interactiveMode && bitmapCounter == objectPtr)
479 // *** END OP PROCESSOR TESTING ONLY ***
480 // if (objectp_stop_reading_list)
483 uint64 p0 = OPLoadPhrase(op_pointer);
485 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
488 if (halfline == TOMGetVDB() && op_start_log)
489 //if (halfline == 215 && op_start_log)
490 //if (halfline == 28 && op_start_log)
493 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
494 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
496 WriteLog(" (BITMAP) ");
497 uint64 p1 = OPLoadPhrase(op_pointer);
498 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
499 uint8 bitdepth = (p1 >> 12) & 0x07;
500 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
501 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
502 int32 xpos = p1 & 0xFFF;
503 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
504 uint32 iwidth = ((p1 >> 28) & 0x3FF);
505 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
506 uint16 height = ((p0 >> 14) & 0x3FF);
507 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
508 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
509 uint32 firstPix = (p1 >> 49) & 0x3F;
510 uint8 flags = (p1 >> 45) & 0x0F;
511 uint8 idx = (p1 >> 38) & 0x7F;
512 uint32 pitch = (p1 >> 15) & 0x07;
513 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
514 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
516 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
518 WriteLog(" (SCALED BITMAP)");
519 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
520 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
521 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
522 uint8 bitdepth = (p1 >> 12) & 0x07;
523 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
524 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
525 int32 xpos = p1 & 0xFFF;
526 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
527 uint32 iwidth = ((p1 >> 28) & 0x3FF);
528 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
529 uint16 height = ((p0 >> 14) & 0x3FF);
530 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
531 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
532 uint32 firstPix = (p1 >> 49) & 0x3F;
533 uint8 flags = (p1 >> 45) & 0x0F;
534 uint8 idx = (p1 >> 38) & 0x7F;
535 uint32 pitch = (p1 >> 15) & 0x07;
536 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
537 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
538 uint32 hscale = p2 & 0xFF;
539 uint32 vscale = (p2 >> 8) & 0xFF;
540 uint32 remainder = (p2 >> 16) & 0xFF;
541 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
543 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
544 WriteLog(" (GPU)\n");
545 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
547 WriteLog(" (BRANCH)\n");
548 uint8 * jaguarMainRam = GetRamPtr();
549 WriteLog("[RAM] --> ");
550 for(int k=0; k<8; k++)
551 WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
554 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
555 WriteLog(" --> List end\n\n");
559 switch ((uint8)p0 & 0x07)
561 case OBJECT_TYPE_BITMAP:
563 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
564 uint16 ypos = (p0 >> 3) & 0x7FF;
565 // This is only theory implied by Rayman...!
566 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
567 // the VDB value. With interlacing, this would be slightly more tricky.
568 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
569 // to affect any other game in a negative way (that I've seen).
570 // Either that, or it's an undocumented bug...
572 //No, the reason this was needed is that the OP code before was wrong. Any value
573 //less than VDB will get written to the top line of the display!
575 // Not so sure... Let's see what happens here...
578 ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
580 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
581 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
582 // what's causing things to fuck up. Still no idea why.
584 uint32 height = (p0 & 0xFFC000) >> 14;
585 uint32 oldOPP = op_pointer - 8;
586 // *** BEGIN OP PROCESSOR TESTING ONLY ***
587 if (inhibit && op_start_log)
588 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
590 if (!inhibit) // For OP testing only!
591 // *** END OP PROCESSOR TESTING ONLY ***
592 if (halfline >= ypos && height > 0)
594 uint64 p1 = OPLoadPhrase(op_pointer);
596 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
597 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
598 // OPProcessFixedBitmap(halfline, p0, p1, render);
599 OPProcessFixedBitmap(p0, p1, render);
603 //???Does this really happen??? Doesn't seem to work if you do this...!
604 //Probably not. Must be a bug in the documentation...!
605 // uint32 link = (p0 & 0x7FFFF000000) >> 21;
606 // SET16(tom_ram_8, 0x20, link & 0xFFFF); // OLP
607 // SET16(tom_ram_8, 0x22, link >> 16);
608 /* uint32 height = (p0 & 0xFFC000) >> 14;
611 // NOTE: Would subtract 2 if in interlaced mode...!
612 // uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
616 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
617 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
620 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
621 p0 |= (uint64)height << 14;
623 OPStorePhrase(oldOPP, p0);
625 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
626 //Temp, for testing...
627 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
628 //And it does! !!! FIX !!!
629 //Let's remove this "fix" since it screws up more than it fixes.
630 /* if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
633 // NOTE: The link address only replaces bits 3-21 in the OLP, and this replaces
634 // EVERYTHING. !!! FIX !!! [DONE]
635 #warning "!!! Link address is not linked properly for all object types !!!"
636 #warning "!!! Only BITMAP is properly handled !!!"
637 op_pointer &= 0xFFC00007;
638 op_pointer |= (p0 & 0x000007FFFF000000LL) >> 21;
639 //WriteLog("New OP: %08X\n", op_pointer);
640 //kludge: Seems that memory access is mirrored in the first 8MB of memory...
641 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
642 op_pointer &= 0xFF1FFFFF; // Knock out bits 21-23
646 case OBJECT_TYPE_SCALE:
648 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
649 uint16 ypos = (p0 >> 3) & 0x7FF;
650 uint32 height = (p0 & 0xFFC000) >> 14;
651 uint32 oldOPP = op_pointer - 8;
652 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
653 // *** BEGIN OP PROCESSOR TESTING ONLY ***
654 if (inhibit && op_start_log)
656 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
657 DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
660 if (!inhibit) // For OP testing only!
661 // *** END OP PROCESSOR TESTING ONLY ***
662 if (halfline >= ypos && height > 0)
664 uint64 p1 = OPLoadPhrase(op_pointer);
666 uint64 p2 = OPLoadPhrase(op_pointer);
668 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
669 OPProcessScaledBitmap(p0, p1, p2, render);
673 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
674 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
675 //Actually, we should skip this object if it has a vscale of zero.
676 //Or do we? Not sure... Atari Karts has a few lines that look like:
678 //000E8268 --> phrase 00010000 7000B00D
679 // [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
680 // [hsc: 9A, vsc: 00, rem: 00]
681 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
682 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
685 vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
687 //extern int start_logging;
689 // WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
691 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
692 //There are other problems here, it looks like...
694 //About to execute OP (508)...
696 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
697 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
698 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
699 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
700 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
701 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
702 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
703 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
704 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
705 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
706 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
707 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
708 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
709 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
710 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
711 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
712 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
713 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
714 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
715 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
716 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
717 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
719 //Here's another problem:
720 // [hsc: 20, vsc: 20, rem: 00]
721 // Since we're not checking for $E0 (but that's what we get from the above), we end
722 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
723 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
724 //Also note: $E0 = 7.0 which IS a legal vscale value...
726 // if (remainder & 0x80) // I.e., it's negative
727 // if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0
728 // if ((remainder - 1) >= 0xE0) // I.e., it's <= 0
729 // if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
730 // if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
731 // if (remainder <= 0x20) // I.e., it's <= 1.0
732 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
733 if (remainder < 0x20)
735 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
736 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
738 // while (remainder & 0x80)
739 // while ((remainder & 0x80) || remainder == 0)
740 // while ((remainder - 1) >= 0xE0)
741 // while ((remainder >= 0xE1) || remainder == 0)
742 // while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
743 // while (remainder <= 0x20)
744 while (remainder < 0x20)
754 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
755 p0 |= (uint64)height << 14;
757 OPStorePhrase(oldOPP, p0);
760 remainder -= 0x20; // 1.0f in [3.5] fixed point format
763 // WriteLog("--> Finished writebacks...\n");//*/
765 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
766 p2 &= ~0x0000000000FF0000LL;
767 p2 |= (uint64)remainder << 16;
768 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
769 OPStorePhrase(oldOPP + 16, p2);
770 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
771 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
774 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
777 case OBJECT_TYPE_GPU:
779 //WriteLog("OP: Asserting GPU IRQ #3...\n");
780 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
781 OPSetCurrentObject(p0);
782 GPUSetIRQLine(3, ASSERT_LINE);
783 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
786 //OPSuspendedByGPU = true;
787 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
788 //on the next halfline...
789 // --> It continues from where it was interrupted! !!! FIX !!!
792 case OBJECT_TYPE_BRANCH:
794 uint16 ypos = (p0 >> 3) & 0x7FF;
795 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
796 // conditions! Need at least one more bit for that! :-P
797 // Also, the ASIC nets imply that it uses bits 14-16 (height in BM & SBM objects)
798 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
799 uint8 cc = (p0 >> 14) & 0x03;
800 uint32 link = (p0 >> 21) & 0x3FFFF8;
802 // if ((ypos!=507)&&(ypos!=25))
803 // WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
806 case CONDITION_EQUAL:
807 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
810 case CONDITION_LESS_THAN:
811 if (TOMReadWord(0xF00006, OP) < ypos)
814 case CONDITION_GREATER_THAN:
815 if (TOMReadWord(0xF00006, OP) > ypos)
818 case CONDITION_OP_FLAG_SET:
819 if (OPGetStatusRegister() & 0x01)
822 case CONDITION_SECOND_HALF_LINE:
823 //Here's the ASIC code:
824 // ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
825 //which means, do the link if bit 10 of HC is set...
827 // This basically means branch if bit 10 of HC is set
828 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
829 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
834 // Basically, if you do this, the OP does nothing. :-)
835 WriteLog("OP: Unimplemented branch condition %i\n", cc);
839 case OBJECT_TYPE_STOP:
843 //WriteLog("OP: --> STOP\n");
844 // op_set_status_register(((p0>>3) & 0xFFFFFFFF));
845 //This seems more likely...
846 OPSetCurrentObject(p0);
850 // We need to check whether these interrupts are enabled or not, THEN
851 // set an IRQ + pending flag if necessary...
852 if (TOMIRQEnabled(IRQ_OPFLAG))
854 TOMSetPendingObjectInt();
855 m68k_set_irq(2); // Cause a 68K IPL 2 to occur...
863 // WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
867 // Here is a little sanity check to keep the OP from locking up the machine
868 // when fed bad data. Better would be to count how many actual cycles it used
869 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
870 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
880 // Store fixed size bitmap in line buffer
882 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
884 // Need to make sure that when writing that it stays within the line buffer...
885 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
886 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
887 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
888 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
889 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
890 //#ifdef OP_DEBUG_BMP
891 uint32 firstPix = (p1 >> 49) & 0x3F;
892 // "The LSB is significant only for scaled objects..." -JTRM
893 // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
896 // We can ignore the RELEASE (high order) bit for now--probably forever...!
897 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
898 //Optimize: break these out to their own BOOL values
899 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
900 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
901 flagRMW = (flags & OPFLAG_RMW ? true : false),
902 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
903 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
904 // provide the most significant bits of the palette address."
905 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
906 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
907 pitch <<= 3; // Optimization: Multiply pitch by 8
909 // int16 scanlineWidth = tom_getVideoModeWidth();
910 uint8 * tomRam8 = TOMGetRamPointer();
911 uint8 * paletteRAM = &tomRam8[0x400];
912 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
913 // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
914 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
916 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
917 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
919 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
920 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
921 // Pitch == 0 is OK too...
923 //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to investigate
924 // on real hardware...
925 #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
929 // if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
930 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
931 if (!render || iwidth == 0)
934 //OK, so we know the position in the line buffer is correct. It's the clipping in
935 //24bpp mode that's wrong!
937 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
938 //into the line buffer for each pixel.
939 if (depth == 5) // i.e., 24bpp mode...
940 xpos >>= 1; // Cut it in half...
943 //#define OP_DEBUG_BMP
944 //#ifdef OP_DEBUG_BMP
945 // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
946 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
949 // int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
950 int32 startPos = xpos, endPos = xpos +
951 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
952 : -((phraseWidthToPixels[depth] * iwidth) + 1));
953 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
954 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
955 // Not sure if this is Jaguar Two only location or what...
956 // From the docs, it is... If we want to limit here we should think of something else.
957 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
958 // int32 limit = 720;
959 // int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
960 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
961 // This is correct, the OP line buffer is a constant size...
963 int32 lbufWidth = 719;
965 // If the image is completely to the left or right of the line buffer, then bail.
966 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
967 //There are four possibilities:
968 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
969 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
970 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
971 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
972 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
973 // numbers 1 & 3 are of concern.
974 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
975 // if (rightMargin < 0 || leftMargin > lbufWidth)
977 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
978 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
979 // Still have to be careful with the DATA and IWIDTH values though...
981 // if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
982 // || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
984 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
985 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
988 // Otherwise, find the clip limits and clip the phrase as well...
989 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
990 // line buffer, but it shouldn't matter since there are two unused line
991 // buffers below and nothing above and I'll at most write 8 bytes outside
992 // the line buffer... I could use a fractional clip begin/end value, but
993 // this makes the blit a *lot* more hairy. I might fix this in the future
994 // if it becomes necessary. (JLH)
995 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
996 // which pixel in the phrase is being written, and quit when either end of phrases
997 // is reached or line buffer extents are surpassed.
999 //This stuff is probably wrong as well... !!! FIX !!!
1000 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1001 //Yup. Seems that JagMania doesn't work correctly with this...
1002 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1003 // if (!flagREFLECT)
1007 clippedWidth = 0 - leftMargin,
1008 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1009 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1012 if (rightMargin > lbufWidth)
1013 clippedWidth = rightMargin - lbufWidth,
1014 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
1015 // rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1016 // rightMargin = lbufWidth;
1019 WriteLog("OP: We're about to encounter a divide by zero error!\n");
1020 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1021 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1023 if (startPos < 0) // Case #1: Begin out, end in, L to R
1024 clippedWidth = 0 - startPos,
1025 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1026 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1028 if (endPos < 0) // Case #2: Begin in, end out, R to L
1029 clippedWidth = 0 - endPos,
1030 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1032 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1033 clippedWidth = endPos - lbufWidth,
1034 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1036 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1037 clippedWidth = startPos - lbufWidth,
1038 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1039 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1040 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1042 // If the image is sitting on the line buffer left or right edge, we need to compensate
1043 // by decreasing the image phrase width accordingly.
1044 iwidth -= phraseClippedWidth;
1046 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1048 // data += phraseClippedWidth * (pitch << 3);
1049 data += dataClippedWidth * pitch;
1051 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1052 // bitmap! This makes clipping & etc. MUCH, much easier...!
1053 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1054 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1055 //Is this a bug in the OP?
1056 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1057 //Though it looks like we're doing it here no matter what...
1058 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1060 uint32 lbufAddress = 0x1800 + (startPos * 2);
1061 uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1065 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1066 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1067 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1069 // This seems to be the case (at least according to the Midsummer docs)...!
1071 // This is to test using palette zeroes instead of bit zeroes...
1072 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1073 //#define OP_USES_PALETTE_ZERO
1075 if (depth == 0) // 1 BPP
1077 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1078 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1080 // Fetch 1st phrase...
1081 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1082 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1083 //i.e., we didn't clip on the margin... !!! FIX !!!
1084 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1085 int i = firstPix; // Start counter at right spot...
1091 uint8 bit = pixels >> 63;
1092 #ifndef OP_USES_PALETTE_ZERO
1093 if (flagTRANS && bit == 0)
1095 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1101 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1102 //Won't optimize RMW case though...
1103 // This is the *only* correct use of endian-dependent code
1104 // (i.e., mem-to-mem direct copying)!
1105 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
1107 *currentLineBuffer =
1108 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1109 *(currentLineBuffer + 1) =
1110 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1113 currentLineBuffer += lbufDelta;
1117 // Fetch next phrase...
1119 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1122 else if (depth == 1) // 2 BPP
1125 WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1126 index &= 0xFC; // Top six bits form CLUT index
1127 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1128 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1133 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1136 for(int i=0; i<32; i++)
1138 uint8 bits = pixels >> 62;
1139 // Seems to me that both of these are in the same endian, so we could cast it as
1140 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1141 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1142 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1143 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1144 #ifndef OP_USES_PALETTE_ZERO
1145 if (flagTRANS && bits == 0)
1147 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1153 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1155 *currentLineBuffer =
1156 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1157 *(currentLineBuffer + 1) =
1158 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1161 currentLineBuffer += lbufDelta;
1166 else if (depth == 2) // 4 BPP
1169 WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1170 index &= 0xF0; // Top four bits form CLUT index
1171 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1172 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1177 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1180 for(int i=0; i<16; i++)
1182 uint8 bits = pixels >> 60;
1183 // Seems to me that both of these are in the same endian, so we could cast it as
1184 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1185 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1186 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1187 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1188 #ifndef OP_USES_PALETTE_ZERO
1189 if (flagTRANS && bits == 0)
1191 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1197 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1199 *currentLineBuffer =
1200 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1201 *(currentLineBuffer + 1) =
1202 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1205 currentLineBuffer += lbufDelta;
1210 else if (depth == 3) // 8 BPP
1212 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1213 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1215 // Fetch 1st phrase...
1216 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1217 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1218 //i.e., we didn't clip on the margin... !!! FIX !!!
1219 firstPix &= 0x30; // Only top two bits are valid for 8 BPP
1220 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1221 int i = firstPix >> 3; // Start counter at right spot...
1227 uint8 bits = pixels >> 56;
1228 // Seems to me that both of these are in the same endian, so we could cast it as
1229 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1230 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1231 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1232 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1233 //This would seem to be problematic...
1234 //Because it's the palette entry being zero that makes the pixel transparent...
1235 //Let's try it and see.
1236 #ifndef OP_USES_PALETTE_ZERO
1237 if (flagTRANS && bits == 0)
1239 if (flagTRANS && (paletteRAM16[bits] == 0))
1245 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1247 *currentLineBuffer =
1248 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1249 *(currentLineBuffer + 1) =
1250 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1253 currentLineBuffer += lbufDelta;
1257 // Fetch next phrase...
1259 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1262 else if (depth == 4) // 16 BPP
1265 WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1266 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1267 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1272 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1275 for(int i=0; i<4; i++)
1277 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1278 // Seems to me that both of these are in the same endian, so we could cast it as
1279 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1280 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1281 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1282 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1283 //This doesn't seem right... Let's try the encoded black value ($8800):
1284 //Apparently, CRY 0 maps to $8800...
1285 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1286 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1291 *currentLineBuffer = bitsHi,
1292 *(currentLineBuffer + 1) = bitsLo;
1294 *currentLineBuffer =
1295 BLEND_CR(*currentLineBuffer, bitsHi),
1296 *(currentLineBuffer + 1) =
1297 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1300 currentLineBuffer += lbufDelta;
1305 else if (depth == 5) // 24 BPP
1307 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1308 //There *might* be others...
1309 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1311 WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1312 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1313 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1314 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1319 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1322 for(int i=0; i<2; i++)
1324 // We don't use a 32-bit var here because of endian issues...!
1325 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1326 bits1 = pixels >> 40, bits0 = pixels >> 32;
1328 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1331 *currentLineBuffer = bits3,
1332 *(currentLineBuffer + 1) = bits2,
1333 *(currentLineBuffer + 2) = bits1,
1334 *(currentLineBuffer + 3) = bits0;
1336 currentLineBuffer += lbufDelta;
1345 // Store scaled bitmap in line buffer
1347 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1349 // Need to make sure that when writing that it stays within the line buffer...
1350 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1351 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
1352 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1353 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
1354 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
1355 //#ifdef OP_DEBUG_BMP
1356 // Prolly should use this... Though not sure exactly how.
1357 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1358 uint32 firstPix = (p1 >> 49) & 0x3F;
1359 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1361 WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1363 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1364 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
1365 //Optimize: break these out to their own BOOL values [DONE]
1366 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
1367 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1368 flagRMW = (flags & OPFLAG_RMW ? true : false),
1369 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1370 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
1371 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
1373 uint8 * tomRam8 = TOMGetRamPointer();
1374 uint8 * paletteRAM = &tomRam8[0x400];
1375 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1376 // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1377 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1379 uint16 hscale = p2 & 0xFF;
1380 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1381 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1382 uint16 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!]
1383 // uint8 horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1384 int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1385 uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1387 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1388 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1390 // Looks like an hscale of zero means don't draw!
1391 if (!render || iwidth == 0 || hscale == 0)
1394 /*extern int start_logging;
1396 WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1397 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1398 //#define OP_DEBUG_BMP
1399 //#ifdef OP_DEBUG_BMP
1400 // WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1401 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1404 int32 startPos = xpos, endPos = xpos +
1405 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1406 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1407 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
1408 // Not sure if this is Jaguar Two only location or what...
1409 // From the docs, it is... If we want to limit here we should think of something else.
1410 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
1412 // int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1413 int32 lbufWidth = 719; // Zero based limit...
1415 // If the image is completely to the left or right of the line buffer, then bail.
1416 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1417 //There are four possibilities:
1418 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1419 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1420 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1421 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1422 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1423 // numbers 1 & 3 are of concern.
1424 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1425 // if (rightMargin < 0 || leftMargin > lbufWidth)
1427 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1428 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1429 // Still have to be careful with the DATA and IWIDTH values though...
1431 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1432 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1435 // Otherwise, find the clip limits and clip the phrase as well...
1436 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1437 // line buffer, but it shouldn't matter since there are two unused line
1438 // buffers below and nothing above and I'll at most write 40 bytes outside
1439 // the line buffer... I could use a fractional clip begin/end value, but
1440 // this makes the blit a *lot* more hairy. I might fix this in the future
1441 // if it becomes necessary. (JLH)
1442 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1443 // which pixel in the phrase is being written, and quit when either end of phrases
1444 // is reached or line buffer extents are surpassed.
1446 //This stuff is probably wrong as well... !!! FIX !!!
1447 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1448 //Yup. Seems that JagMania doesn't work correctly with this...
1449 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1450 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1451 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1452 // a bit more accurately... Strange!
1453 //It's probably a case of the REFLECT flag being set and the background being written
1454 //from the right side of the screen...
1455 //But no, it isn't... At least if the diagnostics are telling the truth!
1457 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1458 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1461 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1462 //the scaling factor is small. So fix it already! !!! FIX !!!
1463 /*if (scaledPhrasePixels == 0)
1465 WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1466 DumpScaledObject(p0, p1, p2);
1468 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1470 //Try a simple example...
1471 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1472 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1473 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1475 // Normally, we would expect this in the line buffer:
1476 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1478 // But instead we're getting:
1479 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1481 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1482 // on negative boundary--or are we? Hmm...
1483 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1485 // Let's try a real world example:
1487 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1488 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1490 // Really, spp is 27.75 in the second case...
1491 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1492 // start position (14 * 27.75), we get -6.5... NOT -17!
1494 //Now it seems we're working OK, at least for the first case...
1495 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1497 if (startPos < 0) // Case #1: Begin out, end in, L to R
1499 extern int start_logging;
1501 WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1502 // clippedWidth = 0 - startPos,
1503 clippedWidth = (0 - startPos) << 5,
1504 // dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1505 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1506 // startPos = 0 - (clippedWidth % scaledPhrasePixels);
1507 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1509 WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1512 if (endPos < 0) // Case #2: Begin in, end out, R to L
1513 clippedWidth = 0 - endPos,
1514 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1516 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1517 clippedWidth = endPos - lbufWidth,
1518 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1520 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1521 clippedWidth = startPos - lbufWidth,
1522 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1523 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1525 extern int op_start_log;
1526 if (op_start_log && clippedWidth != 0)
1527 WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1528 if (op_start_log && startPos == 13)
1530 WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1531 DumpScaledObject(p0, p1, p2);
1534 WriteLog(" %08X: ", data);
1535 for(int i=0; i<7*8; i++)
1536 WriteLog("%02X ", JaguarReadByte(data+i));
1540 // If the image is sitting on the line buffer left or right edge, we need to compensate
1541 // by decreasing the image phrase width accordingly.
1542 iwidth -= phraseClippedWidth;
1544 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1546 // data += phraseClippedWidth * (pitch << 3);
1547 data += dataClippedWidth * (pitch << 3);
1549 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1550 // bitmap! This makes clipping & etc. MUCH, much easier...!
1551 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1552 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1553 uint32 lbufAddress = 0x1800 + startPos * 2;
1554 uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1555 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1556 // * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1560 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1561 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1562 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1564 // This seems to be the case (at least according to the Midsummer docs)...!
1566 if (depth == 0) // 1 BPP
1569 WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1570 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1571 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1574 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1576 while ((int32)iwidth > 0)
1578 uint8 bits = pixels >> 63;
1580 #ifndef OP_USES_PALETTE_ZERO
1581 if (flagTRANS && bits == 0)
1583 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1589 // This is the *only* correct use of endian-dependent code
1590 // (i.e., mem-to-mem direct copying)!
1591 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1593 *currentLineBuffer =
1594 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1595 *(currentLineBuffer + 1) =
1596 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1599 currentLineBuffer += lbufDelta;
1602 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1603 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1604 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1606 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1607 while (horizontalRemainder & 0x80)
1609 horizontalRemainder += hscale;
1613 // while (horizontalRemainder <= 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1614 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1616 horizontalRemainder += hscale;
1620 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1624 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1626 data += (pitch << 3) * phrasesToSkip;
1627 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1628 pixels <<= 1 * pixelShift;
1629 iwidth -= phrasesToSkip;
1630 pixCount = pixelShift;
1634 else if (depth == 1) // 2 BPP
1637 WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1638 index &= 0xFC; // Top six bits form CLUT index
1639 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1640 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1643 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1645 while ((int32)iwidth > 0)
1647 uint8 bits = pixels >> 62;
1649 #ifndef OP_USES_PALETTE_ZERO
1650 if (flagTRANS && bits == 0)
1652 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1658 // This is the *only* correct use of endian-dependent code
1659 // (i.e., mem-to-mem direct copying)!
1660 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1662 *currentLineBuffer =
1663 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1664 *(currentLineBuffer + 1) =
1665 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1668 currentLineBuffer += lbufDelta;
1670 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1671 while (horizontalRemainder & 0x80)
1673 horizontalRemainder += hscale;
1677 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1678 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1680 horizontalRemainder += hscale;
1684 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1688 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1690 data += (pitch << 3) * phrasesToSkip;
1691 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1692 pixels <<= 2 * pixelShift;
1693 iwidth -= phrasesToSkip;
1694 pixCount = pixelShift;
1698 else if (depth == 2) // 4 BPP
1701 WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1702 index &= 0xF0; // Top four bits form CLUT index
1703 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1704 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1707 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1709 while ((int32)iwidth > 0)
1711 uint8 bits = pixels >> 60;
1713 #ifndef OP_USES_PALETTE_ZERO
1714 if (flagTRANS && bits == 0)
1716 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1722 // This is the *only* correct use of endian-dependent code
1723 // (i.e., mem-to-mem direct copying)!
1724 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1726 *currentLineBuffer =
1727 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1728 *(currentLineBuffer + 1) =
1729 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1732 currentLineBuffer += lbufDelta;
1734 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1735 while (horizontalRemainder & 0x80)
1737 horizontalRemainder += hscale;
1741 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1742 while (horizontalRemainder < 0x20) // I.e., it's <= 0 (*before* subtraction)
1744 horizontalRemainder += hscale;
1748 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1752 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1754 data += (pitch << 3) * phrasesToSkip;
1755 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1756 pixels <<= 4 * pixelShift;
1757 iwidth -= phrasesToSkip;
1758 pixCount = pixelShift;
1762 else if (depth == 3) // 8 BPP
1765 WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1766 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1767 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1770 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1772 while ((int32)iwidth > 0)
1774 uint8 bits = pixels >> 56;
1776 #ifndef OP_USES_PALETTE_ZERO
1777 if (flagTRANS && bits == 0)
1779 if (flagTRANS && (paletteRAM16[bits] == 0))
1785 // This is the *only* correct use of endian-dependent code
1786 // (i.e., mem-to-mem direct copying)!
1787 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1789 if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1790 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1793 *currentLineBuffer =
1794 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1795 *(currentLineBuffer + 1) =
1796 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1799 currentLineBuffer += lbufDelta;
1801 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1802 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1804 horizontalRemainder += hscale;
1808 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1812 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1814 data += (pitch << 3) * phrasesToSkip;
1815 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1816 pixels <<= 8 * pixelShift;
1817 iwidth -= phrasesToSkip;
1818 pixCount = pixelShift;
1822 else if (depth == 4) // 16 BPP
1825 WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1826 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1827 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1830 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1832 while ((int32)iwidth > 0)
1834 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1836 //This doesn't seem right... Let's try the encoded black value ($8800):
1837 //Apparently, CRY 0 maps to $8800...
1838 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1839 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1844 *currentLineBuffer = bitsHi,
1845 *(currentLineBuffer + 1) = bitsLo;
1847 *currentLineBuffer =
1848 BLEND_CR(*currentLineBuffer, bitsHi),
1849 *(currentLineBuffer + 1) =
1850 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1853 currentLineBuffer += lbufDelta;
1855 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1856 while (horizontalRemainder & 0x80)
1858 horizontalRemainder += hscale;
1862 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1863 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1865 horizontalRemainder += hscale;
1869 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1873 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1875 data += (pitch << 3) * phrasesToSkip;
1876 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1877 pixels <<= 16 * pixelShift;
1879 iwidth -= phrasesToSkip;
1881 pixCount = pixelShift;
1885 else if (depth == 5) // 24 BPP
1887 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1888 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1890 WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1891 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1892 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1893 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1898 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1899 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1901 for(int i=0; i<2; i++)
1903 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1904 bits1 = pixels >> 40, bits0 = pixels >> 32;
1906 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1909 *currentLineBuffer = bits3,
1910 *(currentLineBuffer + 1) = bits2,
1911 *(currentLineBuffer + 2) = bits1,
1912 *(currentLineBuffer + 3) = bits0;
1914 currentLineBuffer += lbufDelta;