4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 // (C) 2010 Underground Software
9 // JLH = James L. Hammons <jlhamm@acm.org>
12 // --- ---------- -------------------------------------------------------------
13 // JLH 01/16/2010 Created this log ;-)
28 //#define OP_DEBUG_BMP
30 #define BLEND_Y(dst, src) op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src) op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
33 #define OBJECT_TYPE_BITMAP 0 // 000
34 #define OBJECT_TYPE_SCALE 1 // 001
35 #define OBJECT_TYPE_GPU 2 // 010
36 #define OBJECT_TYPE_BRANCH 3 // 011
37 #define OBJECT_TYPE_STOP 4 // 100
39 #define CONDITION_EQUAL 0
40 #define CONDITION_LESS_THAN 1
41 #define CONDITION_GREATER_THAN 2
42 #define CONDITION_OP_FLAG_SET 3
43 #define CONDITION_SECOND_HALF_LINE 4
45 #define OPFLAG_RELEASE 8 // Bus release bit
46 #define OPFLAG_TRANS 4 // Transparency bit
47 #define OPFLAG_RMW 2 // Read-Modify-Write bit
48 #define OPFLAG_REFLECT 1 // Horizontal mirror bit
50 // Private function prototypes
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
55 void DumpFixedObject(uint64 p0, uint64 p1);
56 uint64 OPLoadPhrase(uint32 offset);
58 // Local global variables
60 // Blend tables (64K each)
61 static uint8 op_blend_y[0x10000];
62 static uint8 op_blend_cr[0x10000];
63 // There may be a problem with this "RAM" overlapping (and thus being independent of)
64 // some of the regular TOM RAM...
65 //#warning objectp_ram is separated from TOM RAM--need to fix that!
66 //static uint8 objectp_ram[0x40]; // This is based at $F00000
67 uint8 objectp_running = 0;
68 //bool objectp_stop_reading_list;
70 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
71 //static uint32 op_bitmap_bit_size[8] =
72 // { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
73 // (uint32)(2*65536), (uint32)(1*65536), (uint32)(1*65536), (uint32)(1*65536) };
74 static uint32 op_pointer;
76 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
80 // Object Processor initialization
84 // Here we calculate the saturating blend of a signed 4-bit value and an
85 // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
86 // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
87 for(int i=0; i<256*256; i++)
89 int y = (i >> 8) & 0xFF;
90 int dy = (int8)i; // Sign extend the Y index
91 int c1 = (i >> 8) & 0x0F;
92 int dc1 = (int8)(i << 4) >> 4; // Sign extend the R index
93 int c2 = (i >> 12) & 0x0F;
94 int dc2 = (int8)(i & 0xF0) >> 4; // Sign extend the C index
119 op_blend_cr[i] = (c2 << 4) | c1;
126 // Object Processor reset
130 // memset(objectp_ram, 0x00, 0x40);
136 const char * opType[8] =
137 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
138 const char * ccType[8] =
139 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
141 uint32 olp = OPGetListPointer();
142 WriteLog("OP: OLP = %08X\n", olp);
143 WriteLog("OP: Phrase dump\n ----------\n");
144 for(uint32 i=0; i<0x100; i+=8)
146 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
147 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
148 if ((lo & 0x07) == 3)
150 uint16 ypos = (lo >> 3) & 0x7FF;
151 uint8 cc = (lo >> 14) & 0x03;
152 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
153 WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
156 if ((lo & 0x07) == 0)
157 DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
158 if ((lo & 0x07) == 1)
159 DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
163 // memory_free(op_blend_y);
164 // memory_free(op_blend_cr);
168 // Object Processor memory access
169 // Memory range: F00010 - F00027
171 // F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
172 // F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
173 // F00026 W -------- -------x OBF - object processor flag
177 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
180 return objectp_ram[offset];
183 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
186 return GET16(objectp_ram, offset);
189 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
192 objectp_ram[offset] = data;
195 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
198 SET16(objectp_ram, offset, data);
200 /*if (offset == 0x20)
201 WriteLog("OP: Setting lo list pointer: %04X\n", data);
203 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
207 uint32 OPGetListPointer(void)
209 // Note: This register is LO / HI WORD, hence the funky look of this...
210 return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
213 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
215 uint32 OPGetStatusRegister(void)
217 return GET16(tomRam8, 0x26);
220 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
222 void OPSetStatusRegister(uint32 data)
224 tomRam8[0x26] = (data & 0x0000FF00) >> 8;
225 tomRam8[0x27] |= (data & 0xFE);
228 void OPSetCurrentObject(uint64 object)
230 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
231 // Stored as least significant 32 bits first, ms32 last in big endian
232 /* objectp_ram[0x13] = object & 0xFF; object >>= 8;
233 objectp_ram[0x12] = object & 0xFF; object >>= 8;
234 objectp_ram[0x11] = object & 0xFF; object >>= 8;
235 objectp_ram[0x10] = object & 0xFF; object >>= 8;
237 objectp_ram[0x17] = object & 0xFF; object >>= 8;
238 objectp_ram[0x16] = object & 0xFF; object >>= 8;
239 objectp_ram[0x15] = object & 0xFF; object >>= 8;
240 objectp_ram[0x14] = object & 0xFF;*/
241 // Let's try regular good old big endian...
242 tomRam8[0x17] = object & 0xFF; object >>= 8;
243 tomRam8[0x16] = object & 0xFF; object >>= 8;
244 tomRam8[0x15] = object & 0xFF; object >>= 8;
245 tomRam8[0x14] = object & 0xFF; object >>= 8;
247 tomRam8[0x13] = object & 0xFF; object >>= 8;
248 tomRam8[0x12] = object & 0xFF; object >>= 8;
249 tomRam8[0x11] = object & 0xFF; object >>= 8;
250 tomRam8[0x10] = object & 0xFF;
253 uint64 OPLoadPhrase(uint32 offset)
255 offset &= ~0x07; // 8 byte alignment
256 return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
259 void OPStorePhrase(uint32 offset, uint64 p)
261 offset &= ~0x07; // 8 byte alignment
262 JaguarWriteLong(offset, p >> 32, OP);
263 JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
267 // Debugging routines
269 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
271 WriteLog(" (SCALED BITMAP)");
272 WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
273 WriteLog(" %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
274 uint8 bitdepth = (p1 >> 12) & 0x07;
275 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
276 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
277 int32 xpos = p1 & 0xFFF;
278 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
279 uint32 iwidth = ((p1 >> 28) & 0x3FF);
280 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
281 uint16 height = ((p0 >> 14) & 0x3FF);
282 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
283 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
284 uint32 firstPix = (p1 >> 49) & 0x3F;
285 uint8 flags = (p1 >> 45) & 0x0F;
286 uint8 idx = (p1 >> 38) & 0x7F;
287 uint32 pitch = (p1 >> 15) & 0x07;
288 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
289 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
290 uint32 hscale = p2 & 0xFF;
291 uint32 vscale = (p2 >> 8) & 0xFF;
292 uint32 remainder = (p2 >> 16) & 0xFF;
293 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
296 void DumpFixedObject(uint64 p0, uint64 p1)
298 WriteLog(" (BITMAP)");
299 WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
300 uint8 bitdepth = (p1 >> 12) & 0x07;
301 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
302 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
303 int32 xpos = p1 & 0xFFF;
304 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
305 uint32 iwidth = ((p1 >> 28) & 0x3FF);
306 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
307 uint16 height = ((p0 >> 14) & 0x3FF);
308 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
309 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
310 uint32 firstPix = (p1 >> 49) & 0x3F;
311 uint8 flags = (p1 >> 45) & 0x0F;
312 uint8 idx = (p1 >> 38) & 0x7F;
313 uint32 pitch = (p1 >> 15) & 0x07;
314 WriteLog(" [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
315 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
319 // Object Processor main routine
321 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
322 void OPProcessList(int scanline, bool render)
324 extern int op_start_log;
325 // char * condition_to_str[8] =
326 // { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
328 op_pointer = OPGetListPointer();
330 // objectp_stop_reading_list = false;
332 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer);
335 // *** BEGIN OP PROCESSOR TESTING ONLY ***
336 extern bool interactiveMode;
338 extern int objectPtr;
340 int bitmapCounter = 0;
341 // *** END OP PROCESSOR TESTING ONLY ***
343 uint32 opCyclesToRun = 30000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
345 // if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
348 // *** BEGIN OP PROCESSOR TESTING ONLY ***
349 if (interactiveMode && bitmapCounter == objectPtr)
353 // *** END OP PROCESSOR TESTING ONLY ***
354 // if (objectp_stop_reading_list)
357 uint64 p0 = OPLoadPhrase(op_pointer);
358 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
362 if (scanline == TOMGetVDB() && op_start_log)
363 //if (scanline == 215 && op_start_log)
364 //if (scanline == 28 && op_start_log)
367 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
368 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
370 WriteLog(" (BITMAP) ");
371 uint64 p1 = OPLoadPhrase(op_pointer);
372 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
373 uint8 bitdepth = (p1 >> 12) & 0x07;
374 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
375 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
376 int32 xpos = p1 & 0xFFF;
377 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
378 uint32 iwidth = ((p1 >> 28) & 0x3FF);
379 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
380 uint16 height = ((p0 >> 14) & 0x3FF);
381 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
382 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
383 uint32 firstPix = (p1 >> 49) & 0x3F;
384 uint8 flags = (p1 >> 45) & 0x0F;
385 uint8 idx = (p1 >> 38) & 0x7F;
386 uint32 pitch = (p1 >> 15) & 0x07;
387 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
388 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
390 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
392 WriteLog(" (SCALED BITMAP)");
393 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
394 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
395 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
396 uint8 bitdepth = (p1 >> 12) & 0x07;
397 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
398 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
399 int32 xpos = p1 & 0xFFF;
400 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
401 uint32 iwidth = ((p1 >> 28) & 0x3FF);
402 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
403 uint16 height = ((p0 >> 14) & 0x3FF);
404 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
405 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
406 uint32 firstPix = (p1 >> 49) & 0x3F;
407 uint8 flags = (p1 >> 45) & 0x0F;
408 uint8 idx = (p1 >> 38) & 0x7F;
409 uint32 pitch = (p1 >> 15) & 0x07;
410 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
411 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
412 uint32 hscale = p2 & 0xFF;
413 uint32 vscale = (p2 >> 8) & 0xFF;
414 uint32 remainder = (p2 >> 16) & 0xFF;
415 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
417 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
418 WriteLog(" (GPU)\n");
419 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
421 WriteLog(" (BRANCH)\n");
422 uint8 * jaguarMainRam = GetRamPtr();
423 WriteLog("[RAM] --> ");
424 for(int k=0; k<8; k++)
425 WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
428 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
429 WriteLog(" --> List end\n\n");
433 switch ((uint8)p0 & 0x07)
435 case OBJECT_TYPE_BITMAP:
437 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
438 uint16 ypos = (p0 >> 3) & 0x7FF;
439 // This is only theory implied by Rayman...!
440 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
441 // the VDB value. With interlacing, this would be slightly more tricky.
442 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
443 // to affect any other game in a negative way (that I've seen).
444 // Either that, or it's an undocumented bug...
446 //No, the reason this was needed is that the OP code before was wrong. Any value
447 //less than VDB will get written to the top line of the display!
449 // Not so sure... Let's see what happens here...
452 ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
454 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
455 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
456 // what's causing things to fuck up. Still no idea why.
458 uint32 height = (p0 & 0xFFC000) >> 14;
459 uint32 oldOPP = op_pointer - 8;
460 // *** BEGIN OP PROCESSOR TESTING ONLY ***
461 if (inhibit && op_start_log)
462 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
464 if (!inhibit) // For OP testing only!
465 // *** END OP PROCESSOR TESTING ONLY ***
466 if (scanline >= ypos && height > 0)
468 uint64 p1 = OPLoadPhrase(op_pointer);
470 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
471 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
472 // OPProcessFixedBitmap(scanline, p0, p1, render);
473 OPProcessFixedBitmap(p0, p1, render);
477 //???Does this really happen??? Doesn't seem to work if you do this...!
478 //Probably not. Must be a bug in the documentation...!
479 // uint32 link = (p0 & 0x7FFFF000000) >> 21;
480 // SET16(tom_ram_8, 0x20, link & 0xFFFF); // OLP
481 // SET16(tom_ram_8, 0x22, link >> 16);
482 /* uint32 height = (p0 & 0xFFC000) >> 14;
485 // NOTE: Would subtract 2 if in interlaced mode...!
486 // uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
490 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
491 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
494 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
495 p0 |= (uint64)height << 14;
497 OPStorePhrase(oldOPP, p0);
499 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
500 //Temp, for testing...
501 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
502 //And it does! !!! FIX !!!
503 //Let's remove this "fix" since it screws up more than it fixes.
504 /* if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
507 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
508 //WriteLog("New OP: %08X\n", op_pointer);
511 case OBJECT_TYPE_SCALE:
513 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
514 uint16 ypos = (p0 >> 3) & 0x7FF;
515 uint32 height = (p0 & 0xFFC000) >> 14;
516 uint32 oldOPP = op_pointer - 8;
517 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
518 // *** BEGIN OP PROCESSOR TESTING ONLY ***
519 if (inhibit && op_start_log)
521 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
522 DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
525 if (!inhibit) // For OP testing only!
526 // *** END OP PROCESSOR TESTING ONLY ***
527 if (scanline >= ypos && height > 0)
529 uint64 p1 = OPLoadPhrase(op_pointer);
531 uint64 p2 = OPLoadPhrase(op_pointer);
533 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
534 OPProcessScaledBitmap(p0, p1, p2, render);
538 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
539 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
540 //Actually, we should skip this object if it has a vscale of zero.
541 //Or do we? Not sure... Atari Karts has a few lines that look like:
543 //000E8268 --> phrase 00010000 7000B00D
544 // [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
545 // [hsc: 9A, vsc: 00, rem: 00]
546 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
547 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
550 vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
552 //extern int start_logging;
554 // WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
556 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
557 //There are other problems here, it looks like...
559 //About to execute OP (508)...
561 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
562 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
563 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
564 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
565 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
566 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
567 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
568 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
569 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
570 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
571 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
572 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
573 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
574 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
575 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
576 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
577 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
578 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
579 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
580 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
581 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
582 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
584 //Here's another problem:
585 // [hsc: 20, vsc: 20, rem: 00]
586 // Since we're not checking for $E0 (but that's what we get from the above), we end
587 // up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite
588 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
589 //Also note: $E0 = 7.0 which IS a legal vscale value...
591 // if (remainder & 0x80) // I.e., it's negative
592 // if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0
593 // if ((remainder - 1) >= 0xE0) // I.e., it's <= 0
594 // if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
595 // if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
596 // if (remainder <= 0x20) // I.e., it's <= 1.0
597 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
598 if (remainder < 0x20)
600 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
601 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
603 // while (remainder & 0x80)
604 // while ((remainder & 0x80) || remainder == 0)
605 // while ((remainder - 1) >= 0xE0)
606 // while ((remainder >= 0xE1) || remainder == 0)
607 // while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
608 // while (remainder <= 0x20)
609 while (remainder < 0x20)
619 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
620 p0 |= (uint64)height << 14;
622 OPStorePhrase(oldOPP, p0);
625 remainder -= 0x20; // 1.0f in [3.5] fixed point format
628 // WriteLog("--> Finished writebacks...\n");//*/
630 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
631 p2 &= ~0x0000000000FF0000LL;
632 p2 |= (uint64)remainder << 16;
633 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
634 OPStorePhrase(oldOPP + 16, p2);
635 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
636 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
639 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
642 case OBJECT_TYPE_GPU:
644 //WriteLog("OP: Asserting GPU IRQ #3...\n");
645 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
646 OPSetCurrentObject(p0);
647 GPUSetIRQLine(3, ASSERT_LINE);
648 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
651 //OPSuspendedByGPU = true;
652 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
653 //on the next scanline...
654 // --> It continues from where it was interrupted! !!! FIX !!!
657 case OBJECT_TYPE_BRANCH:
659 uint16 ypos = (p0 >> 3) & 0x7FF;
660 uint8 cc = (p0 >> 14) & 0x03;
661 uint32 link = (p0 >> 21) & 0x3FFFF8;
663 // if ((ypos!=507)&&(ypos!=25))
664 // WriteLog("\t%i%s%i link=0x%.8x\n",scanline,condition_to_str[cc],ypos>>1,link);
667 case CONDITION_EQUAL:
668 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
671 case CONDITION_LESS_THAN:
672 if (TOMReadWord(0xF00006, OP) < ypos)
675 case CONDITION_GREATER_THAN:
676 if (TOMReadWord(0xF00006, OP) > ypos)
679 case CONDITION_OP_FLAG_SET:
680 if (OPGetStatusRegister() & 0x01)
683 case CONDITION_SECOND_HALF_LINE:
684 //Here's the ASIC code:
685 // ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
686 //which means, do the link if bit 10 of HC is set...
688 // This basically means branch if bit 10 of HC is set
689 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
690 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
695 WriteLog("OP: Unimplemented branch condition %i\n", cc);
699 case OBJECT_TYPE_STOP:
703 //WriteLog("OP: --> STOP\n");
704 // op_set_status_register(((p0>>3) & 0xFFFFFFFF));
705 //This seems more likely...
706 OPSetCurrentObject(p0);
710 // We need to check whether these interrupts are enabled or not, THEN
711 // set an IRQ + pending flag if necessary...
712 if (TOMIRQEnabled(IRQ_OPFLAG))
714 TOMSetPendingObjectInt();
715 m68k_set_irq(2); // Cause a 68K IPL 2 to occur...
723 WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
727 // Here is a little sanity check to keep the OP from locking up the machine
728 // when fed bad data. Better would be to count how many actual cycles it used
729 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
730 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
738 // Store fixed size bitmap in line buffer
740 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
742 // Need to make sure that when writing that it stays within the line buffer...
743 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
744 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
745 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
746 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
747 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
748 //#ifdef OP_DEBUG_BMP
749 uint32 firstPix = (p1 >> 49) & 0x3F;
750 // "The LSB is significant only for scaled objects..." -JTRM
751 // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
754 // We can ignore the RELEASE (high order) bit for now--probably forever...!
755 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
756 //Optimize: break these out to their own BOOL values
757 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
758 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
759 flagRMW = (flags & OPFLAG_RMW ? true : false),
760 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
761 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
762 // provide the most significant bits of the palette address."
763 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
764 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
765 pitch <<= 3; // Optimization: Multiply pitch by 8
767 // int16 scanlineWidth = tom_getVideoModeWidth();
768 uint8 * tomRam8 = TOMGetRamPointer();
769 uint8 * paletteRAM = &tomRam8[0x400];
770 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
771 // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
772 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
774 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
775 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
777 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
778 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
779 // Pitch == 0 is OK too...
780 // if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
781 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
782 if (!render || iwidth == 0)
785 //OK, so we know the position in the line buffer is correct. It's the clipping in
786 //24bpp mode that's wrong!
788 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
789 //into the line buffer for each pixel.
790 if (depth == 5) // i.e., 24bpp mode...
791 xpos >>= 1; // Cut it in half...
794 //#define OP_DEBUG_BMP
795 //#ifdef OP_DEBUG_BMP
796 // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
797 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
800 // int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
801 int32 startPos = xpos, endPos = xpos +
802 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
803 : -((phraseWidthToPixels[depth] * iwidth) + 1));
804 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
805 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
806 // Not sure if this is Jaguar Two only location or what...
807 // From the docs, it is... If we want to limit here we should think of something else.
808 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
809 // int32 limit = 720;
810 // int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
811 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
812 // This is correct, the OP line buffer is a constant size...
814 int32 lbufWidth = 719;
816 // If the image is completely to the left or right of the line buffer, then bail.
817 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
818 //There are four possibilities:
819 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
820 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
821 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
822 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
823 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
824 // numbers 1 & 3 are of concern.
825 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
826 // if (rightMargin < 0 || leftMargin > lbufWidth)
828 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
829 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
830 // Still have to be careful with the DATA and IWIDTH values though...
832 // if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
833 // || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
835 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
836 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
839 // Otherwise, find the clip limits and clip the phrase as well...
840 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
841 // line buffer, but it shouldn't matter since there are two unused line
842 // buffers below and nothing above and I'll at most write 8 bytes outside
843 // the line buffer... I could use a fractional clip begin/end value, but
844 // this makes the blit a *lot* more hairy. I might fix this in the future
845 // if it becomes necessary. (JLH)
846 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
847 // which pixel in the phrase is being written, and quit when either end of phrases
848 // is reached or line buffer extents are surpassed.
850 //This stuff is probably wrong as well... !!! FIX !!!
851 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
852 //Yup. Seems that JagMania doesn't work correctly with this...
853 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
858 clippedWidth = 0 - leftMargin,
859 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
860 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
863 if (rightMargin > lbufWidth)
864 clippedWidth = rightMargin - lbufWidth,
865 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
866 // rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
867 // rightMargin = lbufWidth;
870 WriteLog("OP: We're about to encounter a divide by zero error!\n");
871 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
872 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
874 if (startPos < 0) // Case #1: Begin out, end in, L to R
875 clippedWidth = 0 - startPos,
876 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
877 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
879 if (endPos < 0) // Case #2: Begin in, end out, R to L
880 clippedWidth = 0 - endPos,
881 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
883 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
884 clippedWidth = endPos - lbufWidth,
885 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
887 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
888 clippedWidth = startPos - lbufWidth,
889 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
890 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
891 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
893 // If the image is sitting on the line buffer left or right edge, we need to compensate
894 // by decreasing the image phrase width accordingly.
895 iwidth -= phraseClippedWidth;
897 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
899 // data += phraseClippedWidth * (pitch << 3);
900 data += dataClippedWidth * pitch;
902 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
903 // bitmap! This makes clipping & etc. MUCH, much easier...!
904 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
905 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
906 //Is this a bug in the OP?
907 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
908 //Though it looks like we're doing it here no matter what...
909 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
911 uint32 lbufAddress = 0x1800 + (startPos * 2);
912 uint8 * currentLineBuffer = &tomRam8[lbufAddress];
916 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
917 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
918 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
920 // This seems to be the case (at least according to the Midsummer docs)...!
922 // This is to test using palette zeroes instead of bit zeroes...
923 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
924 //#define OP_USES_PALETTE_ZERO
926 if (depth == 0) // 1 BPP
928 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
929 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
931 // Fetch 1st phrase...
932 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
933 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
934 //i.e., we didn't clip on the margin... !!! FIX !!!
935 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
936 int i = firstPix; // Start counter at right spot...
942 uint8 bit = pixels >> 63;
943 #ifndef OP_USES_PALETTE_ZERO
944 if (flagTRANS && bit == 0)
946 if (flagTRANS && (paletteRAM16[index | bit] == 0))
952 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
953 //Won't optimize RMW case though...
954 // This is the *only* correct use of endian-dependent code
955 // (i.e., mem-to-mem direct copying)!
956 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
959 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
960 *(currentLineBuffer + 1) =
961 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
964 currentLineBuffer += lbufDelta;
968 // Fetch next phrase...
970 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
973 else if (depth == 1) // 2 BPP
976 WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
977 index &= 0xFC; // Top six bits form CLUT index
978 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
979 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
984 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
987 for(int i=0; i<32; i++)
989 uint8 bits = pixels >> 62;
990 // Seems to me that both of these are in the same endian, so we could cast it as
991 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
992 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
993 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
994 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
995 #ifndef OP_USES_PALETTE_ZERO
996 if (flagTRANS && bits == 0)
998 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1004 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1006 *currentLineBuffer =
1007 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1008 *(currentLineBuffer + 1) =
1009 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1012 currentLineBuffer += lbufDelta;
1017 else if (depth == 2) // 4 BPP
1020 WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1021 index &= 0xF0; // Top four bits form CLUT index
1022 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1023 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1028 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1031 for(int i=0; i<16; i++)
1033 uint8 bits = pixels >> 60;
1034 // Seems to me that both of these are in the same endian, so we could cast it as
1035 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1036 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1037 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1038 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1039 #ifndef OP_USES_PALETTE_ZERO
1040 if (flagTRANS && bits == 0)
1042 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1048 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1050 *currentLineBuffer =
1051 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1052 *(currentLineBuffer + 1) =
1053 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1056 currentLineBuffer += lbufDelta;
1061 else if (depth == 3) // 8 BPP
1063 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1064 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1066 // Fetch 1st phrase...
1067 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1068 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1069 //i.e., we didn't clip on the margin... !!! FIX !!!
1070 firstPix &= 0x30; // Only top two bits are valid for 8 BPP
1071 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1072 int i = firstPix >> 3; // Start counter at right spot...
1078 uint8 bits = pixels >> 56;
1079 // Seems to me that both of these are in the same endian, so we could cast it as
1080 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1081 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1082 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1083 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1084 //This would seem to be problematic...
1085 //Because it's the palette entry being zero that makes the pixel transparent...
1086 //Let's try it and see.
1087 #ifndef OP_USES_PALETTE_ZERO
1088 if (flagTRANS && bits == 0)
1090 if (flagTRANS && (paletteRAM16[bits] == 0))
1096 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1098 *currentLineBuffer =
1099 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1100 *(currentLineBuffer + 1) =
1101 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1104 currentLineBuffer += lbufDelta;
1108 // Fetch next phrase...
1110 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1113 else if (depth == 4) // 16 BPP
1116 WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1117 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1118 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1123 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1126 for(int i=0; i<4; i++)
1128 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1129 // Seems to me that both of these are in the same endian, so we could cast it as
1130 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1131 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1132 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1133 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1134 //This doesn't seem right... Let's try the encoded black value ($8800):
1135 //Apparently, CRY 0 maps to $8800...
1136 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1137 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1142 *currentLineBuffer = bitsHi,
1143 *(currentLineBuffer + 1) = bitsLo;
1145 *currentLineBuffer =
1146 BLEND_CR(*currentLineBuffer, bitsHi),
1147 *(currentLineBuffer + 1) =
1148 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1151 currentLineBuffer += lbufDelta;
1156 else if (depth == 5) // 24 BPP
1158 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1159 //There *might* be others...
1160 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1162 WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1163 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1164 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1165 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1170 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1173 for(int i=0; i<2; i++)
1175 // We don't use a 32-bit var here because of endian issues...!
1176 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1177 bits1 = pixels >> 40, bits0 = pixels >> 32;
1179 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1182 *currentLineBuffer = bits3,
1183 *(currentLineBuffer + 1) = bits2,
1184 *(currentLineBuffer + 2) = bits1,
1185 *(currentLineBuffer + 3) = bits0;
1187 currentLineBuffer += lbufDelta;
1195 // Store scaled bitmap in line buffer
1197 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1199 // Need to make sure that when writing that it stays within the line buffer...
1200 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1201 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
1202 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1203 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
1204 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
1205 //#ifdef OP_DEBUG_BMP
1206 // Prolly should use this... Though not sure exactly how.
1207 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1208 uint32 firstPix = (p1 >> 49) & 0x3F;
1209 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1211 WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1213 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1214 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
1215 //Optimize: break these out to their own BOOL values [DONE]
1216 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
1217 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1218 flagRMW = (flags & OPFLAG_RMW ? true : false),
1219 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1220 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
1221 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
1223 uint8 * tomRam8 = TOMGetRamPointer();
1224 uint8 * paletteRAM = &tomRam8[0x400];
1225 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1226 // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1227 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1229 uint16 hscale = p2 & 0xFF;
1230 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1231 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1232 uint16 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!]
1233 // uint8 horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1234 int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1235 uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1237 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1238 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1240 // Looks like an hscale of zero means don't draw!
1241 if (!render || iwidth == 0 || hscale == 0)
1244 /*extern int start_logging;
1246 WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1247 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1248 //#define OP_DEBUG_BMP
1249 //#ifdef OP_DEBUG_BMP
1250 // WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1251 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1254 int32 startPos = xpos, endPos = xpos +
1255 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1256 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1257 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
1258 // Not sure if this is Jaguar Two only location or what...
1259 // From the docs, it is... If we want to limit here we should think of something else.
1260 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
1262 // int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1263 int32 lbufWidth = 719; // Zero based limit...
1265 // If the image is completely to the left or right of the line buffer, then bail.
1266 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1267 //There are four possibilities:
1268 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1269 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1270 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1271 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1272 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1273 // numbers 1 & 3 are of concern.
1274 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1275 // if (rightMargin < 0 || leftMargin > lbufWidth)
1277 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1278 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1279 // Still have to be careful with the DATA and IWIDTH values though...
1281 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1282 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1285 // Otherwise, find the clip limits and clip the phrase as well...
1286 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1287 // line buffer, but it shouldn't matter since there are two unused line
1288 // buffers below and nothing above and I'll at most write 40 bytes outside
1289 // the line buffer... I could use a fractional clip begin/end value, but
1290 // this makes the blit a *lot* more hairy. I might fix this in the future
1291 // if it becomes necessary. (JLH)
1292 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1293 // which pixel in the phrase is being written, and quit when either end of phrases
1294 // is reached or line buffer extents are surpassed.
1296 //This stuff is probably wrong as well... !!! FIX !!!
1297 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1298 //Yup. Seems that JagMania doesn't work correctly with this...
1299 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1300 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1301 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1302 // a bit more accurately... Strange!
1303 //It's probably a case of the REFLECT flag being set and the background being written
1304 //from the right side of the screen...
1305 //But no, it isn't... At least if the diagnostics are telling the truth!
1307 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1308 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1311 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1312 //the scaling factor is small. So fix it already! !!! FIX !!!
1313 /*if (scaledPhrasePixels == 0)
1315 WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1316 DumpScaledObject(p0, p1, p2);
1318 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1320 //Try a simple example...
1321 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1322 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1323 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1325 // Normally, we would expect this in the line buffer:
1326 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1328 // But instead we're getting:
1329 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1331 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1332 // on negative boundary--or are we? Hmm...
1333 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1335 // Let's try a real world example:
1337 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1338 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1340 // Really, spp is 27.75 in the second case...
1341 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1342 // start position (14 * 27.75), we get -6.5... NOT -17!
1344 //Now it seems we're working OK, at least for the first case...
1345 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1347 if (startPos < 0) // Case #1: Begin out, end in, L to R
1349 extern int start_logging;
1351 WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1352 // clippedWidth = 0 - startPos,
1353 clippedWidth = (0 - startPos) << 5,
1354 // dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1355 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1356 // startPos = 0 - (clippedWidth % scaledPhrasePixels);
1357 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1359 WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1362 if (endPos < 0) // Case #2: Begin in, end out, R to L
1363 clippedWidth = 0 - endPos,
1364 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1366 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1367 clippedWidth = endPos - lbufWidth,
1368 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1370 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1371 clippedWidth = startPos - lbufWidth,
1372 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1373 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1375 extern int op_start_log;
1376 if (op_start_log && clippedWidth != 0)
1377 WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1378 if (op_start_log && startPos == 13)
1380 WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1381 DumpScaledObject(p0, p1, p2);
1384 WriteLog(" %08X: ", data);
1385 for(int i=0; i<7*8; i++)
1386 WriteLog("%02X ", JaguarReadByte(data+i));
1390 // If the image is sitting on the line buffer left or right edge, we need to compensate
1391 // by decreasing the image phrase width accordingly.
1392 iwidth -= phraseClippedWidth;
1394 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1396 // data += phraseClippedWidth * (pitch << 3);
1397 data += dataClippedWidth * (pitch << 3);
1399 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1400 // bitmap! This makes clipping & etc. MUCH, much easier...!
1401 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1402 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1403 uint32 lbufAddress = 0x1800 + startPos * 2;
1404 uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1405 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1406 // * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1410 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1411 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1412 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1414 // This seems to be the case (at least according to the Midsummer docs)...!
1416 if (depth == 0) // 1 BPP
1419 WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1420 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1421 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1424 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1426 while ((int32)iwidth > 0)
1428 uint8 bits = pixels >> 63;
1430 #ifndef OP_USES_PALETTE_ZERO
1431 if (flagTRANS && bits == 0)
1433 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1439 // This is the *only* correct use of endian-dependent code
1440 // (i.e., mem-to-mem direct copying)!
1441 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1443 *currentLineBuffer =
1444 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1445 *(currentLineBuffer + 1) =
1446 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1449 currentLineBuffer += lbufDelta;
1452 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1453 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1454 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1456 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1457 while (horizontalRemainder & 0x80)
1459 horizontalRemainder += hscale;
1463 // while (horizontalRemainder <= 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1464 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1466 horizontalRemainder += hscale;
1470 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1474 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1476 data += (pitch << 3) * phrasesToSkip;
1477 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1478 pixels <<= 1 * pixelShift;
1479 iwidth -= phrasesToSkip;
1480 pixCount = pixelShift;
1484 else if (depth == 1) // 2 BPP
1487 WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1488 index &= 0xFC; // Top six bits form CLUT index
1489 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1490 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1493 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1495 while ((int32)iwidth > 0)
1497 uint8 bits = pixels >> 62;
1499 #ifndef OP_USES_PALETTE_ZERO
1500 if (flagTRANS && bits == 0)
1502 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1508 // This is the *only* correct use of endian-dependent code
1509 // (i.e., mem-to-mem direct copying)!
1510 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1512 *currentLineBuffer =
1513 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1514 *(currentLineBuffer + 1) =
1515 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1518 currentLineBuffer += lbufDelta;
1520 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1521 while (horizontalRemainder & 0x80)
1523 horizontalRemainder += hscale;
1527 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1528 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1530 horizontalRemainder += hscale;
1534 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1538 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1540 data += (pitch << 3) * phrasesToSkip;
1541 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1542 pixels <<= 2 * pixelShift;
1543 iwidth -= phrasesToSkip;
1544 pixCount = pixelShift;
1548 else if (depth == 2) // 4 BPP
1551 WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1552 index &= 0xF0; // Top four bits form CLUT index
1553 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1554 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1557 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1559 while ((int32)iwidth > 0)
1561 uint8 bits = pixels >> 60;
1563 #ifndef OP_USES_PALETTE_ZERO
1564 if (flagTRANS && bits == 0)
1566 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1572 // This is the *only* correct use of endian-dependent code
1573 // (i.e., mem-to-mem direct copying)!
1574 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1576 *currentLineBuffer =
1577 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1578 *(currentLineBuffer + 1) =
1579 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1582 currentLineBuffer += lbufDelta;
1584 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1585 while (horizontalRemainder & 0x80)
1587 horizontalRemainder += hscale;
1591 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1592 while (horizontalRemainder < 0x20) // I.e., it's <= 0 (*before* subtraction)
1594 horizontalRemainder += hscale;
1598 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1602 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1604 data += (pitch << 3) * phrasesToSkip;
1605 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1606 pixels <<= 4 * pixelShift;
1607 iwidth -= phrasesToSkip;
1608 pixCount = pixelShift;
1612 else if (depth == 3) // 8 BPP
1615 WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1616 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1617 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1620 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1622 while ((int32)iwidth > 0)
1624 uint8 bits = pixels >> 56;
1626 #ifndef OP_USES_PALETTE_ZERO
1627 if (flagTRANS && bits == 0)
1629 if (flagTRANS && (paletteRAM16[bits] == 0))
1635 // This is the *only* correct use of endian-dependent code
1636 // (i.e., mem-to-mem direct copying)!
1637 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1639 if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1640 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1643 *currentLineBuffer =
1644 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1645 *(currentLineBuffer + 1) =
1646 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1649 currentLineBuffer += lbufDelta;
1651 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1652 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1654 horizontalRemainder += hscale;
1658 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1662 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1664 data += (pitch << 3) * phrasesToSkip;
1665 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1666 pixels <<= 8 * pixelShift;
1667 iwidth -= phrasesToSkip;
1668 pixCount = pixelShift;
1672 else if (depth == 4) // 16 BPP
1675 WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1676 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1677 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1680 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1682 while ((int32)iwidth > 0)
1684 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1686 //This doesn't seem right... Let's try the encoded black value ($8800):
1687 //Apparently, CRY 0 maps to $8800...
1688 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1689 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1694 *currentLineBuffer = bitsHi,
1695 *(currentLineBuffer + 1) = bitsLo;
1697 *currentLineBuffer =
1698 BLEND_CR(*currentLineBuffer, bitsHi),
1699 *(currentLineBuffer + 1) =
1700 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1703 currentLineBuffer += lbufDelta;
1705 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1706 while (horizontalRemainder & 0x80)
1708 horizontalRemainder += hscale;
1712 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1713 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1715 horizontalRemainder += hscale;
1719 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1723 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1725 data += (pitch << 3) * phrasesToSkip;
1726 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1727 pixels <<= 16 * pixelShift;
1729 iwidth -= phrasesToSkip;
1731 pixCount = pixelShift;
1735 else if (depth == 5) // 24 BPP
1737 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1738 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1740 WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1741 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1742 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1743 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1748 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1749 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1751 for(int i=0; i<2; i++)
1753 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1754 bits1 = pixels >> 40, bits0 = pixels >> 32;
1756 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1759 *currentLineBuffer = bits3,
1760 *(currentLineBuffer + 1) = bits2,
1761 *(currentLineBuffer + 2) = bits1,
1762 *(currentLineBuffer + 3) = bits0;
1764 currentLineBuffer += lbufDelta;