4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 // (C) 2010 Underground Software
9 // JLH = James L. Hammons <jlhamm@acm.org>
12 // --- ---------- -------------------------------------------------------------
13 // JLH 01/16/2010 Created this log ;-)
28 //#define OP_DEBUG_BMP
30 #define BLEND_Y(dst, src) op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src) op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
33 #define OBJECT_TYPE_BITMAP 0 // 000
34 #define OBJECT_TYPE_SCALE 1 // 001
35 #define OBJECT_TYPE_GPU 2 // 010
36 #define OBJECT_TYPE_BRANCH 3 // 011
37 #define OBJECT_TYPE_STOP 4 // 100
39 #define CONDITION_EQUAL 0
40 #define CONDITION_LESS_THAN 1
41 #define CONDITION_GREATER_THAN 2
42 #define CONDITION_OP_FLAG_SET 3
43 #define CONDITION_SECOND_HALF_LINE 4
45 #define OPFLAG_RELEASE 8 // Bus release bit
46 #define OPFLAG_TRANS 4 // Transparency bit
47 #define OPFLAG_RMW 2 // Read-Modify-Write bit
48 #define OPFLAG_REFLECT 1 // Horizontal mirror bit
50 // Private function prototypes
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
55 void DumpFixedObject(uint64 p0, uint64 p1);
56 uint64 OPLoadPhrase(uint32 offset);
58 // Local global variables
60 // Blend tables (64K each)
61 static uint8 op_blend_y[0x10000];
62 static uint8 op_blend_cr[0x10000];
63 // There may be a problem with this "RAM" overlapping (and thus being independent of)
64 // some of the regular TOM RAM...
65 //#warning objectp_ram is separated from TOM RAM--need to fix that!
66 //static uint8 objectp_ram[0x40]; // This is based at $F00000
67 uint8 objectp_running = 0;
68 //bool objectp_stop_reading_list;
70 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
71 //static uint32 op_bitmap_bit_size[8] =
72 // { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
73 // (uint32)(2*65536), (uint32)(1*65536), (uint32)(1*65536), (uint32)(1*65536) };
74 static uint32 op_pointer;
76 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
80 // Object Processor initialization
84 // Here we calculate the saturating blend of a signed 4-bit value and an
85 // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
86 // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
87 for(int i=0; i<256*256; i++)
89 int y = (i >> 8) & 0xFF;
90 int dy = (int8)i; // Sign extend the Y index
91 int c1 = (i >> 8) & 0x0F;
92 int dc1 = (int8)(i << 4) >> 4; // Sign extend the R index
93 int c2 = (i >> 12) & 0x0F;
94 int dc2 = (int8)(i & 0xF0) >> 4; // Sign extend the C index
119 op_blend_cr[i] = (c2 << 4) | c1;
126 // Object Processor reset
130 // memset(objectp_ram, 0x00, 0x40);
136 const char * opType[8] =
137 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
138 const char * ccType[8] =
139 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
141 uint32 olp = OPGetListPointer();
142 WriteLog("OP: OLP = %08X\n", olp);
143 WriteLog("OP: Phrase dump\n ----------\n");
144 for(uint32 i=0; i<0x100; i+=8)
146 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
147 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
148 if ((lo & 0x07) == 3)
150 uint16 ypos = (lo >> 3) & 0x7FF;
151 uint8 cc = (lo >> 14) & 0x03;
152 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
153 WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
156 if ((lo & 0x07) == 0)
157 DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
158 if ((lo & 0x07) == 1)
159 DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
163 // memory_free(op_blend_y);
164 // memory_free(op_blend_cr);
168 // Object Processor memory access
169 // Memory range: F00010 - F00027
171 // F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
172 // F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
173 // F00026 W -------- -------x OBF - object processor flag
177 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
180 return objectp_ram[offset];
183 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
186 return GET16(objectp_ram, offset);
189 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
192 objectp_ram[offset] = data;
195 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
198 SET16(objectp_ram, offset, data);
200 /*if (offset == 0x20)
201 WriteLog("OP: Setting lo list pointer: %04X\n", data);
203 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
207 uint32 OPGetListPointer(void)
209 // Note: This register is LO / HI WORD, hence the funky look of this...
210 return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
213 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
215 uint32 OPGetStatusRegister(void)
217 return GET16(tomRam8, 0x26);
220 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
222 void OPSetStatusRegister(uint32 data)
224 tomRam8[0x26] = (data & 0x0000FF00) >> 8;
225 tomRam8[0x27] |= (data & 0xFE);
228 void OPSetCurrentObject(uint64 object)
230 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
231 // Stored as least significant 32 bits first, ms32 last in big endian
232 /* objectp_ram[0x13] = object & 0xFF; object >>= 8;
233 objectp_ram[0x12] = object & 0xFF; object >>= 8;
234 objectp_ram[0x11] = object & 0xFF; object >>= 8;
235 objectp_ram[0x10] = object & 0xFF; object >>= 8;
237 objectp_ram[0x17] = object & 0xFF; object >>= 8;
238 objectp_ram[0x16] = object & 0xFF; object >>= 8;
239 objectp_ram[0x15] = object & 0xFF; object >>= 8;
240 objectp_ram[0x14] = object & 0xFF;*/
241 // Let's try regular good old big endian...
242 tomRam8[0x17] = object & 0xFF; object >>= 8;
243 tomRam8[0x16] = object & 0xFF; object >>= 8;
244 tomRam8[0x15] = object & 0xFF; object >>= 8;
245 tomRam8[0x14] = object & 0xFF; object >>= 8;
247 tomRam8[0x13] = object & 0xFF; object >>= 8;
248 tomRam8[0x12] = object & 0xFF; object >>= 8;
249 tomRam8[0x11] = object & 0xFF; object >>= 8;
250 tomRam8[0x10] = object & 0xFF;
253 uint64 OPLoadPhrase(uint32 offset)
255 offset &= ~0x07; // 8 byte alignment
256 return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
259 void OPStorePhrase(uint32 offset, uint64 p)
261 offset &= ~0x07; // 8 byte alignment
262 JaguarWriteLong(offset, p >> 32, OP);
263 JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
267 // Debugging routines
269 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
271 WriteLog(" (SCALED BITMAP)");
272 WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
273 WriteLog(" %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
274 uint8 bitdepth = (p1 >> 12) & 0x07;
275 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
276 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
277 int32 xpos = p1 & 0xFFF;
278 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
279 uint32 iwidth = ((p1 >> 28) & 0x3FF);
280 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
281 uint16 height = ((p0 >> 14) & 0x3FF);
282 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
283 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
284 uint32 firstPix = (p1 >> 49) & 0x3F;
285 uint8 flags = (p1 >> 45) & 0x0F;
286 uint8 idx = (p1 >> 38) & 0x7F;
287 uint32 pitch = (p1 >> 15) & 0x07;
288 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
289 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
290 uint32 hscale = p2 & 0xFF;
291 uint32 vscale = (p2 >> 8) & 0xFF;
292 uint32 remainder = (p2 >> 16) & 0xFF;
293 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
296 void DumpFixedObject(uint64 p0, uint64 p1)
298 WriteLog(" (BITMAP)");
299 WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
300 uint8 bitdepth = (p1 >> 12) & 0x07;
301 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
302 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
303 int32 xpos = p1 & 0xFFF;
304 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
305 uint32 iwidth = ((p1 >> 28) & 0x3FF);
306 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
307 uint16 height = ((p0 >> 14) & 0x3FF);
308 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
309 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
310 uint32 firstPix = (p1 >> 49) & 0x3F;
311 uint8 flags = (p1 >> 45) & 0x0F;
312 uint8 idx = (p1 >> 38) & 0x7F;
313 uint32 pitch = (p1 >> 15) & 0x07;
314 WriteLog(" [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
315 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
319 // Object Processor main routine
321 //Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing
322 //where we left off. !!! FIX !!!
323 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
324 void OPProcessList(int scanline, bool render)
326 extern int op_start_log;
327 // char * condition_to_str[8] =
328 // { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
330 op_pointer = OPGetListPointer();
332 // objectp_stop_reading_list = false;
334 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer);
337 // *** BEGIN OP PROCESSOR TESTING ONLY ***
338 extern bool interactiveMode;
340 extern int objectPtr;
342 int bitmapCounter = 0;
343 // *** END OP PROCESSOR TESTING ONLY ***
345 uint32 opCyclesToRun = 10000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
347 // if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
350 // *** BEGIN OP PROCESSOR TESTING ONLY ***
351 if (interactiveMode && bitmapCounter == objectPtr)
355 // *** END OP PROCESSOR TESTING ONLY ***
356 // if (objectp_stop_reading_list)
359 uint64 p0 = OPLoadPhrase(op_pointer);
360 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
364 if (scanline == TOMGetVDB() && op_start_log)
365 //if (scanline == 215 && op_start_log)
366 //if (scanline == 28 && op_start_log)
369 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
370 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
372 WriteLog(" (BITMAP) ");
373 uint64 p1 = OPLoadPhrase(op_pointer);
374 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
375 uint8 bitdepth = (p1 >> 12) & 0x07;
376 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
377 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
378 int32 xpos = p1 & 0xFFF;
379 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
380 uint32 iwidth = ((p1 >> 28) & 0x3FF);
381 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
382 uint16 height = ((p0 >> 14) & 0x3FF);
383 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
384 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
385 uint32 firstPix = (p1 >> 49) & 0x3F;
386 uint8 flags = (p1 >> 45) & 0x0F;
387 uint8 idx = (p1 >> 38) & 0x7F;
388 uint32 pitch = (p1 >> 15) & 0x07;
389 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
390 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
392 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
394 WriteLog(" (SCALED BITMAP)");
395 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
396 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
397 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
398 uint8 bitdepth = (p1 >> 12) & 0x07;
399 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
400 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
401 int32 xpos = p1 & 0xFFF;
402 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
403 uint32 iwidth = ((p1 >> 28) & 0x3FF);
404 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
405 uint16 height = ((p0 >> 14) & 0x3FF);
406 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
407 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
408 uint32 firstPix = (p1 >> 49) & 0x3F;
409 uint8 flags = (p1 >> 45) & 0x0F;
410 uint8 idx = (p1 >> 38) & 0x7F;
411 uint32 pitch = (p1 >> 15) & 0x07;
412 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
413 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
414 uint32 hscale = p2 & 0xFF;
415 uint32 vscale = (p2 >> 8) & 0xFF;
416 uint32 remainder = (p2 >> 16) & 0xFF;
417 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
419 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
420 WriteLog(" (GPU)\n");
421 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
423 WriteLog(" (BRANCH)\n");
424 uint8 * jaguarMainRam = GetRamPtr();
425 WriteLog("[RAM] --> ");
426 for(int k=0; k<8; k++)
427 WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
430 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
431 WriteLog(" --> List end\n\n");
435 switch ((uint8)p0 & 0x07)
437 case OBJECT_TYPE_BITMAP:
439 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
440 uint16 ypos = (p0 >> 3) & 0x7FF;
441 // This is only theory implied by Rayman...!
442 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
443 // the VDB value. With interlacing, this would be slightly more tricky.
444 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
445 // to affect any other game in a negative way (that I've seen).
446 // Either that, or it's an undocumented bug...
448 //No, the reason this was needed is that the OP code before was wrong. Any value
449 //less than VDB will get written to the top line of the display!
451 // Not so sure... Let's see what happens here...
454 ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
456 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
457 // So we need to fix this somehow... (and it has... in tom.cpp :-P)
459 uint32 height = (p0 & 0xFFC000) >> 14;
460 uint32 oldOPP = op_pointer - 8;
461 // *** BEGIN OP PROCESSOR TESTING ONLY ***
462 if (inhibit && op_start_log)
463 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
465 if (!inhibit) // For OP testing only!
466 // *** END OP PROCESSOR TESTING ONLY ***
467 if (scanline >= ypos && height > 0)
469 uint64 p1 = OPLoadPhrase(op_pointer);
471 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
472 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
473 // OPProcessFixedBitmap(scanline, p0, p1, render);
474 OPProcessFixedBitmap(p0, p1, render);
478 //???Does this really happen??? Doesn't seem to work if you do this...!
479 //Probably not. Must be a bug in the documentation...!
480 // uint32 link = (p0 & 0x7FFFF000000) >> 21;
481 // SET16(tom_ram_8, 0x20, link & 0xFFFF); // OLP
482 // SET16(tom_ram_8, 0x22, link >> 16);
483 /* uint32 height = (p0 & 0xFFC000) >> 14;
486 // NOTE: Would subtract 2 if in interlaced mode...!
487 // uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
491 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
492 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
495 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
496 p0 |= (uint64)height << 14;
498 OPStorePhrase(oldOPP, p0);
500 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
501 //Temp, for testing...
502 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
503 //And it does! !!! FIX !!!
504 //Let's remove this "fix" since it screws up more than it fixes.
505 /* if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
508 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
509 //WriteLog("New OP: %08X\n", op_pointer);
512 case OBJECT_TYPE_SCALE:
514 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
515 uint16 ypos = (p0 >> 3) & 0x7FF;
516 uint32 height = (p0 & 0xFFC000) >> 14;
517 uint32 oldOPP = op_pointer - 8;
518 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
519 // *** BEGIN OP PROCESSOR TESTING ONLY ***
520 if (inhibit && op_start_log)
522 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
523 DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
526 if (!inhibit) // For OP testing only!
527 // *** END OP PROCESSOR TESTING ONLY ***
528 if (scanline >= ypos && height > 0)
530 uint64 p1 = OPLoadPhrase(op_pointer);
532 uint64 p2 = OPLoadPhrase(op_pointer);
534 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
535 OPProcessScaledBitmap(p0, p1, p2, render);
539 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
540 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
541 //Actually, we should skip this object if it has a vscale of zero.
542 //Or do we? Not sure... Atari Karts has a few lines that look like:
544 //000E8268 --> phrase 00010000 7000B00D
545 // [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
546 // [hsc: 9A, vsc: 00, rem: 00]
547 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
548 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
551 vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
553 //extern int start_logging;
555 // WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
557 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
558 //There are other problems here, it looks like...
560 //About to execute OP (508)...
562 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
563 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
564 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
565 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
566 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
567 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
568 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
569 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
570 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
571 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
572 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
573 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
574 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
575 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
576 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
577 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
578 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
579 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
580 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
581 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
582 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
583 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
585 //Here's another problem:
586 // [hsc: 20, vsc: 20, rem: 00]
587 // Since we're not checking for $E0 (but that's what we get from the above), we end
588 // up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite
589 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
590 //Also note: $E0 = 7.0 which IS a legal vscale value...
592 // if (remainder & 0x80) // I.e., it's negative
593 // if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0
594 // if ((remainder - 1) >= 0xE0) // I.e., it's <= 0
595 // if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
596 // if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
597 // if (remainder <= 0x20) // I.e., it's <= 1.0
598 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
599 if (remainder < 0x20)
601 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
602 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
604 // while (remainder & 0x80)
605 // while ((remainder & 0x80) || remainder == 0)
606 // while ((remainder - 1) >= 0xE0)
607 // while ((remainder >= 0xE1) || remainder == 0)
608 // while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
609 // while (remainder <= 0x20)
610 while (remainder < 0x20)
620 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
621 p0 |= (uint64)height << 14;
623 OPStorePhrase(oldOPP, p0);
626 remainder -= 0x20; // 1.0f in [3.5] fixed point format
629 // WriteLog("--> Finished writebacks...\n");//*/
631 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
632 p2 &= ~0x0000000000FF0000LL;
633 p2 |= (uint64)remainder << 16;
634 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
635 OPStorePhrase(oldOPP + 16, p2);
636 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
637 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
640 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
643 case OBJECT_TYPE_GPU:
645 //WriteLog("OP: Asserting GPU IRQ #3...\n");
646 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
647 OPSetCurrentObject(p0);
648 GPUSetIRQLine(3, ASSERT_LINE);
649 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
652 //OPSuspendedByGPU = true;
653 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
654 //on the next scanline...
655 // --> It continues from where it was interrupted! !!! FIX !!!
658 case OBJECT_TYPE_BRANCH:
660 uint16 ypos = (p0 >> 3) & 0x7FF;
661 uint8 cc = (p0 >> 14) & 0x03;
662 uint32 link = (p0 >> 21) & 0x3FFFF8;
664 // if ((ypos!=507)&&(ypos!=25))
665 // WriteLog("\t%i%s%i link=0x%.8x\n",scanline,condition_to_str[cc],ypos>>1,link);
668 case CONDITION_EQUAL:
669 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
672 case CONDITION_LESS_THAN:
673 if (TOMReadWord(0xF00006, OP) < ypos)
676 case CONDITION_GREATER_THAN:
677 if (TOMReadWord(0xF00006, OP) > ypos)
680 case CONDITION_OP_FLAG_SET:
681 if (OPGetStatusRegister() & 0x01)
684 case CONDITION_SECOND_HALF_LINE:
685 //Here's the ASIC code:
686 // ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
687 //which means, do the link if bit 10 of HC is set...
689 // This basically means branch if bit 10 of HC is set
690 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
691 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
696 WriteLog("OP: Unimplemented branch condition %i\n", cc);
700 case OBJECT_TYPE_STOP:
704 //WriteLog("OP: --> STOP\n");
705 // op_set_status_register(((p0>>3) & 0xFFFFFFFF));
706 //This seems more likely...
707 OPSetCurrentObject(p0);
711 // We need to check whether these interrupts are enabled or not, THEN
712 // set an IRQ + pending flag if necessary...
713 if (TOMIRQEnabled(IRQ_OPFLAG))
715 TOMSetPendingObjectInt();
716 m68k_set_irq(2); // Cause a 68K IPL 2 to occur...
724 WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
728 // Here is a little sanity check to keep the OP from locking up the machine
729 // when fed bad data. Better would be to count how many actual cycles it used
730 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
731 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
739 // Store fixed size bitmap in line buffer
741 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
743 // Need to make sure that when writing that it stays within the line buffer...
744 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
745 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
746 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
747 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
748 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
749 //#ifdef OP_DEBUG_BMP
750 uint32 firstPix = (p1 >> 49) & 0x3F;
751 // "The LSB is significant only for scaled objects..." -JTRM
752 // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
755 // We can ignore the RELEASE (high order) bit for now--probably forever...!
756 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
757 //Optimize: break these out to their own BOOL values
758 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
759 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
760 flagRMW = (flags & OPFLAG_RMW ? true : false),
761 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
762 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
763 // provide the most significant bits of the palette address."
764 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
765 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
766 pitch <<= 3; // Optimization: Multiply pitch by 8
768 // int16 scanlineWidth = tom_getVideoModeWidth();
769 uint8 * tomRam8 = TOMGetRamPointer();
770 uint8 * paletteRAM = &tomRam8[0x400];
771 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
772 // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
773 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
775 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
776 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
778 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
779 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
780 // Pitch == 0 is OK too...
781 // if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
782 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
783 if (!render || iwidth == 0)
786 //#define OP_DEBUG_BMP
787 //#ifdef OP_DEBUG_BMP
788 // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
789 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
792 // int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
793 int32 startPos = xpos, endPos = xpos +
794 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
795 : -((phraseWidthToPixels[depth] * iwidth) + 1));
796 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
797 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
798 // Not sure if this is Jaguar Two only location or what...
799 // From the docs, it is... If we want to limit here we should think of something else.
800 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
802 int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
804 // If the image is completely to the left or right of the line buffer, then bail.
805 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
806 //There are four possibilities:
807 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
808 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
809 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
810 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
811 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
812 // numbers 1 & 3 are of concern.
813 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
814 // if (rightMargin < 0 || leftMargin > lbufWidth)
816 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
817 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
818 // Still have to be careful with the DATA and IWIDTH values though...
820 // if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
821 // || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
823 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
824 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
827 // Otherwise, find the clip limits and clip the phrase as well...
828 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
829 // line buffer, but it shouldn't matter since there are two unused line
830 // buffers below and nothing above and I'll at most write 8 bytes outside
831 // the line buffer... I could use a fractional clip begin/end value, but
832 // this makes the blit a *lot* more hairy. I might fix this in the future
833 // if it becomes necessary. (JLH)
834 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
835 // which pixel in the phrase is being written, and quit when either end of phrases
836 // is reached or line buffer extents are surpassed.
838 //This stuff is probably wrong as well... !!! FIX !!!
839 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
840 //Yup. Seems that JagMania doesn't work correctly with this...
841 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
846 clippedWidth = 0 - leftMargin,
847 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
848 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
851 if (rightMargin > lbufWidth)
852 clippedWidth = rightMargin - lbufWidth,
853 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
854 // rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
855 // rightMargin = lbufWidth;
858 WriteLog("OP: We're about to encounter a divide by zero error!\n");
859 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
860 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
862 if (startPos < 0) // Case #1: Begin out, end in, L to R
863 clippedWidth = 0 - startPos,
864 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
865 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
867 if (endPos < 0) // Case #2: Begin in, end out, R to L
868 clippedWidth = 0 - endPos,
869 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
871 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
872 clippedWidth = endPos - lbufWidth,
873 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
875 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
876 clippedWidth = startPos - lbufWidth,
877 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
878 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
880 // If the image is sitting on the line buffer left or right edge, we need to compensate
881 // by decreasing the image phrase width accordingly.
882 iwidth -= phraseClippedWidth;
884 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
886 // data += phraseClippedWidth * (pitch << 3);
887 data += dataClippedWidth * pitch;
889 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
890 // bitmap! This makes clipping & etc. MUCH, much easier...!
891 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
892 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
893 //Is this a bug in the OP?
894 uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
895 uint8 * currentLineBuffer = &tomRam8[lbufAddress];
899 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
900 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
901 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
903 // This seems to be the case (at least according to the Midsummer docs)...!
905 // This is to test using palette zeroes instead of bit zeroes...
906 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
907 //#define OP_USES_PALETTE_ZERO
909 if (depth == 0) // 1 BPP
911 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
912 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
914 // Fetch 1st phrase...
915 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
916 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
917 //i.e., we didn't clip on the margin... !!! FIX !!!
918 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
919 int i = firstPix; // Start counter at right spot...
925 uint8 bit = pixels >> 63;
926 #ifndef OP_USES_PALETTE_ZERO
927 if (flagTRANS && bit == 0)
929 if (flagTRANS && (paletteRAM16[index | bit] == 0))
935 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
936 //Won't optimize RMW case though...
937 // This is the *only* correct use of endian-dependent code
938 // (i.e., mem-to-mem direct copying)!
939 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
942 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
943 *(currentLineBuffer + 1) =
944 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
947 currentLineBuffer += lbufDelta;
951 // Fetch next phrase...
953 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
956 else if (depth == 1) // 2 BPP
959 WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
960 index &= 0xFC; // Top six bits form CLUT index
961 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
962 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
967 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
970 for(int i=0; i<32; i++)
972 uint8 bits = pixels >> 62;
973 // Seems to me that both of these are in the same endian, so we could cast it as
974 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
975 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
976 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
977 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
978 #ifndef OP_USES_PALETTE_ZERO
979 if (flagTRANS && bits == 0)
981 if (flagTRANS && (paletteRAM16[index | bits] == 0))
987 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
990 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
991 *(currentLineBuffer + 1) =
992 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
995 currentLineBuffer += lbufDelta;
1000 else if (depth == 2) // 4 BPP
1003 WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1004 index &= 0xF0; // Top four bits form CLUT index
1005 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1006 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1011 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1014 for(int i=0; i<16; i++)
1016 uint8 bits = pixels >> 60;
1017 // Seems to me that both of these are in the same endian, so we could cast it as
1018 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1019 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1020 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1021 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1022 #ifndef OP_USES_PALETTE_ZERO
1023 if (flagTRANS && bits == 0)
1025 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1031 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1033 *currentLineBuffer =
1034 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1035 *(currentLineBuffer + 1) =
1036 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1039 currentLineBuffer += lbufDelta;
1044 else if (depth == 3) // 8 BPP
1046 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1047 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1049 // Fetch 1st phrase...
1050 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1051 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1052 //i.e., we didn't clip on the margin... !!! FIX !!!
1053 firstPix &= 0x30; // Only top two bits are valid for 8 BPP
1054 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1055 int i = firstPix >> 3; // Start counter at right spot...
1061 uint8 bits = pixels >> 56;
1062 // Seems to me that both of these are in the same endian, so we could cast it as
1063 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1064 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1065 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1066 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1067 //This would seem to be problematic...
1068 //Because it's the palette entry being zero that makes the pixel transparent...
1069 //Let's try it and see.
1070 #ifndef OP_USES_PALETTE_ZERO
1071 if (flagTRANS && bits == 0)
1073 if (flagTRANS && (paletteRAM16[bits] == 0))
1079 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1081 *currentLineBuffer =
1082 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1083 *(currentLineBuffer + 1) =
1084 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1087 currentLineBuffer += lbufDelta;
1091 // Fetch next phrase...
1093 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1096 else if (depth == 4) // 16 BPP
1099 WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1100 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1101 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1106 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1109 for(int i=0; i<4; i++)
1111 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1112 // Seems to me that both of these are in the same endian, so we could cast it as
1113 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1114 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1115 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1116 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1117 //This doesn't seem right... Let's try the encoded black value ($8800):
1118 //Apparently, CRY 0 maps to $8800...
1119 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1120 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1125 *currentLineBuffer = bitsHi,
1126 *(currentLineBuffer + 1) = bitsLo;
1128 *currentLineBuffer =
1129 BLEND_CR(*currentLineBuffer, bitsHi),
1130 *(currentLineBuffer + 1) =
1131 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1134 currentLineBuffer += lbufDelta;
1139 else if (depth == 5) // 24 BPP
1141 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1142 //There *might* be others...
1143 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1145 WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1146 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1147 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1148 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1153 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1156 for(int i=0; i<2; i++)
1158 // We don't use a 32-bit var here because of endian issues...!
1159 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1160 bits1 = pixels >> 40, bits0 = pixels >> 32;
1162 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1165 *currentLineBuffer = bits3,
1166 *(currentLineBuffer + 1) = bits2,
1167 *(currentLineBuffer + 2) = bits1,
1168 *(currentLineBuffer + 3) = bits0;
1170 currentLineBuffer += lbufDelta;
1178 // Store scaled bitmap in line buffer
1180 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1182 // Need to make sure that when writing that it stays within the line buffer...
1183 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1184 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
1185 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1186 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
1187 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
1188 //#ifdef OP_DEBUG_BMP
1189 // Prolly should use this... Though not sure exactly how.
1190 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1191 uint32 firstPix = (p1 >> 49) & 0x3F;
1192 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1194 WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1196 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1197 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
1198 //Optimize: break these out to their own BOOL values [DONE]
1199 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
1200 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1201 flagRMW = (flags & OPFLAG_RMW ? true : false),
1202 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1203 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
1204 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
1206 uint8 * tomRam8 = TOMGetRamPointer();
1207 uint8 * paletteRAM = &tomRam8[0x400];
1208 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1209 // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1210 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1212 uint16 hscale = p2 & 0xFF;
1213 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1214 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1215 uint16 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!]
1216 // uint8 horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1217 int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1218 uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1220 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1221 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1223 // Looks like an hscale of zero means don't draw!
1224 if (!render || iwidth == 0 || hscale == 0)
1227 /*extern int start_logging;
1229 WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1230 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1231 //#define OP_DEBUG_BMP
1232 //#ifdef OP_DEBUG_BMP
1233 // WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1234 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1237 int32 startPos = xpos, endPos = xpos +
1238 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1239 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1240 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
1241 // Not sure if this is Jaguar Two only location or what...
1242 // From the docs, it is... If we want to limit here we should think of something else.
1243 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
1245 int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1247 // If the image is completely to the left or right of the line buffer, then bail.
1248 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1249 //There are four possibilities:
1250 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1251 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1252 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1253 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1254 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1255 // numbers 1 & 3 are of concern.
1256 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1257 // if (rightMargin < 0 || leftMargin > lbufWidth)
1259 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1260 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1261 // Still have to be careful with the DATA and IWIDTH values though...
1263 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1264 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1267 // Otherwise, find the clip limits and clip the phrase as well...
1268 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1269 // line buffer, but it shouldn't matter since there are two unused line
1270 // buffers below and nothing above and I'll at most write 40 bytes outside
1271 // the line buffer... I could use a fractional clip begin/end value, but
1272 // this makes the blit a *lot* more hairy. I might fix this in the future
1273 // if it becomes necessary. (JLH)
1274 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1275 // which pixel in the phrase is being written, and quit when either end of phrases
1276 // is reached or line buffer extents are surpassed.
1278 //This stuff is probably wrong as well... !!! FIX !!!
1279 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1280 //Yup. Seems that JagMania doesn't work correctly with this...
1281 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1282 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1283 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1284 // a bit more accurately... Strange!
1285 //It's probably a case of the REFLECT flag being set and the background being written
1286 //from the right side of the screen...
1287 //But no, it isn't... At least if the diagnostics are telling the truth!
1289 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1290 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1293 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1294 //the scaling factor is small. So fix it already! !!! FIX !!!
1295 /*if (scaledPhrasePixels == 0)
1297 WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1298 DumpScaledObject(p0, p1, p2);
1300 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1302 //Try a simple example...
1303 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1304 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1305 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1307 // Normally, we would expect this in the line buffer:
1308 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1310 // But instead we're getting:
1311 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1313 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1314 // on negative boundary--or are we? Hmm...
1315 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1317 // Let's try a real world example:
1319 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1320 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1322 // Really, spp is 27.75 in the second case...
1323 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1324 // start position (14 * 27.75), we get -6.5... NOT -17!
1326 //Now it seems we're working OK, at least for the first case...
1327 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1329 if (startPos < 0) // Case #1: Begin out, end in, L to R
1331 extern int start_logging;
1333 WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1334 // clippedWidth = 0 - startPos,
1335 clippedWidth = (0 - startPos) << 5,
1336 // dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1337 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1338 // startPos = 0 - (clippedWidth % scaledPhrasePixels);
1339 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1341 WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1344 if (endPos < 0) // Case #2: Begin in, end out, R to L
1345 clippedWidth = 0 - endPos,
1346 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1348 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1349 clippedWidth = endPos - lbufWidth,
1350 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1352 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1353 clippedWidth = startPos - lbufWidth,
1354 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1355 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1357 extern int op_start_log;
1358 if (op_start_log && clippedWidth != 0)
1359 WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1360 if (op_start_log && startPos == 13)
1362 WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1363 DumpScaledObject(p0, p1, p2);
1366 WriteLog(" %08X: ", data);
1367 for(int i=0; i<7*8; i++)
1368 WriteLog("%02X ", JaguarReadByte(data+i));
1372 // If the image is sitting on the line buffer left or right edge, we need to compensate
1373 // by decreasing the image phrase width accordingly.
1374 iwidth -= phraseClippedWidth;
1376 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1378 // data += phraseClippedWidth * (pitch << 3);
1379 data += dataClippedWidth * (pitch << 3);
1381 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1382 // bitmap! This makes clipping & etc. MUCH, much easier...!
1383 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1384 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1385 uint32 lbufAddress = 0x1800 + startPos * 2;
1386 uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1387 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1388 // * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1392 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1393 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1394 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1396 // This seems to be the case (at least according to the Midsummer docs)...!
1398 if (depth == 0) // 1 BPP
1401 WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1402 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1403 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1406 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1408 while ((int32)iwidth > 0)
1410 uint8 bits = pixels >> 63;
1412 #ifndef OP_USES_PALETTE_ZERO
1413 if (flagTRANS && bits == 0)
1415 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1421 // This is the *only* correct use of endian-dependent code
1422 // (i.e., mem-to-mem direct copying)!
1423 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1425 *currentLineBuffer =
1426 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1427 *(currentLineBuffer + 1) =
1428 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1431 currentLineBuffer += lbufDelta;
1434 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1435 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1436 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1438 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1439 while (horizontalRemainder & 0x80)
1441 horizontalRemainder += hscale;
1445 // while (horizontalRemainder <= 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1446 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1448 horizontalRemainder += hscale;
1452 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1456 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1458 data += (pitch << 3) * phrasesToSkip;
1459 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1460 pixels <<= 1 * pixelShift;
1461 iwidth -= phrasesToSkip;
1462 pixCount = pixelShift;
1466 else if (depth == 1) // 2 BPP
1469 WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1470 index &= 0xFC; // Top six bits form CLUT index
1471 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1472 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1475 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1477 while ((int32)iwidth > 0)
1479 uint8 bits = pixels >> 62;
1481 #ifndef OP_USES_PALETTE_ZERO
1482 if (flagTRANS && bits == 0)
1484 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1490 // This is the *only* correct use of endian-dependent code
1491 // (i.e., mem-to-mem direct copying)!
1492 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1494 *currentLineBuffer =
1495 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1496 *(currentLineBuffer + 1) =
1497 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1500 currentLineBuffer += lbufDelta;
1502 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1503 while (horizontalRemainder & 0x80)
1505 horizontalRemainder += hscale;
1509 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1510 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1512 horizontalRemainder += hscale;
1516 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1520 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1522 data += (pitch << 3) * phrasesToSkip;
1523 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1524 pixels <<= 2 * pixelShift;
1525 iwidth -= phrasesToSkip;
1526 pixCount = pixelShift;
1530 else if (depth == 2) // 4 BPP
1533 WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1534 index &= 0xF0; // Top four bits form CLUT index
1535 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1536 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1539 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1541 while ((int32)iwidth > 0)
1543 uint8 bits = pixels >> 60;
1545 #ifndef OP_USES_PALETTE_ZERO
1546 if (flagTRANS && bits == 0)
1548 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1554 // This is the *only* correct use of endian-dependent code
1555 // (i.e., mem-to-mem direct copying)!
1556 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1558 *currentLineBuffer =
1559 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1560 *(currentLineBuffer + 1) =
1561 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1564 currentLineBuffer += lbufDelta;
1566 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1567 while (horizontalRemainder & 0x80)
1569 horizontalRemainder += hscale;
1573 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1574 while (horizontalRemainder < 0x20) // I.e., it's <= 0 (*before* subtraction)
1576 horizontalRemainder += hscale;
1580 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1584 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1586 data += (pitch << 3) * phrasesToSkip;
1587 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1588 pixels <<= 4 * pixelShift;
1589 iwidth -= phrasesToSkip;
1590 pixCount = pixelShift;
1594 else if (depth == 3) // 8 BPP
1597 WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1598 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1599 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1602 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1604 while ((int32)iwidth > 0)
1606 uint8 bits = pixels >> 56;
1608 #ifndef OP_USES_PALETTE_ZERO
1609 if (flagTRANS && bits == 0)
1611 if (flagTRANS && (paletteRAM16[bits] == 0))
1617 // This is the *only* correct use of endian-dependent code
1618 // (i.e., mem-to-mem direct copying)!
1619 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1621 if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1622 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1625 *currentLineBuffer =
1626 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1627 *(currentLineBuffer + 1) =
1628 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1631 currentLineBuffer += lbufDelta;
1633 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1634 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1636 horizontalRemainder += hscale;
1640 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1644 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1646 data += (pitch << 3) * phrasesToSkip;
1647 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1648 pixels <<= 8 * pixelShift;
1649 iwidth -= phrasesToSkip;
1650 pixCount = pixelShift;
1654 else if (depth == 4) // 16 BPP
1657 WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1658 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1659 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1662 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1664 while ((int32)iwidth > 0)
1666 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1668 //This doesn't seem right... Let's try the encoded black value ($8800):
1669 //Apparently, CRY 0 maps to $8800...
1670 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1671 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1676 *currentLineBuffer = bitsHi,
1677 *(currentLineBuffer + 1) = bitsLo;
1679 *currentLineBuffer =
1680 BLEND_CR(*currentLineBuffer, bitsHi),
1681 *(currentLineBuffer + 1) =
1682 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1685 currentLineBuffer += lbufDelta;
1687 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1688 while (horizontalRemainder & 0x80)
1690 horizontalRemainder += hscale;
1694 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1695 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1697 horizontalRemainder += hscale;
1701 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1705 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1707 data += (pitch << 3) * phrasesToSkip;
1708 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1709 pixels <<= 16 * pixelShift;
1711 iwidth -= phrasesToSkip;
1713 pixCount = pixelShift;
1717 else if (depth == 5) // 24 BPP
1719 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1720 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1722 WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1723 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1724 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1725 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1730 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1731 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1733 for(int i=0; i<2; i++)
1735 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1736 bits1 = pixels >> 40, bits0 = pixels >> 32;
1738 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1741 *currentLineBuffer = bits3,
1742 *(currentLineBuffer + 1) = bits2,
1743 *(currentLineBuffer + 2) = bits1,
1744 *(currentLineBuffer + 3) = bits0;
1746 currentLineBuffer += lbufDelta;