4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 // (C) 2010 Underground Software
9 // JLH = James L. Hammons <jlhamm@acm.org>
12 // --- ---------- -------------------------------------------------------------
13 // JLH 01/16/2010 Created this log ;-)
28 //#define OP_DEBUG_BMP
30 #define BLEND_Y(dst, src) op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src) op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
33 #define OBJECT_TYPE_BITMAP 0 // 000
34 #define OBJECT_TYPE_SCALE 1 // 001
35 #define OBJECT_TYPE_GPU 2 // 010
36 #define OBJECT_TYPE_BRANCH 3 // 011
37 #define OBJECT_TYPE_STOP 4 // 100
39 #define CONDITION_EQUAL 0
40 #define CONDITION_LESS_THAN 1
41 #define CONDITION_GREATER_THAN 2
42 #define CONDITION_OP_FLAG_SET 3
43 #define CONDITION_SECOND_HALF_LINE 4
45 #define OPFLAG_RELEASE 8 // Bus release bit
46 #define OPFLAG_TRANS 4 // Transparency bit
47 #define OPFLAG_RMW 2 // Read-Modify-Write bit
48 #define OPFLAG_REFLECT 1 // Horizontal mirror bit
50 // Private function prototypes
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
55 void DumpFixedObject(uint64 p0, uint64 p1);
56 uint64 OPLoadPhrase(uint32 offset);
58 // Local global variables
60 // Blend tables (64K each)
61 static uint8 op_blend_y[0x10000];
62 static uint8 op_blend_cr[0x10000];
63 // There may be a problem with this "RAM" overlapping (and thus being independent of)
64 // some of the regular TOM RAM...
65 //#warning objectp_ram is separated from TOM RAM--need to fix that!
66 //static uint8 objectp_ram[0x40]; // This is based at $F00000
67 uint8 objectp_running = 0;
68 //bool objectp_stop_reading_list;
70 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
71 //static uint32 op_bitmap_bit_size[8] =
72 // { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
73 // (uint32)(2*65536), (uint32)(1*65536), (uint32)(1*65536), (uint32)(1*65536) };
74 static uint32 op_pointer;
76 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
80 // Object Processor initialization
84 // Here we calculate the saturating blend of a signed 4-bit value and an
85 // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
86 // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
87 for(int i=0; i<256*256; i++)
89 int y = (i >> 8) & 0xFF;
90 int dy = (int8)i; // Sign extend the Y index
91 int c1 = (i >> 8) & 0x0F;
92 int dc1 = (int8)(i << 4) >> 4; // Sign extend the R index
93 int c2 = (i >> 12) & 0x0F;
94 int dc2 = (int8)(i & 0xF0) >> 4; // Sign extend the C index
119 op_blend_cr[i] = (c2 << 4) | c1;
126 // Object Processor reset
130 // memset(objectp_ram, 0x00, 0x40);
136 #warning "!!! Fix OL dump so that it follows links !!!"
137 const char * opType[8] =
138 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
139 const char * ccType[8] =
140 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
142 uint32 olp = OPGetListPointer();
143 WriteLog("OP: OLP = %08X\n", olp);
144 WriteLog("OP: Phrase dump\n ----------\n");
145 for(uint32 i=0; i<0x100; i+=8)
147 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
148 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
149 if ((lo & 0x07) == 3)
151 uint16 ypos = (lo >> 3) & 0x7FF;
152 uint8 cc = (lo >> 14) & 0x03;
153 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
154 WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
157 if ((lo & 0x07) == 0)
158 DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
159 if ((lo & 0x07) == 1)
160 DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
164 // memory_free(op_blend_y);
165 // memory_free(op_blend_cr);
169 // Object Processor memory access
170 // Memory range: F00010 - F00027
172 // F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
173 // F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
174 // F00026 W -------- -------x OBF - object processor flag
178 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
181 return objectp_ram[offset];
184 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
187 return GET16(objectp_ram, offset);
190 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
193 objectp_ram[offset] = data;
196 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
199 SET16(objectp_ram, offset, data);
201 /*if (offset == 0x20)
202 WriteLog("OP: Setting lo list pointer: %04X\n", data);
204 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
208 uint32 OPGetListPointer(void)
210 // Note: This register is LO / HI WORD, hence the funky look of this...
211 return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
214 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
216 uint32 OPGetStatusRegister(void)
218 return GET16(tomRam8, 0x26);
221 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
223 void OPSetStatusRegister(uint32 data)
225 tomRam8[0x26] = (data & 0x0000FF00) >> 8;
226 tomRam8[0x27] |= (data & 0xFE);
229 void OPSetCurrentObject(uint64 object)
231 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
232 // Stored as least significant 32 bits first, ms32 last in big endian
233 /* objectp_ram[0x13] = object & 0xFF; object >>= 8;
234 objectp_ram[0x12] = object & 0xFF; object >>= 8;
235 objectp_ram[0x11] = object & 0xFF; object >>= 8;
236 objectp_ram[0x10] = object & 0xFF; object >>= 8;
238 objectp_ram[0x17] = object & 0xFF; object >>= 8;
239 objectp_ram[0x16] = object & 0xFF; object >>= 8;
240 objectp_ram[0x15] = object & 0xFF; object >>= 8;
241 objectp_ram[0x14] = object & 0xFF;*/
242 // Let's try regular good old big endian...
243 tomRam8[0x17] = object & 0xFF; object >>= 8;
244 tomRam8[0x16] = object & 0xFF; object >>= 8;
245 tomRam8[0x15] = object & 0xFF; object >>= 8;
246 tomRam8[0x14] = object & 0xFF; object >>= 8;
248 tomRam8[0x13] = object & 0xFF; object >>= 8;
249 tomRam8[0x12] = object & 0xFF; object >>= 8;
250 tomRam8[0x11] = object & 0xFF; object >>= 8;
251 tomRam8[0x10] = object & 0xFF;
254 uint64 OPLoadPhrase(uint32 offset)
256 offset &= ~0x07; // 8 byte alignment
257 return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
260 void OPStorePhrase(uint32 offset, uint64 p)
262 offset &= ~0x07; // 8 byte alignment
263 JaguarWriteLong(offset, p >> 32, OP);
264 JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
268 // Debugging routines
270 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
272 WriteLog(" (SCALED BITMAP)");
273 WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
274 WriteLog(" %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
275 uint8 bitdepth = (p1 >> 12) & 0x07;
276 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
277 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
278 int32 xpos = p1 & 0xFFF;
279 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
280 uint32 iwidth = ((p1 >> 28) & 0x3FF);
281 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
282 uint16 height = ((p0 >> 14) & 0x3FF);
283 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
284 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
285 uint32 firstPix = (p1 >> 49) & 0x3F;
286 uint8 flags = (p1 >> 45) & 0x0F;
287 uint8 idx = (p1 >> 38) & 0x7F;
288 uint32 pitch = (p1 >> 15) & 0x07;
289 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
290 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
291 uint32 hscale = p2 & 0xFF;
292 uint32 vscale = (p2 >> 8) & 0xFF;
293 uint32 remainder = (p2 >> 16) & 0xFF;
294 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
297 void DumpFixedObject(uint64 p0, uint64 p1)
299 WriteLog(" (BITMAP)");
300 WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
301 uint8 bitdepth = (p1 >> 12) & 0x07;
302 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
303 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
304 int32 xpos = p1 & 0xFFF;
305 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
306 uint32 iwidth = ((p1 >> 28) & 0x3FF);
307 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
308 uint16 height = ((p0 >> 14) & 0x3FF);
309 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
310 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
311 uint32 firstPix = (p1 >> 49) & 0x3F;
312 uint8 flags = (p1 >> 45) & 0x0F;
313 uint8 idx = (p1 >> 38) & 0x7F;
314 uint32 pitch = (p1 >> 15) & 0x07;
315 WriteLog(" [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
316 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
320 // Object Processor main routine
322 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
323 void OPProcessList(int halfline, bool render)
325 extern int op_start_log;
326 // char * condition_to_str[8] =
327 // { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
329 op_pointer = OPGetListPointer();
331 // objectp_stop_reading_list = false;
333 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
336 // *** BEGIN OP PROCESSOR TESTING ONLY ***
337 extern bool interactiveMode;
339 extern int objectPtr;
341 int bitmapCounter = 0;
342 // *** END OP PROCESSOR TESTING ONLY ***
344 uint32 opCyclesToRun = 30000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
346 // if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
349 // *** BEGIN OP PROCESSOR TESTING ONLY ***
350 if (interactiveMode && bitmapCounter == objectPtr)
354 // *** END OP PROCESSOR TESTING ONLY ***
355 // if (objectp_stop_reading_list)
358 uint64 p0 = OPLoadPhrase(op_pointer);
360 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
363 if (halfline == TOMGetVDB() && op_start_log)
364 //if (halfline == 215 && op_start_log)
365 //if (halfline == 28 && op_start_log)
368 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
369 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
371 WriteLog(" (BITMAP) ");
372 uint64 p1 = OPLoadPhrase(op_pointer);
373 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
374 uint8 bitdepth = (p1 >> 12) & 0x07;
375 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
376 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
377 int32 xpos = p1 & 0xFFF;
378 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
379 uint32 iwidth = ((p1 >> 28) & 0x3FF);
380 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
381 uint16 height = ((p0 >> 14) & 0x3FF);
382 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
383 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
384 uint32 firstPix = (p1 >> 49) & 0x3F;
385 uint8 flags = (p1 >> 45) & 0x0F;
386 uint8 idx = (p1 >> 38) & 0x7F;
387 uint32 pitch = (p1 >> 15) & 0x07;
388 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
389 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
391 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
393 WriteLog(" (SCALED BITMAP)");
394 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
395 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
396 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
397 uint8 bitdepth = (p1 >> 12) & 0x07;
398 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
399 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
400 int32 xpos = p1 & 0xFFF;
401 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
402 uint32 iwidth = ((p1 >> 28) & 0x3FF);
403 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
404 uint16 height = ((p0 >> 14) & 0x3FF);
405 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
406 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
407 uint32 firstPix = (p1 >> 49) & 0x3F;
408 uint8 flags = (p1 >> 45) & 0x0F;
409 uint8 idx = (p1 >> 38) & 0x7F;
410 uint32 pitch = (p1 >> 15) & 0x07;
411 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
412 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
413 uint32 hscale = p2 & 0xFF;
414 uint32 vscale = (p2 >> 8) & 0xFF;
415 uint32 remainder = (p2 >> 16) & 0xFF;
416 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
418 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
419 WriteLog(" (GPU)\n");
420 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
422 WriteLog(" (BRANCH)\n");
423 uint8 * jaguarMainRam = GetRamPtr();
424 WriteLog("[RAM] --> ");
425 for(int k=0; k<8; k++)
426 WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
429 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
430 WriteLog(" --> List end\n\n");
434 switch ((uint8)p0 & 0x07)
436 case OBJECT_TYPE_BITMAP:
438 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
439 uint16 ypos = (p0 >> 3) & 0x7FF;
440 // This is only theory implied by Rayman...!
441 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
442 // the VDB value. With interlacing, this would be slightly more tricky.
443 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
444 // to affect any other game in a negative way (that I've seen).
445 // Either that, or it's an undocumented bug...
447 //No, the reason this was needed is that the OP code before was wrong. Any value
448 //less than VDB will get written to the top line of the display!
450 // Not so sure... Let's see what happens here...
453 ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
455 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
456 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
457 // what's causing things to fuck up. Still no idea why.
459 uint32 height = (p0 & 0xFFC000) >> 14;
460 uint32 oldOPP = op_pointer - 8;
461 // *** BEGIN OP PROCESSOR TESTING ONLY ***
462 if (inhibit && op_start_log)
463 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
465 if (!inhibit) // For OP testing only!
466 // *** END OP PROCESSOR TESTING ONLY ***
467 if (halfline >= ypos && height > 0)
469 uint64 p1 = OPLoadPhrase(op_pointer);
471 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
472 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
473 // OPProcessFixedBitmap(halfline, p0, p1, render);
474 OPProcessFixedBitmap(p0, p1, render);
478 //???Does this really happen??? Doesn't seem to work if you do this...!
479 //Probably not. Must be a bug in the documentation...!
480 // uint32 link = (p0 & 0x7FFFF000000) >> 21;
481 // SET16(tom_ram_8, 0x20, link & 0xFFFF); // OLP
482 // SET16(tom_ram_8, 0x22, link >> 16);
483 /* uint32 height = (p0 & 0xFFC000) >> 14;
486 // NOTE: Would subtract 2 if in interlaced mode...!
487 // uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
491 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
492 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
495 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
496 p0 |= (uint64)height << 14;
498 OPStorePhrase(oldOPP, p0);
500 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
501 //Temp, for testing...
502 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
503 //And it does! !!! FIX !!!
504 //Let's remove this "fix" since it screws up more than it fixes.
505 /* if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
508 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
509 //WriteLog("New OP: %08X\n", op_pointer);
512 case OBJECT_TYPE_SCALE:
514 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
515 uint16 ypos = (p0 >> 3) & 0x7FF;
516 uint32 height = (p0 & 0xFFC000) >> 14;
517 uint32 oldOPP = op_pointer - 8;
518 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
519 // *** BEGIN OP PROCESSOR TESTING ONLY ***
520 if (inhibit && op_start_log)
522 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
523 DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
526 if (!inhibit) // For OP testing only!
527 // *** END OP PROCESSOR TESTING ONLY ***
528 if (halfline >= ypos && height > 0)
530 uint64 p1 = OPLoadPhrase(op_pointer);
532 uint64 p2 = OPLoadPhrase(op_pointer);
534 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
535 OPProcessScaledBitmap(p0, p1, p2, render);
539 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
540 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
541 //Actually, we should skip this object if it has a vscale of zero.
542 //Or do we? Not sure... Atari Karts has a few lines that look like:
544 //000E8268 --> phrase 00010000 7000B00D
545 // [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
546 // [hsc: 9A, vsc: 00, rem: 00]
547 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
548 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
551 vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
553 //extern int start_logging;
555 // WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
557 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
558 //There are other problems here, it looks like...
560 //About to execute OP (508)...
562 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
563 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
564 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
565 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
566 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
567 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
568 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
569 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
570 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
571 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
572 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
573 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
574 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
575 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
576 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
577 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
578 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
579 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
580 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
581 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
582 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
583 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
585 //Here's another problem:
586 // [hsc: 20, vsc: 20, rem: 00]
587 // Since we're not checking for $E0 (but that's what we get from the above), we end
588 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
589 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
590 //Also note: $E0 = 7.0 which IS a legal vscale value...
592 // if (remainder & 0x80) // I.e., it's negative
593 // if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0
594 // if ((remainder - 1) >= 0xE0) // I.e., it's <= 0
595 // if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
596 // if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
597 // if (remainder <= 0x20) // I.e., it's <= 1.0
598 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
599 if (remainder < 0x20)
601 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
602 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
604 // while (remainder & 0x80)
605 // while ((remainder & 0x80) || remainder == 0)
606 // while ((remainder - 1) >= 0xE0)
607 // while ((remainder >= 0xE1) || remainder == 0)
608 // while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
609 // while (remainder <= 0x20)
610 while (remainder < 0x20)
620 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
621 p0 |= (uint64)height << 14;
623 OPStorePhrase(oldOPP, p0);
626 remainder -= 0x20; // 1.0f in [3.5] fixed point format
629 // WriteLog("--> Finished writebacks...\n");//*/
631 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
632 p2 &= ~0x0000000000FF0000LL;
633 p2 |= (uint64)remainder << 16;
634 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
635 OPStorePhrase(oldOPP + 16, p2);
636 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
637 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
640 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
643 case OBJECT_TYPE_GPU:
645 //WriteLog("OP: Asserting GPU IRQ #3...\n");
646 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
647 OPSetCurrentObject(p0);
648 GPUSetIRQLine(3, ASSERT_LINE);
649 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
652 //OPSuspendedByGPU = true;
653 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
654 //on the next halfline...
655 // --> It continues from where it was interrupted! !!! FIX !!!
658 case OBJECT_TYPE_BRANCH:
660 uint16 ypos = (p0 >> 3) & 0x7FF;
661 uint8 cc = (p0 >> 14) & 0x03;
662 uint32 link = (p0 >> 21) & 0x3FFFF8;
664 // if ((ypos!=507)&&(ypos!=25))
665 // WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
668 case CONDITION_EQUAL:
669 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
672 case CONDITION_LESS_THAN:
673 if (TOMReadWord(0xF00006, OP) < ypos)
676 case CONDITION_GREATER_THAN:
677 if (TOMReadWord(0xF00006, OP) > ypos)
680 case CONDITION_OP_FLAG_SET:
681 if (OPGetStatusRegister() & 0x01)
684 case CONDITION_SECOND_HALF_LINE:
685 //Here's the ASIC code:
686 // ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
687 //which means, do the link if bit 10 of HC is set...
689 // This basically means branch if bit 10 of HC is set
690 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
691 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
696 // Basically, if you do this, the OP does nothing. :-)
697 WriteLog("OP: Unimplemented branch condition %i\n", cc);
701 case OBJECT_TYPE_STOP:
705 //WriteLog("OP: --> STOP\n");
706 // op_set_status_register(((p0>>3) & 0xFFFFFFFF));
707 //This seems more likely...
708 OPSetCurrentObject(p0);
712 // We need to check whether these interrupts are enabled or not, THEN
713 // set an IRQ + pending flag if necessary...
714 if (TOMIRQEnabled(IRQ_OPFLAG))
716 TOMSetPendingObjectInt();
717 m68k_set_irq(2); // Cause a 68K IPL 2 to occur...
725 WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
729 // Here is a little sanity check to keep the OP from locking up the machine
730 // when fed bad data. Better would be to count how many actual cycles it used
731 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
732 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
741 // Store fixed size bitmap in line buffer
743 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
745 // Need to make sure that when writing that it stays within the line buffer...
746 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
747 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
748 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
749 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
750 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
751 //#ifdef OP_DEBUG_BMP
752 uint32 firstPix = (p1 >> 49) & 0x3F;
753 // "The LSB is significant only for scaled objects..." -JTRM
754 // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
757 // We can ignore the RELEASE (high order) bit for now--probably forever...!
758 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
759 //Optimize: break these out to their own BOOL values
760 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
761 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
762 flagRMW = (flags & OPFLAG_RMW ? true : false),
763 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
764 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
765 // provide the most significant bits of the palette address."
766 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
767 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
768 pitch <<= 3; // Optimization: Multiply pitch by 8
770 // int16 scanlineWidth = tom_getVideoModeWidth();
771 uint8 * tomRam8 = TOMGetRamPointer();
772 uint8 * paletteRAM = &tomRam8[0x400];
773 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
774 // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
775 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
777 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
778 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
780 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
781 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
782 // Pitch == 0 is OK too...
783 // if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
784 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
785 if (!render || iwidth == 0)
788 //OK, so we know the position in the line buffer is correct. It's the clipping in
789 //24bpp mode that's wrong!
791 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
792 //into the line buffer for each pixel.
793 if (depth == 5) // i.e., 24bpp mode...
794 xpos >>= 1; // Cut it in half...
797 //#define OP_DEBUG_BMP
798 //#ifdef OP_DEBUG_BMP
799 // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
800 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
803 // int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
804 int32 startPos = xpos, endPos = xpos +
805 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
806 : -((phraseWidthToPixels[depth] * iwidth) + 1));
807 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
808 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
809 // Not sure if this is Jaguar Two only location or what...
810 // From the docs, it is... If we want to limit here we should think of something else.
811 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
812 // int32 limit = 720;
813 // int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
814 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
815 // This is correct, the OP line buffer is a constant size...
817 int32 lbufWidth = 719;
819 // If the image is completely to the left or right of the line buffer, then bail.
820 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
821 //There are four possibilities:
822 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
823 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
824 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
825 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
826 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
827 // numbers 1 & 3 are of concern.
828 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
829 // if (rightMargin < 0 || leftMargin > lbufWidth)
831 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
832 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
833 // Still have to be careful with the DATA and IWIDTH values though...
835 // if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
836 // || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
838 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
839 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
842 // Otherwise, find the clip limits and clip the phrase as well...
843 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
844 // line buffer, but it shouldn't matter since there are two unused line
845 // buffers below and nothing above and I'll at most write 8 bytes outside
846 // the line buffer... I could use a fractional clip begin/end value, but
847 // this makes the blit a *lot* more hairy. I might fix this in the future
848 // if it becomes necessary. (JLH)
849 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
850 // which pixel in the phrase is being written, and quit when either end of phrases
851 // is reached or line buffer extents are surpassed.
853 //This stuff is probably wrong as well... !!! FIX !!!
854 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
855 //Yup. Seems that JagMania doesn't work correctly with this...
856 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
861 clippedWidth = 0 - leftMargin,
862 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
863 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
866 if (rightMargin > lbufWidth)
867 clippedWidth = rightMargin - lbufWidth,
868 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
869 // rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
870 // rightMargin = lbufWidth;
873 WriteLog("OP: We're about to encounter a divide by zero error!\n");
874 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
875 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
877 if (startPos < 0) // Case #1: Begin out, end in, L to R
878 clippedWidth = 0 - startPos,
879 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
880 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
882 if (endPos < 0) // Case #2: Begin in, end out, R to L
883 clippedWidth = 0 - endPos,
884 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
886 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
887 clippedWidth = endPos - lbufWidth,
888 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
890 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
891 clippedWidth = startPos - lbufWidth,
892 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
893 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
894 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
896 // If the image is sitting on the line buffer left or right edge, we need to compensate
897 // by decreasing the image phrase width accordingly.
898 iwidth -= phraseClippedWidth;
900 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
902 // data += phraseClippedWidth * (pitch << 3);
903 data += dataClippedWidth * pitch;
905 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
906 // bitmap! This makes clipping & etc. MUCH, much easier...!
907 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
908 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
909 //Is this a bug in the OP?
910 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
911 //Though it looks like we're doing it here no matter what...
912 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
914 uint32 lbufAddress = 0x1800 + (startPos * 2);
915 uint8 * currentLineBuffer = &tomRam8[lbufAddress];
919 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
920 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
921 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
923 // This seems to be the case (at least according to the Midsummer docs)...!
925 // This is to test using palette zeroes instead of bit zeroes...
926 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
927 //#define OP_USES_PALETTE_ZERO
929 if (depth == 0) // 1 BPP
931 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
932 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
934 // Fetch 1st phrase...
935 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
936 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
937 //i.e., we didn't clip on the margin... !!! FIX !!!
938 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
939 int i = firstPix; // Start counter at right spot...
945 uint8 bit = pixels >> 63;
946 #ifndef OP_USES_PALETTE_ZERO
947 if (flagTRANS && bit == 0)
949 if (flagTRANS && (paletteRAM16[index | bit] == 0))
955 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
956 //Won't optimize RMW case though...
957 // This is the *only* correct use of endian-dependent code
958 // (i.e., mem-to-mem direct copying)!
959 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
962 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
963 *(currentLineBuffer + 1) =
964 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
967 currentLineBuffer += lbufDelta;
971 // Fetch next phrase...
973 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
976 else if (depth == 1) // 2 BPP
979 WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
980 index &= 0xFC; // Top six bits form CLUT index
981 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
982 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
987 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
990 for(int i=0; i<32; i++)
992 uint8 bits = pixels >> 62;
993 // Seems to me that both of these are in the same endian, so we could cast it as
994 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
995 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
996 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
997 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
998 #ifndef OP_USES_PALETTE_ZERO
999 if (flagTRANS && bits == 0)
1001 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1007 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1009 *currentLineBuffer =
1010 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1011 *(currentLineBuffer + 1) =
1012 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1015 currentLineBuffer += lbufDelta;
1020 else if (depth == 2) // 4 BPP
1023 WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1024 index &= 0xF0; // Top four bits form CLUT index
1025 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1026 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1031 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1034 for(int i=0; i<16; i++)
1036 uint8 bits = pixels >> 60;
1037 // Seems to me that both of these are in the same endian, so we could cast it as
1038 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1039 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1040 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1041 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1042 #ifndef OP_USES_PALETTE_ZERO
1043 if (flagTRANS && bits == 0)
1045 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1051 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1053 *currentLineBuffer =
1054 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1055 *(currentLineBuffer + 1) =
1056 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1059 currentLineBuffer += lbufDelta;
1064 else if (depth == 3) // 8 BPP
1066 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1067 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1069 // Fetch 1st phrase...
1070 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1071 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1072 //i.e., we didn't clip on the margin... !!! FIX !!!
1073 firstPix &= 0x30; // Only top two bits are valid for 8 BPP
1074 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1075 int i = firstPix >> 3; // Start counter at right spot...
1081 uint8 bits = pixels >> 56;
1082 // Seems to me that both of these are in the same endian, so we could cast it as
1083 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1084 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1085 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1086 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1087 //This would seem to be problematic...
1088 //Because it's the palette entry being zero that makes the pixel transparent...
1089 //Let's try it and see.
1090 #ifndef OP_USES_PALETTE_ZERO
1091 if (flagTRANS && bits == 0)
1093 if (flagTRANS && (paletteRAM16[bits] == 0))
1099 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1101 *currentLineBuffer =
1102 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1103 *(currentLineBuffer + 1) =
1104 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1107 currentLineBuffer += lbufDelta;
1111 // Fetch next phrase...
1113 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1116 else if (depth == 4) // 16 BPP
1119 WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1120 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1121 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1126 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1129 for(int i=0; i<4; i++)
1131 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1132 // Seems to me that both of these are in the same endian, so we could cast it as
1133 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1134 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1135 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1136 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1137 //This doesn't seem right... Let's try the encoded black value ($8800):
1138 //Apparently, CRY 0 maps to $8800...
1139 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1140 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1145 *currentLineBuffer = bitsHi,
1146 *(currentLineBuffer + 1) = bitsLo;
1148 *currentLineBuffer =
1149 BLEND_CR(*currentLineBuffer, bitsHi),
1150 *(currentLineBuffer + 1) =
1151 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1154 currentLineBuffer += lbufDelta;
1159 else if (depth == 5) // 24 BPP
1161 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1162 //There *might* be others...
1163 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1165 WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1166 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1167 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1168 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1173 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1176 for(int i=0; i<2; i++)
1178 // We don't use a 32-bit var here because of endian issues...!
1179 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1180 bits1 = pixels >> 40, bits0 = pixels >> 32;
1182 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1185 *currentLineBuffer = bits3,
1186 *(currentLineBuffer + 1) = bits2,
1187 *(currentLineBuffer + 2) = bits1,
1188 *(currentLineBuffer + 3) = bits0;
1190 currentLineBuffer += lbufDelta;
1198 // Store scaled bitmap in line buffer
1200 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1202 // Need to make sure that when writing that it stays within the line buffer...
1203 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1204 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
1205 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1206 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
1207 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
1208 //#ifdef OP_DEBUG_BMP
1209 // Prolly should use this... Though not sure exactly how.
1210 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1211 uint32 firstPix = (p1 >> 49) & 0x3F;
1212 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1214 WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1216 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1217 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
1218 //Optimize: break these out to their own BOOL values [DONE]
1219 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
1220 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1221 flagRMW = (flags & OPFLAG_RMW ? true : false),
1222 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1223 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
1224 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
1226 uint8 * tomRam8 = TOMGetRamPointer();
1227 uint8 * paletteRAM = &tomRam8[0x400];
1228 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1229 // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1230 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1232 uint16 hscale = p2 & 0xFF;
1233 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1234 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1235 uint16 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!]
1236 // uint8 horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1237 int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1238 uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1240 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1241 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1243 // Looks like an hscale of zero means don't draw!
1244 if (!render || iwidth == 0 || hscale == 0)
1247 /*extern int start_logging;
1249 WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1250 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1251 //#define OP_DEBUG_BMP
1252 //#ifdef OP_DEBUG_BMP
1253 // WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1254 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1257 int32 startPos = xpos, endPos = xpos +
1258 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1259 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1260 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
1261 // Not sure if this is Jaguar Two only location or what...
1262 // From the docs, it is... If we want to limit here we should think of something else.
1263 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
1265 // int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1266 int32 lbufWidth = 719; // Zero based limit...
1268 // If the image is completely to the left or right of the line buffer, then bail.
1269 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1270 //There are four possibilities:
1271 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1272 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1273 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1274 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1275 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1276 // numbers 1 & 3 are of concern.
1277 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1278 // if (rightMargin < 0 || leftMargin > lbufWidth)
1280 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1281 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1282 // Still have to be careful with the DATA and IWIDTH values though...
1284 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1285 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1288 // Otherwise, find the clip limits and clip the phrase as well...
1289 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1290 // line buffer, but it shouldn't matter since there are two unused line
1291 // buffers below and nothing above and I'll at most write 40 bytes outside
1292 // the line buffer... I could use a fractional clip begin/end value, but
1293 // this makes the blit a *lot* more hairy. I might fix this in the future
1294 // if it becomes necessary. (JLH)
1295 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1296 // which pixel in the phrase is being written, and quit when either end of phrases
1297 // is reached or line buffer extents are surpassed.
1299 //This stuff is probably wrong as well... !!! FIX !!!
1300 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1301 //Yup. Seems that JagMania doesn't work correctly with this...
1302 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1303 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1304 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1305 // a bit more accurately... Strange!
1306 //It's probably a case of the REFLECT flag being set and the background being written
1307 //from the right side of the screen...
1308 //But no, it isn't... At least if the diagnostics are telling the truth!
1310 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1311 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1314 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1315 //the scaling factor is small. So fix it already! !!! FIX !!!
1316 /*if (scaledPhrasePixels == 0)
1318 WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1319 DumpScaledObject(p0, p1, p2);
1321 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1323 //Try a simple example...
1324 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1325 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1326 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1328 // Normally, we would expect this in the line buffer:
1329 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1331 // But instead we're getting:
1332 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1334 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1335 // on negative boundary--or are we? Hmm...
1336 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1338 // Let's try a real world example:
1340 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1341 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1343 // Really, spp is 27.75 in the second case...
1344 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1345 // start position (14 * 27.75), we get -6.5... NOT -17!
1347 //Now it seems we're working OK, at least for the first case...
1348 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1350 if (startPos < 0) // Case #1: Begin out, end in, L to R
1352 extern int start_logging;
1354 WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1355 // clippedWidth = 0 - startPos,
1356 clippedWidth = (0 - startPos) << 5,
1357 // dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1358 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1359 // startPos = 0 - (clippedWidth % scaledPhrasePixels);
1360 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1362 WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1365 if (endPos < 0) // Case #2: Begin in, end out, R to L
1366 clippedWidth = 0 - endPos,
1367 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1369 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1370 clippedWidth = endPos - lbufWidth,
1371 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1373 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1374 clippedWidth = startPos - lbufWidth,
1375 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1376 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1378 extern int op_start_log;
1379 if (op_start_log && clippedWidth != 0)
1380 WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1381 if (op_start_log && startPos == 13)
1383 WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1384 DumpScaledObject(p0, p1, p2);
1387 WriteLog(" %08X: ", data);
1388 for(int i=0; i<7*8; i++)
1389 WriteLog("%02X ", JaguarReadByte(data+i));
1393 // If the image is sitting on the line buffer left or right edge, we need to compensate
1394 // by decreasing the image phrase width accordingly.
1395 iwidth -= phraseClippedWidth;
1397 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1399 // data += phraseClippedWidth * (pitch << 3);
1400 data += dataClippedWidth * (pitch << 3);
1402 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1403 // bitmap! This makes clipping & etc. MUCH, much easier...!
1404 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1405 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1406 uint32 lbufAddress = 0x1800 + startPos * 2;
1407 uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1408 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1409 // * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1413 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1414 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1415 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1417 // This seems to be the case (at least according to the Midsummer docs)...!
1419 if (depth == 0) // 1 BPP
1422 WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1423 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1424 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1427 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1429 while ((int32)iwidth > 0)
1431 uint8 bits = pixels >> 63;
1433 #ifndef OP_USES_PALETTE_ZERO
1434 if (flagTRANS && bits == 0)
1436 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1442 // This is the *only* correct use of endian-dependent code
1443 // (i.e., mem-to-mem direct copying)!
1444 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1446 *currentLineBuffer =
1447 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1448 *(currentLineBuffer + 1) =
1449 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1452 currentLineBuffer += lbufDelta;
1455 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1456 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1457 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1459 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1460 while (horizontalRemainder & 0x80)
1462 horizontalRemainder += hscale;
1466 // while (horizontalRemainder <= 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1467 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1469 horizontalRemainder += hscale;
1473 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1477 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1479 data += (pitch << 3) * phrasesToSkip;
1480 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1481 pixels <<= 1 * pixelShift;
1482 iwidth -= phrasesToSkip;
1483 pixCount = pixelShift;
1487 else if (depth == 1) // 2 BPP
1490 WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1491 index &= 0xFC; // Top six bits form CLUT index
1492 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1493 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1496 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1498 while ((int32)iwidth > 0)
1500 uint8 bits = pixels >> 62;
1502 #ifndef OP_USES_PALETTE_ZERO
1503 if (flagTRANS && bits == 0)
1505 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1511 // This is the *only* correct use of endian-dependent code
1512 // (i.e., mem-to-mem direct copying)!
1513 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1515 *currentLineBuffer =
1516 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1517 *(currentLineBuffer + 1) =
1518 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1521 currentLineBuffer += lbufDelta;
1523 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1524 while (horizontalRemainder & 0x80)
1526 horizontalRemainder += hscale;
1530 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1531 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1533 horizontalRemainder += hscale;
1537 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1541 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1543 data += (pitch << 3) * phrasesToSkip;
1544 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1545 pixels <<= 2 * pixelShift;
1546 iwidth -= phrasesToSkip;
1547 pixCount = pixelShift;
1551 else if (depth == 2) // 4 BPP
1554 WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1555 index &= 0xF0; // Top four bits form CLUT index
1556 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1557 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1560 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1562 while ((int32)iwidth > 0)
1564 uint8 bits = pixels >> 60;
1566 #ifndef OP_USES_PALETTE_ZERO
1567 if (flagTRANS && bits == 0)
1569 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1575 // This is the *only* correct use of endian-dependent code
1576 // (i.e., mem-to-mem direct copying)!
1577 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1579 *currentLineBuffer =
1580 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1581 *(currentLineBuffer + 1) =
1582 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1585 currentLineBuffer += lbufDelta;
1587 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1588 while (horizontalRemainder & 0x80)
1590 horizontalRemainder += hscale;
1594 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1595 while (horizontalRemainder < 0x20) // I.e., it's <= 0 (*before* subtraction)
1597 horizontalRemainder += hscale;
1601 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1605 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1607 data += (pitch << 3) * phrasesToSkip;
1608 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1609 pixels <<= 4 * pixelShift;
1610 iwidth -= phrasesToSkip;
1611 pixCount = pixelShift;
1615 else if (depth == 3) // 8 BPP
1618 WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1619 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1620 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1623 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1625 while ((int32)iwidth > 0)
1627 uint8 bits = pixels >> 56;
1629 #ifndef OP_USES_PALETTE_ZERO
1630 if (flagTRANS && bits == 0)
1632 if (flagTRANS && (paletteRAM16[bits] == 0))
1638 // This is the *only* correct use of endian-dependent code
1639 // (i.e., mem-to-mem direct copying)!
1640 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1642 if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1643 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1646 *currentLineBuffer =
1647 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1648 *(currentLineBuffer + 1) =
1649 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1652 currentLineBuffer += lbufDelta;
1654 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1655 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1657 horizontalRemainder += hscale;
1661 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1665 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1667 data += (pitch << 3) * phrasesToSkip;
1668 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1669 pixels <<= 8 * pixelShift;
1670 iwidth -= phrasesToSkip;
1671 pixCount = pixelShift;
1675 else if (depth == 4) // 16 BPP
1678 WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1679 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1680 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1683 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1685 while ((int32)iwidth > 0)
1687 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1689 //This doesn't seem right... Let's try the encoded black value ($8800):
1690 //Apparently, CRY 0 maps to $8800...
1691 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1692 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1697 *currentLineBuffer = bitsHi,
1698 *(currentLineBuffer + 1) = bitsLo;
1700 *currentLineBuffer =
1701 BLEND_CR(*currentLineBuffer, bitsHi),
1702 *(currentLineBuffer + 1) =
1703 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1706 currentLineBuffer += lbufDelta;
1708 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1709 while (horizontalRemainder & 0x80)
1711 horizontalRemainder += hscale;
1715 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1716 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1718 horizontalRemainder += hscale;
1722 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1726 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1728 data += (pitch << 3) * phrasesToSkip;
1729 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1730 pixels <<= 16 * pixelShift;
1732 iwidth -= phrasesToSkip;
1734 pixCount = pixelShift;
1738 else if (depth == 5) // 24 BPP
1740 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1741 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1743 WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1744 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1745 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1746 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1751 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1752 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1754 for(int i=0; i<2; i++)
1756 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1757 bits1 = pixels >> 40, bits0 = pixels >> 32;
1759 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1762 *currentLineBuffer = bits3,
1763 *(currentLineBuffer + 1) = bits2,
1764 *(currentLineBuffer + 2) = bits1,
1765 *(currentLineBuffer + 3) = bits0;
1767 currentLineBuffer += lbufDelta;