4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
15 //#define OP_DEBUG_BMP
17 #define BLEND_Y(dst, src) op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
18 #define BLEND_CR(dst, src) op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
20 #define OBJECT_TYPE_BITMAP 0 // 000
21 #define OBJECT_TYPE_SCALE 1 // 001
22 #define OBJECT_TYPE_GPU 2 // 010
23 #define OBJECT_TYPE_BRANCH 3 // 011
24 #define OBJECT_TYPE_STOP 4 // 100
26 #define CONDITION_EQUAL 0
27 #define CONDITION_LESS_THAN 1
28 #define CONDITION_GREATER_THAN 2
29 #define CONDITION_OP_FLAG_SET 3
30 #define CONDITION_SECOND_HALF_LINE 4
32 #define OPFLAG_RELEASE 8 // Bus release bit
33 #define OPFLAG_TRANS 4 // Transparency bit
34 #define OPFLAG_RMW 2 // Read-Modify-Write bit
35 #define OPFLAG_REFLECT 1 // Horizontal mirror bit
37 // Private function prototypes
39 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
40 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
41 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
42 void DumpFixedObject(uint64 p0, uint64 p1);
43 uint64 op_load_phrase(uint32 offset);
45 // Local global variables
47 static uint8 * op_blend_y;
48 static uint8 * op_blend_cr;
49 // There may be a problem with this "RAM" overlapping (and thus being independent of)
50 // some of the regular TOM RAM...
51 static uint8 objectp_ram[0x40]; // This is based at $F00000
52 uint8 objectp_running;
53 //bool objectp_stop_reading_list;
55 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
56 //static uint32 op_bitmap_bit_size[8] =
57 // { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
58 // (uint32)(2*65536), (uint32)(1*65536), (uint32)(1*65536), (uint32)(1*65536) };
59 static uint32 op_pointer;
61 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
65 // Object Processor initialization
69 // Blend tables (64K each)
70 memory_malloc_secure((void **)&op_blend_y, 0x10000, "Jaguar Object processor Y blend lookup table");
71 memory_malloc_secure((void **)&op_blend_cr, 0x10000, "Jaguar Object processor CR blend lookup table");
73 // Here we calculate the saturating blend of a signed 4-bit value and an
74 // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
75 // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
76 for(int i=0; i<256*256; i++)
78 int y = (i >> 8) & 0xFF;
79 int dy = (int8)i; // Sign extend the Y index
80 int c1 = (i >> 8) & 0x0F;
81 int dc1 = (int8)(i << 4) >> 4; // Sign extend the R index
82 int c2 = (i >> 12) & 0x0F;
83 int dc2 = (int8)(i & 0xF0) >> 4; // Sign extend the C index
108 op_blend_cr[i] = (c2 << 4) | c1;
115 // Object Processor reset
119 memset(objectp_ram, 0x00, 0x40);
126 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
128 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
130 uint32 olp = op_get_list_pointer();
131 WriteLog("OP: OLP = %08X\n", olp);
132 WriteLog("OP: Phrase dump\n ----------\n");
133 for(uint32 i=0; i<0x100; i+=8)
135 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
136 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
137 if ((lo & 0x07) == 3)
139 uint16 ypos = (lo >> 3) & 0x7FF;
140 uint8 cc = (lo >> 14) & 0x03;
141 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
142 WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
145 if ((lo & 0x07) == 0)
146 DumpFixedObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8));
147 if ((lo & 0x07) == 1)
148 DumpScaledObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8), op_load_phrase(olp+i+16));
152 memory_free(op_blend_y);
153 memory_free(op_blend_cr);
157 // Object Processor memory access
158 // Memory range: F00010 - F00027
160 // F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
161 // F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
162 // F00026 W -------- -------x OBF - object processor flag
165 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
168 return objectp_ram[offset];
171 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
174 return GET16(objectp_ram, offset);
177 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
180 objectp_ram[offset] = data;
183 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
186 SET16(objectp_ram, offset, data);
188 /*if (offset == 0x20)
189 WriteLog("OP: Setting lo list pointer: %04X\n", data);
191 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
194 uint32 op_get_list_pointer(void)
196 // Note: This register is LO / HI WORD, hence the funky look of this...
197 // return (objectp_ram[0x22] << 24) | (objectp_ram[0x23] << 16) | (objectp_ram[0x20] << 8) | objectp_ram[0x21];
198 return GET16(objectp_ram, 0x20) | (GET16(objectp_ram, 0x22) << 16);
201 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
203 uint32 op_get_status_register(void)
205 // return (objectp_ram[0x26] << 24) | (objectp_ram[0x27] << 16) | (objectp_ram[0x28] << 8) | objectp_ram[0x29];
206 // return GET32(objectp_ram, 0x26);
207 return GET16(objectp_ram, 0x26);
210 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
212 void op_set_status_register(uint32 data)
214 /* objectp_ram[0x26] = (data & 0xFF000000) >> 24;
215 objectp_ram[0x27] = (data & 0x00FF0000) >> 16;
216 objectp_ram[0x28] = (data & 0x0000FF00) >> 8;
217 objectp_ram[0x29] |= (data & 0xFE);*/
218 objectp_ram[0x26] = (data & 0x0000FF00) >> 8;
219 objectp_ram[0x27] |= (data & 0xFE);
222 void op_set_current_object(uint64 object)
224 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
225 // Stored as least significant 32 bits first, ms32 last in big endian
226 /* objectp_ram[0x13] = object & 0xFF; object >>= 8;
227 objectp_ram[0x12] = object & 0xFF; object >>= 8;
228 objectp_ram[0x11] = object & 0xFF; object >>= 8;
229 objectp_ram[0x10] = object & 0xFF; object >>= 8;
231 objectp_ram[0x17] = object & 0xFF; object >>= 8;
232 objectp_ram[0x16] = object & 0xFF; object >>= 8;
233 objectp_ram[0x15] = object & 0xFF; object >>= 8;
234 objectp_ram[0x14] = object & 0xFF;*/
235 // Let's try regular good old big endian...
236 objectp_ram[0x17] = object & 0xFF; object >>= 8;
237 objectp_ram[0x16] = object & 0xFF; object >>= 8;
238 objectp_ram[0x15] = object & 0xFF; object >>= 8;
239 objectp_ram[0x14] = object & 0xFF; object >>= 8;
241 objectp_ram[0x13] = object & 0xFF; object >>= 8;
242 objectp_ram[0x12] = object & 0xFF; object >>= 8;
243 objectp_ram[0x11] = object & 0xFF; object >>= 8;
244 objectp_ram[0x10] = object & 0xFF;
247 uint64 op_load_phrase(uint32 offset)
249 offset &= ~0x07; // 8 byte alignment
250 return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
253 void OPStorePhrase(uint32 offset, uint64 p)
255 offset &= ~0x07; // 8 byte alignment
256 JaguarWriteLong(offset, p >> 32, OP);
257 JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
261 // Debugging routines
263 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
265 WriteLog(" (SCALED BITMAP)");
266 WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
267 WriteLog(" %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
268 uint8 bitdepth = (p1 >> 12) & 0x07;
269 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
270 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
271 int32 xpos = p1 & 0xFFF;
272 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
273 uint32 iwidth = ((p1 >> 28) & 0x3FF);
274 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
275 uint16 height = ((p0 >> 14) & 0x3FF);
276 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
277 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
278 uint32 firstPix = (p1 >> 49) & 0x3F;
279 uint8 flags = (p1 >> 45) & 0x0F;
280 uint8 idx = (p1 >> 38) & 0x7F;
281 uint32 pitch = (p1 >> 15) & 0x07;
282 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
283 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
284 uint32 hscale = p2 & 0xFF;
285 uint32 vscale = (p2 >> 8) & 0xFF;
286 uint32 remainder = (p2 >> 16) & 0xFF;
287 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
290 void DumpFixedObject(uint64 p0, uint64 p1)
292 WriteLog(" (BITMAP)");
293 WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
294 uint8 bitdepth = (p1 >> 12) & 0x07;
295 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
296 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
297 int32 xpos = p1 & 0xFFF;
298 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
299 uint32 iwidth = ((p1 >> 28) & 0x3FF);
300 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
301 uint16 height = ((p0 >> 14) & 0x3FF);
302 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
303 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
304 uint32 firstPix = (p1 >> 49) & 0x3F;
305 uint8 flags = (p1 >> 45) & 0x0F;
306 uint8 idx = (p1 >> 38) & 0x7F;
307 uint32 pitch = (p1 >> 15) & 0x07;
308 WriteLog(" [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
309 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
313 // Object Processor main routine
315 //Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing
316 //where we left off. !!! FIX !!!
317 void OPProcessList(int scanline, bool render)
319 extern int op_start_log;
320 // char * condition_to_str[8] =
321 // { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
323 op_pointer = op_get_list_pointer();
325 // objectp_stop_reading_list = false;
327 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer);
330 // *** BEGIN OP PROCESSOR TESTING ONLY ***
331 extern bool interactiveMode;
333 extern int objectPtr;
335 int bitmapCounter = 0;
336 // *** END OP PROCESSOR TESTING ONLY ***
338 uint32 opCyclesToRun = 10000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
340 // if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
343 // *** BEGIN OP PROCESSOR TESTING ONLY ***
344 if (interactiveMode && bitmapCounter == objectPtr)
348 // *** END OP PROCESSOR TESTING ONLY ***
349 // if (objectp_stop_reading_list)
352 uint64 p0 = op_load_phrase(op_pointer);
353 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
355 if (scanline == tom_get_vdb() && op_start_log)
356 //if (scanline == 215 && op_start_log)
357 //if (scanline == 28 && op_start_log)
359 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
360 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
362 WriteLog(" (BITMAP) ");
363 uint64 p1 = op_load_phrase(op_pointer);
364 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
365 uint8 bitdepth = (p1 >> 12) & 0x07;
366 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
367 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
368 int32 xpos = p1 & 0xFFF;
369 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
370 uint32 iwidth = ((p1 >> 28) & 0x3FF);
371 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
372 uint16 height = ((p0 >> 14) & 0x3FF);
373 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
374 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
375 uint32 firstPix = (p1 >> 49) & 0x3F;
376 uint8 flags = (p1 >> 45) & 0x0F;
377 uint8 idx = (p1 >> 38) & 0x7F;
378 uint32 pitch = (p1 >> 15) & 0x07;
379 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
380 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
382 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
384 WriteLog(" (SCALED BITMAP)");
385 uint64 p1 = op_load_phrase(op_pointer), p2 = op_load_phrase(op_pointer+8);
386 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
387 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
388 uint8 bitdepth = (p1 >> 12) & 0x07;
389 //WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
390 int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
391 int32 xpos = p1 & 0xFFF;
392 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
393 uint32 iwidth = ((p1 >> 28) & 0x3FF);
394 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
395 uint16 height = ((p0 >> 14) & 0x3FF);
396 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
397 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
398 uint32 firstPix = (p1 >> 49) & 0x3F;
399 uint8 flags = (p1 >> 45) & 0x0F;
400 uint8 idx = (p1 >> 38) & 0x7F;
401 uint32 pitch = (p1 >> 15) & 0x07;
402 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
403 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
404 uint32 hscale = p2 & 0xFF;
405 uint32 vscale = (p2 >> 8) & 0xFF;
406 uint32 remainder = (p2 >> 16) & 0xFF;
407 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
409 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
410 WriteLog(" (GPU)\n");
411 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
413 WriteLog(" (BRANCH)\n");
414 uint8 * jaguar_mainRam = GetRamPtr();
415 WriteLog("[RAM] --> ");
416 for(int k=0; k<8; k++)
417 WriteLog("%02X ", jaguar_mainRam[op_pointer-8 + k]);
420 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
421 WriteLog(" --> List end\n");
424 switch ((uint8)p0 & 0x07)
426 case OBJECT_TYPE_BITMAP:
428 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
429 uint16 ypos = (p0 >> 3) & 0x7FF;
430 // This is only theory implied by Rayman...!
431 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
432 // the VDB value. With interlacing, this would be slightly more tricky.
433 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
434 // to affect any other game in a negative way (that I've seen).
435 // Either that, or it's an undocumented bug...
437 //No, the reason this was needed is that the OP code before was wrong. Any value
438 //less than VDB will get written to the top line of the display!
440 // ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
441 uint32 height = (p0 & 0xFFC000) >> 14;
442 uint32 oldOPP = op_pointer - 8;
443 // *** BEGIN OP PROCESSOR TESTING ONLY ***
444 if (inhibit && op_start_log)
445 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
447 if (!inhibit) // For OP testing only!
448 // *** END OP PROCESSOR TESTING ONLY ***
449 if (scanline >= ypos && height > 0)
451 uint64 p1 = op_load_phrase(op_pointer);
453 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
454 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
455 // OPProcessFixedBitmap(scanline, p0, p1, render);
456 OPProcessFixedBitmap(p0, p1, render);
460 //???Does this really happen??? Doesn't seem to work if you do this...!
461 //Probably not. Must be a bug in the documentation...!
462 // uint32 link = (p0 & 0x7FFFF000000) >> 21;
463 // SET16(objectp_ram, 0x20, link & 0xFFFF); // OLP
464 // SET16(objectp_ram, 0x22, link >> 16);
465 /* uint32 height = (p0 & 0xFFC000) >> 14;
468 // NOTE: Would subtract 2 if in interlaced mode...!
469 // uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
473 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
474 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
477 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
478 p0 |= (uint64)height << 14;
480 OPStorePhrase(oldOPP, p0);
482 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
483 //Temp, for testing...
484 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
485 //And it does! !!! FIX !!!
486 //Let's remove this "fix" since it screws up more than it fixes.
487 /* if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
490 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
491 //WriteLog("New OP: %08X\n", op_pointer);
494 case OBJECT_TYPE_SCALE:
496 //WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
497 uint16 ypos = (p0 >> 3) & 0x7FF;
498 uint32 height = (p0 & 0xFFC000) >> 14;
499 uint32 oldOPP = op_pointer - 8;
500 // *** BEGIN OP PROCESSOR TESTING ONLY ***
501 if (inhibit && op_start_log)
503 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
504 DumpScaledObject(p0, op_load_phrase(op_pointer), op_load_phrase(op_pointer+8));
507 if (!inhibit) // For OP testing only!
508 // *** END OP PROCESSOR TESTING ONLY ***
509 if (scanline >= ypos && height > 0)
511 uint64 p1 = op_load_phrase(op_pointer);
513 uint64 p2 = op_load_phrase(op_pointer);
515 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
516 OPProcessScaledBitmap(p0, p1, p2, render);
520 uint8 remainder = p2 >> 16, vscale = p2 >> 8;
521 //Actually, we should skip this object if it has a vscale of zero.
522 //Or do we? Not sure... Atari Karts has a few lines that look like:
524 //000E8268 --> phrase 00010000 7000B00D
525 // [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
526 // [hsc: 9A, vsc: 00, rem: 00]
527 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
530 vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
532 //extern int start_logging;
534 // WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
536 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
537 //There are other problems here, it looks like...
539 //About to execute OP (508)...
541 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
542 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
543 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
544 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
545 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
546 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
547 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
548 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
549 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
550 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
551 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
552 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
553 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
554 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
555 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
556 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
557 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
558 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
559 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
560 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
561 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
562 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
564 //Here's another problem:
565 // [hsc: 20, vsc: 20, rem: 00]
566 // Since we're not checking for $E0 (but that's what we get from the above), we end
567 // up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite
568 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
569 //Also note: $E0 = 7.0 which IS a legal vscale value...
571 // if (remainder & 0x80) // I.e., it's negative
572 // if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0
573 // if ((remainder - 1) >= 0xE0) // I.e., it's <= 0
574 // if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
575 // if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
576 if (remainder <= 0x20) // I.e., it's <= 0
578 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
579 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
581 // while (remainder & 0x80)
582 // while ((remainder & 0x80) || remainder == 0)
583 // while ((remainder - 1) >= 0xE0)
584 // while ((remainder >= 0xE1) || remainder == 0)
585 // while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
586 while (remainder <= 0x20)
596 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
597 p0 |= (uint64)height << 14;
599 OPStorePhrase(oldOPP, p0);
602 remainder -= 0x20; // 1.0f in [3.5] fixed point format
605 // WriteLog("--> Finished writebacks...\n");//*/
607 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
608 p2 &= ~0x0000000000FF0000LL;
609 p2 |= (uint64)remainder << 16;
610 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
611 OPStorePhrase(oldOPP+16, p2);
612 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
613 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
615 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
618 case OBJECT_TYPE_GPU:
620 //WriteLog("OP: Asserting GPU IRQ #3...\n");
621 op_set_current_object(p0);
622 GPUSetIRQLine(3, ASSERT_LINE);
623 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
626 //OPSuspendedByGPU = true;
627 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
628 //on the next scanline...
629 // --> It continues from where it was interrupted! !!! FIX !!!
632 case OBJECT_TYPE_BRANCH:
634 uint16 ypos = (p0 >> 3) & 0x7FF;
635 uint8 cc = (p0 >> 14) & 0x03;
636 uint32 link = (p0 >> 21) & 0x3FFFF8;
638 // if ((ypos!=507)&&(ypos!=25))
639 // WriteLog("\t%i%s%i link=0x%.8x\n",scanline,condition_to_str[cc],ypos>>1,link);
642 case CONDITION_EQUAL:
643 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
646 case CONDITION_LESS_THAN:
647 if (TOMReadWord(0xF00006, OP) < ypos)
650 case CONDITION_GREATER_THAN:
651 if (TOMReadWord(0xF00006, OP) > ypos)
654 case CONDITION_OP_FLAG_SET:
655 if (op_get_status_register() & 0x01)
658 case CONDITION_SECOND_HALF_LINE:
659 // This basically means branch if bit 10 of HC is set
660 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shuting down\n");
665 WriteLog("OP: Unimplemented branch condition %i\n", cc);
669 case OBJECT_TYPE_STOP:
673 //WriteLog("OP: --> STOP\n");
674 // op_set_status_register(((p0>>3) & 0xFFFFFFFF));
675 //This seems more likely...
676 op_set_current_object(p0);
680 tom_set_pending_object_int();
681 if (tom_irq_enabled(IRQ_OPFLAG))// && jaguar_interrupt_handler_is_valid(64))
682 m68k_set_irq(7); // Cause an NMI to occur...
689 WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
693 // Here is a little sanity check to keep the OP from locking up the machine
694 // when fed bad data. Better would be to count how many actual cycles it used
695 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
703 // Store fixed size bitmap in line buffer
705 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
707 // Need to make sure that when writing that it stays within the line buffer...
708 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
709 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
710 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
711 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
712 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
713 //#ifdef OP_DEBUG_BMP
714 uint32 firstPix = (p1 >> 49) & 0x3F;
715 // "The LSB is significant only for scaled objects..." -JTRM
716 // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
719 // We can ignore the RELEASE (high order) bit for now--probably forever...!
720 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
721 //Optimize: break these out to their own BOOL values
722 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
723 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
724 flagRMW = (flags & OPFLAG_RMW ? true : false),
725 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
726 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
727 // provide the most significant bits of the palette address."
728 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
729 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
730 pitch <<= 3; // Optimization: Multiply pitch by 8
732 // int16 scanlineWidth = tom_getVideoModeWidth();
733 uint8 * tom_ram_8 = tom_get_ram_pointer();
734 uint8 * paletteRAM = &tom_ram_8[0x400];
735 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
736 // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
737 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
739 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
740 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
742 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
743 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
744 // Pitch == 0 is OK too...
745 // if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
746 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
747 if (!render || iwidth == 0)
750 //#define OP_DEBUG_BMP
751 //#ifdef OP_DEBUG_BMP
752 // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
753 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
756 // int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
757 int32 startPos = xpos, endPos = xpos +
758 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
759 : -((phraseWidthToPixels[depth] * iwidth) + 1));
760 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
761 bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
762 // Not sure if this is Jaguar Two only location or what...
763 // From the docs, it is... If we want to limit here we should think of something else.
764 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
766 int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
768 // If the image is completely to the left or right of the line buffer, then bail.
769 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
770 //There are four possibilities:
771 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
772 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
773 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
774 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
775 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
776 // numbers 1 & 3 are of concern.
777 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
778 // if (rightMargin < 0 || leftMargin > lbufWidth)
780 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
781 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
782 // Still have to be careful with the DATA and IWIDTH values though...
784 // if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
785 // || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
787 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
788 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
791 // Otherwise, find the clip limits and clip the phrase as well...
792 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
793 // line buffer, but it shouldn't matter since there are two unused line
794 // buffers below and nothing above and I'll at most write 8 bytes outside
795 // the line buffer... I could use a fractional clip begin/end value, but
796 // this makes the blit a *lot* more hairy. I might fix this in the future
797 // if it becomes necessary. (JLH)
798 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
799 // which pixel in the phrase is being written, and quit when either end of phrases
800 // is reached or line buffer extents are surpassed.
802 //This stuff is probably wrong as well... !!! FIX !!!
803 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
804 //Yup. Seems that JagMania doesn't work correctly with this...
805 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
810 clippedWidth = 0 - leftMargin,
811 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
812 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
815 if (rightMargin > lbufWidth)
816 clippedWidth = rightMargin - lbufWidth,
817 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
818 // rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
819 // rightMargin = lbufWidth;
822 WriteLog("OP: We're about to encounter a divide by zero error!\n");
823 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
824 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
826 if (startPos < 0) // Case #1: Begin out, end in, L to R
827 clippedWidth = 0 - startPos,
828 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
829 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
831 if (endPos < 0) // Case #2: Begin in, end out, R to L
832 clippedWidth = 0 - endPos,
833 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
835 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
836 clippedWidth = endPos - lbufWidth,
837 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
839 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
840 clippedWidth = startPos - lbufWidth,
841 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
842 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
844 // If the image is sitting on the line buffer left or right edge, we need to compensate
845 // by decreasing the image phrase width accordingly.
846 iwidth -= phraseClippedWidth;
848 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
850 // data += phraseClippedWidth * (pitch << 3);
851 data += dataClippedWidth * pitch;
853 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
854 // bitmap! This makes clipping & etc. MUCH, much easier...!
855 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
856 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
857 //Is this a bug in the OP?
858 uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
859 uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
863 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
864 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
865 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
867 // This seems to be the case (at least according to the Midsummer docs)...!
869 if (depth == 0) // 1 BPP
871 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
872 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
874 // Fetch 1st phrase...
875 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
876 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
877 //i.e., we didn't clip on the margin... !!! FIX !!!
878 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
879 int i = firstPix; // Start counter at right spot...
885 uint8 bit = pixels >> 63;
886 if (flagTRANS && bit == 0)
891 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
892 //Won't optimize RMW case though...
893 // This is the *only* correct use of endian-dependent code
894 // (i.e., mem-to-mem direct copying)!
895 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
898 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
899 *(currentLineBuffer + 1) =
900 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
903 currentLineBuffer += lbufDelta;
907 // Fetch next phrase...
909 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
912 else if (depth == 1) // 2 BPP
915 WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
916 index &= 0xFC; // Top six bits form CLUT index
917 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
918 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
923 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
926 for(int i=0; i<32; i++)
928 uint8 bits = pixels >> 62;
929 // Seems to me that both of these are in the same endian, so we could cast it as
930 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
931 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
932 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
933 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
934 if (flagTRANS && bits == 0)
939 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
942 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
943 *(currentLineBuffer + 1) =
944 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
947 currentLineBuffer += lbufDelta;
952 else if (depth == 2) // 4 BPP
955 WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
956 index &= 0xF0; // Top four bits form CLUT index
957 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
958 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
963 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
966 for(int i=0; i<16; i++)
968 uint8 bits = pixels >> 60;
969 // Seems to me that both of these are in the same endian, so we could cast it as
970 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
971 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
972 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
973 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
974 if (flagTRANS && bits == 0)
979 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
982 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
983 *(currentLineBuffer + 1) =
984 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
987 currentLineBuffer += lbufDelta;
992 else if (depth == 3) // 8 BPP
994 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
995 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
997 // Fetch 1st phrase...
998 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
999 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1000 //i.e., we didn't clip on the margin... !!! FIX !!!
1001 firstPix &= 0x30; // Only top two bits are valid for 8 BPP
1002 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1003 int i = firstPix >> 3; // Start counter at right spot...
1009 uint8 bits = pixels >> 56;
1010 // Seems to me that both of these are in the same endian, so we could cast it as
1011 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1012 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1013 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1014 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1015 if (flagTRANS && bits == 0)
1020 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1022 *currentLineBuffer =
1023 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1024 *(currentLineBuffer + 1) =
1025 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1028 currentLineBuffer += lbufDelta;
1032 // Fetch next phrase...
1034 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1037 else if (depth == 4) // 16 BPP
1040 WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1041 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1042 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1047 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1050 for(int i=0; i<4; i++)
1052 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1053 // Seems to me that both of these are in the same endian, so we could cast it as
1054 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1055 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1056 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1057 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1058 if (flagTRANS && (bitsLo | bitsHi) == 0)
1063 *currentLineBuffer = bitsHi,
1064 *(currentLineBuffer + 1) = bitsLo;
1066 *currentLineBuffer =
1067 BLEND_CR(*currentLineBuffer, bitsHi),
1068 *(currentLineBuffer + 1) =
1069 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1072 currentLineBuffer += lbufDelta;
1077 else if (depth == 5) // 24 BPP
1079 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1080 //There *might* be others...
1081 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1083 WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1084 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1085 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1086 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1091 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1094 for(int i=0; i<2; i++)
1096 // We don't use a 32-bit var here because of endian issues...!
1097 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1098 bits1 = pixels >> 40, bits0 = pixels >> 32;
1100 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1103 *currentLineBuffer = bits3,
1104 *(currentLineBuffer + 1) = bits2,
1105 *(currentLineBuffer + 2) = bits1,
1106 *(currentLineBuffer + 3) = bits0;
1108 currentLineBuffer += lbufDelta;
1116 // Store scaled bitmap in line buffer
1118 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1120 // Need to make sure that when writing that it stays within the line buffer...
1121 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1122 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
1123 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1124 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
1125 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
1126 //#ifdef OP_DEBUG_BMP
1127 // Prolly should use this... Though not sure exactly how.
1128 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1129 uint32 firstPix = (p1 >> 49) & 0x3F;
1130 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1132 WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1134 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1135 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
1136 //Optimize: break these out to their own BOOL values [DONE]
1137 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
1138 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1139 flagRMW = (flags & OPFLAG_RMW ? true : false),
1140 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1141 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
1142 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
1144 uint8 * tom_ram_8 = tom_get_ram_pointer();
1145 uint8 * paletteRAM = &tom_ram_8[0x400];
1146 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1147 // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1148 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1150 uint8 hscale = p2 & 0xFF;
1151 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1152 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1153 uint8 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!]
1154 // uint8 horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1155 int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1156 uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1158 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1159 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1161 // Looks like an hscale of zero means don't draw!
1162 if (!render || iwidth == 0 || hscale == 0)
1165 /*extern int start_logging;
1167 WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1168 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1169 //#define OP_DEBUG_BMP
1170 //#ifdef OP_DEBUG_BMP
1171 // WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1172 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1175 int32 startPos = xpos, endPos = xpos +
1176 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1177 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1178 bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
1179 // Not sure if this is Jaguar Two only location or what...
1180 // From the docs, it is... If we want to limit here we should think of something else.
1181 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
1183 int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1185 // If the image is completely to the left or right of the line buffer, then bail.
1186 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1187 //There are four possibilities:
1188 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1189 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1190 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1191 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1192 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1193 // numbers 1 & 3 are of concern.
1194 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1195 // if (rightMargin < 0 || leftMargin > lbufWidth)
1197 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1198 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1199 // Still have to be careful with the DATA and IWIDTH values though...
1201 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1202 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1205 // Otherwise, find the clip limits and clip the phrase as well...
1206 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1207 // line buffer, but it shouldn't matter since there are two unused line
1208 // buffers below and nothing above and I'll at most write 40 bytes outside
1209 // the line buffer... I could use a fractional clip begin/end value, but
1210 // this makes the blit a *lot* more hairy. I might fix this in the future
1211 // if it becomes necessary. (JLH)
1212 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1213 // which pixel in the phrase is being written, and quit when either end of phrases
1214 // is reached or line buffer extents are surpassed.
1216 //This stuff is probably wrong as well... !!! FIX !!!
1217 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1218 //Yup. Seems that JagMania doesn't work correctly with this...
1219 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1220 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1221 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1222 // a bit more accurately... Strange!
1223 //It's probably a case of the REFLECT flag being set and the background being written
1224 //from the right side of the screen...
1225 //But no, it isn't... At least if the diagnostics are telling the truth!
1227 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1228 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1231 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1232 //the scaling factor is small. So fix it already! !!! FIX !!!
1233 /*if (scaledPhrasePixels == 0)
1235 WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1236 DumpScaledObject(p0, p1, p2);
1238 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1240 //Try a simple example...
1241 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1242 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1243 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1245 // Normally, we would expect this in the line buffer:
1246 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1248 // But instead we're getting:
1249 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1251 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1252 // on negative boundary--or are we? Hmm...
1253 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1255 // Let's try a real world example:
1257 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1258 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1260 // Really, spp is 27.75 in the second case...
1261 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1262 // start position (14 * 27.75), we get -6.5... NOT -17!
1264 //Now it seems we're working OK, at least for the first case...
1265 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1267 if (startPos < 0) // Case #1: Begin out, end in, L to R
1269 extern int start_logging;
1271 WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1272 // clippedWidth = 0 - startPos,
1273 clippedWidth = (0 - startPos) << 5,
1274 // dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1275 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1276 // startPos = 0 - (clippedWidth % scaledPhrasePixels);
1277 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1279 WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1282 if (endPos < 0) // Case #2: Begin in, end out, R to L
1283 clippedWidth = 0 - endPos,
1284 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1286 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1287 clippedWidth = endPos - lbufWidth,
1288 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1290 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1291 clippedWidth = startPos - lbufWidth,
1292 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1293 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1295 extern int op_start_log;
1296 if (op_start_log && clippedWidth != 0)
1297 WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1298 if (op_start_log && startPos == 13)
1300 WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1301 DumpScaledObject(p0, p1, p2);
1304 WriteLog(" %08X: ", data);
1305 for(int i=0; i<7*8; i++)
1306 WriteLog("%02X ", JaguarReadByte(data+i));
1310 // If the image is sitting on the line buffer left or right edge, we need to compensate
1311 // by decreasing the image phrase width accordingly.
1312 iwidth -= phraseClippedWidth;
1314 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1316 // data += phraseClippedWidth * (pitch << 3);
1317 data += dataClippedWidth * (pitch << 3);
1319 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1320 // bitmap! This makes clipping & etc. MUCH, much easier...!
1321 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1322 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1323 uint32 lbufAddress = 0x1800 + startPos * 2;
1324 uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
1325 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1326 // * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1330 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1331 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1332 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1334 // This seems to be the case (at least according to the Midsummer docs)...!
1336 if (depth == 0) // 1 BPP
1339 WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1340 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1341 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1344 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1346 while ((int32)iwidth > 0)
1348 uint8 bits = pixels >> 63;
1350 if (flagTRANS && bits == 0)
1355 // This is the *only* correct use of endian-dependent code
1356 // (i.e., mem-to-mem direct copying)!
1357 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1359 *currentLineBuffer =
1360 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1361 *(currentLineBuffer + 1) =
1362 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1365 currentLineBuffer += lbufDelta;
1367 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1368 while (horizontalRemainder & 0x80)
1370 horizontalRemainder += hscale;
1374 while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1376 horizontalRemainder += hscale;
1380 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1384 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1386 data += (pitch << 3) * phrasesToSkip;
1387 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1388 pixels <<= 1 * pixelShift;
1389 iwidth -= phrasesToSkip;
1390 pixCount = pixelShift;
1394 else if (depth == 1) // 2 BPP
1397 WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1398 index &= 0xFC; // Top six bits form CLUT index
1399 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1400 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1403 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1405 while ((int32)iwidth > 0)
1407 uint8 bits = pixels >> 62;
1409 if (flagTRANS && bits == 0)
1414 // This is the *only* correct use of endian-dependent code
1415 // (i.e., mem-to-mem direct copying)!
1416 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1418 *currentLineBuffer =
1419 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1420 *(currentLineBuffer + 1) =
1421 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1424 currentLineBuffer += lbufDelta;
1426 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1427 while (horizontalRemainder & 0x80)
1429 horizontalRemainder += hscale;
1433 while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1435 horizontalRemainder += hscale;
1439 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1443 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1445 data += (pitch << 3) * phrasesToSkip;
1446 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1447 pixels <<= 2 * pixelShift;
1448 iwidth -= phrasesToSkip;
1449 pixCount = pixelShift;
1453 else if (depth == 2) // 4 BPP
1456 WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1457 index &= 0xF0; // Top four bits form CLUT index
1458 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1459 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1462 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1464 while ((int32)iwidth > 0)
1466 uint8 bits = pixels >> 60;
1468 if (flagTRANS && bits == 0)
1473 // This is the *only* correct use of endian-dependent code
1474 // (i.e., mem-to-mem direct copying)!
1475 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1477 *currentLineBuffer =
1478 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1479 *(currentLineBuffer + 1) =
1480 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1483 currentLineBuffer += lbufDelta;
1485 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1486 while (horizontalRemainder & 0x80)
1488 horizontalRemainder += hscale;
1492 while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1494 horizontalRemainder += hscale;
1498 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1502 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1504 data += (pitch << 3) * phrasesToSkip;
1505 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1506 pixels <<= 4 * pixelShift;
1507 iwidth -= phrasesToSkip;
1508 pixCount = pixelShift;
1512 else if (depth == 3) // 8 BPP
1515 WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1516 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1517 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1520 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1522 while ((int32)iwidth > 0)
1524 uint8 bits = pixels >> 56;
1526 if (flagTRANS && bits == 0)
1531 // This is the *only* correct use of endian-dependent code
1532 // (i.e., mem-to-mem direct copying)!
1533 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1535 if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1536 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1539 *currentLineBuffer =
1540 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1541 *(currentLineBuffer + 1) =
1542 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1545 currentLineBuffer += lbufDelta;
1547 while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1549 horizontalRemainder += hscale;
1553 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1557 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1559 data += (pitch << 3) * phrasesToSkip;
1560 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1561 pixels <<= 8 * pixelShift;
1562 iwidth -= phrasesToSkip;
1563 pixCount = pixelShift;
1567 else if (depth == 4) // 16 BPP
1570 WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1571 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1572 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1575 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1577 while ((int32)iwidth > 0)
1579 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1581 if (flagTRANS && (bitsLo | bitsHi) == 0)
1586 *currentLineBuffer = bitsHi,
1587 *(currentLineBuffer + 1) = bitsLo;
1589 *currentLineBuffer =
1590 BLEND_CR(*currentLineBuffer, bitsHi),
1591 *(currentLineBuffer + 1) =
1592 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1595 currentLineBuffer += lbufDelta;
1597 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1598 while (horizontalRemainder & 0x80)
1600 horizontalRemainder += hscale;
1604 while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1606 horizontalRemainder += hscale;
1610 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1614 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1616 data += (pitch << 3) * phrasesToSkip;
1617 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1618 pixels <<= 16 * pixelShift;
1620 iwidth -= phrasesToSkip;
1622 pixCount = pixelShift;
1626 else if (depth == 5) // 24 BPP
1628 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1629 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1631 WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1632 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1633 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1634 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1639 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1640 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1642 for(int i=0; i<2; i++)
1644 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1645 bits1 = pixels >> 40, bits0 = pixels >> 32;
1647 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1650 *currentLineBuffer = bits3,
1651 *(currentLineBuffer + 1) = bits2,
1652 *(currentLineBuffer + 2) = bits1,
1653 *(currentLineBuffer + 3) = bits0;
1655 currentLineBuffer += lbufDelta;