5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Cleanups/fixes/rewrites by James L. Hammons
15 //#define OP_DEBUG_BMP
17 #define BLEND_Y(dst, src) op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
18 #define BLEND_CR(dst, src) op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
20 #define OBJECT_TYPE_BITMAP 0 // 000
21 #define OBJECT_TYPE_SCALE 1 // 001
22 #define OBJECT_TYPE_GPU 2 // 010
23 #define OBJECT_TYPE_BRANCH 3 // 011
24 #define OBJECT_TYPE_STOP 4 // 100
26 #define CONDITION_EQUAL 0
27 #define CONDITION_LESS_THAN 1
28 #define CONDITION_GREATER_THAN 2
29 #define CONDITION_OP_FLAG_SET 3
30 #define CONDITION_SECOND_HALF_LINE 4
32 #define OPFLAG_RELEASE 8 // Bus release bit
33 #define OPFLAG_TRANS 4 // Transparency bit
34 #define OPFLAG_RMW 2 // Read-Modify-Write bit
35 #define OPFLAG_REFLECT 1 // Horizontal mirror bit
37 // Private function prototypes
39 void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render);
40 void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool render);
41 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
42 void DumpFixedObject(uint64 p0, uint64 p1);
43 uint64 op_load_phrase(uint32 offset);
45 // External global variables
47 extern uint32 jaguar_mainRom_crc32;
49 // Local global variables
51 static uint8 * op_blend_y;
52 static uint8 * op_blend_cr;
53 // There may be a problem with this "RAM" overlapping (and thus being independent of)
54 // some of the regular TOM RAM...
55 static uint8 objectp_ram[0x40]; // This is based at $F00000
56 uint8 objectp_running;
57 bool objectp_stop_reading_list;
59 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
60 //static uint32 op_bitmap_bit_size[8] =
61 // { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
62 // (uint32)(2*65536), (uint32)(1*65536), (uint32)(1*65536), (uint32)(1*65536) };
63 static uint32 op_pointer;
65 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
69 // Object Processor initialization
73 // Blend tables (64K each)
74 memory_malloc_secure((void **)&op_blend_y, 0x10000, "Jaguar Object processor Y blend lookup table");
75 memory_malloc_secure((void **)&op_blend_cr, 0x10000, "Jaguar Object processor CR blend lookup table");
77 // Here we calculate the saturating blend of a signed 4-bit value and an
78 // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
79 // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
80 for(int i=0; i<256*256; i++)
82 int y = (i >> 8) & 0xFF;
83 int dy = (INT8)i; // Sign extend the Y index
84 int c1 = (i >> 8) & 0x0F;
85 int dc1 = (INT8)(i << 4) >> 4; // Sign extend the R index
86 int c2 = (i >> 12) & 0x0F;
87 int dc2 = (INT8)(i & 0xF0) >> 4; // Sign extend the C index
107 op_blend_cr[i] = (c2 << 4) | c1;
114 // Object Processor reset
118 memset(objectp_ram, 0x00, 0x40);
125 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
127 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
129 uint32 olp = op_get_list_pointer();
130 WriteLog("OP: OLP = %08X\n", olp);
131 WriteLog("OP: Phrase dump\n ----------\n");
132 for(uint32 i=0; i<0x100; i+=8)
134 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
135 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
136 if ((lo & 0x07) == 3)
138 uint16 ypos = (lo >> 3) & 0x7FF;
139 uint8 cc = (lo >> 14) & 0x03;
140 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
141 WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
144 if ((lo & 0x07) == 0)
145 DumpFixedObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8));
146 if ((lo & 0x07) == 1)
147 DumpScaledObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8), op_load_phrase(olp+i+16));
153 // Object Processor memory access
154 // Memory range: F00010 - F00027
156 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
159 return objectp_ram[offset];
162 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
165 return GET16(objectp_ram, offset);
168 // F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
169 // F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
170 // F00026 W -------- -------x OBF - object processor flag
172 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
175 objectp_ram[offset] = data;
178 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
181 SET16(objectp_ram, offset, data);
183 /*if (offset == 0x20)
184 WriteLog("OP: Setting lo list pointer: %04X\n", data);
186 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
189 uint32 op_get_list_pointer(void)
191 // Note: This register is LO / HI WORD, hence the funky look of this...
192 // return (objectp_ram[0x22] << 24) | (objectp_ram[0x23] << 16) | (objectp_ram[0x20] << 8) | objectp_ram[0x21];
193 return GET16(objectp_ram, 0x20) | (GET16(objectp_ram, 0x22) << 16);
196 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
198 uint32 op_get_status_register(void)
200 // return (objectp_ram[0x26] << 24) | (objectp_ram[0x27] << 16) | (objectp_ram[0x28] << 8) | objectp_ram[0x29];
201 // return GET32(objectp_ram, 0x26);
202 return GET16(objectp_ram, 0x26);
205 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
207 void op_set_status_register(uint32 data)
209 /* objectp_ram[0x26] = (data & 0xFF000000) >> 24;
210 objectp_ram[0x27] = (data & 0x00FF0000) >> 16;
211 objectp_ram[0x28] = (data & 0x0000FF00) >> 8;
212 objectp_ram[0x29] |= (data & 0xFE);*/
213 objectp_ram[0x26] = (data & 0x0000FF00) >> 8;
214 objectp_ram[0x27] |= (data & 0xFE);
217 void op_set_current_object(uint64 object)
219 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
220 // Stored as least significant 32 bits first, ms32 last in big endian
221 /* objectp_ram[0x13] = object & 0xFF; object >>= 8;
222 objectp_ram[0x12] = object & 0xFF; object >>= 8;
223 objectp_ram[0x11] = object & 0xFF; object >>= 8;
224 objectp_ram[0x10] = object & 0xFF; object >>= 8;
226 objectp_ram[0x17] = object & 0xFF; object >>= 8;
227 objectp_ram[0x16] = object & 0xFF; object >>= 8;
228 objectp_ram[0x15] = object & 0xFF; object >>= 8;
229 objectp_ram[0x14] = object & 0xFF;*/
230 // Let's try regular good old big endian...
231 objectp_ram[0x17] = object & 0xFF; object >>= 8;
232 objectp_ram[0x16] = object & 0xFF; object >>= 8;
233 objectp_ram[0x15] = object & 0xFF; object >>= 8;
234 objectp_ram[0x14] = object & 0xFF; object >>= 8;
236 objectp_ram[0x13] = object & 0xFF; object >>= 8;
237 objectp_ram[0x12] = object & 0xFF; object >>= 8;
238 objectp_ram[0x11] = object & 0xFF; object >>= 8;
239 objectp_ram[0x10] = object & 0xFF;
242 uint64 op_load_phrase(uint32 offset)
244 offset &= ~0x07; // 8 byte alignment
245 return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
248 void OPStorePhrase(uint32 offset, uint64 p)
250 offset &= ~0x07; // 8 byte alignment
251 JaguarWriteLong(offset, p >> 32, OP);
252 JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
256 // Debugging routines
258 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
260 WriteLog(" (SCALED BITMAP)");
261 WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
262 WriteLog(" %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
263 uint8 bitdepth = (p1 >> 12) & 0x07;
264 int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
265 int32 xpos = p1 & 0xFFF;
266 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
267 uint32 iwidth = ((p1 >> 28) & 0x3FF);
268 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
269 uint16 height = ((p0 >> 14) & 0x3FF);
270 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
271 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
272 uint32 firstPix = (p1 >> 49) & 0x3F;
273 uint8 flags = (p1 >> 45) & 0x0F;
274 uint8 idx = (p1 >> 38) & 0x7F;
275 uint32 pitch = (p1 >> 15) & 0x07;
276 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
277 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
278 uint32 hscale = p2 & 0xFF;
279 uint32 vscale = (p2 >> 8) & 0xFF;
280 uint32 remainder = (p2 >> 16) & 0xFF;
281 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
284 void DumpFixedObject(uint64 p0, uint64 p1)
286 WriteLog(" (BITMAP)");
287 WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
288 uint8 bitdepth = (p1 >> 12) & 0x07;
289 int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
290 int32 xpos = p1 & 0xFFF;
291 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
292 uint32 iwidth = ((p1 >> 28) & 0x3FF);
293 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
294 uint16 height = ((p0 >> 14) & 0x3FF);
295 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
296 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
297 uint32 firstPix = (p1 >> 49) & 0x3F;
298 uint8 flags = (p1 >> 45) & 0x0F;
299 uint8 idx = (p1 >> 38) & 0x7F;
300 uint32 pitch = (p1 >> 15) & 0x07;
301 WriteLog(" [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
302 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
306 // Object Processor main routine
308 void OPProcessList(int scanline, bool render)
310 extern int op_start_log;
311 // char * condition_to_str[8] =
312 // { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
314 op_pointer = op_get_list_pointer();
316 objectp_stop_reading_list = false;
318 // *** BEGIN OP PROCESSOR TESTING ONLY ***
319 extern bool interactiveMode;
321 extern int objectPtr;
323 int bitmapCounter = 0;
324 // *** END OP PROCESSOR TESTING ONLY ***
326 // if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
329 // *** BEGIN OP PROCESSOR TESTING ONLY ***
330 if (interactiveMode && bitmapCounter == objectPtr)
334 // *** END OP PROCESSOR TESTING ONLY ***
335 if (objectp_stop_reading_list)
338 uint64 p0 = op_load_phrase(op_pointer);
340 if (scanline == tom_get_vdb() && op_start_log)
341 //if (scanline == 215 && op_start_log)
343 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
344 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
346 WriteLog(" (BITMAP) ");
347 uint64 p1 = op_load_phrase(op_pointer);
348 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
349 uint8 bitdepth = (p1 >> 12) & 0x07;
350 int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
351 int32 xpos = p1 & 0xFFF;
352 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
353 uint32 iwidth = ((p1 >> 28) & 0x3FF);
354 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
355 uint16 height = ((p0 >> 14) & 0x3FF);
356 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
357 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
358 uint32 firstPix = (p1 >> 49) & 0x3F;
359 uint8 flags = (p1 >> 45) & 0x0F;
360 uint8 idx = (p1 >> 38) & 0x7F;
361 uint32 pitch = (p1 >> 15) & 0x07;
362 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
363 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
365 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
367 WriteLog(" (SCALED BITMAP)");
368 uint64 p1 = op_load_phrase(op_pointer), p2 = op_load_phrase(op_pointer+8);
369 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
370 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
371 uint8 bitdepth = (p1 >> 12) & 0x07;
372 int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
373 int32 xpos = p1 & 0xFFF;
374 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
375 uint32 iwidth = ((p1 >> 28) & 0x3FF);
376 uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
377 uint16 height = ((p0 >> 14) & 0x3FF);
378 uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
379 uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
380 uint32 firstPix = (p1 >> 49) & 0x3F;
381 uint8 flags = (p1 >> 45) & 0x0F;
382 uint8 idx = (p1 >> 38) & 0x7F;
383 uint32 pitch = (p1 >> 15) & 0x07;
384 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
385 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
386 uint32 hscale = p2 & 0xFF;
387 uint32 vscale = (p2 >> 8) & 0xFF;
388 uint32 remainder = (p2 >> 16) & 0xFF;
389 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
391 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
392 WriteLog(" (GPU)\n");
393 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
395 WriteLog(" (BRANCH)\n");
396 uint8 * jaguar_mainRam = GetRamPtr();
397 WriteLog("[RAM] --> ");
398 for(int k=0; k<8; k++)
399 WriteLog("%02X ", jaguar_mainRam[op_pointer-8 + k]);
402 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
403 WriteLog(" --> List end\n");
406 // WriteLog("%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
407 switch ((uint8)p0 & 0x07)
409 case OBJECT_TYPE_BITMAP:
411 // Would *not* be /2 if interlaced...!
412 uint16 ypos = ((p0 >> 3) & 0x3FF) / 2;
413 // This is only theory implied by Rayman...!
414 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
415 // the VDB value. With interlacing, this would be slightly more tricky.
416 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
417 // to affect any other game in a negative way (that I've seen).
418 // Either that, or it's an undocumented bug...
420 //No, the reason this was needed is that the OP code before was wrong. Any value
421 //less than VDB will get written to the top line of the display!
423 // ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
424 uint32 height = (p0 & 0xFFC000) >> 14;
425 uint32 oldOPP = op_pointer - 8;
426 // *** BEGIN OP PROCESSOR TESTING ONLY ***
427 if (inhibit && op_start_log)
428 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
430 if (!inhibit) // For OP testing only!
431 // *** END OP PROCESSOR TESTING ONLY ***
432 if (scanline >= ypos && height > 0)
434 uint64 p1 = op_load_phrase(op_pointer);
436 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
437 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
438 OPProcessFixedBitmap(scanline, p0, p1, render);
442 //???Does this really happen??? Doesn't seem to work if you do this...!
443 // uint32 link = (p0 & 0x7FFFF000000) >> 21;
444 // SET16(objectp_ram, 0x20, link & 0xFFFF); // OLP
445 // SET16(objectp_ram, 0x22, link >> 16);
446 /* uint32 height = (p0 & 0xFFC000) >> 14;
449 // NOTE: Would subtract 2 if in interlaced mode...!
450 // uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
454 uint64 data = (p0 & 0xFFFFF80000000000) >> 40;
455 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
458 p0 &= ~0xFFFFF80000FFC000; // Mask out old data...
459 p0 |= (uint64)height << 14;
461 OPStorePhrase(oldOPP, p0);
463 op_pointer = (p0 & 0x000007FFFF000000) >> 21;
466 case OBJECT_TYPE_SCALE:
468 // Would *not* be /2 if interlaced...!
469 uint16 ypos = ((p0 >> 3) & 0x3FF) / 2;
470 // This is only theory implied by Rayman...!
471 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
472 // the VDB value. With interlacing, this would be slightly more tricky.
473 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
474 // to affect any other game in a negative way (that I've seen).
475 // Either that, or it's an undocumented bug...
477 //No, the reason this was needed is that the OP code before was wrong. Any value
478 //less than VDB will get written to the top line of the display!
480 // ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
481 uint32 height = (p0 & 0xFFC000) >> 14;
482 uint32 oldOPP = op_pointer - 8;
483 // *** BEGIN OP PROCESSOR TESTING ONLY ***
484 if (inhibit && op_start_log)
486 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
487 DumpScaledObject(p0, op_load_phrase(op_pointer), op_load_phrase(op_pointer+8));
490 if (!inhibit) // For OP testing only!
491 // *** END OP PROCESSOR TESTING ONLY ***
492 if (scanline >= ypos && height > 0)
494 uint64 p1 = op_load_phrase(op_pointer);
496 uint64 p2 = op_load_phrase(op_pointer);
498 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
499 OPProcessScaledBitmap(scanline, p0, p1, p2, render);
503 //???Does this really happen??? Doesn't seem to work if you do this...!
504 // uint32 link = (p0 & 0x7FFFF000000) >> 21;
505 // SET16(objectp_ram, 0x20, link & 0xFFFF); // OLP
506 // SET16(objectp_ram, 0x22, link >> 16);
507 /* uint32 height = (p0 & 0xFFC000) >> 14;
510 // NOTE: Would subtract 2 if in interlaced mode...!
511 // uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
513 uint8 remainder = p2 >> 16, vscale = p2 >> 8;
514 //Actually, we should skip this object if it has a vscale of zero.
515 //Or do we? Not sure... Atari Karts has a few lines that look like:
517 //000E8268 --> phrase 00010000 7000B00D
518 // [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
519 // [hsc: 9A, vsc: 00, rem: 00]
520 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
523 vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
525 remainder -= 0x20; // 1.0f in [3.5] fixed point format
526 if (remainder & 0x80) // I.e., it's negative
528 uint64 data = (p0 & 0xFFFFF80000000000) >> 40;
529 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
531 while (remainder & 0x80)
539 p0 &= ~0xFFFFF80000FFC000; // Mask out old data...
540 p0 |= (uint64)height << 14;
542 OPStorePhrase(oldOPP, p0);
545 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
546 p2 &= ~0x0000000000FF0000;
547 p2 |= (uint64)remainder << 16;
548 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
549 OPStorePhrase(oldOPP+16, p2);
550 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
551 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
553 op_pointer = (p0 & 0x000007FFFF000000) >> 21;
556 case OBJECT_TYPE_GPU:
558 //WriteLog("OP: Asserting GPU IRQ #3...\n");
559 op_set_current_object(p0);
560 GPUSetIRQLine(3, ASSERT_LINE);
561 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
564 //OPSuspendedByGPU = true;
565 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
566 //on the next scanline...
569 case OBJECT_TYPE_BRANCH:
571 uint16 ypos = (p0 >> 3) & 0x7FF;
572 uint8 cc = (p0 >> 14) & 0x03;
573 uint32 link = (p0 >> 21) & 0x3FFFF8;
575 // if ((ypos!=507)&&(ypos!=25))
576 // WriteLog("\t%i%s%i link=0x%.8x\n",scanline,condition_to_str[cc],ypos>>1,link);
579 case CONDITION_EQUAL:
580 //Why do this for the equal case? If they wrote an odd YPOS, then it wouldn't be detected!
581 // if (ypos != 0x7FF && (ypos & 0x01))
583 // if ((2 * tom_get_scanline()) == ypos || ypos == 0x7FF)
584 //Here we're using VC instead of the bogus tom_get_scanline() value...
585 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
588 case CONDITION_LESS_THAN:
589 // if ((2 * tom_get_scanline()) < ypos)
590 if (TOMReadWord(0xF00006, OP) < ypos)
593 case CONDITION_GREATER_THAN:
594 // if ((2 * tom_get_scanline()) > ypos)
595 if (TOMReadWord(0xF00006, OP) > ypos)
598 case CONDITION_OP_FLAG_SET:
599 if (op_get_status_register() & 0x01)
602 case CONDITION_SECOND_HALF_LINE:
603 // This basically means branch if bit 10 of HC is set
604 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shuting down\n");
609 WriteLog("OP: Unimplemented branch condition %i\n", cc);
613 case OBJECT_TYPE_STOP:
617 //WriteLog("OP: --> STOP\n");
618 // op_set_status_register(((p0>>3) & 0xFFFFFFFF));
619 //This seems more likely...
620 op_set_current_object(p0);
624 tom_set_pending_object_int();
625 if (tom_irq_enabled(IRQ_OPFLAG) && jaguar_interrupt_handler_is_valid(64))
626 m68k_set_irq(7); // Cause an NMI to occur...
633 WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
640 // Store fixed size bitmap in line buffer
643 // Interesting thing about Rayman: There seems to be a transparent bitmap (1/8/16 bpp--which?)
644 // being rendered under his feet--doesn't align when walking... Check it out!
646 void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
648 // Need to make sure that when writing that it stays within the line buffer...
649 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
650 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
651 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
652 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
653 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
654 //#ifdef OP_DEBUG_BMP
655 // Prolly should use this... Though not sure exactly how.
656 uint32 firstPix = (p1 >> 49) & 0x3F;
657 // "The LSB is significant only for scaled objects..." -JTRM
658 // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
661 // We can ignore the RELEASE (high order) bit for now--probably forever...!
662 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
663 //Optimize: break these out to their own BOOL values
664 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
665 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
666 flagRMW = (flags & OPFLAG_RMW ? true : false),
667 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
668 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
669 // provide the most significant bits of the palette address."
670 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
671 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
673 // int16 scanlineWidth = tom_getVideoModeWidth();
674 uint8 * tom_ram_8 = tom_get_ram_pointer();
675 uint8 * paletteRAM = &tom_ram_8[0x400];
676 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
677 // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
678 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
680 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
681 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
683 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
684 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
685 // Pitch == 0 is OK too...
686 // if (!render || op_pointer == 0 || dwidth == 0 || ptr == 0 || pitch == 0)
687 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
688 if (!render || iwidth == 0) // || data == 0 || op_pointer == 0)
691 //#define OP_DEBUG_BMP
692 //#ifdef OP_DEBUG_BMP
693 // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
694 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
697 // int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
698 int32 startPos = xpos, endPos = xpos +
699 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
700 : -((phraseWidthToPixels[depth] * iwidth) + 1));
701 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
702 bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
703 // Not sure if this is Jaguar Two only location or what...
704 // From the docs, it is... If we want to limit here we should think of something else.
705 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
707 int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
709 // If the image is completely to the left or right of the line buffer, then bail.
710 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
711 //There are four possibilities:
712 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
713 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
714 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
715 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
716 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
717 // numbers 1 & 3 are of concern.
718 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
719 // if (rightMargin < 0 || leftMargin > lbufWidth)
721 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
722 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
723 // Still have to be careful with the DATA and IWIDTH values though...
725 // if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
726 // || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
728 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
729 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
732 // Otherwise, find the clip limits and clip the phrase as well...
733 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
734 // line buffer, but it shouldn't matter since there are two unused line
735 // buffers below and nothing above and I'll at most write 8 bytes outside
736 // the line buffer... I could use a fractional clip begin/end value, but
737 // this makes the blit a *lot* more hairy. I might fix this in the future
738 // if it becomes necessary. (JLH)
739 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
740 // which pixel in the phrase is being written, and quit when either end of phrases
741 // is reached or line buffer extents are surpassed.
743 //This stuff is probably wrong as well... !!! FIX !!!
744 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
745 //Yup. Seems that JagMania doesn't work correctly with this...
746 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
751 clippedWidth = 0 - leftMargin,
752 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
753 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
756 if (rightMargin > lbufWidth)
757 clippedWidth = rightMargin - lbufWidth,
758 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
759 // rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
760 // rightMargin = lbufWidth;
763 WriteLog("We're about to encounter a divide by zero error!\n");
764 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
765 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
766 if (startPos < 0) // Case #1: Begin out, end in, L to R
767 clippedWidth = 0 - startPos,
768 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
769 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
771 if (endPos < 0) // Case #2: Begin in, end out, R to L
772 clippedWidth = 0 - endPos,
773 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
775 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
776 clippedWidth = endPos - lbufWidth,
777 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
779 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
780 clippedWidth = startPos - lbufWidth,
781 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
782 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
784 // If the image is sitting on the line buffer left or right edge, we need to compensate
785 // by decreasing the image phrase width accordingly.
786 iwidth -= phraseClippedWidth;
788 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
790 // data += phraseClippedWidth * (pitch << 3);
791 data += dataClippedWidth * (pitch << 3);
793 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
794 // bitmap! This makes clipping & etc. MUCH, much easier...!
795 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
796 uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
797 uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
801 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
802 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
803 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
806 if (depth == 0) // 1 BPP
808 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
809 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
811 // Fetch 1st phrase...
812 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
813 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
814 //i.e., we didn't clip on the margin...
815 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
816 int i = firstPix; // Start counter at right spot...
822 uint8 bit = pixels >> 63;
823 if (flagTRANS && bit == 0)
828 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
829 //Won't optimize RMW case though...
830 // This is the *only* correct use of endian-dependent code
831 // (i.e., mem-to-mem direct copying)!
832 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
835 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
836 *(currentLineBuffer + 1) =
837 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
840 currentLineBuffer += lbufDelta;
844 // Fetch next phrase...
845 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
846 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
849 else if (depth == 1) // 2 BPP
852 WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
853 index &= 0xFC; // Top six bits form CLUT index
854 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
855 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
860 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
861 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
863 for(int i=0; i<32; i++)
865 uint8 bits = pixels >> 62;
866 // Seems to me that both of these are in the same endian, so we could cast it as
867 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
868 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
869 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
870 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
871 if (flagTRANS && bits == 0)
876 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
879 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
880 *(currentLineBuffer + 1) =
881 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
884 currentLineBuffer += lbufDelta;
889 else if (depth == 2) // 4 BPP
892 WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
893 index &= 0xF0; // Top four bits form CLUT index
894 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
895 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
900 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
901 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
903 for(int i=0; i<16; i++)
905 uint8 bits = pixels >> 60;
906 // Seems to me that both of these are in the same endian, so we could cast it as
907 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
908 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
909 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
910 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
911 if (flagTRANS && bits == 0)
916 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
919 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
920 *(currentLineBuffer + 1) =
921 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
924 currentLineBuffer += lbufDelta;
929 else if (depth == 3) // 8 BPP
932 WriteLog("OP: Fixed bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
933 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
934 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
939 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
940 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
942 for(int i=0; i<8; i++)
944 uint8 bits = pixels >> 56;
945 // Seems to me that both of these are in the same endian, so we could cast it as
946 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
947 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
948 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
949 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
950 if (flagTRANS && bits == 0)
955 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
958 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
959 *(currentLineBuffer + 1) =
960 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
963 currentLineBuffer += lbufDelta;
968 else if (depth == 4) // 16 BPP
971 WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
972 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
973 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
978 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
979 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
981 for(int i=0; i<4; i++)
983 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
984 // Seems to me that both of these are in the same endian, so we could cast it as
985 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
986 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
987 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
988 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
989 if (flagTRANS && (bitsLo | bitsHi) == 0)
994 *currentLineBuffer = bitsHi,
995 *(currentLineBuffer + 1) = bitsLo;
998 BLEND_CR(*currentLineBuffer, bitsHi),
999 *(currentLineBuffer + 1) =
1000 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1003 currentLineBuffer += lbufDelta;
1008 else if (depth == 5) // 24 BPP
1010 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1011 //There *might* be others...
1012 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1014 WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1015 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1016 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1017 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1022 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1023 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1025 for(int i=0; i<2; i++)
1027 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1028 bits1 = pixels >> 40, bits0 = pixels >> 32;
1029 // Seems to me that both of these are in the same endian, so we could cast it as
1030 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1031 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1032 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1033 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1034 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1037 *currentLineBuffer = bits3,
1038 *(currentLineBuffer + 1) = bits2,
1039 *(currentLineBuffer + 2) = bits1,
1040 *(currentLineBuffer + 3) = bits0;
1042 currentLineBuffer += lbufDelta;
1050 // Store scaled bitmap in line buffer
1052 void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool render)
1054 // Need to make sure that when writing that it stays within the line buffer...
1055 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1056 uint8 depth = (p1 >> 12) & 0x07; // Color depth of image
1057 int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1058 uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
1059 uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
1060 //#ifdef OP_DEBUG_BMP
1061 // Prolly should use this... Though not sure exactly how.
1062 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1063 uint32 firstPix = (p1 >> 49) & 0x3F;
1064 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1066 WriteLog("OP: FIRSTPIX != 0!\n");
1068 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1069 // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
1070 //Optimize: break these out to their own BOOL values
1071 uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
1072 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1073 flagRMW = (flags & OPFLAG_RMW ? true : false),
1074 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1075 uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
1076 uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
1078 // int16 scanlineWidth = tom_getVideoModeWidth();
1079 uint8 * tom_ram_8 = tom_get_ram_pointer();
1080 uint8 * paletteRAM = &tom_ram_8[0x400];
1081 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1082 // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
1083 uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1085 uint8 hscale = p2 & 0xFF;
1086 uint8 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable
1087 int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1088 uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1090 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1091 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1093 //Looks like an hscale of zero means don't draw!
1094 if (!render || iwidth == 0 || hscale == 0)
1097 //#define OP_DEBUG_BMP
1098 //#ifdef OP_DEBUG_BMP
1099 // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1100 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1103 int32 startPos = xpos, endPos = xpos +
1104 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1105 uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1106 bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
1107 // Not sure if this is Jaguar Two only location or what...
1108 // From the docs, it is... If we want to limit here we should think of something else.
1109 // int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT
1111 int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1113 // If the image is completely to the left or right of the line buffer, then bail.
1114 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1115 //There are four possibilities:
1116 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1117 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1118 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1119 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1120 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1121 // numbers 1 & 3 are of concern.
1122 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
1123 // if (rightMargin < 0 || leftMargin > lbufWidth)
1125 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1126 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1127 // Still have to be careful with the DATA and IWIDTH values though...
1129 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1130 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1133 // Otherwise, find the clip limits and clip the phrase as well...
1134 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1135 // line buffer, but it shouldn't matter since there are two unused line
1136 // buffers below and nothing above and I'll at most write 40 bytes outside
1137 // the line buffer... I could use a fractional clip begin/end value, but
1138 // this makes the blit a *lot* more hairy. I might fix this in the future
1139 // if it becomes necessary. (JLH)
1140 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1141 // which pixel in the phrase is being written, and quit when either end of phrases
1142 // is reached or line buffer extents are surpassed.
1144 //This stuff is probably wrong as well... !!! FIX !!!
1145 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1146 //Yup. Seems that JagMania doesn't work correctly with this...
1147 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1148 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1149 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1150 // a bit more accurately... Strange!
1151 //It's probably a case of the REFLECT flag being set and the background being written
1152 //from the right side of the screen...
1153 //But no, it isn't... At least if the diagnostics are telling the truth!
1155 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1156 // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1158 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1159 //the scaling factor is small. So fix it already! !!! FIX !!!
1160 /*if (scaledPhrasePixels == 0)
1162 WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1163 DumpScaledObject(p0, p1, p2);
1165 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1166 if (startPos < 0) // Case #1: Begin out, end in, L to R
1167 /* clippedWidth = 0 - startPos,
1168 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1169 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);*/
1170 clippedWidth = 0 - startPos,
1171 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1172 startPos = 0 - (clippedWidth % scaledPhrasePixels);
1174 if (endPos < 0) // Case #2: Begin in, end out, R to L
1175 /* clippedWidth = 0 - endPos,
1176 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];*/
1177 clippedWidth = 0 - endPos,
1178 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1180 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1181 /* clippedWidth = endPos - lbufWidth,
1182 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];*/
1183 clippedWidth = endPos - lbufWidth,
1184 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1186 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1187 /* clippedWidth = startPos - lbufWidth,
1188 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1189 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);*/
1190 clippedWidth = startPos - lbufWidth,
1191 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1192 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1194 extern int op_start_log;
1195 if (op_start_log && clippedWidth != 0)
1196 WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1197 if (op_start_log && startPos == 13)
1199 WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1200 DumpScaledObject(p0, p1, p2);
1202 // If the image is sitting on the line buffer left or right edge, we need to compensate
1203 // by decreasing the image phrase width accordingly.
1204 iwidth -= phraseClippedWidth;
1206 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1208 // data += phraseClippedWidth * (pitch << 3);
1209 data += dataClippedWidth * (pitch << 3);
1211 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1212 // bitmap! This makes clipping & etc. MUCH, much easier...!
1213 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1214 uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1215 uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
1219 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1220 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1221 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1224 if (depth == 0) // 1 BPP
1227 WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1228 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1229 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1232 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1234 while ((int32)iwidth > 0)
1236 uint8 bits = pixels >> 63;
1238 if (flagTRANS && bits == 0)
1243 // This is the *only* correct use of endian-dependent code
1244 // (i.e., mem-to-mem direct copying)!
1245 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1247 *currentLineBuffer =
1248 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1249 *(currentLineBuffer + 1) =
1250 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1253 currentLineBuffer += lbufDelta;
1255 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1256 while (horizontalRemainder & 0x80)
1258 horizontalRemainder += hscale;
1265 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1267 data += (pitch << 3) * phrasesToSkip;
1268 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1269 pixels <<= 1 * pixelShift;
1270 iwidth -= phrasesToSkip;
1271 pixCount = pixelShift;
1275 else if (depth == 1) // 2 BPP
1278 WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1279 index &= 0xFC; // Top six bits form CLUT index
1280 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1281 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1284 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1286 while ((int32)iwidth > 0)
1288 uint8 bits = pixels >> 62;
1290 if (flagTRANS && bits == 0)
1295 // This is the *only* correct use of endian-dependent code
1296 // (i.e., mem-to-mem direct copying)!
1297 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1299 *currentLineBuffer =
1300 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1301 *(currentLineBuffer + 1) =
1302 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1305 currentLineBuffer += lbufDelta;
1307 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1308 while (horizontalRemainder & 0x80)
1310 horizontalRemainder += hscale;
1317 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1319 data += (pitch << 3) * phrasesToSkip;
1320 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1321 pixels <<= 2 * pixelShift;
1322 iwidth -= phrasesToSkip;
1323 pixCount = pixelShift;
1327 else if (depth == 2) // 4 BPP
1330 WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1331 index &= 0xF0; // Top four bits form CLUT index
1332 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1333 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1336 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1338 while ((int32)iwidth > 0)
1340 uint8 bits = pixels >> 60;
1342 if (flagTRANS && bits == 0)
1347 // This is the *only* correct use of endian-dependent code
1348 // (i.e., mem-to-mem direct copying)!
1349 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1351 *currentLineBuffer =
1352 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1353 *(currentLineBuffer + 1) =
1354 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1357 currentLineBuffer += lbufDelta;
1359 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1360 while (horizontalRemainder & 0x80)
1362 horizontalRemainder += hscale;
1369 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1371 data += (pitch << 3) * phrasesToSkip;
1372 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1373 pixels <<= 4 * pixelShift;
1374 iwidth -= phrasesToSkip;
1375 pixCount = pixelShift;
1379 else if (depth == 3) // 8 BPP
1382 WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1383 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1384 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1387 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1389 while ((int32)iwidth > 0)
1391 uint8 bits = pixels >> 56;
1393 if (flagTRANS && bits == 0)
1398 // This is the *only* correct use of endian-dependent code
1399 // (i.e., mem-to-mem direct copying)!
1400 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1402 *currentLineBuffer =
1403 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1404 *(currentLineBuffer + 1) =
1405 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1408 currentLineBuffer += lbufDelta;
1410 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1411 while (horizontalRemainder & 0x80)
1413 horizontalRemainder += hscale;
1420 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1422 data += (pitch << 3) * phrasesToSkip;
1423 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1424 pixels <<= 8 * pixelShift;
1425 iwidth -= phrasesToSkip;
1426 pixCount = pixelShift;
1430 else if (depth == 4) // 16 BPP
1433 WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1434 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1435 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1438 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1440 while ((int32)iwidth > 0)
1442 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1444 if (flagTRANS && (bitsLo | bitsHi) == 0)
1449 *currentLineBuffer = bitsHi,
1450 *(currentLineBuffer + 1) = bitsLo;
1452 *currentLineBuffer =
1453 BLEND_CR(*currentLineBuffer, bitsHi),
1454 *(currentLineBuffer + 1) =
1455 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1458 currentLineBuffer += lbufDelta;
1460 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1461 while (horizontalRemainder & 0x80)
1463 horizontalRemainder += hscale;
1470 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1472 data += (pitch << 3) * phrasesToSkip;
1473 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1474 pixels <<= 16 * pixelShift;
1476 iwidth -= phrasesToSkip;
1478 pixCount = pixelShift;
1482 else if (depth == 5) // 24 BPP
1484 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1485 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1487 WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1488 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1489 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1490 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1495 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1496 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1498 for(int i=0; i<2; i++)
1500 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1501 bits1 = pixels >> 40, bits0 = pixels >> 32;
1502 // Seems to me that both of these are in the same endian, so we could cast it as
1503 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1504 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1505 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1506 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1507 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1510 *currentLineBuffer = bits3,
1511 *(currentLineBuffer + 1) = bits2,
1512 *(currentLineBuffer + 2) = bits1,
1513 *(currentLineBuffer + 3) = bits0;
1515 currentLineBuffer += lbufDelta;
1520 /*if (depth == 3 && startPos == 13)
1523 WriteLog("OP: Writing in the margins...\n");
1524 for(int i=0; i<100*2; i+=2)
1525 // for(int i=0; i<14*2; i+=2)
1526 tom_ram_8[0x1800 + i] = 0xFF,
1527 tom_ram_8[0x1800 + i + 1] = 0xFF;
1529 // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1530 // uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];