X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fobjectp.cpp;h=43b575eb57297371f5dc6b79801c63574f76c20e;hb=376e1f69fb9e9360d1f796b35a00227111e63fab;hp=4934892aa99596e559a3b9b46e42e7b25016f020;hpb=fa566a2c8ec532eb5325b4d5a663fb2a7d72adc6;p=virtualjaguar diff --git a/src/objectp.cpp b/src/objectp.cpp index 4934892..43b575e 100644 --- a/src/objectp.cpp +++ b/src/objectp.cpp @@ -16,8 +16,6 @@ #define BLEND_Y(dst, src) op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))] #define BLEND_CR(dst, src) op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))] -//Delete this once we're rid of zbmpop*.h... -#define BLEND_CC(dst, src) op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))] #define OBJECT_TYPE_BITMAP 0 // 000 #define OBJECT_TYPE_SCALE 1 // 001 @@ -31,12 +29,6 @@ #define CONDITION_OP_FLAG_SET 3 #define CONDITION_SECOND_HALF_LINE 4 -//Delete this once we're rid of zbmpop*.h... -#define FLAGS_RELEASE 8 -#define FLAGS_TRANSPARENT 4 -#define FLAGS_READMODIFY 2 -#define FLAGS_HFLIP 1 - #define OPFLAG_RELEASE 8 // Bus release bit #define OPFLAG_TRANS 4 // Transparency bit #define OPFLAG_RMW 2 // Read-Modify-Write bit @@ -46,6 +38,9 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render); void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool render); +void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2); +void DumpFixedObject(uint64 p0, uint64 p1); +uint64 op_load_phrase(uint32 offset); // External global variables @@ -55,18 +50,20 @@ extern uint32 jaguar_mainRom_crc32; static uint8 * op_blend_y; static uint8 * op_blend_cr; -// There may be a problem with this "RAM" overlapping some of the -// regular TOM RAM... +// There may be a problem with this "RAM" overlapping (and thus being independent of) +// some of the regular TOM RAM... static uint8 objectp_ram[0x40]; // This is based at $F00000 uint8 objectp_running; bool objectp_stop_reading_list; static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 }; -static uint32 op_bitmap_bit_size[8] = - { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536), - (uint32)(2*65536), (uint32)(1*65536), (uint32)(1*65536), (uint32)(1*65536) }; +//static uint32 op_bitmap_bit_size[8] = +// { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536), +// (uint32)(2*65536), (uint32)(1*65536), (uint32)(1*65536), (uint32)(1*65536) }; static uint32 op_pointer; +int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 }; + // // Object Processor initialization @@ -124,11 +121,37 @@ void op_reset(void) void op_done(void) { + char * opType[8] = + { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" }; + char * ccType[8] = + { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" }; + + uint32 olp = op_get_list_pointer(); + WriteLog("OP: OLP = %08X\n", olp); + WriteLog("OP: Phrase dump\n ----------\n"); + for(uint32 i=0; i<0x100; i+=8) + { + uint32 hi = jaguar_long_read(olp + i), lo = jaguar_long_read(olp + i + 4); + WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]); + if ((lo & 0x07) == 3) + { + uint16 ypos = (lo >> 3) & 0x7FF; + uint8 cc = (lo >> 14) & 0x03; + uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8; + WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link); + } + WriteLog("\n"); + if ((lo & 0x07) == 0) + DumpFixedObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8)); + if ((lo & 0x07) == 1) + DumpScaledObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8), op_load_phrase(olp+i+16)); + } + WriteLog("\n"); } // // Object Processor memory access -// Memory range: F00010 (F00008?) - F00027 +// Memory range: F00010 - F00027 // void op_byte_write(uint32 offset, uint8 data) { @@ -198,7 +221,7 @@ void op_set_current_object(uint64 object) { //Not sure this is right... Wouldn't it just be stored 64 bit BE? // Stored as least significant 32 bits first, ms32 last in big endian - objectp_ram[0x13] = object & 0xFF; object >>= 8; +/* objectp_ram[0x13] = object & 0xFF; object >>= 8; objectp_ram[0x12] = object & 0xFF; object >>= 8; objectp_ram[0x11] = object & 0xFF; object >>= 8; objectp_ram[0x10] = object & 0xFF; object >>= 8; @@ -206,7 +229,17 @@ void op_set_current_object(uint64 object) objectp_ram[0x17] = object & 0xFF; object >>= 8; objectp_ram[0x16] = object & 0xFF; object >>= 8; objectp_ram[0x15] = object & 0xFF; object >>= 8; - objectp_ram[0x14] = object & 0xFF; + objectp_ram[0x14] = object & 0xFF;*/ +// Let's try regular good old big endian... + objectp_ram[0x17] = object & 0xFF; object >>= 8; + objectp_ram[0x16] = object & 0xFF; object >>= 8; + objectp_ram[0x15] = object & 0xFF; object >>= 8; + objectp_ram[0x14] = object & 0xFF; object >>= 8; + + objectp_ram[0x13] = object & 0xFF; object >>= 8; + objectp_ram[0x12] = object & 0xFF; object >>= 8; + objectp_ram[0x11] = object & 0xFF; object >>= 8; + objectp_ram[0x10] = object & 0xFF; } uint64 op_load_phrase(uint32 offset) @@ -215,10 +248,6 @@ uint64 op_load_phrase(uint32 offset) return ((uint64)jaguar_long_read(offset) << 32) | (uint64)jaguar_long_read(offset+4); } -// -// OP replacement functions -// - void OPStorePhrase(uint32 offset, uint64 p) { offset &= ~0x07; // 8 byte alignment @@ -227,7 +256,56 @@ void OPStorePhrase(uint32 offset, uint64 p) } // -// *** NEW *** +// Debugging routines +// +void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2) +{ + WriteLog(" (SCALED BITMAP)"); + WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF)); + WriteLog(" %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF)); + uint8 bitdepth = (p1 >> 12) & 0x07; + int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? + int32 xpos = p1 & 0xFFF; + xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); + uint32 iwidth = ((p1 >> 28) & 0x3FF); + uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned! + uint16 height = ((p0 >> 14) & 0x3FF); + uint32 link = ((p0 >> 24) & 0x7FFFF) << 3; + uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3; + uint32 firstPix = (p1 >> 49) & 0x3F; + uint8 flags = (p1 >> 45) & 0x0F; + uint8 idx = (p1 >> 38) & 0x7F; + uint32 pitch = (p1 >> 15) & 0x07; + WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n", + iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch); + uint32 hscale = p2 & 0xFF; + uint32 vscale = (p2 >> 8) & 0xFF; + uint32 remainder = (p2 >> 16) & 0xFF; + WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder); +} + +void DumpFixedObject(uint64 p0, uint64 p1) +{ + WriteLog(" (BITMAP)"); + WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF)); + uint8 bitdepth = (p1 >> 12) & 0x07; + int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? + int32 xpos = p1 & 0xFFF; + xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); + uint32 iwidth = ((p1 >> 28) & 0x3FF); + uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned! + uint16 height = ((p0 >> 14) & 0x3FF); + uint32 link = ((p0 >> 24) & 0x7FFFF) << 3; + uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3; + uint32 firstPix = (p1 >> 49) & 0x3F; + uint8 flags = (p1 >> 45) & 0x0F; + uint8 idx = (p1 >> 38) & 0x7F; + uint32 pitch = (p1 >> 15) & 0x07; + WriteLog(" [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n", + iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch); +} + +// // Object Processor main routine // void OPProcessList(int scanline, bool render) @@ -238,11 +316,11 @@ extern int op_start_log; // If jaguar_exec() is working right, we should *never* have to check for this // condition... - if (scanline < tom_get_vdb()) +/* if (scanline < tom_get_vdb()) return; if (scanline >= 525)//tom_getVideoModeHeight()+tom_get_vdb()) - return; + return;//*/ op_pointer = op_get_list_pointer(); @@ -271,6 +349,7 @@ else uint64 p0 = op_load_phrase(op_pointer); op_pointer += 8; if (scanline == tom_get_vdb() && op_start_log) +//if (scanline == 215 && op_start_log) { WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF)); if ((p0 & 0x07) == OBJECT_TYPE_BITMAP) @@ -284,36 +363,37 @@ int32 xpos = p1 & 0xFFF; xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); uint32 iwidth = ((p1 >> 28) & 0x3FF); uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned! - uint16 height = ((p0 >> 14) & 0x3FF) - 1; + uint16 height = ((p0 >> 14) & 0x3FF); uint32 link = ((p0 >> 24) & 0x7FFFF) << 3; uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3; uint32 firstPix = (p1 >> 49) & 0x3F; uint8 flags = (p1 >> 45) & 0x0F; uint8 idx = (p1 >> 38) & 0x7F; uint32 pitch = (p1 >> 15) & 0x07; -WriteLog("\n [%u (%u) x %u @ %i, %u (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n", - iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&FLAGS_HFLIP ? "REFLECT " : ""), (flags&FLAGS_READMODIFY ? "RMW " : ""), (flags&FLAGS_TRANSPARENT ? "TRANS " : ""), (flags&FLAGS_RELEASE ? "RELEASE" : ""), idx, pitch); +WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n", + iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch); } if ((p0 & 0x07) == OBJECT_TYPE_SCALE) { WriteLog(" (SCALED BITMAP)"); uint64 p1 = op_load_phrase(op_pointer), p2 = op_load_phrase(op_pointer+8); WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF)); +WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF)); uint8 bitdepth = (p1 >> 12) & 0x07; int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? int32 xpos = p1 & 0xFFF; xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); uint32 iwidth = ((p1 >> 28) & 0x3FF); uint32 dwidth = ((p1 >> 18) & 0x3FF); // Unsigned! - uint16 height = ((p0 >> 14) & 0x3FF) - 1; + uint16 height = ((p0 >> 14) & 0x3FF); uint32 link = ((p0 >> 24) & 0x7FFFF) << 3; uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3; uint32 firstPix = (p1 >> 49) & 0x3F; uint8 flags = (p1 >> 45) & 0x0F; uint8 idx = (p1 >> 38) & 0x7F; uint32 pitch = (p1 >> 15) & 0x07; -WriteLog("\n [%u (%u) x %u @ %i, %u (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n", - iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&FLAGS_HFLIP ? "REFLECT " : ""), (flags&FLAGS_READMODIFY ? "RMW " : ""), (flags&FLAGS_TRANSPARENT ? "TRANS " : ""), (flags&FLAGS_RELEASE ? "RELEASE" : ""), idx, pitch); +WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n", + iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch); uint32 hscale = p2 & 0xFF; uint32 vscale = (p2 >> 8) & 0xFF; uint32 remainder = (p2 >> 16) & 0xFF; @@ -347,8 +427,11 @@ WriteLog(" --> List end\n"); // There's probably another bit somewhere that enables this mode--but so far, doesn't seem // to affect any other game in a negative way (that I've seen). // Either that, or it's an undocumented bug... - if (ypos == 0) - ypos = tom_word_read(0xF00046) / 2; // Get the VDB value + +//No, the reason this was needed is that the OP code before was wrong. Any value +//less than VDB will get written to the top line of the display! +// if (ypos == 0) +// ypos = tom_word_read(0xF00046) / 2; // Get the VDB value uint32 height = (p0 & 0xFFC000) >> 14; uint32 oldOPP = op_pointer - 8; // *** BEGIN OP PROCESSOR TESTING ONLY *** @@ -401,13 +484,19 @@ if (!inhibit) // For OP testing only! // There's probably another bit somewhere that enables this mode--but so far, doesn't seem // to affect any other game in a negative way (that I've seen). // Either that, or it's an undocumented bug... - if (ypos == 0) - ypos = tom_word_read(0xF00046) / 2; // Get the VDB value + +//No, the reason this was needed is that the OP code before was wrong. Any value +//less than VDB will get written to the top line of the display! +// if (ypos == 0) +// ypos = tom_word_read(0xF00046) / 2; // Get the VDB value uint32 height = (p0 & 0xFFC000) >> 14; uint32 oldOPP = op_pointer - 8; // *** BEGIN OP PROCESSOR TESTING ONLY *** if (inhibit && op_start_log) - WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n"); +{ + WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height); + DumpScaledObject(p0, op_load_phrase(op_pointer), op_load_phrase(op_pointer+8)); +} bitmapCounter++; if (!inhibit) // For OP testing only! // *** END OP PROCESSOR TESTING ONLY *** @@ -433,8 +522,16 @@ if (!inhibit) // For OP testing only! // uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000; uint8 remainder = p2 >> 16, vscale = p2 >> 8; +//Actually, we should skip this object if it has a vscale of zero. +//Or do we? Not sure... Atari Karts has a few lines that look like: +// (SCALED BITMAP) +//000E8268 --> phrase 00010000 7000B00D +// [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01] +// [hsc: 9A, vsc: 00, rem: 00] +// Could it be the vscale is overridden if the DWIDTH is zero? Hmm... + if (vscale == 0) - vscale = 0x20; // OP bug??? + vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it? remainder -= 0x20; // 1.0f in [3.5] fixed point format if (remainder & 0x80) // I.e., it's negative @@ -469,8 +566,15 @@ if (!inhibit) // For OP testing only! } case OBJECT_TYPE_GPU: { +//WriteLog("OP: Asserting GPU IRQ #3...\n"); op_set_current_object(p0); - gpu_set_irq_line(3, 1); + GPUSetIRQLine(3, ASSERT_LINE); +//Also, OP processing is suspended from this point until OBF (F00026) is written to... +// !!! FIX !!! +//Do something like: +//OPSuspendedByGPU = true; +//Dunno if the OP keeps processing from where it was interrupted, or if it just continues +//on the next scanline... break; } case OBJECT_TYPE_BRANCH: @@ -484,17 +588,22 @@ if (!inhibit) // For OP testing only! switch (cc) { case CONDITION_EQUAL: - if (ypos != 0x7FF && (ypos & 0x01)) - ypos ^= 0x01; - if ((2 * tom_get_scanline()) == ypos || ypos == 0x7FF) +//Why do this for the equal case? If they wrote an odd YPOS, then it wouldn't be detected! +// if (ypos != 0x7FF && (ypos & 0x01)) +// ypos ^= 0x01; +// if ((2 * tom_get_scanline()) == ypos || ypos == 0x7FF) +//Here we're using VC instead of the bogus tom_get_scanline() value... + if (tom_word_read(0xF00006) == ypos || ypos == 0x7FF) op_pointer = link; break; case CONDITION_LESS_THAN: - if ((2 * tom_get_scanline()) < ypos) +// if ((2 * tom_get_scanline()) < ypos) + if (tom_word_read(0xF00006) < ypos) op_pointer = link; break; case CONDITION_GREATER_THAN: - if ((2 * tom_get_scanline()) > ypos) +// if ((2 * tom_get_scanline()) > ypos) + if (tom_word_read(0xF00006) > ypos) op_pointer = link; break; case CONDITION_OP_FLAG_SET: @@ -503,7 +612,7 @@ if (!inhibit) // For OP testing only! break; case CONDITION_SECOND_HALF_LINE: // This basically means branch if bit 10 of HC is set - WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nop: shuting down\n"); + WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shuting down\n"); fclose(log_get()); exit(0); break; @@ -517,17 +626,19 @@ if (!inhibit) // For OP testing only! //op_start_log = 0; // unsure //WriteLog("OP: --> STOP\n"); - op_set_status_register(((p0>>3) & 0xFFFFFFFF)); +// op_set_status_register(((p0>>3) & 0xFFFFFFFF)); +//This seems more likely... + op_set_current_object(p0); - if (p0 & 0x8) + if (p0 & 0x08) { tom_set_pending_object_int(); - if (tom_irq_enabled(2) && jaguar_interrupt_handler_is_valid(64)) + if (tom_irq_enabled(IRQ_OPFLAG) && jaguar_interrupt_handler_is_valid(64)) m68k_set_irq(7); // Cause an NMI to occur... } return; - break; +// break; } default: WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07)); @@ -537,7 +648,6 @@ if (!inhibit) // For OP testing only! } // -// *** NEW *** // Store fixed size bitmap in line buffer // @@ -549,19 +659,23 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // Need to make sure that when writing that it stays within the line buffer... // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM uint8 depth = (p1 >> 12) & 0x07; // Color depth of image -//Why is HBlankWidthInPixels subtracted from this??? -// int32 xpos = (((int32)((p1 << 20) & 0xFFFFFFFF)) >> 20) - tom_getHBlankWidthInPixels(); int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases* uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address -#ifdef OP_DEBUG_BMP +//#ifdef OP_DEBUG_BMP // Prolly should use this... Though not sure exactly how. uint32 firstPix = (p1 >> 49) & 0x3F; -#endif + // "The LSB is significant only for scaled objects..." -JTRM + // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..." + firstPix &= 0x3E; +//#endif // We can ignore the RELEASE (high order) bit for now--probably forever...! // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE //Optimize: break these out to their own BOOL values uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2) + bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false), + flagRMW = (flags & OPFLAG_RMW ? true : false), + flagTRANS = (flags & OPFLAG_TRANS ? true : false); // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index // provide the most significant bits of the palette address." uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp) @@ -575,7 +689,7 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) uint16 * paletteRAM16 = (uint16 *)paletteRAM; // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n", -// iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&FLAGS_HFLIP ? "yes" : "no"), (flags&FLAGS_READMODIFY ? "yes" : "no"), (flags&FLAGS_TRANSPARENT ? "yes" : "no")); +// iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no")); // Is it OK to have a 0 for the data width??? (i.e., undocumented?) // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well. @@ -588,12 +702,14 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) //#define OP_DEBUG_BMP //#ifdef OP_DEBUG_BMP // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n", -// iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&FLAGS_HFLIP ? "yes" : "no"), dwidth, op_pointer, (flags&FLAGS_READMODIFY ? "yes" : "no")); +// iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no")); //#endif - int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 }; - int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1; - uint32 clippedWidth = 0, phraseClippedWidth = 0;//, phrasePixel = 0; +// int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1; + int32 startPos = xpos, endPos = xpos + + (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1 + : -((phraseWidthToPixels[depth] * iwidth) + 1)); + uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0; bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE // Not sure if this is Jaguar Two only location or what... // From the docs, it is... If we want to limit here we should think of something else. @@ -617,8 +733,11 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise. // Still have to be careful with the DATA and IWIDTH values though... - if ((!(flags & OPFLAG_REFLECT) && (rightMargin < 0 || leftMargin > lbufWidth)) - || ((flags & OPFLAG_REFLECT) && (leftMargin < 0 || rightMargin > lbufWidth))) +// if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth)) +// || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth))) +// return; + if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth)) + || (flagREFLECT && (startPos < 0 || endPos > lbufWidth))) return; // Otherwise, find the clip limits and clip the phrase as well... @@ -636,6 +755,9 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof! //Yup. Seems that JagMania doesn't work correctly with this... //Dunno if this is the problem, but Atari Karts is showing *some* of the road now... +// if (!flagREFLECT) + +/* if (leftMargin < 0) clippedWidth = 0 - leftMargin, phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], @@ -644,9 +766,31 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) if (rightMargin > lbufWidth) clippedWidth = rightMargin - lbufWidth, - phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], - rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]); + phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//, +// rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]); // rightMargin = lbufWidth; +*/ +if (depth > 5) + WriteLog("We're about to encounter a divide by zero error!\n"); + // NOTE: We're just using endPos to figure out how much, if any, to clip by. + // ALSO: There may be another case where we start out of bounds and end out of bounds...! + if (startPos < 0) // Case #1: Begin out, end in, L to R + clippedWidth = 0 - startPos, + dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], + startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]); + + if (endPos < 0) // Case #2: Begin in, end out, R to L + clippedWidth = 0 - endPos, + phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth]; + + if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R + clippedWidth = endPos - lbufWidth, + phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth]; + + if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L + clippedWidth = startPos - lbufWidth, + dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], + startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]); // If the image is sitting on the line buffer left or right edge, we need to compensate // by decreasing the image phrase width accordingly. @@ -654,11 +798,13 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // Also, if we're clipping the phrase we need to make sure we're in the correct part of // the pixel data. - data += phraseClippedWidth * (pitch << 3); +// data += phraseClippedWidth * (pitch << 3); + data += dataClippedWidth * (pitch << 3); // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the // bitmap! This makes clipping & etc. MUCH, much easier...! - uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4); +// uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4); + uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4); uint8 * currentLineBuffer = &tom_ram_8[lbufAddress]; // Render. @@ -670,30 +816,30 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) if (depth == 0) // 1 BPP { -// uint32 paletteIndex = index << 1; - // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. + // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it. int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; + // Fetch 1st phrase... + uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); +//Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap +//i.e., we didn't clip on the margin... + pixels <<= firstPix; // Skip first N pixels (N=firstPix)... + int i = firstPix; // Start counter at right spot... + while (iwidth--) { - // Fetch phrase... - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) - - for(int i=0; i<64; i++) + while (i++ < 64) { uint8 bit = pixels >> 63; -// Seems to me that both of these are in the same endian, so we could cast it as -// uint16 * and do straight across copies (what about 24 bpp? Treat it differently...) -// This only works for the palettized modes (1 - 8 BPP), since we actually have to -// copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) -// No, it isn't because we read the memory in an endian safe way--this *won't* work... - if ((flags & OPFLAG_TRANS) && bit == 0) + if (flagTRANS && bit == 0) ; // Do nothing... else { - if (!(flags & OPFLAG_RMW)) + if (!flagRMW) //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index... +//Won't optimize RMW case though... + // This is the *only* correct use of endian-dependent code + // (i.e., mem-to-mem direct copying)! *(uint16 *)currentLineBuffer = paletteRAM16[index | bit]; else *currentLineBuffer = @@ -705,10 +851,16 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) currentLineBuffer += lbufDelta; pixels <<= 1; } + i = 0; + // Fetch next phrase... + data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) + pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); } } else if (depth == 1) // 2 BPP { +if (firstPix) + WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); index &= 0xFC; // Top six bits form CLUT index // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; @@ -727,11 +879,11 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // This only works for the palettized modes (1 - 8 BPP), since we actually have to // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) // No, it isn't because we read the memory in an endian safe way--this *won't* work... - if ((flags & OPFLAG_TRANS) && bits == 0) + if (flagTRANS && bits == 0) ; // Do nothing... else { - if (!(flags & OPFLAG_RMW)) + if (!flagRMW) *(uint16 *)currentLineBuffer = paletteRAM16[index | bits]; else *currentLineBuffer = @@ -747,6 +899,8 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) } else if (depth == 2) // 4 BPP { +if (firstPix) + WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); index &= 0xF0; // Top four bits form CLUT index // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; @@ -765,11 +919,11 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // This only works for the palettized modes (1 - 8 BPP), since we actually have to // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) // No, it isn't because we read the memory in an endian safe way--this *won't* work... - if ((flags & OPFLAG_TRANS) && bits == 0) + if (flagTRANS && bits == 0) ; // Do nothing... else { - if (!(flags & OPFLAG_RMW)) + if (!flagRMW) *(uint16 *)currentLineBuffer = paletteRAM16[index | bits]; else *currentLineBuffer = @@ -785,6 +939,8 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) } else if (depth == 3) // 8 BPP { +if (firstPix) + WriteLog("OP: Fixed bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; @@ -802,11 +958,11 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // This only works for the palettized modes (1 - 8 BPP), since we actually have to // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) // No, it isn't because we read the memory in an endian safe way--this *won't* work... - if ((flags & OPFLAG_TRANS) && bits == 0) + if (flagTRANS && bits == 0) ; // Do nothing... else { - if (!(flags & OPFLAG_RMW)) + if (!flagRMW) *(uint16 *)currentLineBuffer = paletteRAM16[bits]; else *currentLineBuffer = @@ -822,6 +978,8 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) } else if (depth == 4) // 16 BPP { +if (firstPix) + WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; @@ -839,11 +997,11 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // This only works for the palettized modes (1 - 8 BPP), since we actually have to // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) // No, it isn't because we read the memory in an endian safe way--it *won't* work... - if ((flags & OPFLAG_TRANS) && (bitsLo | bitsHi) == 0) + if (flagTRANS && (bitsLo | bitsHi) == 0) ; // Do nothing... else { - if (!(flags & OPFLAG_RMW)) + if (!flagRMW) *currentLineBuffer = bitsHi, *(currentLineBuffer + 1) = bitsLo; else @@ -860,7 +1018,11 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) } else if (depth == 5) // 24 BPP { -WriteLog("OP: Writing 24 BPP bitmap!\n"); +//Looks like Iron Soldier is the only game that uses 24BPP mode... +//There *might* be others... +//WriteLog("OP: Writing 24 BPP bitmap!\n"); +if (firstPix) + WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode... // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it. int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04; @@ -880,7 +1042,7 @@ WriteLog("OP: Writing 24 BPP bitmap!\n"); // This only works for the palettized modes (1 - 8 BPP), since we actually have to // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) // No, it isn't because we read the memory in an endian safe way--it *won't* work... - if ((flags & OPFLAG_TRANS) && (bits3 | bits2 | bits1 | bits0) == 0) + if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0) ; // Do nothing... else *currentLineBuffer = bits3, @@ -896,161 +1058,485 @@ WriteLog("OP: Writing 24 BPP bitmap!\n"); } // -// *** NEW *** // Store scaled bitmap in line buffer // void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool render) { - int32 xpos = (((int32)((p1 << 20) & 0xFFFFFFFF)) >> 20) - tom_getHBlankWidthInPixels(); -// uint16 ypos = ((p0 & 0x3FF8) >> 3) / 2; - uint16 iwidth = ((p1 >> 28) & 0x3FF) * 4; - uint16 dwidth = ((p1 >> 18) & 0x3FF) * 4; // Unsigned! -// uint16 height = (p0 >> 14) & 0x3FF; // Unsigned! -// uint32 link = ((p0 >> 24) & 0x7FFFF) << 3; - uint32 ptr = (p0 >> 40) & 0xFFFFF8; -//unused uint32 firstPix = (p1 >> 49) & 0x3F; - uint8 flags = (p1 >> 45) & 0x0F; - uint8 idx = (p1 >> 38) & 0x7F; - uint8 pitch = (p1 >> 15) & 0x07; - uint8 bitdepth = (p1 >> 12) & 0x07; +// Need to make sure that when writing that it stays within the line buffer... +// LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM + uint8 depth = (p1 >> 12) & 0x07; // Color depth of image + int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF + uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases* + uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address +//#ifdef OP_DEBUG_BMP +// Prolly should use this... Though not sure exactly how. +//Use the upper bits as an offset into the phrase depending on the BPP. That's how! + uint32 firstPix = (p1 >> 49) & 0x3F; +//This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened??? +if (firstPix) + WriteLog("OP: FIRSTPIX != 0!\n"); +//#endif +// We can ignore the RELEASE (high order) bit for now--probably forever...! +// uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE +//Optimize: break these out to their own BOOL values + uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2) + bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false), + flagRMW = (flags & OPFLAG_RMW ? true : false), + flagTRANS = (flags & OPFLAG_TRANS ? true : false); + uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp) + uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch - int16 scanline_width = tom_getVideoModeWidth(); +// int16 scanlineWidth = tom_getVideoModeWidth(); uint8 * tom_ram_8 = tom_get_ram_pointer(); - uint8 * current_line_buffer = &tom_ram_8[0x1800]; + uint8 * paletteRAM = &tom_ram_8[0x400]; + // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT + // for use when using endian-corrected data (i.e., any of the *_word_read functions!) + uint16 * paletteRAM16 = (uint16 *)paletteRAM; - uint32 vscale_fixed3p5 = (p2 >> 8) & 0xFF; - uint32 hscale_fixed3p5 = p2 & 0xFF; - float vscale = (float)vscale_fixed3p5 / 32.0f, hscale = (float)hscale_fixed3p5 / 32.0f; + uint8 hscale = p2 & 0xFF; + uint8 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable + int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5; + uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5; -//No hacks! -/* if (jaguar_mainRom_crc32==0x5a5b9c68) // atari karts - { - if (vscale == 0.0f) - vscale = 1.0f; - - if (ypos == 0) - ypos = scanline; - }*/ -#ifdef OP_DEBUG_BMP - if (xpos == -3) - WriteLog("[scanline %i] %ix%i scaled to %ix%i scale (%f, %f)%i bpp pitch %i at (%i,%i) @ 0x%.8x Transluency=%s\n", - scanline, iwidth,height, (int)(iwidth*hscale), (int)(height*vscale), hscale, vscale, - op_bitmap_bit_depth[bitdepth], pitch, xpos, ypos, ptr, (flags&FLAGS_READMODIFY) ? "yes" : "no"); -#endif -//No hacks! -/* if (jaguar_mainRom_crc32==0x2f032271) - ypos += 8;*/ - - if (!render || dwidth == 0 || ptr == 0 || pitch == 0) - return; +// WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n", +// iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no")); - if (bitdepth <= 3) // 1, 2, 4, 8 BPP - iwidth *= 2, dwidth *= 2; +//Looks like an hscale of zero means don't draw! + if (!render || iwidth == 0 || hscale == 0) + return; - uint16 scaled_width = (uint16)((float)iwidth * hscale); +//#define OP_DEBUG_BMP +//#ifdef OP_DEBUG_BMP +// WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n", +// iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no")); +//#endif - if (op_bitmap_bit_depth[bitdepth] == 4) // why ? - scaled_width *= 2; + int32 startPos = xpos, endPos = xpos + + (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1)); + uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0; + bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE + // Not sure if this is Jaguar Two only location or what... + // From the docs, it is... If we want to limit here we should think of something else. +// int32 limit = GET16(tom_ram_8, 0x0008); // LIMIT + int32 limit = 720; + int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit... - if (op_bitmap_bit_depth[bitdepth] == 2) // why ? - scaled_width *= 4; + // If the image is completely to the left or right of the line buffer, then bail. +//If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE] +//There are four possibilities: +// 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds. +// 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds. +// 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds. +// 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds. +//Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop, +// numbers 1 & 3 are of concern. +// This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...! +// if (rightMargin < 0 || leftMargin > lbufWidth) - if (op_bitmap_bit_depth[bitdepth] == 1) // why ? - scaled_width *= 8; +// It might be easier to swap these (if REFLECTed) and just use XPOS down below... +// That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise. +// Still have to be careful with the DATA and IWIDTH values though... - // visible ? -/* if ((scanline < ypos) || (scanline > (ypos + scaled_height)) || ((xpos + scaled_width) < 0) - || (xpos >= scanline_width))*/ - if ((xpos + scaled_width) < 0 || xpos >= scanline_width) + if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth)) + || (flagREFLECT && (startPos < 0 || endPos > lbufWidth))) return; - if (xpos < 0) - { - scaled_width += xpos; - ptr += (pitch * op_bitmap_bit_size[bitdepth] * ((uint32)((-xpos) / hscale))) >> 16; - xpos = 0; - } + // Otherwise, find the clip limits and clip the phrase as well... + // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the + // line buffer, but it shouldn't matter since there are two unused line + // buffers below and nothing above and I'll at most write 40 bytes outside + // the line buffer... I could use a fractional clip begin/end value, but + // this makes the blit a *lot* more hairy. I might fix this in the future + // if it becomes necessary. (JLH) + // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop + // which pixel in the phrase is being written, and quit when either end of phrases + // is reached or line buffer extents are surpassed. - if (iwidth <= 0) - return; +//This stuff is probably wrong as well... !!! FIX !!! +//The strange thing is that it seems to work, but that's no guarantee that it's bulletproof! +//Yup. Seems that JagMania doesn't work correctly with this... +//Dunno if this is the problem, but Atari Karts is showing *some* of the road now... +//Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies +//elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground +// a bit more accurately... Strange! +//It's probably a case of the REFLECT flag being set and the background being written +//from the right side of the screen... +//But no, it isn't... At least if the diagnostics are telling the truth! + + // NOTE: We're just using endPos to figure out how much, if any, to clip by. + // ALSO: There may be another case where we start out of bounds and end out of bounds...! + +//There's a problem here with scaledPhrasePixels in that it can be forced to zero when +//the scaling factor is small. So fix it already! !!! FIX !!! +/*if (scaledPhrasePixels == 0) +{ + WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n"); + DumpScaledObject(p0, p1, p2); +}//*/ +//NOTE: I'm almost 100% sure that this is wrong... And it is! :-p + if (startPos < 0) // Case #1: Begin out, end in, L to R +/* clippedWidth = 0 - startPos, + dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], + startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);*/ + clippedWidth = 0 - startPos, + dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels, + startPos = 0 - (clippedWidth % scaledPhrasePixels); + + if (endPos < 0) // Case #2: Begin in, end out, R to L +/* clippedWidth = 0 - endPos, + phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];*/ + clippedWidth = 0 - endPos, + phraseClippedWidth = clippedWidth / scaledPhrasePixels; + + if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R +/* clippedWidth = endPos - lbufWidth, + phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];*/ + clippedWidth = endPos - lbufWidth, + phraseClippedWidth = clippedWidth / scaledPhrasePixels; + + if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L +/* clippedWidth = startPos - lbufWidth, + dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], + startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);*/ + clippedWidth = startPos - lbufWidth, + dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels, + startPos = lbufWidth + (clippedWidth % scaledPhrasePixels); - if (flags & FLAGS_HFLIP) - { - if (xpos < 0 || (xpos-scaled_width) >= scanline_width) - return; +extern int op_start_log; +if (op_start_log && clippedWidth != 0) + WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale); +if (op_start_log && startPos == 13) +{ + WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix); + DumpScaledObject(p0, p1, p2); +} + // If the image is sitting on the line buffer left or right edge, we need to compensate + // by decreasing the image phrase width accordingly. + iwidth -= phraseClippedWidth; - if ((xpos - scaled_width) < 0) - scaled_width = xpos; - } - else - { - if ((xpos + scaled_width) < 0 || xpos >= scanline_width) - return; + // Also, if we're clipping the phrase we need to make sure we're in the correct part of + // the pixel data. +// data += phraseClippedWidth * (pitch << 3); + data += dataClippedWidth * (pitch << 3); - if ((xpos + scaled_width) > scanline_width) - scaled_width = scanline_width-xpos; - } - - current_line_buffer += xpos * 2; + // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the + // bitmap! This makes clipping & etc. MUCH, much easier...! +// uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4); + uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4); + uint8 * currentLineBuffer = &tom_ram_8[lbufAddress]; - int32 hscale_fixed = (int32)(65536.0f / hscale); - int32 cnt = 0; + // Render. - if (op_bitmap_bit_depth[bitdepth] == 1) +// Hmm. We check above for 24 BPP mode, but don't do anything about it below... +// If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me +// that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps +// anyway. + + if (depth == 0) // 1 BPP { - if (pitch == 1) - { -#include "zbmpop1.h" - } - else +if (firstPix != 0) + WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n"); + // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it. + int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; + + int pixCount = 0; + uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + + while ((int32)iwidth > 0) { -#include "zbmpop1p.h" + uint8 bits = pixels >> 63; + + if (flagTRANS && bits == 0) + ; // Do nothing... + else + { + if (!flagRMW) + // This is the *only* correct use of endian-dependent code + // (i.e., mem-to-mem direct copying)! + *(uint16 *)currentLineBuffer = paletteRAM16[index | bits]; + else + *currentLineBuffer = + BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]), + *(currentLineBuffer + 1) = + BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]); + } + + currentLineBuffer += lbufDelta; + + horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format + while (horizontalRemainder & 0x80) + { + horizontalRemainder += hscale; + pixCount++; + pixels <<= 1; + } + + if (pixCount > 63) + { + int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64; + + data += (pitch << 3) * phrasesToSkip; + pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + pixels <<= 1 * pixelShift; + iwidth -= phrasesToSkip; + pixCount = pixelShift; + } } } - else if (op_bitmap_bit_depth[bitdepth] == 2) + else if (depth == 1) // 2 BPP { - if (pitch == 1) - { -#include "zbmpop2.h" - } - else +if (firstPix != 0) + WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n"); + index &= 0xFC; // Top six bits form CLUT index + // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. + int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; + + int pixCount = 0; + uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + + while ((int32)iwidth > 0) { -#include "zbmpop2p.h" + uint8 bits = pixels >> 62; + + if (flagTRANS && bits == 0) + ; // Do nothing... + else + { + if (!flagRMW) + // This is the *only* correct use of endian-dependent code + // (i.e., mem-to-mem direct copying)! + *(uint16 *)currentLineBuffer = paletteRAM16[index | bits]; + else + *currentLineBuffer = + BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]), + *(currentLineBuffer + 1) = + BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]); + } + + currentLineBuffer += lbufDelta; + + horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format + while (horizontalRemainder & 0x80) + { + horizontalRemainder += hscale; + pixCount++; + pixels <<= 2; + } + + if (pixCount > 31) + { + int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32; + + data += (pitch << 3) * phrasesToSkip; + pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + pixels <<= 2 * pixelShift; + iwidth -= phrasesToSkip; + pixCount = pixelShift; + } } } - else if (op_bitmap_bit_depth[bitdepth] == 4) + else if (depth == 2) // 4 BPP { - if (pitch == 1) - { -#include "zbmpop4.h" - } - else +if (firstPix != 0) + WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n"); + index &= 0xF0; // Top four bits form CLUT index + // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. + int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; + + int pixCount = 0; + uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + + while ((int32)iwidth > 0) { -#include "zbmpop4p.h" + uint8 bits = pixels >> 60; + + if (flagTRANS && bits == 0) + ; // Do nothing... + else + { + if (!flagRMW) + // This is the *only* correct use of endian-dependent code + // (i.e., mem-to-mem direct copying)! + *(uint16 *)currentLineBuffer = paletteRAM16[index | bits]; + else + *currentLineBuffer = + BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]), + *(currentLineBuffer + 1) = + BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]); + } + + currentLineBuffer += lbufDelta; + + horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format + while (horizontalRemainder & 0x80) + { + horizontalRemainder += hscale; + pixCount++; + pixels <<= 4; + } + + if (pixCount > 15) + { + int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16; + + data += (pitch << 3) * phrasesToSkip; + pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + pixels <<= 4 * pixelShift; + iwidth -= phrasesToSkip; + pixCount = pixelShift; + } } } - else if (op_bitmap_bit_depth[bitdepth] == 8) + else if (depth == 3) // 8 BPP { - if (pitch == 1) - { -#include "zbmpop8.h" - } - else +if (firstPix) + WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); + // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. + int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; + + int pixCount = 0; + uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + + while ((int32)iwidth > 0) { -#include "zbmpop8p.h" + uint8 bits = pixels >> 56; + + if (flagTRANS && bits == 0) + ; // Do nothing... + else + { + if (!flagRMW) + // This is the *only* correct use of endian-dependent code + // (i.e., mem-to-mem direct copying)! + *(uint16 *)currentLineBuffer = paletteRAM16[bits]; + else + *currentLineBuffer = + BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]), + *(currentLineBuffer + 1) = + BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]); + } + + currentLineBuffer += lbufDelta; + + horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format + while (horizontalRemainder & 0x80) + { + horizontalRemainder += hscale; + pixCount++; + pixels <<= 8; + } + + if (pixCount > 7) + { + int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8; + + data += (pitch << 3) * phrasesToSkip; + pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + pixels <<= 8 * pixelShift; + iwidth -= phrasesToSkip; + pixCount = pixelShift; + } } } - else if (op_bitmap_bit_depth[bitdepth] == 16) + else if (depth == 4) // 16 BPP { - if (pitch == 1) +if (firstPix != 0) + WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n"); + // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it. + int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; + + int pixCount = 0; + uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + + while ((int32)iwidth > 0) { -#include "zbmpop16.h" + uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48; + + if (flagTRANS && (bitsLo | bitsHi) == 0) + ; // Do nothing... + else + { + if (!flagRMW) + *currentLineBuffer = bitsHi, + *(currentLineBuffer + 1) = bitsLo; + else + *currentLineBuffer = + BLEND_CR(*currentLineBuffer, bitsHi), + *(currentLineBuffer + 1) = + BLEND_Y(*(currentLineBuffer + 1), bitsLo); + } + + currentLineBuffer += lbufDelta; + + horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format + while (horizontalRemainder & 0x80) + { + horizontalRemainder += hscale; + pixCount++; + pixels <<= 16; + } + + if (pixCount > 3) + { + int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4; + + data += (pitch << 3) * phrasesToSkip; + pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + pixels <<= 16 * pixelShift; + + iwidth -= phrasesToSkip; + + pixCount = pixelShift; + } } - else + } + else if (depth == 5) // 24 BPP + { +//I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much. +WriteLog("OP: Writing 24 BPP scaled bitmap!\n"); +if (firstPix != 0) + WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n"); + // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode... + // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it. + int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04; + + while (iwidth--) { -#include "zbmpop16p.h" + // Fetch phrase... + uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) + + for(int i=0; i<2; i++) + { + uint8 bits3 = pixels >> 56, bits2 = pixels >> 48, + bits1 = pixels >> 40, bits0 = pixels >> 32; +// Seems to me that both of these are in the same endian, so we could cast it as +// uint16 * and do straight across copies (what about 24 bpp? Treat it differently...) +// This only works for the palettized modes (1 - 8 BPP), since we actually have to +// copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) +// No, it isn't because we read the memory in an endian safe way--it *won't* work... + if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0) + ; // Do nothing... + else + *currentLineBuffer = bits3, + *(currentLineBuffer + 1) = bits2, + *(currentLineBuffer + 2) = bits1, + *(currentLineBuffer + 3) = bits0; + + currentLineBuffer += lbufDelta; + pixels <<= 32; + } } } - else - WriteLog("(unimplemented) %i bpp scaled bitmap\n",op_bitmap_bit_depth[bitdepth]); +/*if (depth == 3 && startPos == 13) +{ +if (op_start_log) +WriteLog("OP: Writing in the margins...\n"); + for(int i=0; i<100*2; i+=2) +// for(int i=0; i<14*2; i+=2) + tom_ram_8[0x1800 + i] = 0xFF, + tom_ram_8[0x1800 + i + 1] = 0xFF; +}*/ +// uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4); +// uint8 * currentLineBuffer = &tom_ram_8[lbufAddress]; }