X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fobjectp.cpp;h=160f7c7a8141dbc613a45ff67142570a36d03dcd;hb=0d440b68b0a35a3ee200607a9f3988334bd0998c;hp=43b575eb57297371f5dc6b79801c63574f76c20e;hpb=135a0c52a2bcbcc37192c61801de6e9c80aeebff;p=virtualjaguar diff --git a/src/objectp.cpp b/src/objectp.cpp index 43b575e..160f7c7 100644 --- a/src/objectp.cpp +++ b/src/objectp.cpp @@ -1,9 +1,9 @@ // // Object Processor // -// by cal2 +// Original source by Cal2 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS) -// Cleanups/fixes/rewrites by James L. Hammons +// Extensive cleanups/fixes/rewrites by James L. Hammons // #include @@ -36,16 +36,12 @@ // Private function prototypes -void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render); -void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool render); +void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render); +void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render); void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2); void DumpFixedObject(uint64 p0, uint64 p1); uint64 op_load_phrase(uint32 offset); -// External global variables - -extern uint32 jaguar_mainRom_crc32; - // Local global variables static uint8 * op_blend_y; @@ -54,7 +50,7 @@ static uint8 * op_blend_cr; // some of the regular TOM RAM... static uint8 objectp_ram[0x40]; // This is based at $F00000 uint8 objectp_running; -bool objectp_stop_reading_list; +//bool objectp_stop_reading_list; static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 }; //static uint32 op_bitmap_bit_size[8] = @@ -131,7 +127,7 @@ void op_done(void) WriteLog("OP: Phrase dump\n ----------\n"); for(uint32 i=0; i<0x100; i+=8) { - uint32 hi = jaguar_long_read(olp + i), lo = jaguar_long_read(olp + i + 4); + uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP); WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]); if ((lo & 0x07) == 3) { @@ -153,41 +149,39 @@ void op_done(void) // Object Processor memory access // Memory range: F00010 - F00027 // -void op_byte_write(uint32 offset, uint8 data) +// F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor +// F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list +// F00026 W -------- -------x OBF - object processor flag +// + +uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/) { offset &= 0x3F; - objectp_ram[offset] = data; + return objectp_ram[offset]; } -void op_word_write(uint32 offset, uint16 data) +uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/) { offset &= 0x3F; -// objectp_ram[offset] = (data >> 8) & 0xFF; -// objectp_ram[offset+1] = data & 0xFF; - SET16(objectp_ram, offset, data); - -/*if (offset == 0x20) -WriteLog("OP: Setting lo list pointer: %04X\n", data); -if (offset == 0x22) -WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/ + return GET16(objectp_ram, offset); } -uint8 op_byte_read(uint32 offset) +void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/) { offset &= 0x3F; - return objectp_ram[offset]; + objectp_ram[offset] = data; } -uint16 op_word_read(uint32 offset) +void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/) { -// return (objectp_ram[offset & 0x3F] << 8) | objectp_ram[(offset+1) & 0x3F]; offset &= 0x3F; - return GET16(objectp_ram, offset); -} + SET16(objectp_ram, offset, data); -// F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor -// F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list -// F00026 W -------- -------x OBF - object processor flag +/*if (offset == 0x20) +WriteLog("OP: Setting lo list pointer: %04X\n", data); +if (offset == 0x22) +WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/ +} uint32 op_get_list_pointer(void) { @@ -245,14 +239,14 @@ void op_set_current_object(uint64 object) uint64 op_load_phrase(uint32 offset) { offset &= ~0x07; // 8 byte alignment - return ((uint64)jaguar_long_read(offset) << 32) | (uint64)jaguar_long_read(offset+4); + return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP); } void OPStorePhrase(uint32 offset, uint64 p) { offset &= ~0x07; // 8 byte alignment - jaguar_long_write(offset, p >> 32); - jaguar_long_write(offset + 4, p & 0xFFFFFFFF); + JaguarWriteLong(offset, p >> 32, OP); + JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP); } // @@ -264,7 +258,8 @@ void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2) WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF)); WriteLog(" %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF)); uint8 bitdepth = (p1 >> 12) & 0x07; - int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? +//WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? + int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)? int32 xpos = p1 & 0xFFF; xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); uint32 iwidth = ((p1 >> 28) & 0x3FF); @@ -289,7 +284,8 @@ void DumpFixedObject(uint64 p0, uint64 p1) WriteLog(" (BITMAP)"); WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF)); uint8 bitdepth = (p1 >> 12) & 0x07; - int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? +//WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? + int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)? int32 xpos = p1 & 0xFFF; xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); uint32 iwidth = ((p1 >> 28) & 0x3FF); @@ -308,23 +304,20 @@ void DumpFixedObject(uint64 p0, uint64 p1) // // Object Processor main routine // +//Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing +//where we left off. !!! FIX !!! void OPProcessList(int scanline, bool render) { extern int op_start_log; // char * condition_to_str[8] = // { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" }; -// If jaguar_exec() is working right, we should *never* have to check for this -// condition... -/* if (scanline < tom_get_vdb()) - return; - - if (scanline >= 525)//tom_getVideoModeHeight()+tom_get_vdb()) - return;//*/ - op_pointer = op_get_list_pointer(); - objectp_stop_reading_list = false; +// objectp_stop_reading_list = false; + +//WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer); +//op_done(); // *** BEGIN OP PROCESSOR TESTING ONLY *** extern bool interactiveMode; @@ -334,6 +327,8 @@ bool inhibit; int bitmapCounter = 0; // *** END OP PROCESSOR TESTING ONLY *** + uint32 opCyclesToRun = 10000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!) + // if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline); while (op_pointer) { @@ -343,10 +338,11 @@ if (interactiveMode && bitmapCounter == objectPtr) else inhibit = false; // *** END OP PROCESSOR TESTING ONLY *** - if (objectp_stop_reading_list) - return; +// if (objectp_stop_reading_list) +// return; uint64 p0 = op_load_phrase(op_pointer); +//WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07); op_pointer += 8; if (scanline == tom_get_vdb() && op_start_log) //if (scanline == 215 && op_start_log) @@ -358,7 +354,8 @@ WriteLog(" (BITMAP) "); uint64 p1 = op_load_phrase(op_pointer); WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF)); uint8 bitdepth = (p1 >> 12) & 0x07; - int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? +//WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? + int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)? int32 xpos = p1 & 0xFFF; xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); uint32 iwidth = ((p1 >> 28) & 0x3FF); @@ -380,7 +377,8 @@ uint64 p1 = op_load_phrase(op_pointer), p2 = op_load_phrase(op_pointer+8); WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF)); WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF)); uint8 bitdepth = (p1 >> 12) & 0x07; - int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? +//WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? + int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)? int32 xpos = p1 & 0xFFF; xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); uint32 iwidth = ((p1 >> 28) & 0x3FF); @@ -414,13 +412,12 @@ if ((p0 & 0x07) == OBJECT_TYPE_STOP) WriteLog(" --> List end\n"); }//*/ -// WriteLog("%08X type %i\n", op_pointer, (uint8)p0 & 0x07); switch ((uint8)p0 & 0x07) { case OBJECT_TYPE_BITMAP: { - // Would *not* be /2 if interlaced...! - uint16 ypos = ((p0 >> 3) & 0x3FF) / 2; +//WAS: uint16 ypos = (p0 >> 3) & 0x3FF; + uint16 ypos = (p0 >> 3) & 0x7FF; // This is only theory implied by Rayman...! // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with // the VDB value. With interlacing, this would be slightly more tricky. @@ -431,7 +428,7 @@ WriteLog(" --> List end\n"); //No, the reason this was needed is that the OP code before was wrong. Any value //less than VDB will get written to the top line of the display! // if (ypos == 0) -// ypos = tom_word_read(0xF00046) / 2; // Get the VDB value +// ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value uint32 height = (p0 & 0xFFC000) >> 14; uint32 oldOPP = op_pointer - 8; // *** BEGIN OP PROCESSOR TESTING ONLY *** @@ -446,11 +443,13 @@ if (!inhibit) // For OP testing only! op_pointer += 8; //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos); //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]); - OPProcessFixedBitmap(scanline, p0, p1, render); +// OPProcessFixedBitmap(scanline, p0, p1, render); + OPProcessFixedBitmap(p0, p1, render); // OP write-backs //???Does this really happen??? Doesn't seem to work if you do this...! +//Probably not. Must be a bug in the documentation...! // uint32 link = (p0 & 0x7FFFF000000) >> 21; // SET16(objectp_ram, 0x20, link & 0xFFFF); // OLP // SET16(objectp_ram, 0x22, link >> 16); @@ -460,35 +459,33 @@ if (!inhibit) // For OP testing only! // NOTE: Would subtract 2 if in interlaced mode...! // uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000; // if (height) - height--; + height--; - uint64 data = (p0 & 0xFFFFF80000000000) >> 40; + uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40; uint64 dwidth = (p1 & 0xFFC0000) >> 15; data += dwidth; - p0 &= ~0xFFFFF80000FFC000; // Mask out old data... + p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data... p0 |= (uint64)height << 14; p0 |= data << 40; OPStorePhrase(oldOPP, p0); } - op_pointer = (p0 & 0x000007FFFF000000) >> 21; +//WriteLog("\t\tOld OP: %08X -> ", op_pointer); +//Temp, for testing... +//No doubt, this type of check will break all kinds of stuff... !!! FIX !!! +//And it does! !!! FIX !!! +//Let's remove this "fix" since it screws up more than it fixes. +/* if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21)) + return;*/ + + op_pointer = (p0 & 0x000007FFFF000000LL) >> 21; +//WriteLog("New OP: %08X\n", op_pointer); break; } case OBJECT_TYPE_SCALE: { - // Would *not* be /2 if interlaced...! - uint16 ypos = ((p0 >> 3) & 0x3FF) / 2; -// This is only theory implied by Rayman...! -// It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with -// the VDB value. With interlacing, this would be slightly more tricky. -// There's probably another bit somewhere that enables this mode--but so far, doesn't seem -// to affect any other game in a negative way (that I've seen). -// Either that, or it's an undocumented bug... - -//No, the reason this was needed is that the OP code before was wrong. Any value -//less than VDB will get written to the top line of the display! -// if (ypos == 0) -// ypos = tom_word_read(0xF00046) / 2; // Get the VDB value +//WAS: uint16 ypos = (p0 >> 3) & 0x3FF; + uint16 ypos = (p0 >> 3) & 0x7FF; uint32 height = (p0 & 0xFFC000) >> 14; uint32 oldOPP = op_pointer - 8; // *** BEGIN OP PROCESSOR TESTING ONLY *** @@ -507,20 +504,10 @@ if (!inhibit) // For OP testing only! uint64 p2 = op_load_phrase(op_pointer); op_pointer += 8; //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF)); - OPProcessScaledBitmap(scanline, p0, p1, p2, render); + OPProcessScaledBitmap(p0, p1, p2, render); // OP write-backs -//???Does this really happen??? Doesn't seem to work if you do this...! -// uint32 link = (p0 & 0x7FFFF000000) >> 21; -// SET16(objectp_ram, 0x20, link & 0xFFFF); // OLP -// SET16(objectp_ram, 0x22, link >> 16); -/* uint32 height = (p0 & 0xFFC000) >> 14; - if (height - 1 > 0) - height--;*/ - // NOTE: Would subtract 2 if in interlaced mode...! -// uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000; - uint8 remainder = p2 >> 16, vscale = p2 >> 8; //Actually, we should skip this object if it has a vscale of zero. //Or do we? Not sure... Atari Karts has a few lines that look like: @@ -533,35 +520,90 @@ if (!inhibit) // For OP testing only! if (vscale == 0) vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it? - remainder -= 0x20; // 1.0f in [3.5] fixed point format - if (remainder & 0x80) // I.e., it's negative +//extern int start_logging; +//if (start_logging) +// WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/ +//Locks up here: +//--> Returned from scaled bitmap processing (rem=20, vscale=80)... +//There are other problems here, it looks like... +//Another lock up: +//About to execute OP (508)... +/* +OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no +--> Returned from scaled bitmap processing (rem=50, vscale=7C)... +OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no +--> Returned from scaled bitmap processing (rem=30, vscale=7C)... +OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no +--> Returned from scaled bitmap processing (rem=10, vscale=7C)... +OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no +--> Returned from scaled bitmap processing (rem=00, vscale=7E)... +OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no +--> Returned from scaled bitmap processing (rem=00, vscale=80)... +OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no +--> Returned from scaled bitmap processing (rem=5E, vscale=7E)... +OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no +--> Returned from scaled bitmap processing (rem=60, vscale=80)... +OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no +--> Returned from scaled bitmap processing (rem=3E, vscale=7E)... +OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no +--> Returned from scaled bitmap processing (rem=40, vscale=80)... +OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no +--> Returned from scaled bitmap processing (rem=1E, vscale=7E)... +OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no +--> Returned from scaled bitmap processing (rem=20, vscale=80)... +*/ +//Here's another problem: +// [hsc: 20, vsc: 20, rem: 00] +// Since we're not checking for $E0 (but that's what we get from the above), we end +// up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite +// right. Either that, or the Accolade team that wrote Bubsy screwed up royal.] +//Also note: $E0 = 7.0 which IS a legal vscale value... + +// if (remainder & 0x80) // I.e., it's negative +// if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0 +// if ((remainder - 1) >= 0xE0) // I.e., it's <= 0 +// if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0 +// if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0 + if (remainder <= 0x20) // I.e., it's <= 0 { - uint64 data = (p0 & 0xFFFFF80000000000) >> 40; + uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40; uint64 dwidth = (p1 & 0xFFC0000) >> 15; - while (remainder & 0x80) +// while (remainder & 0x80) +// while ((remainder & 0x80) || remainder == 0) +// while ((remainder - 1) >= 0xE0) +// while ((remainder >= 0xE1) || remainder == 0) +// while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0) + while (remainder <= 0x20) { remainder += vscale; + if (height) height--; data += dwidth; } - p0 &= ~0xFFFFF80000FFC000; // Mask out old data... + + p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data... p0 |= (uint64)height << 14; p0 |= data << 40; OPStorePhrase(oldOPP, p0); } + remainder -= 0x20; // 1.0f in [3.5] fixed point format + +//if (start_logging) +// WriteLog("--> Finished writebacks...\n");//*/ + //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF)); - p2 &= ~0x0000000000FF0000; + p2 &= ~0x0000000000FF0000LL; p2 |= (uint64)remainder << 16; //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF)); OPStorePhrase(oldOPP+16, p2); //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8); //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale); } - op_pointer = (p0 & 0x000007FFFF000000) >> 21; + op_pointer = (p0 & 0x000007FFFF000000LL) >> 21; break; } case OBJECT_TYPE_GPU: @@ -575,6 +617,7 @@ if (!inhibit) // For OP testing only! //OPSuspendedByGPU = true; //Dunno if the OP keeps processing from where it was interrupted, or if it just continues //on the next scanline... +// --> It continues from where it was interrupted! !!! FIX !!! break; } case OBJECT_TYPE_BRANCH: @@ -588,22 +631,15 @@ if (!inhibit) // For OP testing only! switch (cc) { case CONDITION_EQUAL: -//Why do this for the equal case? If they wrote an odd YPOS, then it wouldn't be detected! -// if (ypos != 0x7FF && (ypos & 0x01)) -// ypos ^= 0x01; -// if ((2 * tom_get_scanline()) == ypos || ypos == 0x7FF) -//Here we're using VC instead of the bogus tom_get_scanline() value... - if (tom_word_read(0xF00006) == ypos || ypos == 0x7FF) + if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF) op_pointer = link; break; case CONDITION_LESS_THAN: -// if ((2 * tom_get_scanline()) < ypos) - if (tom_word_read(0xF00006) < ypos) + if (TOMReadWord(0xF00006, OP) < ypos) op_pointer = link; break; case CONDITION_GREATER_THAN: -// if ((2 * tom_get_scanline()) > ypos) - if (tom_word_read(0xF00006) > ypos) + if (TOMReadWord(0xF00006, OP) > ypos) op_pointer = link; break; case CONDITION_OP_FLAG_SET: @@ -633,7 +669,7 @@ if (!inhibit) // For OP testing only! if (p0 & 0x08) { tom_set_pending_object_int(); - if (tom_irq_enabled(IRQ_OPFLAG) && jaguar_interrupt_handler_is_valid(64)) + if (tom_irq_enabled(IRQ_OPFLAG))// && jaguar_interrupt_handler_is_valid(64)) m68k_set_irq(7); // Cause an NMI to occur... } @@ -644,17 +680,20 @@ if (!inhibit) // For OP testing only! WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07)); return; } + + // Here is a little sanity check to keep the OP from locking up the machine + // when fed bad data. Better would be to count how many actual cycles it used + // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!! + opCyclesToRun--; + if (!opCyclesToRun) + return; } } // // Store fixed size bitmap in line buffer // - -// Interesting thing about Rayman: There seems to be a transparent bitmap (1/8/16 bpp--which?) -// being rendered under his feet--doesn't align when walking... Check it out! - -void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) +void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render) { // Need to make sure that when writing that it stays within the line buffer... // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM @@ -663,7 +702,6 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases* uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address //#ifdef OP_DEBUG_BMP -// Prolly should use this... Though not sure exactly how. uint32 firstPix = (p1 >> 49) & 0x3F; // "The LSB is significant only for scaled objects..." -JTRM // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..." @@ -680,6 +718,7 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // provide the most significant bits of the palette address." uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp) uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch + pitch <<= 3; // Optimization: Multiply pitch by 8 // int16 scanlineWidth = tom_getVideoModeWidth(); uint8 * tom_ram_8 = tom_get_ram_pointer(); @@ -694,9 +733,9 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // Is it OK to have a 0 for the data width??? (i.e., undocumented?) // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well. // Pitch == 0 is OK too... -// if (!render || op_pointer == 0 || dwidth == 0 || ptr == 0 || pitch == 0) +// if (!render || op_pointer == 0 || ptr == 0 || pitch == 0) //I'm not convinced that we need to concern ourselves with data & op_pointer here either! - if (!render || iwidth == 0) // || data == 0 || op_pointer == 0) + if (!render || iwidth == 0) return; //#define OP_DEBUG_BMP @@ -771,9 +810,10 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // rightMargin = lbufWidth; */ if (depth > 5) - WriteLog("We're about to encounter a divide by zero error!\n"); + WriteLog("OP: We're about to encounter a divide by zero error!\n"); // NOTE: We're just using endPos to figure out how much, if any, to clip by. // ALSO: There may be another case where we start out of bounds and end out of bounds...! + // !!! FIX !!! if (startPos < 0) // Case #1: Begin out, end in, L to R clippedWidth = 0 - startPos, dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], @@ -799,12 +839,14 @@ if (depth > 5) // Also, if we're clipping the phrase we need to make sure we're in the correct part of // the pixel data. // data += phraseClippedWidth * (pitch << 3); - data += dataClippedWidth * (pitch << 3); + data += dataClippedWidth * pitch; // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the // bitmap! This makes clipping & etc. MUCH, much easier...! // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4); - uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4); +//Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode? +//Is this a bug in the OP? + uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2); uint8 * currentLineBuffer = &tom_ram_8[lbufAddress]; // Render. @@ -813,6 +855,7 @@ if (depth > 5) // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps // anyway. +// This seems to be the case (at least according to the Midsummer docs)...! if (depth == 0) // 1 BPP { @@ -820,9 +863,9 @@ if (depth > 5) int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; // Fetch 1st phrase... - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap -//i.e., we didn't clip on the margin... +//i.e., we didn't clip on the margin... !!! FIX !!! pixels <<= firstPix; // Skip first N pixels (N=firstPix)... int i = firstPix; // Start counter at right spot... @@ -853,8 +896,8 @@ if (depth > 5) } i = 0; // Fetch next phrase... - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) - pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + data += pitch; + pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); } } else if (depth == 1) // 2 BPP @@ -868,8 +911,8 @@ if (firstPix) while (iwidth--) { // Fetch phrase... - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); + data += pitch; for(int i=0; i<32; i++) { @@ -908,8 +951,8 @@ if (firstPix) while (iwidth--) { // Fetch phrase... - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); + data += pitch; for(int i=0; i<16; i++) { @@ -939,18 +982,20 @@ if (firstPix) } else if (depth == 3) // 8 BPP { -if (firstPix) - WriteLog("OP: Fixed bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; + // Fetch 1st phrase... + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); +//Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap +//i.e., we didn't clip on the margin... !!! FIX !!! + firstPix &= 0x30; // Only top two bits are valid for 8 BPP + pixels <<= firstPix; // Skip first N pixels (N=firstPix)... + int i = firstPix >> 3; // Start counter at right spot... + while (iwidth--) { - // Fetch phrase... - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) - - for(int i=0; i<8; i++) + while (i++ < 8) { uint8 bits = pixels >> 56; // Seems to me that both of these are in the same endian, so we could cast it as @@ -974,6 +1019,10 @@ if (firstPix) currentLineBuffer += lbufDelta; pixels <<= 8; } + i = 0; + // Fetch next phrase... + data += pitch; + pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); } } else if (depth == 4) // 16 BPP @@ -986,8 +1035,8 @@ if (firstPix) while (iwidth--) { // Fetch phrase... - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); + data += pitch; for(int i=0; i<4; i++) { @@ -1024,24 +1073,21 @@ if (firstPix) if (firstPix) WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode... - // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it. + // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it. int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04; while (iwidth--) { // Fetch phrase... - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); + data += pitch; for(int i=0; i<2; i++) { + // We don't use a 32-bit var here because of endian issues...! uint8 bits3 = pixels >> 56, bits2 = pixels >> 48, bits1 = pixels >> 40, bits0 = pixels >> 32; -// Seems to me that both of these are in the same endian, so we could cast it as -// uint16 * and do straight across copies (what about 24 bpp? Treat it differently...) -// This only works for the palettized modes (1 - 8 BPP), since we actually have to -// copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) -// No, it isn't because we read the memory in an endian safe way--it *won't* work... + if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0) ; // Do nothing... else @@ -1060,7 +1106,7 @@ if (firstPix) // // Store scaled bitmap in line buffer // -void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool render) +void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render) { // Need to make sure that when writing that it stays within the line buffer... // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM @@ -1074,11 +1120,11 @@ void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool r uint32 firstPix = (p1 >> 49) & 0x3F; //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened??? if (firstPix) - WriteLog("OP: FIRSTPIX != 0!\n"); + WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n"); //#endif // We can ignore the RELEASE (high order) bit for now--probably forever...! // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE -//Optimize: break these out to their own BOOL values +//Optimize: break these out to their own BOOL values [DONE] uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2) bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false), flagRMW = (flags & OPFLAG_RMW ? true : false), @@ -1086,28 +1132,34 @@ if (firstPix) uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp) uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch -// int16 scanlineWidth = tom_getVideoModeWidth(); uint8 * tom_ram_8 = tom_get_ram_pointer(); uint8 * paletteRAM = &tom_ram_8[0x400]; // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT - // for use when using endian-corrected data (i.e., any of the *_word_read functions!) + // for use when using endian-corrected data (i.e., any of the *ReadWord functions!) uint16 * paletteRAM16 = (uint16 *)paletteRAM; uint8 hscale = p2 & 0xFF; - uint8 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable +// Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why, +// but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)... + uint8 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!] +// uint8 horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!] int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5; uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5; // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n", // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no")); -//Looks like an hscale of zero means don't draw! +// Looks like an hscale of zero means don't draw! if (!render || iwidth == 0 || hscale == 0) return; +/*extern int start_logging; +if (start_logging) + WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n", + iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/ //#define OP_DEBUG_BMP //#ifdef OP_DEBUG_BMP -// WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n", +// WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n", // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no")); //#endif @@ -1130,7 +1182,7 @@ if (firstPix) // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds. //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop, // numbers 1 & 3 are of concern. -// This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...! +// This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...! // if (rightMargin < 0 || leftMargin > lbufWidth) // It might be easier to swap these (if REFLECTed) and just use XPOS down below... @@ -1165,6 +1217,7 @@ if (firstPix) // NOTE: We're just using endPos to figure out how much, if any, to clip by. // ALSO: There may be another case where we start out of bounds and end out of bounds...! + // !!! FIX !!! //There's a problem here with scaledPhrasePixels in that it can be forced to zero when //the scaling factor is small. So fix it already! !!! FIX !!! @@ -1174,30 +1227,58 @@ if (firstPix) DumpScaledObject(p0, p1, p2); }//*/ //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p + +//Try a simple example... +// Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10, +// non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ. +// Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ... +// +// Normally, we would expect this in the line buffer: +// ZZXXXXYYYYZZZZXXXXYYYYZZZZ... +// +// But instead we're getting: +// XXXXYYYYZZZZXXXXYYYYZZZZ... +// +// or are we??? It would seem so, simply by virtue of the fact that we're NOT starting +// on negative boundary--or are we? Hmm... +// cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10 +// +// Let's try a real world example: +// +//OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14] +//OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14] +// +// Really, spp is 27.75 in the second case... +// So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the +// start position (14 * 27.75), we get -6.5... NOT -17! + +//Now it seems we're working OK, at least for the first case... +uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale; + if (startPos < 0) // Case #1: Begin out, end in, L to R -/* clippedWidth = 0 - startPos, - dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], - startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);*/ - clippedWidth = 0 - startPos, - dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels, - startPos = 0 - (clippedWidth % scaledPhrasePixels); +{ +extern int start_logging; +if (start_logging) + WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos); +// clippedWidth = 0 - startPos, + clippedWidth = (0 - startPos) << 5, +// dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels, + dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5, +// startPos = 0 - (clippedWidth % scaledPhrasePixels); + startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5; +if (start_logging) + WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth); +} if (endPos < 0) // Case #2: Begin in, end out, R to L -/* clippedWidth = 0 - endPos, - phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];*/ clippedWidth = 0 - endPos, phraseClippedWidth = clippedWidth / scaledPhrasePixels; if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R -/* clippedWidth = endPos - lbufWidth, - phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];*/ clippedWidth = endPos - lbufWidth, phraseClippedWidth = clippedWidth / scaledPhrasePixels; if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L -/* clippedWidth = startPos - lbufWidth, - dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], - startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);*/ clippedWidth = startPos - lbufWidth, dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels, startPos = lbufWidth + (clippedWidth % scaledPhrasePixels); @@ -1209,6 +1290,13 @@ if (op_start_log && startPos == 13) { WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix); DumpScaledObject(p0, p1, p2); + if (iwidth == 7) + { + WriteLog(" %08X: ", data); + for(int i=0; i<7*8; i++) + WriteLog("%02X ", JaguarReadByte(data+i)); + WriteLog("\n"); + } } // If the image is sitting on the line buffer left or right edge, we need to compensate // by decreasing the image phrase width accordingly. @@ -1222,8 +1310,11 @@ if (op_start_log && startPos == 13) // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the // bitmap! This makes clipping & etc. MUCH, much easier...! // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4); - uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4); +// uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4); + uint32 lbufAddress = 0x1800 + startPos * 2; uint8 * currentLineBuffer = &tom_ram_8[lbufAddress]; +//uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800], +// * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719]; // Render. @@ -1231,6 +1322,7 @@ if (op_start_log && startPos == 13) // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps // anyway. +// This seems to be the case (at least according to the Midsummer docs)...! if (depth == 0) // 1 BPP { @@ -1240,7 +1332,7 @@ if (firstPix != 0) int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; int pixCount = 0; - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); while ((int32)iwidth > 0) { @@ -1263,20 +1355,27 @@ if (firstPix != 0) currentLineBuffer += lbufDelta; - horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format +/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format while (horizontalRemainder & 0x80) + { + horizontalRemainder += hscale; + pixCount++; + pixels <<= 1; + }//*/ + while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction) { horizontalRemainder += hscale; pixCount++; pixels <<= 1; } + horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format if (pixCount > 63) { int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64; data += (pitch << 3) * phrasesToSkip; - pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); pixels <<= 1 * pixelShift; iwidth -= phrasesToSkip; pixCount = pixelShift; @@ -1292,7 +1391,7 @@ if (firstPix != 0) int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; int pixCount = 0; - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); while ((int32)iwidth > 0) { @@ -1315,20 +1414,27 @@ if (firstPix != 0) currentLineBuffer += lbufDelta; - horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format +/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format while (horizontalRemainder & 0x80) + { + horizontalRemainder += hscale; + pixCount++; + pixels <<= 2; + }//*/ + while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction) { horizontalRemainder += hscale; pixCount++; pixels <<= 2; } + horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format if (pixCount > 31) { int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32; data += (pitch << 3) * phrasesToSkip; - pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); pixels <<= 2 * pixelShift; iwidth -= phrasesToSkip; pixCount = pixelShift; @@ -1344,7 +1450,7 @@ if (firstPix != 0) int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; int pixCount = 0; - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); while ((int32)iwidth > 0) { @@ -1367,20 +1473,27 @@ if (firstPix != 0) currentLineBuffer += lbufDelta; - horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format +/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format while (horizontalRemainder & 0x80) + { + horizontalRemainder += hscale; + pixCount++; + pixels <<= 4; + }//*/ + while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction) { horizontalRemainder += hscale; pixCount++; pixels <<= 4; } + horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format if (pixCount > 15) { int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16; data += (pitch << 3) * phrasesToSkip; - pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); pixels <<= 4 * pixelShift; iwidth -= phrasesToSkip; pixCount = pixelShift; @@ -1395,7 +1508,7 @@ if (firstPix) int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; int pixCount = 0; - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); while ((int32)iwidth > 0) { @@ -1409,6 +1522,10 @@ if (firstPix) // This is the *only* correct use of endian-dependent code // (i.e., mem-to-mem direct copying)! *(uint16 *)currentLineBuffer = paletteRAM16[bits]; +/* { + if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit) + *(uint16 *)currentLineBuffer = paletteRAM16[bits]; + }*/ else *currentLineBuffer = BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]), @@ -1418,20 +1535,20 @@ if (firstPix) currentLineBuffer += lbufDelta; - horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format - while (horizontalRemainder & 0x80) + while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction) { horizontalRemainder += hscale; pixCount++; pixels <<= 8; } + horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format if (pixCount > 7) { int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8; data += (pitch << 3) * phrasesToSkip; - pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); pixels <<= 8 * pixelShift; iwidth -= phrasesToSkip; pixCount = pixelShift; @@ -1446,7 +1563,7 @@ if (firstPix != 0) int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; int pixCount = 0; - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); while ((int32)iwidth > 0) { @@ -1468,20 +1585,27 @@ if (firstPix != 0) currentLineBuffer += lbufDelta; - horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format +/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format while (horizontalRemainder & 0x80) + { + horizontalRemainder += hscale; + pixCount++; + pixels <<= 16; + }//*/ + while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction) { horizontalRemainder += hscale; pixCount++; pixels <<= 16; } - + horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format +//*/ if (pixCount > 3) { int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4; data += (pitch << 3) * phrasesToSkip; - pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); pixels <<= 16 * pixelShift; iwidth -= phrasesToSkip; @@ -1503,18 +1627,14 @@ if (firstPix != 0) while (iwidth--) { // Fetch phrase... - uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4); + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) for(int i=0; i<2; i++) { uint8 bits3 = pixels >> 56, bits2 = pixels >> 48, bits1 = pixels >> 40, bits0 = pixels >> 32; -// Seems to me that both of these are in the same endian, so we could cast it as -// uint16 * and do straight across copies (what about 24 bpp? Treat it differently...) -// This only works for the palettized modes (1 - 8 BPP), since we actually have to -// copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) -// No, it isn't because we read the memory in an endian safe way--it *won't* work... + if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0) ; // Do nothing... else @@ -1528,15 +1648,4 @@ if (firstPix != 0) } } } -/*if (depth == 3 && startPos == 13) -{ -if (op_start_log) -WriteLog("OP: Writing in the margins...\n"); - for(int i=0; i<100*2; i+=2) -// for(int i=0; i<14*2; i+=2) - tom_ram_8[0x1800 + i] = 0xFF, - tom_ram_8[0x1800 + i + 1] = 0xFF; -}*/ -// uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4); -// uint8 * currentLineBuffer = &tom_ram_8[lbufAddress]; }