X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fobjectp.cpp;h=44bb34dd3e6f91601387d23af7aba80e011df105;hb=a1ad40785ac6d954051e4e5882436da9a58cc3a6;hp=9c7f4e659b88725eb246b1cbc5117d05482b29c3;hpb=cd61d997688c71e8eeecf39e4ce9d77a08872d7b;p=virtualjaguar diff --git a/src/objectp.cpp b/src/objectp.cpp index 9c7f4e6..44bb34d 100644 --- a/src/objectp.cpp +++ b/src/objectp.cpp @@ -1,9 +1,9 @@ // // Object Processor // -// by cal2 +// Original source by Cal2 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS) -// Cleanups/fixes/rewrites by James L. Hammons +// Extensive cleanups/fixes/rewrites by James L. Hammons // #include @@ -36,16 +36,12 @@ // Private function prototypes -void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render); -void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool render); +void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render); +void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render); void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2); void DumpFixedObject(uint64 p0, uint64 p1); uint64 op_load_phrase(uint32 offset); -// External global variables - -extern uint32 jaguar_mainRom_crc32; - // Local global variables static uint8 * op_blend_y; @@ -54,7 +50,7 @@ static uint8 * op_blend_cr; // some of the regular TOM RAM... static uint8 objectp_ram[0x40]; // This is based at $F00000 uint8 objectp_running; -bool objectp_stop_reading_list; +//bool objectp_stop_reading_list; static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 }; //static uint32 op_bitmap_bit_size[8] = @@ -153,6 +149,11 @@ void op_done(void) // Object Processor memory access // Memory range: F00010 - F00027 // +// F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor +// F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list +// F00026 W -------- -------x OBF - object processor flag +// + uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/) { offset &= 0x3F; @@ -165,10 +166,6 @@ uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/) return GET16(objectp_ram, offset); } -// F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor -// F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list -// F00026 W -------- -------x OBF - object processor flag - void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/) { offset &= 0x3F; @@ -305,6 +302,8 @@ void DumpFixedObject(uint64 p0, uint64 p1) // // Object Processor main routine // +//Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing +//where we left off. !!! FIX !!! void OPProcessList(int scanline, bool render) { extern int op_start_log; @@ -313,7 +312,7 @@ extern int op_start_log; op_pointer = op_get_list_pointer(); - objectp_stop_reading_list = false; +// objectp_stop_reading_list = false; // *** BEGIN OP PROCESSOR TESTING ONLY *** extern bool interactiveMode; @@ -332,8 +331,8 @@ if (interactiveMode && bitmapCounter == objectPtr) else inhibit = false; // *** END OP PROCESSOR TESTING ONLY *** - if (objectp_stop_reading_list) - return; +// if (objectp_stop_reading_list) +// return; uint64 p0 = op_load_phrase(op_pointer); op_pointer += 8; @@ -408,8 +407,7 @@ WriteLog(" --> List end\n"); { case OBJECT_TYPE_BITMAP: { - // Would *not* be /2 if interlaced...! - uint16 ypos = ((p0 >> 3) & 0x3FF) / 2; + uint16 ypos = (p0 >> 3) & 0x3FF; // This is only theory implied by Rayman...! // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with // the VDB value. With interlacing, this would be slightly more tricky. @@ -435,11 +433,13 @@ if (!inhibit) // For OP testing only! op_pointer += 8; //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos); //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]); - OPProcessFixedBitmap(scanline, p0, p1, render); +// OPProcessFixedBitmap(scanline, p0, p1, render); + OPProcessFixedBitmap(p0, p1, render); // OP write-backs //???Does this really happen??? Doesn't seem to work if you do this...! +//Probably not. Must be a bug in the documentation...! // uint32 link = (p0 & 0x7FFFF000000) >> 21; // SET16(objectp_ram, 0x20, link & 0xFFFF); // OLP // SET16(objectp_ram, 0x22, link >> 16); @@ -451,33 +451,21 @@ if (!inhibit) // For OP testing only! // if (height) height--; - uint64 data = (p0 & 0xFFFFF80000000000) >> 40; + uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40; uint64 dwidth = (p1 & 0xFFC0000) >> 15; data += dwidth; - p0 &= ~0xFFFFF80000FFC000; // Mask out old data... + p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data... p0 |= (uint64)height << 14; p0 |= data << 40; OPStorePhrase(oldOPP, p0); } - op_pointer = (p0 & 0x000007FFFF000000) >> 21; + op_pointer = (p0 & 0x000007FFFF000000LL) >> 21; break; } case OBJECT_TYPE_SCALE: { - // Would *not* be /2 if interlaced...! - uint16 ypos = ((p0 >> 3) & 0x3FF) / 2; -// This is only theory implied by Rayman...! -// It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with -// the VDB value. With interlacing, this would be slightly more tricky. -// There's probably another bit somewhere that enables this mode--but so far, doesn't seem -// to affect any other game in a negative way (that I've seen). -// Either that, or it's an undocumented bug... - -//No, the reason this was needed is that the OP code before was wrong. Any value -//less than VDB will get written to the top line of the display! -// if (ypos == 0) -// ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value + uint16 ypos = (p0 >> 3) & 0x3FF; uint32 height = (p0 & 0xFFC000) >> 14; uint32 oldOPP = op_pointer - 8; // *** BEGIN OP PROCESSOR TESTING ONLY *** @@ -496,20 +484,10 @@ if (!inhibit) // For OP testing only! uint64 p2 = op_load_phrase(op_pointer); op_pointer += 8; //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF)); - OPProcessScaledBitmap(scanline, p0, p1, p2, render); + OPProcessScaledBitmap(p0, p1, p2, render); // OP write-backs -//???Does this really happen??? Doesn't seem to work if you do this...! -// uint32 link = (p0 & 0x7FFFF000000) >> 21; -// SET16(objectp_ram, 0x20, link & 0xFFFF); // OLP -// SET16(objectp_ram, 0x22, link >> 16); -/* uint32 height = (p0 & 0xFFC000) >> 14; - if (height - 1 > 0) - height--;*/ - // NOTE: Would subtract 2 if in interlaced mode...! -// uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000; - uint8 remainder = p2 >> 16, vscale = p2 >> 8; //Actually, we should skip this object if it has a vscale of zero. //Or do we? Not sure... Atari Karts has a few lines that look like: @@ -522,35 +500,88 @@ if (!inhibit) // For OP testing only! if (vscale == 0) vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it? - remainder -= 0x20; // 1.0f in [3.5] fixed point format - if (remainder & 0x80) // I.e., it's negative +/*extern int start_logging; +if (start_logging) + WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);*/ +//Locks up here: +//--> Returned from scaled bitmap processing (rem=20, vscale=80)... +//There are other problems here, it looks like... +/* +OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no +--> Returned from scaled bitmap processing (rem=50, vscale=7C)... +OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no +--> Returned from scaled bitmap processing (rem=30, vscale=7C)... +OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no +--> Returned from scaled bitmap processing (rem=10, vscale=7C)... +OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no +--> Returned from scaled bitmap processing (rem=00, vscale=7E)... +OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no +--> Returned from scaled bitmap processing (rem=00, vscale=80)... +OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no +--> Returned from scaled bitmap processing (rem=5E, vscale=7E)... +OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no +--> Returned from scaled bitmap processing (rem=60, vscale=80)... +OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no +--> Returned from scaled bitmap processing (rem=3E, vscale=7E)... +OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no +--> Returned from scaled bitmap processing (rem=40, vscale=80)... +OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no +--> Returned from scaled bitmap processing (rem=1E, vscale=7E)... +OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no +--> Returned from scaled bitmap processing (rem=20, vscale=80)... +*/ +//Here's another problem: +// [hsc: 20, vsc: 20, rem: 00] +// Since we're not checking for $E0 (but that's what we get from the above), we end +// up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite +// right. Either that, or the Accolade team that wrote Bubsy screwed up royal.] +//Also note: $E0 = 7.0 which IS a legal vscale value... + +// if (remainder & 0x80) // I.e., it's negative +// if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0 +// if ((remainder - 1) >= 0xE0) // I.e., it's <= 0 +// if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0 +// if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0 + if (remainder <= 0x20) // I.e., it's <= 0 { - uint64 data = (p0 & 0xFFFFF80000000000) >> 40; + uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40; uint64 dwidth = (p1 & 0xFFC0000) >> 15; - while (remainder & 0x80) +// while (remainder & 0x80) +// while ((remainder & 0x80) || remainder == 0) +// while ((remainder - 1) >= 0xE0) +// while ((remainder >= 0xE1) || remainder == 0) +// while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0) + while (remainder <= 0x20) { remainder += vscale; + if (height) height--; data += dwidth; } - p0 &= ~0xFFFFF80000FFC000; // Mask out old data... + + p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data... p0 |= (uint64)height << 14; p0 |= data << 40; OPStorePhrase(oldOPP, p0); } + remainder -= 0x20; // 1.0f in [3.5] fixed point format + +/*if (start_logging) + WriteLog("--> Finished writebacks...\n");*/ + //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF)); - p2 &= ~0x0000000000FF0000; + p2 &= ~0x0000000000FF0000LL; p2 |= (uint64)remainder << 16; //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF)); OPStorePhrase(oldOPP+16, p2); //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8); //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale); } - op_pointer = (p0 & 0x000007FFFF000000) >> 21; + op_pointer = (p0 & 0x000007FFFF000000LL) >> 21; break; } case OBJECT_TYPE_GPU: @@ -564,6 +595,7 @@ if (!inhibit) // For OP testing only! //OPSuspendedByGPU = true; //Dunno if the OP keeps processing from where it was interrupted, or if it just continues //on the next scanline... +// --> It continues from where it was interrupted! !!! FIX !!! break; } case OBJECT_TYPE_BRANCH: @@ -577,21 +609,14 @@ if (!inhibit) // For OP testing only! switch (cc) { case CONDITION_EQUAL: -//Why do this for the equal case? If they wrote an odd YPOS, then it wouldn't be detected! -// if (ypos != 0x7FF && (ypos & 0x01)) -// ypos ^= 0x01; -// if ((2 * tom_get_scanline()) == ypos || ypos == 0x7FF) -//Here we're using VC instead of the bogus tom_get_scanline() value... if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF) op_pointer = link; break; case CONDITION_LESS_THAN: -// if ((2 * tom_get_scanline()) < ypos) if (TOMReadWord(0xF00006, OP) < ypos) op_pointer = link; break; case CONDITION_GREATER_THAN: -// if ((2 * tom_get_scanline()) > ypos) if (TOMReadWord(0xF00006, OP) > ypos) op_pointer = link; break; @@ -622,7 +647,7 @@ if (!inhibit) // For OP testing only! if (p0 & 0x08) { tom_set_pending_object_int(); - if (tom_irq_enabled(IRQ_OPFLAG) && jaguar_interrupt_handler_is_valid(64)) + if (tom_irq_enabled(IRQ_OPFLAG))// && jaguar_interrupt_handler_is_valid(64)) m68k_set_irq(7); // Cause an NMI to occur... } @@ -639,11 +664,7 @@ if (!inhibit) // For OP testing only! // // Store fixed size bitmap in line buffer // - -// Interesting thing about Rayman: There seems to be a transparent bitmap (1/8/16 bpp--which?) -// being rendered under his feet--doesn't align when walking... Check it out! - -void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) +void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render) { // Need to make sure that when writing that it stays within the line buffer... // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM @@ -652,7 +673,6 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases* uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address //#ifdef OP_DEBUG_BMP -// Prolly should use this... Though not sure exactly how. uint32 firstPix = (p1 >> 49) & 0x3F; // "The LSB is significant only for scaled objects..." -JTRM // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..." @@ -669,6 +689,7 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // provide the most significant bits of the palette address." uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp) uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch + pitch <<= 3; // Optimization: Multiply pitch by 8 // int16 scanlineWidth = tom_getVideoModeWidth(); uint8 * tom_ram_8 = tom_get_ram_pointer(); @@ -683,9 +704,9 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // Is it OK to have a 0 for the data width??? (i.e., undocumented?) // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well. // Pitch == 0 is OK too... -// if (!render || op_pointer == 0 || dwidth == 0 || ptr == 0 || pitch == 0) +// if (!render || op_pointer == 0 || ptr == 0 || pitch == 0) //I'm not convinced that we need to concern ourselves with data & op_pointer here either! - if (!render || iwidth == 0) // || data == 0 || op_pointer == 0) + if (!render || iwidth == 0) return; //#define OP_DEBUG_BMP @@ -760,9 +781,10 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render) // rightMargin = lbufWidth; */ if (depth > 5) - WriteLog("We're about to encounter a divide by zero error!\n"); + WriteLog("OP: We're about to encounter a divide by zero error!\n"); // NOTE: We're just using endPos to figure out how much, if any, to clip by. // ALSO: There may be another case where we start out of bounds and end out of bounds...! + // !!! FIX !!! if (startPos < 0) // Case #1: Begin out, end in, L to R clippedWidth = 0 - startPos, dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], @@ -788,12 +810,14 @@ if (depth > 5) // Also, if we're clipping the phrase we need to make sure we're in the correct part of // the pixel data. // data += phraseClippedWidth * (pitch << 3); - data += dataClippedWidth * (pitch << 3); + data += dataClippedWidth * pitch; // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the // bitmap! This makes clipping & etc. MUCH, much easier...! // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4); - uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4); +//Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode? +//Is this a bug in the OP? + uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2); uint8 * currentLineBuffer = &tom_ram_8[lbufAddress]; // Render. @@ -802,6 +826,7 @@ if (depth > 5) // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps // anyway. +// This seems to be the case (at least according to the Midsummer docs)...! if (depth == 0) // 1 BPP { @@ -811,7 +836,7 @@ if (depth > 5) // Fetch 1st phrase... uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap -//i.e., we didn't clip on the margin... +//i.e., we didn't clip on the margin... !!! FIX !!! pixels <<= firstPix; // Skip first N pixels (N=firstPix)... int i = firstPix; // Start counter at right spot... @@ -842,7 +867,7 @@ if (depth > 5) } i = 0; // Fetch next phrase... - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) + data += pitch; pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); } } @@ -858,7 +883,7 @@ if (firstPix) { // Fetch phrase... uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) + data += pitch; for(int i=0; i<32; i++) { @@ -898,7 +923,7 @@ if (firstPix) { // Fetch phrase... uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) + data += pitch; for(int i=0; i<16; i++) { @@ -928,18 +953,20 @@ if (firstPix) } else if (depth == 3) // 8 BPP { -if (firstPix) - WriteLog("OP: Fixed bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02; + // Fetch 1st phrase... + uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); +//Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap +//i.e., we didn't clip on the margin... !!! FIX !!! + firstPix &= 0x30; // Only top two bits are valid for 8 BPP + pixels <<= firstPix; // Skip first N pixels (N=firstPix)... + int i = firstPix >> 3; // Start counter at right spot... + while (iwidth--) { - // Fetch phrase... - uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) - - for(int i=0; i<8; i++) + while (i++ < 8) { uint8 bits = pixels >> 56; // Seems to me that both of these are in the same endian, so we could cast it as @@ -963,6 +990,10 @@ if (firstPix) currentLineBuffer += lbufDelta; pixels <<= 8; } + i = 0; + // Fetch next phrase... + data += pitch; + pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); } } else if (depth == 4) // 16 BPP @@ -976,7 +1007,7 @@ if (firstPix) { // Fetch phrase... uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) + data += pitch; for(int i=0; i<4; i++) { @@ -1013,24 +1044,21 @@ if (firstPix) if (firstPix) WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode... - // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it. + // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it. int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04; while (iwidth--) { // Fetch phrase... uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); - data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) + data += pitch; for(int i=0; i<2; i++) { + // We don't use a 32-bit var here because of endian issues...! uint8 bits3 = pixels >> 56, bits2 = pixels >> 48, bits1 = pixels >> 40, bits0 = pixels >> 32; -// Seems to me that both of these are in the same endian, so we could cast it as -// uint16 * and do straight across copies (what about 24 bpp? Treat it differently...) -// This only works for the palettized modes (1 - 8 BPP), since we actually have to -// copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) -// No, it isn't because we read the memory in an endian safe way--it *won't* work... + if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0) ; // Do nothing... else @@ -1049,7 +1077,7 @@ if (firstPix) // // Store scaled bitmap in line buffer // -void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool render) +void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render) { // Need to make sure that when writing that it stays within the line buffer... // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM @@ -1063,11 +1091,11 @@ void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool r uint32 firstPix = (p1 >> 49) & 0x3F; //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened??? if (firstPix) - WriteLog("OP: FIRSTPIX != 0!\n"); + WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n"); //#endif // We can ignore the RELEASE (high order) bit for now--probably forever...! // uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE -//Optimize: break these out to their own BOOL values +//Optimize: break these out to their own BOOL values [DONE] uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2) bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false), flagRMW = (flags & OPFLAG_RMW ? true : false), @@ -1075,28 +1103,32 @@ if (firstPix) uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp) uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch -// int16 scanlineWidth = tom_getVideoModeWidth(); uint8 * tom_ram_8 = tom_get_ram_pointer(); uint8 * paletteRAM = &tom_ram_8[0x400]; // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT - // for use when using endian-corrected data (i.e., any of the *_word_read functions!) + // for use when using endian-corrected data (i.e., any of the *ReadWord functions!) uint16 * paletteRAM16 = (uint16 *)paletteRAM; uint8 hscale = p2 & 0xFF; - uint8 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable +// uint8 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!] + uint8 horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5; uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5; // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n", // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no")); -//Looks like an hscale of zero means don't draw! +// Looks like an hscale of zero means don't draw! if (!render || iwidth == 0 || hscale == 0) return; +/*extern int start_logging; +if (start_logging) + WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n", + iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/ //#define OP_DEBUG_BMP //#ifdef OP_DEBUG_BMP -// WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n", +// WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n", // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no")); //#endif @@ -1119,7 +1151,7 @@ if (firstPix) // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds. //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop, // numbers 1 & 3 are of concern. -// This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...! +// This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...! // if (rightMargin < 0 || leftMargin > lbufWidth) // It might be easier to swap these (if REFLECTed) and just use XPOS down below... @@ -1154,6 +1186,7 @@ if (firstPix) // NOTE: We're just using endPos to figure out how much, if any, to clip by. // ALSO: There may be another case where we start out of bounds and end out of bounds...! + // !!! FIX !!! //There's a problem here with scaledPhrasePixels in that it can be forced to zero when //the scaling factor is small. So fix it already! !!! FIX !!! @@ -1163,30 +1196,58 @@ if (firstPix) DumpScaledObject(p0, p1, p2); }//*/ //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p + +//Try a simple example... +// Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10, +// non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ. +// Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ... +// +// Normally, we would expect this in the line buffer: +// ZZXXXXYYYYZZZZXXXXYYYYZZZZ... +// +// But instead we're getting: +// XXXXYYYYZZZZXXXXYYYYZZZZ... +// +// or are we??? It would seem so, simply by virtue of the fact that we're NOT starting +// on negative boundary--or are we? Hmm... +// cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10 +// +// Let's try a real world example: +// +//OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14] +//OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14] +// +// Really, spp is 27.75 in the second case... +// So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the +// start position (14 * 27.75), we get -6.5... NOT -17! + +//Now it seems we're working OK, at least for the first case... +uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale; + if (startPos < 0) // Case #1: Begin out, end in, L to R -/* clippedWidth = 0 - startPos, - dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], - startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);*/ - clippedWidth = 0 - startPos, - dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels, - startPos = 0 - (clippedWidth % scaledPhrasePixels); +{ +extern int start_logging; +if (start_logging) + WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos); +// clippedWidth = 0 - startPos, + clippedWidth = (0 - startPos) << 5, +// dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels, + dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5, +// startPos = 0 - (clippedWidth % scaledPhrasePixels); + startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5; +if (start_logging) + WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth); +} if (endPos < 0) // Case #2: Begin in, end out, R to L -/* clippedWidth = 0 - endPos, - phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];*/ clippedWidth = 0 - endPos, phraseClippedWidth = clippedWidth / scaledPhrasePixels; if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R -/* clippedWidth = endPos - lbufWidth, - phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];*/ clippedWidth = endPos - lbufWidth, phraseClippedWidth = clippedWidth / scaledPhrasePixels; if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L -/* clippedWidth = startPos - lbufWidth, - dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], - startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);*/ clippedWidth = startPos - lbufWidth, dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels, startPos = lbufWidth + (clippedWidth % scaledPhrasePixels); @@ -1211,8 +1272,11 @@ if (op_start_log && startPos == 13) // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the // bitmap! This makes clipping & etc. MUCH, much easier...! // uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4); - uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4); +// uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4); + uint32 lbufAddress = 0x1800 + startPos * 2; uint8 * currentLineBuffer = &tom_ram_8[lbufAddress]; +uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800], + * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719]; // Render. @@ -1220,6 +1284,7 @@ if (op_start_log && startPos == 13) // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps // anyway. +// This seems to be the case (at least according to the Midsummer docs)...! if (depth == 0) // 1 BPP { @@ -1398,6 +1463,10 @@ if (firstPix) // This is the *only* correct use of endian-dependent code // (i.e., mem-to-mem direct copying)! *(uint16 *)currentLineBuffer = paletteRAM16[bits]; +/* { + if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit) + *(uint16 *)currentLineBuffer = paletteRAM16[bits]; + }*/ else *currentLineBuffer = BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]), @@ -1499,11 +1568,7 @@ if (firstPix != 0) { uint8 bits3 = pixels >> 56, bits2 = pixels >> 48, bits1 = pixels >> 40, bits0 = pixels >> 32; -// Seems to me that both of these are in the same endian, so we could cast it as -// uint16 * and do straight across copies (what about 24 bpp? Treat it differently...) -// This only works for the palettized modes (1 - 8 BPP), since we actually have to -// copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) -// No, it isn't because we read the memory in an endian safe way--it *won't* work... + if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0) ; // Do nothing... else