//
// Object Processor
//
-// by cal2
+// Original source by Cal2
// GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
-// Cleanups/fixes/rewrites by James L. Hammons
+// Extensive cleanups/fixes/rewrites by James L. Hammons
//
#include <stdio.h>
// Private function prototypes
-void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render);
-void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool render);
+void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
+void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
void DumpFixedObject(uint64 p0, uint64 p1);
uint64 op_load_phrase(uint32 offset);
-// External global variables
-
-extern uint32 jaguar_mainRom_crc32;
-
// Local global variables
static uint8 * op_blend_y;
// some of the regular TOM RAM...
static uint8 objectp_ram[0x40]; // This is based at $F00000
uint8 objectp_running;
-bool objectp_stop_reading_list;
+//bool objectp_stop_reading_list;
static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
//static uint32 op_bitmap_bit_size[8] =
// Object Processor memory access
// Memory range: F00010 - F00027
//
+// F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
+// F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
+// F00026 W -------- -------x OBF - object processor flag
+//
+
uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
{
offset &= 0x3F;
return GET16(objectp_ram, offset);
}
-// F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
-// F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
-// F00026 W -------- -------x OBF - object processor flag
-
void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
{
offset &= 0x3F;
WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
WriteLog(" %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
uint8 bitdepth = (p1 >> 12) & 0x07;
- int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
+//WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
+ int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
int32 xpos = p1 & 0xFFF;
xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
uint32 iwidth = ((p1 >> 28) & 0x3FF);
WriteLog(" (BITMAP)");
WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
uint8 bitdepth = (p1 >> 12) & 0x07;
- int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
+//WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
+ int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
int32 xpos = p1 & 0xFFF;
xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
uint32 iwidth = ((p1 >> 28) & 0x3FF);
//
// Object Processor main routine
//
+//Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing
+//where we left off. !!! FIX !!!
void OPProcessList(int scanline, bool render)
{
extern int op_start_log;
op_pointer = op_get_list_pointer();
- objectp_stop_reading_list = false;
+// objectp_stop_reading_list = false;
+
+//WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer);
+//op_done();
// *** BEGIN OP PROCESSOR TESTING ONLY ***
extern bool interactiveMode;
int bitmapCounter = 0;
// *** END OP PROCESSOR TESTING ONLY ***
+ uint32 opCyclesToRun = 10000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
+
// if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
while (op_pointer)
{
else
inhibit = false;
// *** END OP PROCESSOR TESTING ONLY ***
- if (objectp_stop_reading_list)
- return;
+// if (objectp_stop_reading_list)
+// return;
uint64 p0 = op_load_phrase(op_pointer);
+//WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
op_pointer += 8;
if (scanline == tom_get_vdb() && op_start_log)
//if (scanline == 215 && op_start_log)
uint64 p1 = op_load_phrase(op_pointer);
WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
uint8 bitdepth = (p1 >> 12) & 0x07;
- int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
+//WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
+ int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
int32 xpos = p1 & 0xFFF;
xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
uint32 iwidth = ((p1 >> 28) & 0x3FF);
WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
uint8 bitdepth = (p1 >> 12) & 0x07;
- int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
+//WAS: int16 ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
+ int16 ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
int32 xpos = p1 & 0xFFF;
xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
uint32 iwidth = ((p1 >> 28) & 0x3FF);
WriteLog(" --> List end\n");
}//*/
-// WriteLog("%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
switch ((uint8)p0 & 0x07)
{
case OBJECT_TYPE_BITMAP:
{
- // Would *not* be /2 if interlaced...!
- uint16 ypos = ((p0 >> 3) & 0x3FF) / 2;
+//WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
+ uint16 ypos = (p0 >> 3) & 0x7FF;
// This is only theory implied by Rayman...!
// It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
// the VDB value. With interlacing, this would be slightly more tricky.
op_pointer += 8;
//WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
//WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
- OPProcessFixedBitmap(scanline, p0, p1, render);
+// OPProcessFixedBitmap(scanline, p0, p1, render);
+ OPProcessFixedBitmap(p0, p1, render);
// OP write-backs
//???Does this really happen??? Doesn't seem to work if you do this...!
+//Probably not. Must be a bug in the documentation...!
// uint32 link = (p0 & 0x7FFFF000000) >> 21;
// SET16(objectp_ram, 0x20, link & 0xFFFF); // OLP
// SET16(objectp_ram, 0x22, link >> 16);
// NOTE: Would subtract 2 if in interlaced mode...!
// uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
// if (height)
- height--;
+ height--;
- uint64 data = (p0 & 0xFFFFF80000000000) >> 40;
+ uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
uint64 dwidth = (p1 & 0xFFC0000) >> 15;
data += dwidth;
- p0 &= ~0xFFFFF80000FFC000; // Mask out old data...
+ p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
p0 |= (uint64)height << 14;
p0 |= data << 40;
OPStorePhrase(oldOPP, p0);
}
- op_pointer = (p0 & 0x000007FFFF000000) >> 21;
+//WriteLog("\t\tOld OP: %08X -> ", op_pointer);
+//Temp, for testing...
+//No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
+//And it does! !!! FIX !!!
+//Let's remove this "fix" since it screws up more than it fixes.
+/* if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
+ return;*/
+
+ op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
+//WriteLog("New OP: %08X\n", op_pointer);
break;
}
case OBJECT_TYPE_SCALE:
{
- // Would *not* be /2 if interlaced...!
- uint16 ypos = ((p0 >> 3) & 0x3FF) / 2;
-// This is only theory implied by Rayman...!
-// It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
-// the VDB value. With interlacing, this would be slightly more tricky.
-// There's probably another bit somewhere that enables this mode--but so far, doesn't seem
-// to affect any other game in a negative way (that I've seen).
-// Either that, or it's an undocumented bug...
-
-//No, the reason this was needed is that the OP code before was wrong. Any value
-//less than VDB will get written to the top line of the display!
-// if (ypos == 0)
-// ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
+//WAS: uint16 ypos = (p0 >> 3) & 0x3FF;
+ uint16 ypos = (p0 >> 3) & 0x7FF;
uint32 height = (p0 & 0xFFC000) >> 14;
uint32 oldOPP = op_pointer - 8;
// *** BEGIN OP PROCESSOR TESTING ONLY ***
uint64 p2 = op_load_phrase(op_pointer);
op_pointer += 8;
//WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
- OPProcessScaledBitmap(scanline, p0, p1, p2, render);
+ OPProcessScaledBitmap(p0, p1, p2, render);
// OP write-backs
-//???Does this really happen??? Doesn't seem to work if you do this...!
-// uint32 link = (p0 & 0x7FFFF000000) >> 21;
-// SET16(objectp_ram, 0x20, link & 0xFFFF); // OLP
-// SET16(objectp_ram, 0x22, link >> 16);
-/* uint32 height = (p0 & 0xFFC000) >> 14;
- if (height - 1 > 0)
- height--;*/
- // NOTE: Would subtract 2 if in interlaced mode...!
-// uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
-
uint8 remainder = p2 >> 16, vscale = p2 >> 8;
//Actually, we should skip this object if it has a vscale of zero.
//Or do we? Not sure... Atari Karts has a few lines that look like:
if (vscale == 0)
vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
- remainder -= 0x20; // 1.0f in [3.5] fixed point format
- if (remainder & 0x80) // I.e., it's negative
+//extern int start_logging;
+//if (start_logging)
+// WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
+//Locks up here:
+//--> Returned from scaled bitmap processing (rem=20, vscale=80)...
+//There are other problems here, it looks like...
+//Another lock up:
+//About to execute OP (508)...
+/*
+OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
+--> Returned from scaled bitmap processing (rem=50, vscale=7C)...
+OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
+--> Returned from scaled bitmap processing (rem=30, vscale=7C)...
+OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
+--> Returned from scaled bitmap processing (rem=10, vscale=7C)...
+OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
+--> Returned from scaled bitmap processing (rem=00, vscale=7E)...
+OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
+--> Returned from scaled bitmap processing (rem=00, vscale=80)...
+OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
+--> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
+OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
+--> Returned from scaled bitmap processing (rem=60, vscale=80)...
+OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
+--> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
+OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
+--> Returned from scaled bitmap processing (rem=40, vscale=80)...
+OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
+--> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
+OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
+--> Returned from scaled bitmap processing (rem=20, vscale=80)...
+*/
+//Here's another problem:
+// [hsc: 20, vsc: 20, rem: 00]
+// Since we're not checking for $E0 (but that's what we get from the above), we end
+// up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite
+// right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
+//Also note: $E0 = 7.0 which IS a legal vscale value...
+
+// if (remainder & 0x80) // I.e., it's negative
+// if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0
+// if ((remainder - 1) >= 0xE0) // I.e., it's <= 0
+// if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
+// if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
+ if (remainder <= 0x20) // I.e., it's <= 0
{
- uint64 data = (p0 & 0xFFFFF80000000000) >> 40;
+ uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
uint64 dwidth = (p1 & 0xFFC0000) >> 15;
- while (remainder & 0x80)
+// while (remainder & 0x80)
+// while ((remainder & 0x80) || remainder == 0)
+// while ((remainder - 1) >= 0xE0)
+// while ((remainder >= 0xE1) || remainder == 0)
+// while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
+ while (remainder <= 0x20)
{
remainder += vscale;
+
if (height)
height--;
data += dwidth;
}
- p0 &= ~0xFFFFF80000FFC000; // Mask out old data...
+
+ p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
p0 |= (uint64)height << 14;
p0 |= data << 40;
OPStorePhrase(oldOPP, p0);
}
+ remainder -= 0x20; // 1.0f in [3.5] fixed point format
+
+//if (start_logging)
+// WriteLog("--> Finished writebacks...\n");//*/
+
//WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
- p2 &= ~0x0000000000FF0000;
+ p2 &= ~0x0000000000FF0000LL;
p2 |= (uint64)remainder << 16;
//WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
OPStorePhrase(oldOPP+16, p2);
//remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
//WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
}
- op_pointer = (p0 & 0x000007FFFF000000) >> 21;
+ op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
break;
}
case OBJECT_TYPE_GPU:
//OPSuspendedByGPU = true;
//Dunno if the OP keeps processing from where it was interrupted, or if it just continues
//on the next scanline...
+// --> It continues from where it was interrupted! !!! FIX !!!
break;
}
case OBJECT_TYPE_BRANCH:
switch (cc)
{
case CONDITION_EQUAL:
-//Why do this for the equal case? If they wrote an odd YPOS, then it wouldn't be detected!
-// if (ypos != 0x7FF && (ypos & 0x01))
-// ypos ^= 0x01;
-// if ((2 * tom_get_scanline()) == ypos || ypos == 0x7FF)
-//Here we're using VC instead of the bogus tom_get_scanline() value...
if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
op_pointer = link;
break;
case CONDITION_LESS_THAN:
-// if ((2 * tom_get_scanline()) < ypos)
if (TOMReadWord(0xF00006, OP) < ypos)
op_pointer = link;
break;
case CONDITION_GREATER_THAN:
-// if ((2 * tom_get_scanline()) > ypos)
if (TOMReadWord(0xF00006, OP) > ypos)
op_pointer = link;
break;
if (p0 & 0x08)
{
tom_set_pending_object_int();
- if (tom_irq_enabled(IRQ_OPFLAG) && jaguar_interrupt_handler_is_valid(64))
+ if (tom_irq_enabled(IRQ_OPFLAG))// && jaguar_interrupt_handler_is_valid(64))
m68k_set_irq(7); // Cause an NMI to occur...
}
WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
return;
}
+
+ // Here is a little sanity check to keep the OP from locking up the machine
+ // when fed bad data. Better would be to count how many actual cycles it used
+ // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
+ opCyclesToRun--;
+ if (!opCyclesToRun)
+ return;
}
}
//
// Store fixed size bitmap in line buffer
//
-
-// Interesting thing about Rayman: There seems to be a transparent bitmap (1/8/16 bpp--which?)
-// being rendered under his feet--doesn't align when walking... Check it out!
-
-void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
+void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
{
// Need to make sure that when writing that it stays within the line buffer...
// LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
uint32 iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
uint32 data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
//#ifdef OP_DEBUG_BMP
-// Prolly should use this... Though not sure exactly how.
uint32 firstPix = (p1 >> 49) & 0x3F;
// "The LSB is significant only for scaled objects..." -JTRM
// "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
// rightMargin = lbufWidth;
*/
if (depth > 5)
- WriteLog("We're about to encounter a divide by zero error!\n");
+ WriteLog("OP: We're about to encounter a divide by zero error!\n");
// NOTE: We're just using endPos to figure out how much, if any, to clip by.
// ALSO: There may be another case where we start out of bounds and end out of bounds...!
// !!! FIX !!!
}
else if (depth == 3) // 8 BPP
{
-if (firstPix)
- WriteLog("OP: Fixed bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
// The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
+ // Fetch 1st phrase...
+ uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+//Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
+//i.e., we didn't clip on the margin... !!! FIX !!!
+ firstPix &= 0x30; // Only top two bits are valid for 8 BPP
+ pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
+ int i = firstPix >> 3; // Start counter at right spot...
+
while (iwidth--)
{
- // Fetch phrase...
- uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
- data += pitch;
-
- for(int i=0; i<8; i++)
+ while (i++ < 8)
{
uint8 bits = pixels >> 56;
// Seems to me that both of these are in the same endian, so we could cast it as
currentLineBuffer += lbufDelta;
pixels <<= 8;
}
+ i = 0;
+ // Fetch next phrase...
+ data += pitch;
+ pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
}
}
else if (depth == 4) // 16 BPP
//
// Store scaled bitmap in line buffer
//
-void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool render)
+void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
{
// Need to make sure that when writing that it stays within the line buffer...
// LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
//#endif
// We can ignore the RELEASE (high order) bit for now--probably forever...!
// uint8 flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
-//Optimize: break these out to their own BOOL values
+//Optimize: break these out to their own BOOL values [DONE]
uint8 flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
flagRMW = (flags & OPFLAG_RMW ? true : false),
uint8 index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
uint32 pitch = (p1 >> 15) & 0x07; // Phrase pitch
-// int16 scanlineWidth = tom_getVideoModeWidth();
uint8 * tom_ram_8 = tom_get_ram_pointer();
uint8 * paletteRAM = &tom_ram_8[0x400];
// This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
- // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
+ // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
uint16 * paletteRAM16 = (uint16 *)paletteRAM;
uint8 hscale = p2 & 0xFF;
- uint8 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable
+// Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
+// but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
+ uint8 horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!]
+// uint8 horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!]
int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
// WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
// iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
-//Looks like an hscale of zero means don't draw!
+// Looks like an hscale of zero means don't draw!
if (!render || iwidth == 0 || hscale == 0)
return;
+/*extern int start_logging;
+if (start_logging)
+ WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
+ iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
//#define OP_DEBUG_BMP
//#ifdef OP_DEBUG_BMP
-// WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
+// WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
// iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
//#endif
// 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
//Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
// numbers 1 & 3 are of concern.
-// This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
+// This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
// if (rightMargin < 0 || leftMargin > lbufWidth)
// It might be easier to swap these (if REFLECTed) and just use XPOS down below...
DumpScaledObject(p0, p1, p2);
}//*/
//NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
+
+//Try a simple example...
+// Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
+// non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
+// Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
+//
+// Normally, we would expect this in the line buffer:
+// ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
+//
+// But instead we're getting:
+// XXXXYYYYZZZZXXXXYYYYZZZZ...
+//
+// or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
+// on negative boundary--or are we? Hmm...
+// cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
+//
+// Let's try a real world example:
+//
+//OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
+//OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
+//
+// Really, spp is 27.75 in the second case...
+// So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
+// start position (14 * 27.75), we get -6.5... NOT -17!
+
+//Now it seems we're working OK, at least for the first case...
+uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
+
if (startPos < 0) // Case #1: Begin out, end in, L to R
-/* clippedWidth = 0 - startPos,
- dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
- startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);*/
- clippedWidth = 0 - startPos,
- dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
- startPos = 0 - (clippedWidth % scaledPhrasePixels);
+{
+extern int start_logging;
+if (start_logging)
+ WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
+// clippedWidth = 0 - startPos,
+ clippedWidth = (0 - startPos) << 5,
+// dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
+ dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
+// startPos = 0 - (clippedWidth % scaledPhrasePixels);
+ startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
+if (start_logging)
+ WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
+}
if (endPos < 0) // Case #2: Begin in, end out, R to L
-/* clippedWidth = 0 - endPos,
- phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];*/
clippedWidth = 0 - endPos,
phraseClippedWidth = clippedWidth / scaledPhrasePixels;
if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
-/* clippedWidth = endPos - lbufWidth,
- phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];*/
clippedWidth = endPos - lbufWidth,
phraseClippedWidth = clippedWidth / scaledPhrasePixels;
if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
-/* clippedWidth = startPos - lbufWidth,
- dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
- startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);*/
clippedWidth = startPos - lbufWidth,
dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
{
WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
DumpScaledObject(p0, p1, p2);
+ if (iwidth == 7)
+ {
+ WriteLog(" %08X: ", data);
+ for(int i=0; i<7*8; i++)
+ WriteLog("%02X ", JaguarReadByte(data+i));
+ WriteLog("\n");
+ }
}
// If the image is sitting on the line buffer left or right edge, we need to compensate
// by decreasing the image phrase width accordingly.
// bitmap! This makes clipping & etc. MUCH, much easier...!
// uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
// uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
- uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
+ uint32 lbufAddress = 0x1800 + startPos * 2;
uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
+//uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
+// * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
// Render.
currentLineBuffer += lbufDelta;
- horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
+/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
while (horizontalRemainder & 0x80)
+ {
+ horizontalRemainder += hscale;
+ pixCount++;
+ pixels <<= 1;
+ }//*/
+ while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
{
horizontalRemainder += hscale;
pixCount++;
pixels <<= 1;
}
+ horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
if (pixCount > 63)
{
currentLineBuffer += lbufDelta;
- horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
+/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
while (horizontalRemainder & 0x80)
+ {
+ horizontalRemainder += hscale;
+ pixCount++;
+ pixels <<= 2;
+ }//*/
+ while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
{
horizontalRemainder += hscale;
pixCount++;
pixels <<= 2;
}
+ horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
if (pixCount > 31)
{
currentLineBuffer += lbufDelta;
- horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
+/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
while (horizontalRemainder & 0x80)
+ {
+ horizontalRemainder += hscale;
+ pixCount++;
+ pixels <<= 4;
+ }//*/
+ while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
{
horizontalRemainder += hscale;
pixCount++;
pixels <<= 4;
}
+ horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
if (pixCount > 15)
{
// This is the *only* correct use of endian-dependent code
// (i.e., mem-to-mem direct copying)!
*(uint16 *)currentLineBuffer = paletteRAM16[bits];
+/* {
+ if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
+ *(uint16 *)currentLineBuffer = paletteRAM16[bits];
+ }*/
else
*currentLineBuffer =
BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
currentLineBuffer += lbufDelta;
- horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
- while (horizontalRemainder & 0x80)
+ while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
{
horizontalRemainder += hscale;
pixCount++;
pixels <<= 8;
}
+ horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
if (pixCount > 7)
{
currentLineBuffer += lbufDelta;
- horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
+/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
while (horizontalRemainder & 0x80)
+ {
+ horizontalRemainder += hscale;
+ pixCount++;
+ pixels <<= 16;
+ }//*/
+ while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
{
horizontalRemainder += hscale;
pixCount++;
pixels <<= 16;
}
-
+ horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
+//*/
if (pixCount > 3)
{
int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
}
}
}
-/*if (depth == 3 && startPos == 13)
-{
-if (op_start_log)
-WriteLog("OP: Writing in the margins...\n");
- for(int i=0; i<100*2; i+=2)
-// for(int i=0; i<14*2; i+=2)
- tom_ram_8[0x1800 + i] = 0xFF,
- tom_ram_8[0x1800 + i + 1] = 0xFF;
-}*/
-// uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
-// uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
}