]> Shamusworld >> Repos - virtualjaguar/blobdiff - src/objectp.cpp
Changes for 1.0.7 update
[virtualjaguar] / src / objectp.cpp
index 4934892aa99596e559a3b9b46e42e7b25016f020..44bb34dd3e6f91601387d23af7aba80e011df105 100644 (file)
@@ -1,9 +1,9 @@
 //
 // Object Processor
 //
-// by cal2
+// Original source by Cal2
 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
-// Cleanups/fixes/rewrites by James L. Hammons
+// Extensive cleanups/fixes/rewrites by James L. Hammons
 //
 
 #include <stdio.h>
@@ -16,8 +16,6 @@
 
 #define BLEND_Y(dst, src)      op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
 #define BLEND_CR(dst, src)     op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
-//Delete this once we're rid of zbmpop*.h...
-#define BLEND_CC(dst, src)     op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
 
 #define OBJECT_TYPE_BITMAP     0                       // 000
 #define OBJECT_TYPE_SCALE      1                       // 001
 #define CONDITION_OP_FLAG_SET          3
 #define CONDITION_SECOND_HALF_LINE     4
 
-//Delete this once we're rid of zbmpop*.h...
-#define FLAGS_RELEASE          8
-#define FLAGS_TRANSPARENT      4
-#define FLAGS_READMODIFY       2
-#define FLAGS_HFLIP                    1
-
 #define OPFLAG_RELEASE         8                       // Bus release bit
 #define OPFLAG_TRANS           4                       // Transparency bit
 #define OPFLAG_RMW                     2                       // Read-Modify-Write bit
 
 // Private function prototypes
 
-void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render);
-void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool render);
-
-// External global variables
-
-extern uint32 jaguar_mainRom_crc32;
+void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
+void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
+void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
+void DumpFixedObject(uint64 p0, uint64 p1);
+uint64 op_load_phrase(uint32 offset);
 
 // Local global variables
 
 static uint8 * op_blend_y;
 static uint8 * op_blend_cr;
-// There may be a problem with this "RAM" overlapping some of the
-// regular TOM RAM...
+// There may be a problem with this "RAM" overlapping (and thus being independent of)
+// some of the regular TOM RAM...
 static uint8 objectp_ram[0x40];                        // This is based at $F00000
 uint8 objectp_running;
-bool objectp_stop_reading_list;
+//bool objectp_stop_reading_list;
 
 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
-static uint32 op_bitmap_bit_size[8] =
-       { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
-         (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
+//static uint32 op_bitmap_bit_size[8] =
+//     { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
+//       (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
 static uint32 op_pointer;
 
+int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
+
 
 //
 // Object Processor initialization
@@ -124,47 +117,71 @@ void op_reset(void)
 
 void op_done(void)
 {
+       char * opType[8] =
+       { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
+       char * ccType[8] =
+               { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
+
+       uint32 olp = op_get_list_pointer();
+       WriteLog("OP: OLP = %08X\n", olp);
+       WriteLog("OP: Phrase dump\n    ----------\n");
+       for(uint32 i=0; i<0x100; i+=8)
+       {
+               uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
+               WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
+               if ((lo & 0x07) == 3)
+               {
+                       uint16 ypos = (lo >> 3) & 0x7FF;
+                       uint8  cc   = (lo >> 14) & 0x03;
+                       uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
+                       WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
+               }
+               WriteLog("\n");
+               if ((lo & 0x07) == 0)
+                       DumpFixedObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8));
+               if ((lo & 0x07) == 1)
+                       DumpScaledObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8), op_load_phrase(olp+i+16));
+       }
+       WriteLog("\n");
 }
 
 //
 // Object Processor memory access
-// Memory range: F00010 (F00008?) - F00027
+// Memory range: F00010 - F00027
+//
+//     F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
+//     F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
+//     F00026            W   -------- -------x   OBF - object processor flag
 //
-void op_byte_write(uint32 offset, uint8 data)
+
+uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
 {
        offset &= 0x3F;
-       objectp_ram[offset] = data;
+       return objectp_ram[offset];
 }
 
-void op_word_write(uint32 offset, uint16 data)
+uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
 {
        offset &= 0x3F;
-//     objectp_ram[offset] = (data >> 8) & 0xFF;
-//     objectp_ram[offset+1] = data & 0xFF;
-       SET16(objectp_ram, offset, data);
-
-/*if (offset == 0x20)
-WriteLog("OP: Setting lo list pointer: %04X\n", data);
-if (offset == 0x22)
-WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
+       return GET16(objectp_ram, offset);
 }
 
-uint8 op_byte_read(uint32 offset)
+void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
 {
        offset &= 0x3F;
-       return objectp_ram[offset];
+       objectp_ram[offset] = data;
 }
 
-uint16 op_word_read(uint32 offset)
+void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
 {
-//     return (objectp_ram[offset & 0x3F] << 8) | objectp_ram[(offset+1) & 0x3F];
        offset &= 0x3F;
-       return GET16(objectp_ram, offset);
-}
+       SET16(objectp_ram, offset, data);
 
-//     F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
-//     F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
-//     F00026            W   -------- -------x   OBF - object processor flag
+/*if (offset == 0x20)
+WriteLog("OP: Setting lo list pointer: %04X\n", data);
+if (offset == 0x22)
+WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
+}
 
 uint32 op_get_list_pointer(void)
 {
@@ -198,7 +215,7 @@ void op_set_current_object(uint64 object)
 {
 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
        // Stored as least significant 32 bits first, ms32 last in big endian
-       objectp_ram[0x13] = object & 0xFF; object >>= 8;
+/*     objectp_ram[0x13] = object & 0xFF; object >>= 8;
        objectp_ram[0x12] = object & 0xFF; object >>= 8;
        objectp_ram[0x11] = object & 0xFF; object >>= 8;
        objectp_ram[0x10] = object & 0xFF; object >>= 8;
@@ -206,47 +223,96 @@ void op_set_current_object(uint64 object)
        objectp_ram[0x17] = object & 0xFF; object >>= 8;
        objectp_ram[0x16] = object & 0xFF; object >>= 8;
        objectp_ram[0x15] = object & 0xFF; object >>= 8;
-       objectp_ram[0x14] = object & 0xFF; 
+       objectp_ram[0x14] = object & 0xFF;*/
+// Let's try regular good old big endian...
+       objectp_ram[0x17] = object & 0xFF; object >>= 8;
+       objectp_ram[0x16] = object & 0xFF; object >>= 8;
+       objectp_ram[0x15] = object & 0xFF; object >>= 8;
+       objectp_ram[0x14] = object & 0xFF; object >>= 8;
+
+       objectp_ram[0x13] = object & 0xFF; object >>= 8;
+       objectp_ram[0x12] = object & 0xFF; object >>= 8;
+       objectp_ram[0x11] = object & 0xFF; object >>= 8;
+       objectp_ram[0x10] = object & 0xFF;
 }
 
 uint64 op_load_phrase(uint32 offset)
 {
        offset &= ~0x07;                                                // 8 byte alignment
-       return ((uint64)jaguar_long_read(offset) << 32) | (uint64)jaguar_long_read(offset+4);
+       return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
+}
+
+void OPStorePhrase(uint32 offset, uint64 p)
+{
+       offset &= ~0x07;                                                // 8 byte alignment
+       JaguarWriteLong(offset, p >> 32, OP);
+       JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
 }
 
 //
-// OP replacement functions
+// Debugging routines
 //
+void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
+{
+       WriteLog(" (SCALED BITMAP)");
+       WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
+       WriteLog("                 %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
+       uint8 bitdepth = (p1 >> 12) & 0x07;
+       int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
+       int32 xpos = p1 & 0xFFF;
+       xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
+       uint32 iwidth = ((p1 >> 28) & 0x3FF);
+       uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
+       uint16 height = ((p0 >> 14) & 0x3FF);
+       uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
+       uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
+       uint32 firstPix = (p1 >> 49) & 0x3F;
+       uint8 flags = (p1 >> 45) & 0x0F;
+       uint8 idx = (p1 >> 38) & 0x7F;
+       uint32 pitch = (p1 >> 15) & 0x07;
+       WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
+               iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
+       uint32 hscale = p2 & 0xFF;
+       uint32 vscale = (p2 >> 8) & 0xFF;
+       uint32 remainder = (p2 >> 16) & 0xFF;
+       WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
+}
 
-void OPStorePhrase(uint32 offset, uint64 p)
+void DumpFixedObject(uint64 p0, uint64 p1)
 {
-       offset &= ~0x07;                                                // 8 byte alignment
-       jaguar_long_write(offset, p >> 32);
-       jaguar_long_write(offset + 4, p & 0xFFFFFFFF);
+       WriteLog(" (BITMAP)");
+       WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
+       uint8 bitdepth = (p1 >> 12) & 0x07;
+       int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
+       int32 xpos = p1 & 0xFFF;
+       xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
+       uint32 iwidth = ((p1 >> 28) & 0x3FF);
+       uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
+       uint16 height = ((p0 >> 14) & 0x3FF);
+       uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
+       uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
+       uint32 firstPix = (p1 >> 49) & 0x3F;
+       uint8 flags = (p1 >> 45) & 0x0F;
+       uint8 idx = (p1 >> 38) & 0x7F;
+       uint32 pitch = (p1 >> 15) & 0x07;
+       WriteLog("    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
+               iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
 }
 
 //
-// *** NEW ***
 // Object Processor main routine
 //
+//Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing
+//where we left off. !!! FIX !!!
 void OPProcessList(int scanline, bool render)
 {
 extern int op_start_log;
 //     char * condition_to_str[8] =
 //             { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
 
-// If jaguar_exec() is working right, we should *never* have to check for this
-// condition...
-       if (scanline < tom_get_vdb())
-               return;
-
-       if (scanline >= 525)//tom_getVideoModeHeight()+tom_get_vdb())
-               return;
-
        op_pointer = op_get_list_pointer();
 
-       objectp_stop_reading_list = false;
+//     objectp_stop_reading_list = false;
 
 // *** BEGIN OP PROCESSOR TESTING ONLY ***
 extern bool interactiveMode;
@@ -265,12 +331,13 @@ if (interactiveMode && bitmapCounter == objectPtr)
 else
        inhibit = false;
 // *** END OP PROCESSOR TESTING ONLY ***
-               if (objectp_stop_reading_list)
-                       return;
+//             if (objectp_stop_reading_list)
+//                     return;
                        
                uint64 p0 = op_load_phrase(op_pointer);
                op_pointer += 8;
 if (scanline == tom_get_vdb() && op_start_log)
+//if (scanline == 215 && op_start_log)
 {
 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
@@ -284,36 +351,37 @@ int32 xpos = p1 & 0xFFF;
 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
        uint32 iwidth = ((p1 >> 28) & 0x3FF);
        uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
-       uint16 height = ((p0 >> 14) & 0x3FF) - 1;
+       uint16 height = ((p0 >> 14) & 0x3FF);
        uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
        uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
        uint32 firstPix = (p1 >> 49) & 0x3F;
        uint8 flags = (p1 >> 45) & 0x0F;
        uint8 idx = (p1 >> 38) & 0x7F;
        uint32 pitch = (p1 >> 15) & 0x07;
-WriteLog("\n    [%u (%u) x %u @ %i, %u (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
-       iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&FLAGS_HFLIP ? "REFLECT " : ""), (flags&FLAGS_READMODIFY ? "RMW " : ""), (flags&FLAGS_TRANSPARENT ? "TRANS " : ""), (flags&FLAGS_RELEASE ? "RELEASE" : ""), idx, pitch);
+WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
+       iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
 }
 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
 {
 WriteLog(" (SCALED BITMAP)");
 uint64 p1 = op_load_phrase(op_pointer), p2 = op_load_phrase(op_pointer+8);
 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
+WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
        uint8 bitdepth = (p1 >> 12) & 0x07;
        int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
 int32 xpos = p1 & 0xFFF;
 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
        uint32 iwidth = ((p1 >> 28) & 0x3FF);
        uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
-       uint16 height = ((p0 >> 14) & 0x3FF) - 1;
+       uint16 height = ((p0 >> 14) & 0x3FF);
        uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
        uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
        uint32 firstPix = (p1 >> 49) & 0x3F;
        uint8 flags = (p1 >> 45) & 0x0F;
        uint8 idx = (p1 >> 38) & 0x7F;
        uint32 pitch = (p1 >> 15) & 0x07;
-WriteLog("\n    [%u (%u) x %u @ %i, %u (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
-       iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&FLAGS_HFLIP ? "REFLECT " : ""), (flags&FLAGS_READMODIFY ? "RMW " : ""), (flags&FLAGS_TRANSPARENT ? "TRANS " : ""), (flags&FLAGS_RELEASE ? "RELEASE" : ""), idx, pitch);
+WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
+       iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
        uint32 hscale = p2 & 0xFF;
        uint32 vscale = (p2 >> 8) & 0xFF;
        uint32 remainder = (p2 >> 16) & 0xFF;
@@ -339,16 +407,18 @@ WriteLog("    --> List end\n");
                {
                case OBJECT_TYPE_BITMAP:
                {
-                       // Would *not* be /2 if interlaced...!
-                       uint16 ypos = ((p0 >> 3) & 0x3FF) / 2;
+                       uint16 ypos = (p0 >> 3) & 0x3FF;
 // This is only theory implied by Rayman...!
 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
 // the VDB value. With interlacing, this would be slightly more tricky.
 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
 // to affect any other game in a negative way (that I've seen).
 // Either that, or it's an undocumented bug...
-                       if (ypos == 0)
-                               ypos = tom_word_read(0xF00046) / 2;                     // Get the VDB value
+
+//No, the reason this was needed is that the OP code before was wrong. Any value
+//less than VDB will get written to the top line of the display!
+//                     if (ypos == 0)
+//                             ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
                        uint32 height = (p0 & 0xFFC000) >> 14;
                        uint32 oldOPP = op_pointer - 8;
 // *** BEGIN OP PROCESSOR TESTING ONLY ***
@@ -363,11 +433,13 @@ if (!inhibit)     // For OP testing only!
                                op_pointer += 8;
 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
-                               OPProcessFixedBitmap(scanline, p0, p1, render);
+//                             OPProcessFixedBitmap(scanline, p0, p1, render);
+                               OPProcessFixedBitmap(p0, p1, render);
 
                                // OP write-backs
 
 //???Does this really happen??? Doesn't seem to work if you do this...!
+//Probably not. Must be a bug in the documentation...!
 //                             uint32 link = (p0 & 0x7FFFF000000) >> 21;
 //                             SET16(objectp_ram, 0x20, link & 0xFFFF);        // OLP
 //                             SET16(objectp_ram, 0x22, link >> 16);
@@ -379,35 +451,29 @@ if (!inhibit)     // For OP testing only!
 //                             if (height)
                                        height--;
 
-                               uint64 data = (p0 & 0xFFFFF80000000000) >> 40;
+                               uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
                                uint64 dwidth = (p1 & 0xFFC0000) >> 15;
                                data += dwidth;
 
-                               p0 &= ~0xFFFFF80000FFC000;                      // Mask out old data...
+                               p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
                                p0 |= (uint64)height << 14;
                                p0 |= data << 40;
                                OPStorePhrase(oldOPP, p0);
                        }
-                       op_pointer = (p0 & 0x000007FFFF000000) >> 21;
+                       op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
                        break;
                }
                case OBJECT_TYPE_SCALE:
                {
-                       // Would *not* be /2 if interlaced...!
-                       uint16 ypos = ((p0 >> 3) & 0x3FF) / 2;
-// This is only theory implied by Rayman...!
-// It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
-// the VDB value. With interlacing, this would be slightly more tricky.
-// There's probably another bit somewhere that enables this mode--but so far, doesn't seem
-// to affect any other game in a negative way (that I've seen).
-// Either that, or it's an undocumented bug...
-                       if (ypos == 0)
-                               ypos = tom_word_read(0xF00046) / 2;                     // Get the VDB value
+                       uint16 ypos = (p0 >> 3) & 0x3FF;
                        uint32 height = (p0 & 0xFFC000) >> 14;
                        uint32 oldOPP = op_pointer - 8;
 // *** BEGIN OP PROCESSOR TESTING ONLY ***
 if (inhibit && op_start_log)
-       WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
+{
+       WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
+       DumpScaledObject(p0, op_load_phrase(op_pointer), op_load_phrase(op_pointer+8));
+}
 bitmapCounter++;
 if (!inhibit)  // For OP testing only!
 // *** END OP PROCESSOR TESTING ONLY ***
@@ -418,59 +484,118 @@ if (!inhibit)    // For OP testing only!
                                uint64 p2 = op_load_phrase(op_pointer);
                                op_pointer += 8;
 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
-                               OPProcessScaledBitmap(scanline, p0, p1, p2, render);
+                               OPProcessScaledBitmap(p0, p1, p2, render);
 
                                // OP write-backs
 
-//???Does this really happen??? Doesn't seem to work if you do this...!
-//                             uint32 link = (p0 & 0x7FFFF000000) >> 21;
-//                             SET16(objectp_ram, 0x20, link & 0xFFFF);        // OLP
-//                             SET16(objectp_ram, 0x22, link >> 16);
-/*                             uint32 height = (p0 & 0xFFC000) >> 14;
-                               if (height - 1 > 0)
-                                       height--;*/
-                               // NOTE: Would subtract 2 if in interlaced mode...!
-//                             uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
-
                                uint8 remainder = p2 >> 16, vscale = p2 >> 8;
-                               if (vscale == 0)
-                                       vscale = 0x20;                                  // OP bug???
+//Actually, we should skip this object if it has a vscale of zero.
+//Or do we? Not sure... Atari Karts has a few lines that look like:
+// (SCALED BITMAP)
+//000E8268 --> phrase 00010000 7000B00D 
+//    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
+//    [hsc: 9A, vsc: 00, rem: 00]
+// Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
 
-                               remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
-                               if (remainder & 0x80)                           // I.e., it's negative
+                               if (vscale == 0)
+                                       vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
+
+/*extern int start_logging;
+if (start_logging)
+       WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);*/
+//Locks up here:
+//--> Returned from scaled bitmap processing (rem=20, vscale=80)...
+//There are other problems here, it looks like...
+/*
+OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
+--> Returned from scaled bitmap processing (rem=50, vscale=7C)...
+OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
+--> Returned from scaled bitmap processing (rem=30, vscale=7C)...
+OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
+--> Returned from scaled bitmap processing (rem=10, vscale=7C)...
+OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
+--> Returned from scaled bitmap processing (rem=00, vscale=7E)...
+OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
+--> Returned from scaled bitmap processing (rem=00, vscale=80)...
+OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
+--> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
+OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
+--> Returned from scaled bitmap processing (rem=60, vscale=80)...
+OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
+--> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
+OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
+--> Returned from scaled bitmap processing (rem=40, vscale=80)...
+OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
+--> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
+OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
+--> Returned from scaled bitmap processing (rem=20, vscale=80)...
+*/
+//Here's another problem:
+//    [hsc: 20, vsc: 20, rem: 00]
+// Since we're not checking for $E0 (but that's what we get from the above), we end
+// up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite
+// right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
+//Also note: $E0 = 7.0 which IS a legal vscale value...
+
+//                             if (remainder & 0x80)                           // I.e., it's negative
+//                             if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
+//                             if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
+//                             if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
+//                             if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
+                               if (remainder <= 0x20)                          // I.e., it's <= 0
                                {
-                                       uint64 data = (p0 & 0xFFFFF80000000000) >> 40;
+                                       uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
                                        uint64 dwidth = (p1 & 0xFFC0000) >> 15;
 
-                                       while (remainder & 0x80)
+//                                     while (remainder & 0x80)
+//                                     while ((remainder & 0x80) || remainder == 0)
+//                                     while ((remainder - 1) >= 0xE0)
+//                                     while ((remainder >= 0xE1) || remainder == 0)
+//                                     while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
+                                       while (remainder <= 0x20)
                                        {
                                                remainder += vscale;
+
                                                if (height)
                                                        height--;
 
                                                data += dwidth;
                                        }
-                                       p0 &= ~0xFFFFF80000FFC000;              // Mask out old data...
+
+                                       p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
                                        p0 |= (uint64)height << 14;
                                        p0 |= data << 40;
                                        OPStorePhrase(oldOPP, p0);
                                }
 
+                               remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
+
+/*if (start_logging)
+       WriteLog("--> Finished writebacks...\n");*/
+
 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
-                               p2 &= ~0x0000000000FF0000;
+                               p2 &= ~0x0000000000FF0000LL;
                                p2 |= (uint64)remainder << 16;
 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
                                OPStorePhrase(oldOPP+16, p2);
 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
                        }
-                       op_pointer = (p0 & 0x000007FFFF000000) >> 21;
+                       op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
                        break;
                }
                case OBJECT_TYPE_GPU:
                {
+//WriteLog("OP: Asserting GPU IRQ #3...\n");
                        op_set_current_object(p0);
-                       gpu_set_irq_line(3, 1);
+                       GPUSetIRQLine(3, ASSERT_LINE);
+//Also, OP processing is suspended from this point until OBF (F00026) is written to...
+// !!! FIX !!!
+//Do something like:
+//OPSuspendedByGPU = true;
+//Dunno if the OP keeps processing from where it was interrupted, or if it just continues
+//on the next scanline...
+// --> It continues from where it was interrupted! !!! FIX !!!
                        break;
                }
                case OBJECT_TYPE_BRANCH:
@@ -484,17 +609,15 @@ if (!inhibit)     // For OP testing only!
                        switch (cc)
                        {
                        case CONDITION_EQUAL:
-                               if (ypos != 0x7FF && (ypos & 0x01))
-                                       ypos ^= 0x01;
-                               if ((2 * tom_get_scanline()) == ypos || ypos == 0x7FF)
+                               if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
                                        op_pointer = link;
                                break;
                        case CONDITION_LESS_THAN:
-                               if ((2 * tom_get_scanline()) < ypos)
+                               if (TOMReadWord(0xF00006, OP) < ypos)
                                        op_pointer = link;
                                break;
                        case CONDITION_GREATER_THAN:
-                               if ((2 * tom_get_scanline()) > ypos)
+                               if (TOMReadWord(0xF00006, OP) > ypos)
                                        op_pointer = link;
                                break;
                        case CONDITION_OP_FLAG_SET:
@@ -503,7 +626,7 @@ if (!inhibit)       // For OP testing only!
                                break;
                        case CONDITION_SECOND_HALF_LINE:
                                // This basically means branch if bit 10 of HC is set
-                               WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nop: shuting down\n");
+                               WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shuting down\n");
                                fclose(log_get());
                                exit(0);
                                break;
@@ -517,17 +640,19 @@ if (!inhibit)     // For OP testing only!
 //op_start_log = 0;
                        // unsure
 //WriteLog("OP: --> STOP\n");
-                       op_set_status_register(((p0>>3) & 0xFFFFFFFF));
+//                     op_set_status_register(((p0>>3) & 0xFFFFFFFF));
+//This seems more likely...
+                       op_set_current_object(p0);
                        
-                       if (p0 & 0x8)
+                       if (p0 & 0x08)
                        {
                                tom_set_pending_object_int();
-                               if (tom_irq_enabled(2) && jaguar_interrupt_handler_is_valid(64))
+                               if (tom_irq_enabled(IRQ_OPFLAG))// && jaguar_interrupt_handler_is_valid(64))
                                        m68k_set_irq(7);                                // Cause an NMI to occur...
                        }
 
                        return;
-                       break;
+//                     break;
                }
                default:
                        WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07)); 
@@ -537,35 +662,34 @@ if (!inhibit)     // For OP testing only!
 }
 
 //
-// *** NEW ***
 // Store fixed size bitmap in line buffer
 //
-
-// Interesting thing about Rayman: There seems to be a transparent bitmap (1/8/16 bpp--which?)
-// being rendered under his feet--doesn't align when walking... Check it out!
-
-void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
+void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
 {
 // Need to make sure that when writing that it stays within the line buffer...
 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
        uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
-//Why is HBlankWidthInPixels subtracted from this???
-//     int32 xpos = (((int32)((p1 << 20) & 0xFFFFFFFF)) >> 20) - tom_getHBlankWidthInPixels();
        int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
        uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
        uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
-#ifdef OP_DEBUG_BMP
-// Prolly should use this... Though not sure exactly how.
+//#ifdef OP_DEBUG_BMP
        uint32  firstPix = (p1 >> 49) & 0x3F;
-#endif
+       // "The LSB is significant only for scaled objects..." -JTRM
+       // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
+       firstPix &= 0x3E;
+//#endif
 // We can ignore the RELEASE (high order) bit for now--probably forever...!
 //     uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
 //Optimize: break these out to their own BOOL values
        uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
+       bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
+               flagRMW = (flags & OPFLAG_RMW ? true : false),
+               flagTRANS = (flags & OPFLAG_TRANS ? true : false);
 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
 //  provide the most significant bits of the palette address."
        uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
        uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
+       pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
 
 //     int16 scanlineWidth = tom_getVideoModeWidth();
        uint8 * tom_ram_8 = tom_get_ram_pointer();
@@ -575,25 +699,27 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
        uint16 * paletteRAM16 = (uint16 *)paletteRAM;
 
 //     WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
-//             iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&FLAGS_HFLIP ? "yes" : "no"), (flags&FLAGS_READMODIFY ? "yes" : "no"), (flags&FLAGS_TRANSPARENT ? "yes" : "no"));
+//             iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
 
 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
 // Pitch == 0 is OK too...
-//     if (!render || op_pointer == 0 || dwidth == 0 || ptr == 0 || pitch == 0)
+//     if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
-       if (!render || iwidth == 0) // || data == 0 || op_pointer == 0)
+       if (!render || iwidth == 0)
                return;
 
 //#define OP_DEBUG_BMP
 //#ifdef OP_DEBUG_BMP
 //     WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
-//             iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&FLAGS_HFLIP ? "yes" : "no"), dwidth, op_pointer, (flags&FLAGS_READMODIFY ? "yes" : "no"));
+//             iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
 //#endif
 
-       int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
-       int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
-       uint32 clippedWidth = 0, phraseClippedWidth = 0;//, phrasePixel = 0;
+//     int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
+       int32 startPos = xpos, endPos = xpos +
+               (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
+               : -((phraseWidthToPixels[depth] * iwidth) + 1));
+       uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
        bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false);      // VMODE
        // Not sure if this is Jaguar Two only location or what...
        // From the docs, it is... If we want to limit here we should think of something else.
@@ -617,8 +743,11 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
 // Still have to be careful with the DATA and IWIDTH values though...
 
-       if ((!(flags & OPFLAG_REFLECT) && (rightMargin < 0 || leftMargin > lbufWidth))
-               || ((flags & OPFLAG_REFLECT) && (leftMargin < 0 || rightMargin > lbufWidth)))
+//     if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
+//             || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
+//             return;
+       if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
+               || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
                return;
 
        // Otherwise, find the clip limits and clip the phrase as well...
@@ -636,6 +765,9 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
 //Yup. Seems that JagMania doesn't work correctly with this...
 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
+//     if (!flagREFLECT)
+
+/*
        if (leftMargin < 0)
                clippedWidth = 0 - leftMargin,
                phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
@@ -644,9 +776,32 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
 
        if (rightMargin > lbufWidth)
                clippedWidth = rightMargin - lbufWidth,
-               phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
-               rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
+               phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
+//             rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
 //             rightMargin = lbufWidth;
+*/
+if (depth > 5)
+       WriteLog("OP: We're about to encounter a divide by zero error!\n");
+       // NOTE: We're just using endPos to figure out how much, if any, to clip by.
+       // ALSO: There may be another case where we start out of bounds and end out of bounds...!
+       // !!! FIX !!!
+       if (startPos < 0)                       // Case #1: Begin out, end in, L to R
+               clippedWidth = 0 - startPos,
+               dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
+               startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
+
+       if (endPos < 0)                         // Case #2: Begin in, end out, R to L
+               clippedWidth = 0 - endPos,
+               phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
+
+       if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
+               clippedWidth = endPos - lbufWidth,
+               phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
+
+       if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
+               clippedWidth = startPos - lbufWidth,
+               dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
+               startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
 
        // If the image is sitting on the line buffer left or right edge, we need to compensate
        // by decreasing the image phrase width accordingly.
@@ -654,11 +809,15 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
 
        // Also, if we're clipping the phrase we need to make sure we're in the correct part of
        // the pixel data.
-       data += phraseClippedWidth * (pitch << 3);
+//     data += phraseClippedWidth * (pitch << 3);
+       data += dataClippedWidth * pitch;
 
        // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
        //       bitmap! This makes clipping & etc. MUCH, much easier...!
-       uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
+//     uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
+//Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
+//Is this a bug in the OP?
+       uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
        uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
 
        // Render.
@@ -667,33 +826,34 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
 // anyway.
+// This seems to be the case (at least according to the Midsummer docs)...!
 
        if (depth == 0)                                                                 // 1 BPP
        {
-//             uint32 paletteIndex = index << 1;
-               // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
+               // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
                int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
 
+               // Fetch 1st phrase...
+               uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+//Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
+//i.e., we didn't clip on the margin... !!! FIX !!!
+               pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
+               int i = firstPix;                                                       // Start counter at right spot...
+
                while (iwidth--)
                {
-                       // Fetch phrase...
-                       uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4);
-                       data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
-
-                       for(int i=0; i<64; i++)
+                       while (i++ < 64)
                        {
                                uint8 bit = pixels >> 63;
-// Seems to me that both of these are in the same endian, so we could cast it as
-// uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
-// This only works for the palettized modes (1 - 8 BPP), since we actually have to
-// copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
-// No, it isn't because we read the memory in an endian safe way--this *won't* work...
-                               if ((flags & OPFLAG_TRANS) && bit == 0)
+                               if (flagTRANS && bit == 0)
                                        ;       // Do nothing...
                                else
                                {
-                                       if (!(flags & OPFLAG_RMW))
+                                       if (!flagRMW)
 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
+//Won't optimize RMW case though...
+                                               // This is the *only* correct use of endian-dependent code
+                                               // (i.e., mem-to-mem direct copying)!
                                                *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
                                        else
                                                *currentLineBuffer = 
@@ -705,10 +865,16 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
                                currentLineBuffer += lbufDelta;
                                pixels <<= 1;
                        }
+                       i = 0;
+                       // Fetch next phrase...
+                       data += pitch;
+                       pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
                }
        }
        else if (depth == 1)                                                    // 2 BPP
        {
+if (firstPix)
+       WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
                index &= 0xFC;                                                          // Top six bits form CLUT index
                // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
                int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
@@ -716,8 +882,8 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
                while (iwidth--)
                {
                        // Fetch phrase...
-                       uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4);
-                       data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
+                       uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+                       data += pitch;
 
                        for(int i=0; i<32; i++)
                        {
@@ -727,11 +893,11 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
-                               if ((flags & OPFLAG_TRANS) && bits == 0)
+                               if (flagTRANS && bits == 0)
                                        ;       // Do nothing...
                                else
                                {
-                                       if (!(flags & OPFLAG_RMW))
+                                       if (!flagRMW)
                                                *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
                                        else
                                                *currentLineBuffer = 
@@ -747,6 +913,8 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
        }
        else if (depth == 2)                                                    // 4 BPP
        {
+if (firstPix)
+       WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
                index &= 0xF0;                                                          // Top four bits form CLUT index
                // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
                int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
@@ -754,8 +922,8 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
                while (iwidth--)
                {
                        // Fetch phrase...
-                       uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4);
-                       data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
+                       uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+                       data += pitch;
 
                        for(int i=0; i<16; i++)
                        {
@@ -765,11 +933,11 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
-                               if ((flags & OPFLAG_TRANS) && bits == 0)
+                               if (flagTRANS && bits == 0)
                                        ;       // Do nothing...
                                else
                                {
-                                       if (!(flags & OPFLAG_RMW))
+                                       if (!flagRMW)
                                                *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
                                        else
                                                *currentLineBuffer = 
@@ -788,13 +956,17 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
                // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
                int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
 
+               // Fetch 1st phrase...
+               uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+//Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
+//i.e., we didn't clip on the margin... !!! FIX !!!
+               firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
+               pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
+               int i = firstPix >> 3;                                          // Start counter at right spot...
+
                while (iwidth--)
                {
-                       // Fetch phrase...
-                       uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4);
-                       data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
-
-                       for(int i=0; i<8; i++)
+                       while (i++ < 8)
                        {
                                uint8 bits = pixels >> 56;
 // Seems to me that both of these are in the same endian, so we could cast it as
@@ -802,11 +974,11 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
-                               if ((flags & OPFLAG_TRANS) && bits == 0)
+                               if (flagTRANS && bits == 0)
                                        ;       // Do nothing...
                                else
                                {
-                                       if (!(flags & OPFLAG_RMW))
+                                       if (!flagRMW)
                                                *(uint16 *)currentLineBuffer = paletteRAM16[bits];
                                        else
                                                *currentLineBuffer = 
@@ -818,18 +990,24 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
                                currentLineBuffer += lbufDelta;
                                pixels <<= 8;
                        }
+                       i = 0;
+                       // Fetch next phrase...
+                       data += pitch;
+                       pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
                }
        }
        else if (depth == 4)                                                    // 16 BPP
        {
+if (firstPix)
+       WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
                // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
                int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
 
                while (iwidth--)
                {
                        // Fetch phrase...
-                       uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4);
-                       data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
+                       uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+                       data += pitch;
 
                        for(int i=0; i<4; i++)
                        {
@@ -839,11 +1017,11 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
-                               if ((flags & OPFLAG_TRANS) && (bitsLo | bitsHi) == 0)
+                               if (flagTRANS && (bitsLo | bitsHi) == 0)
                                        ;       // Do nothing...
                                else
                                {
-                                       if (!(flags & OPFLAG_RMW))
+                                       if (!flagRMW)
                                                *currentLineBuffer = bitsHi,
                                                *(currentLineBuffer + 1) = bitsLo;
                                        else
@@ -860,27 +1038,28 @@ void OPProcessFixedBitmap(int scanline, uint64 p0, uint64 p1, bool render)
        }
        else if (depth == 5)                                                    // 24 BPP
        {
-WriteLog("OP: Writing 24 BPP bitmap!\n");
+//Looks like Iron Soldier is the only game that uses 24BPP mode...
+//There *might* be others...
+//WriteLog("OP: Writing 24 BPP bitmap!\n");
+if (firstPix)
+       WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
                // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
-               // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
+               // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
                int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
 
                while (iwidth--)
                {
                        // Fetch phrase...
-                       uint64 pixels = ((uint64)jaguar_long_read(data) << 32) | jaguar_long_read(data + 4);
-                       data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
+                       uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+                       data += pitch;
 
                        for(int i=0; i<2; i++)
                        {
+                               // We don't use a 32-bit var here because of endian issues...!
                                uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
                                        bits1 = pixels >> 40, bits0 = pixels >> 32;
-// Seems to me that both of these are in the same endian, so we could cast it as
-// uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
-// This only works for the palettized modes (1 - 8 BPP), since we actually have to
-// copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
-// No, it isn't because we read the memory in an endian safe way--it *won't* work...
-                               if ((flags & OPFLAG_TRANS) && (bits3 | bits2 | bits1 | bits0) == 0)
+
+                               if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
                                        ;       // Do nothing...
                                else
                                        *currentLineBuffer = bits3,
@@ -896,161 +1075,522 @@ WriteLog("OP: Writing 24 BPP bitmap!\n");
 }
 
 //
-// *** NEW ***
 // Store scaled bitmap in line buffer
 //
-void OPProcessScaledBitmap(int scanline, uint64 p0, uint64 p1, uint64 p2, bool render)
+void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
 {
-       int32 xpos = (((int32)((p1 << 20) & 0xFFFFFFFF)) >> 20) - tom_getHBlankWidthInPixels();
-//     uint16 ypos = ((p0 & 0x3FF8) >> 3) / 2;
-       uint16 iwidth = ((p1 >> 28) & 0x3FF) * 4;
-       uint16 dwidth = ((p1 >> 18) & 0x3FF) * 4;               // Unsigned!
-//     uint16 height = (p0 >> 14) & 0x3FF;                             // Unsigned!
-//     uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
-       uint32 ptr = (p0 >> 40) & 0xFFFFF8;
-//unused       uint32 firstPix = (p1 >> 49) & 0x3F;
-       uint8 flags = (p1 >> 45) & 0x0F;
-       uint8 idx = (p1 >> 38) & 0x7F;
-       uint8 pitch = (p1 >> 15) & 0x07;
-       uint8 bitdepth = (p1 >> 12) & 0x07;
+// Need to make sure that when writing that it stays within the line buffer...
+// LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
+       uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
+       int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
+       uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
+       uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
+//#ifdef OP_DEBUG_BMP
+// Prolly should use this... Though not sure exactly how.
+//Use the upper bits as an offset into the phrase depending on the BPP. That's how!
+       uint32 firstPix = (p1 >> 49) & 0x3F;
+//This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
+if (firstPix)
+       WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
+//#endif
+// We can ignore the RELEASE (high order) bit for now--probably forever...!
+//     uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
+//Optimize: break these out to their own BOOL values [DONE]
+       uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
+       bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
+               flagRMW = (flags & OPFLAG_RMW ? true : false),
+               flagTRANS = (flags & OPFLAG_TRANS ? true : false);
+       uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
+       uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
 
-       int16 scanline_width = tom_getVideoModeWidth();
        uint8 * tom_ram_8 = tom_get_ram_pointer();
-       uint8 * current_line_buffer = &tom_ram_8[0x1800];
+       uint8 * paletteRAM = &tom_ram_8[0x400];
+       // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
+       // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
+       uint16 * paletteRAM16 = (uint16 *)paletteRAM;
 
-       uint32 vscale_fixed3p5 = (p2 >> 8) & 0xFF;
-       uint32 hscale_fixed3p5 = p2 & 0xFF;
-       float vscale = (float)vscale_fixed3p5 / 32.0f, hscale = (float)hscale_fixed3p5 / 32.0f;
+       uint8 hscale = p2 & 0xFF;
+//     uint8 horizontalRemainder = hscale;                             // Not sure if it starts full, but seems reasonable [It's not!]
+       uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay!
+       int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
+       uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
 
-//No hacks!
-/*     if (jaguar_mainRom_crc32==0x5a5b9c68) // atari karts
-       {
-               if (vscale == 0.0f) 
-                       vscale = 1.0f;
-
-               if (ypos == 0)
-                       ypos = scanline;
-       }*/
-#ifdef OP_DEBUG_BMP
-       if (xpos == -3)
-               WriteLog("[scanline %i] %ix%i scaled to %ix%i scale (%f, %f)%i bpp pitch %i at (%i,%i) @ 0x%.8x Transluency=%s\n",
-                       scanline, iwidth,height, (int)(iwidth*hscale), (int)(height*vscale), hscale, vscale,
-                       op_bitmap_bit_depth[bitdepth], pitch, xpos, ypos, ptr, (flags&FLAGS_READMODIFY) ? "yes" : "no");
-#endif
-//No hacks!
-/*     if (jaguar_mainRom_crc32==0x2f032271)
-               ypos += 8;*/
-
-       if (!render || dwidth == 0 || ptr == 0 || pitch == 0)
-               return;
+//     WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
+//             iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
 
-       if (bitdepth <= 3)                                      // 1, 2, 4, 8 BPP
-               iwidth *= 2, dwidth *= 2;
+// Looks like an hscale of zero means don't draw!
+       if (!render || iwidth == 0 || hscale == 0)
+               return;
 
-       uint16 scaled_width = (uint16)((float)iwidth * hscale);
+/*extern int start_logging;
+if (start_logging)
+       WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
+               iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
+//#define OP_DEBUG_BMP
+//#ifdef OP_DEBUG_BMP
+//     WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
+//             iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
+//#endif
 
-       if (op_bitmap_bit_depth[bitdepth] == 4) // why ?
-               scaled_width *= 2;
+       int32 startPos = xpos, endPos = xpos +
+               (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
+       uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
+       bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false);      // VMODE
+       // Not sure if this is Jaguar Two only location or what...
+       // From the docs, it is... If we want to limit here we should think of something else.
+//     int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
+       int32 limit = 720;
+       int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
 
-       if (op_bitmap_bit_depth[bitdepth] == 2) // why ?
-               scaled_width *= 4;
+       // If the image is completely to the left or right of the line buffer, then bail.
+//If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
+//There are four possibilities:
+//  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
+//  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
+//  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
+//  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
+//Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
+// numbers 1 & 3 are of concern.
+// This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
+//     if (rightMargin < 0 || leftMargin > lbufWidth)
 
-       if (op_bitmap_bit_depth[bitdepth] == 1) // why ?
-               scaled_width *= 8;
+// It might be easier to swap these (if REFLECTed) and just use XPOS down below...
+// That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
+// Still have to be careful with the DATA and IWIDTH values though...
 
-       // visible ?
-/*     if ((scanline < ypos) || (scanline > (ypos + scaled_height)) || ((xpos + scaled_width) < 0)
-               || (xpos >= scanline_width))*/
-       if ((xpos + scaled_width) < 0 || xpos >= scanline_width)
+       if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
+               || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
                return;
 
-       if (xpos < 0)
-       {
-               scaled_width += xpos;
-               ptr += (pitch * op_bitmap_bit_size[bitdepth] * ((uint32)((-xpos) / hscale))) >> 16;
-               xpos = 0;
-       }
+       // Otherwise, find the clip limits and clip the phrase as well...
+       // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
+       //       line buffer, but it shouldn't matter since there are two unused line
+       //       buffers below and nothing above and I'll at most write 40 bytes outside
+       //       the line buffer... I could use a fractional clip begin/end value, but
+       //       this makes the blit a *lot* more hairy. I might fix this in the future
+       //       if it becomes necessary. (JLH)
+       //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
+       //       which pixel in the phrase is being written, and quit when either end of phrases
+       //       is reached or line buffer extents are surpassed.
 
-       if (iwidth <= 0)
-               return;
+//This stuff is probably wrong as well... !!! FIX !!!
+//The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
+//Yup. Seems that JagMania doesn't work correctly with this...
+//Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
+//Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
+//elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
+// a bit more accurately... Strange!
+//It's probably a case of the REFLECT flag being set and the background being written
+//from the right side of the screen...
+//But no, it isn't... At least if the diagnostics are telling the truth!
+
+       // NOTE: We're just using endPos to figure out how much, if any, to clip by.
+       // ALSO: There may be another case where we start out of bounds and end out of bounds...!
+       // !!! FIX !!!
+
+//There's a problem here with scaledPhrasePixels in that it can be forced to zero when
+//the scaling factor is small. So fix it already! !!! FIX !!!
+/*if (scaledPhrasePixels == 0)
+{
+       WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
+       DumpScaledObject(p0, p1, p2);
+}//*/
+//NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
 
-       if (flags & FLAGS_HFLIP)
-       {
-               if (xpos < 0 || (xpos-scaled_width) >= scanline_width)
-                       return;
+//Try a simple example...
+// Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
+// non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
+// Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
+//
+// Normally, we would expect this in the line buffer:
+// ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
+//
+// But instead we're getting:
+// XXXXYYYYZZZZXXXXYYYYZZZZ...
+//
+// or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
+// on negative boundary--or are we? Hmm...
+// cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
+//
+// Let's try a real world example:
+//
+//OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
+//OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
+//
+// Really, spp is 27.75 in the second case...
+// So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
+// start position (14 * 27.75), we get -6.5... NOT -17!
 
-               if ((xpos - scaled_width) < 0)
-                       scaled_width = xpos;
-       }
-       else
-       {
-               if ((xpos + scaled_width) < 0 || xpos >= scanline_width)
-                       return;
+//Now it seems we're working OK, at least for the first case...
+uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
 
-               if ((xpos + scaled_width) > scanline_width)
-                       scaled_width = scanline_width-xpos;
-       }
-       
-       current_line_buffer += xpos * 2;
+       if (startPos < 0)                       // Case #1: Begin out, end in, L to R
+{
+extern int start_logging;
+if (start_logging)
+       WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
+//             clippedWidth = 0 - startPos,
+               clippedWidth = (0 - startPos) << 5,
+//             dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
+               dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
+//             startPos = 0 - (clippedWidth % scaledPhrasePixels);
+               startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
+if (start_logging)
+       WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
+}
+
+       if (endPos < 0)                         // Case #2: Begin in, end out, R to L
+               clippedWidth = 0 - endPos,
+               phraseClippedWidth = clippedWidth / scaledPhrasePixels;
 
-       int32 hscale_fixed = (int32)(65536.0f / hscale);
-       int32 cnt = 0;
+       if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
+               clippedWidth = endPos - lbufWidth,
+               phraseClippedWidth = clippedWidth / scaledPhrasePixels;
+
+       if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
+               clippedWidth = startPos - lbufWidth,
+               dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
+               startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
+
+extern int op_start_log;
+if (op_start_log && clippedWidth != 0)
+       WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
+if (op_start_log && startPos == 13)
+{
+       WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
+       DumpScaledObject(p0, p1, p2);
+}
+       // If the image is sitting on the line buffer left or right edge, we need to compensate
+       // by decreasing the image phrase width accordingly.
+       iwidth -= phraseClippedWidth;
+
+       // Also, if we're clipping the phrase we need to make sure we're in the correct part of
+       // the pixel data.
+//     data += phraseClippedWidth * (pitch << 3);
+       data += dataClippedWidth * (pitch << 3);
 
-       if (op_bitmap_bit_depth[bitdepth] == 1)
+       // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
+       //       bitmap! This makes clipping & etc. MUCH, much easier...!
+//     uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
+//     uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
+       uint32 lbufAddress = 0x1800 + startPos * 2;
+       uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
+uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
+       * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
+
+       // Render.
+
+// Hmm. We check above for 24 BPP mode, but don't do anything about it below...
+// If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
+// that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
+// anyway.
+// This seems to be the case (at least according to the Midsummer docs)...!
+
+       if (depth == 0)                                                                 // 1 BPP
        {
-               if (pitch == 1)
-               {
-#include "zbmpop1.h"
-               }
-               else
+if (firstPix != 0)
+       WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
+               // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
+               int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
+
+               int pixCount = 0;
+               uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+
+               while ((int32)iwidth > 0)
                {
-#include "zbmpop1p.h"
+                       uint8 bits = pixels >> 63;
+
+                       if (flagTRANS && bits == 0)
+                               ;       // Do nothing...
+                       else
+                       {
+                               if (!flagRMW)
+                                       // This is the *only* correct use of endian-dependent code
+                                       // (i.e., mem-to-mem direct copying)!
+                                       *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
+                               else
+                                       *currentLineBuffer = 
+                                               BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
+                                       *(currentLineBuffer + 1) = 
+                                               BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
+                       }
+
+                       currentLineBuffer += lbufDelta;
+
+                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
+                       while (horizontalRemainder & 0x80)
+                       {
+                               horizontalRemainder += hscale;
+                               pixCount++;
+                               pixels <<= 1;
+                       }
+
+                       if (pixCount > 63)
+                       {
+                               int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
+
+                               data += (pitch << 3) * phrasesToSkip;
+                               pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+                               pixels <<= 1 * pixelShift;
+                               iwidth -= phrasesToSkip;
+                               pixCount = pixelShift;
+                       }
                }
        }
-       else if (op_bitmap_bit_depth[bitdepth] == 2)
+       else if (depth == 1)                                                    // 2 BPP
        {
-               if (pitch == 1)
-               {
-#include "zbmpop2.h"
-               }
-               else
+if (firstPix != 0)
+       WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
+               index &= 0xFC;                                                          // Top six bits form CLUT index
+               // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
+               int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
+
+               int pixCount = 0;
+               uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+
+               while ((int32)iwidth > 0)
                {
-#include "zbmpop2p.h"
+                       uint8 bits = pixels >> 62;
+
+                       if (flagTRANS && bits == 0)
+                               ;       // Do nothing...
+                       else
+                       {
+                               if (!flagRMW)
+                                       // This is the *only* correct use of endian-dependent code
+                                       // (i.e., mem-to-mem direct copying)!
+                                       *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
+                               else
+                                       *currentLineBuffer = 
+                                               BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
+                                       *(currentLineBuffer + 1) = 
+                                               BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
+                       }
+
+                       currentLineBuffer += lbufDelta;
+
+                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
+                       while (horizontalRemainder & 0x80)
+                       {
+                               horizontalRemainder += hscale;
+                               pixCount++;
+                               pixels <<= 2;
+                       }
+
+                       if (pixCount > 31)
+                       {
+                               int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
+
+                               data += (pitch << 3) * phrasesToSkip;
+                               pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+                               pixels <<= 2 * pixelShift;
+                               iwidth -= phrasesToSkip;
+                               pixCount = pixelShift;
+                       }
                }
        }
-       else if (op_bitmap_bit_depth[bitdepth] == 4)
+       else if (depth == 2)                                                    // 4 BPP
        {
-               if (pitch == 1)
-               {
-#include "zbmpop4.h"
-               }
-               else
+if (firstPix != 0)
+       WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
+               index &= 0xF0;                                                          // Top four bits form CLUT index
+               // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
+               int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
+
+               int pixCount = 0;
+               uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+
+               while ((int32)iwidth > 0)
                {
-#include "zbmpop4p.h"
+                       uint8 bits = pixels >> 60;
+
+                       if (flagTRANS && bits == 0)
+                               ;       // Do nothing...
+                       else
+                       {
+                               if (!flagRMW)
+                                       // This is the *only* correct use of endian-dependent code
+                                       // (i.e., mem-to-mem direct copying)!
+                                       *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
+                               else
+                                       *currentLineBuffer = 
+                                               BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
+                                       *(currentLineBuffer + 1) = 
+                                               BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
+                       }
+
+                       currentLineBuffer += lbufDelta;
+
+                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
+                       while (horizontalRemainder & 0x80)
+                       {
+                               horizontalRemainder += hscale;
+                               pixCount++;
+                               pixels <<= 4;
+                       }
+
+                       if (pixCount > 15)
+                       {
+                               int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
+
+                               data += (pitch << 3) * phrasesToSkip;
+                               pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+                               pixels <<= 4 * pixelShift;
+                               iwidth -= phrasesToSkip;
+                               pixCount = pixelShift;
+                       }
                }
        }
-       else if (op_bitmap_bit_depth[bitdepth] == 8)
+       else if (depth == 3)                                                    // 8 BPP
        {
-               if (pitch == 1)
-               {
-#include "zbmpop8.h"
-               }
-               else
+if (firstPix)
+       WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
+               // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
+               int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
+
+               int pixCount = 0;
+               uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+
+               while ((int32)iwidth > 0)
                {
-#include "zbmpop8p.h"
+                       uint8 bits = pixels >> 56;
+
+                       if (flagTRANS && bits == 0)
+                               ;       // Do nothing...
+                       else
+                       {
+                               if (!flagRMW)
+                                       // This is the *only* correct use of endian-dependent code
+                                       // (i.e., mem-to-mem direct copying)!
+                                       *(uint16 *)currentLineBuffer = paletteRAM16[bits];
+/*                             {
+                                       if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
+                                               *(uint16 *)currentLineBuffer = paletteRAM16[bits];
+                               }*/
+                               else
+                                       *currentLineBuffer = 
+                                               BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
+                                       *(currentLineBuffer + 1) = 
+                                               BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
+                       }
+
+                       currentLineBuffer += lbufDelta;
+
+                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
+                       while (horizontalRemainder & 0x80)
+                       {
+                               horizontalRemainder += hscale;
+                               pixCount++;
+                               pixels <<= 8;
+                       }
+
+                       if (pixCount > 7)
+                       {
+                               int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
+
+                               data += (pitch << 3) * phrasesToSkip;
+                               pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+                               pixels <<= 8 * pixelShift;
+                               iwidth -= phrasesToSkip;
+                               pixCount = pixelShift;
+                       }
                }
        }
-       else if (op_bitmap_bit_depth[bitdepth] == 16)
+       else if (depth == 4)                                                    // 16 BPP
        {
-               if (pitch == 1)
+if (firstPix != 0)
+       WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
+               // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
+               int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
+
+               int pixCount = 0;
+               uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+
+               while ((int32)iwidth > 0)
                {
-#include "zbmpop16.h"
+                       uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
+
+                       if (flagTRANS && (bitsLo | bitsHi) == 0)
+                               ;       // Do nothing...
+                       else
+                       {
+                               if (!flagRMW)
+                                       *currentLineBuffer = bitsHi,
+                                       *(currentLineBuffer + 1) = bitsLo;
+                               else
+                                       *currentLineBuffer = 
+                                               BLEND_CR(*currentLineBuffer, bitsHi),
+                                       *(currentLineBuffer + 1) = 
+                                               BLEND_Y(*(currentLineBuffer + 1), bitsLo);
+                       }
+
+                       currentLineBuffer += lbufDelta;
+
+                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
+                       while (horizontalRemainder & 0x80)
+                       {
+                               horizontalRemainder += hscale;
+                               pixCount++;
+                               pixels <<= 16;
+                       }
+
+                       if (pixCount > 3)
+                       {
+                               int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
+
+                               data += (pitch << 3) * phrasesToSkip;
+                               pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+                               pixels <<= 16 * pixelShift;
+
+                               iwidth -= phrasesToSkip;
+
+                               pixCount = pixelShift;
+                       }
                }
-               else
+       }
+       else if (depth == 5)                                                    // 24 BPP
+       {
+//I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
+WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
+if (firstPix != 0)
+       WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
+               // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
+               // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
+               int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
+
+               while (iwidth--)
                {
-#include "zbmpop16p.h"
+                       // Fetch phrase...
+                       uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+                       data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
+
+                       for(int i=0; i<2; i++)
+                       {
+                               uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
+                                       bits1 = pixels >> 40, bits0 = pixels >> 32;
+
+                               if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
+                                       ;       // Do nothing...
+                               else
+                                       *currentLineBuffer = bits3,
+                                       *(currentLineBuffer + 1) = bits2,
+                                       *(currentLineBuffer + 2) = bits1,
+                                       *(currentLineBuffer + 3) = bits0;
+
+                               currentLineBuffer += lbufDelta;
+                               pixels <<= 32;
+                       }
                }
        }
-       else
-               WriteLog("(unimplemented) %i bpp scaled bitmap\n",op_bitmap_bit_depth[bitdepth]);
+/*if (depth == 3 && startPos == 13)
+{
+if (op_start_log)
+WriteLog("OP: Writing in the margins...\n");
+       for(int i=0; i<100*2; i+=2)
+//     for(int i=0; i<14*2; i+=2)
+               tom_ram_8[0x1800 + i] = 0xFF,
+               tom_ram_8[0x1800 + i + 1] = 0xFF;
+}*/
+//     uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
+//     uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
 }