]> Shamusworld >> Repos - virtualjaguar/blobdiff - src/objectp.cpp
Extensive changes to remove gcc 4.x warnings, general code cleanup
[virtualjaguar] / src / objectp.cpp
index 44bb34dd3e6f91601387d23af7aba80e011df105..946714530a5c392a56d17882194ea63de423e971 100644 (file)
@@ -1,15 +1,20 @@
 //
 // Object Processor
 //
-// Original source by Cal2
+// Original source by David Raingeard (Cal2)
 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
 // Extensive cleanups/fixes/rewrites by James L. Hammons
 //
 
-#include <stdio.h>
+#include "objectp.h"
+
 #include <stdlib.h>
 #include <string.h>
+#include "tom.h"
 #include "jaguar.h"
+#include "log.h"
+#include "gpu.h"
+#include "m68k.h"
 
 //#define OP_DEBUG
 //#define OP_DEBUG_BMP
 #define BLEND_Y(dst, src)      op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
 #define BLEND_CR(dst, src)     op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
 
-#define OBJECT_TYPE_BITMAP     0                       // 000
-#define OBJECT_TYPE_SCALE      1                       // 001
-#define OBJECT_TYPE_GPU                2                       // 010
-#define OBJECT_TYPE_BRANCH     3                       // 011
-#define OBJECT_TYPE_STOP       4                       // 100
+#define OBJECT_TYPE_BITMAP     0                                       // 000
+#define OBJECT_TYPE_SCALE      1                                       // 001
+#define OBJECT_TYPE_GPU                2                                       // 010
+#define OBJECT_TYPE_BRANCH     3                                       // 011
+#define OBJECT_TYPE_STOP       4                                       // 100
 
 #define CONDITION_EQUAL                                0
 #define CONDITION_LESS_THAN                    1
 #define CONDITION_OP_FLAG_SET          3
 #define CONDITION_SECOND_HALF_LINE     4
 
-#define OPFLAG_RELEASE         8                       // Bus release bit
-#define OPFLAG_TRANS           4                       // Transparency bit
-#define OPFLAG_RMW                     2                       // Read-Modify-Write bit
-#define OPFLAG_REFLECT         1                       // Horizontal mirror bit
+#define OPFLAG_RELEASE         8                                       // Bus release bit
+#define OPFLAG_TRANS           4                                       // Transparency bit
+#define OPFLAG_RMW                     2                                       // Read-Modify-Write bit
+#define OPFLAG_REFLECT         1                                       // Horizontal mirror bit
 
 // Private function prototypes
 
@@ -44,12 +49,14 @@ uint64 op_load_phrase(uint32 offset);
 
 // Local global variables
 
-static uint8 * op_blend_y;
-static uint8 * op_blend_cr;
+// Blend tables (64K each)
+static uint8 op_blend_y[0x10000];
+static uint8 op_blend_cr[0x10000];
 // There may be a problem with this "RAM" overlapping (and thus being independent of)
 // some of the regular TOM RAM...
-static uint8 objectp_ram[0x40];                        // This is based at $F00000
-uint8 objectp_running;
+//#warning objectp_ram is separated from TOM RAM--need to fix that!
+//static uint8 objectp_ram[0x40];                      // This is based at $F00000
+uint8 objectp_running = 0;
 //bool objectp_stop_reading_list;
 
 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
@@ -66,40 +73,41 @@ int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
 //
 void op_init(void)
 {
-       // Blend tables (64K each)
-       memory_malloc_secure((void **)&op_blend_y, 0x10000, "Jaguar Object processor Y blend lookup table");
-       memory_malloc_secure((void **)&op_blend_cr, 0x10000, "Jaguar Object processor CR blend lookup table");
-
        // Here we calculate the saturating blend of a signed 4-bit value and an
        // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
        // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
        for(int i=0; i<256*256; i++)
        {
                int y = (i >> 8) & 0xFF;
-               int dy = (INT8)i;                                       // Sign extend the Y index
+               int dy = (int8)i;                                       // Sign extend the Y index
                int c1 = (i >> 8) & 0x0F;
-               int dc1 = (INT8)(i << 4) >> 4;          // Sign extend the R index
+               int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
                int c2 = (i >> 12) & 0x0F;
-               int dc2 = (INT8)(i & 0xF0) >> 4;        // Sign extend the C index
+               int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
 
                y += dy;
+
                if (y < 0)
                        y = 0;
                else if (y > 0xFF)
                        y = 0xFF;
+
                op_blend_y[i] = y;
 
                c1 += dc1;
+
                if (c1 < 0)
                        c1 = 0;
                else if (c1 > 0x0F)
                        c1 = 0x0F;
+
                c2 += dc2;
 
                if (c2 < 0)
                        c2 = 0;
                else if (c2 > 0x0F)
                        c2 = 0x0F;
+
                op_blend_cr[i] = (c2 << 4) | c1;
        }
 
@@ -111,15 +119,15 @@ void op_init(void)
 //
 void op_reset(void)
 {
-       memset(objectp_ram, 0x00, 0x40);
+//     memset(objectp_ram, 0x00, 0x40);
        objectp_running = 0;
 }
 
 void op_done(void)
 {
-       char * opType[8] =
+       const char * opType[8] =
        { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
-       char * ccType[8] =
+       const char * ccType[8] =
                { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
 
        uint32 olp = op_get_list_pointer();
@@ -143,6 +151,9 @@ void op_done(void)
                        DumpScaledObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8), op_load_phrase(olp+i+16));
        }
        WriteLog("\n");
+
+//     memory_free(op_blend_y);
+//     memory_free(op_blend_cr);
 }
 
 //
@@ -154,6 +165,7 @@ void op_done(void)
 //     F00026            W   -------- -------x   OBF - object processor flag
 //
 
+#if 0
 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
 {
        offset &= 0x3F;
@@ -182,33 +194,27 @@ WriteLog("OP: Setting lo list pointer: %04X\n", data);
 if (offset == 0x22)
 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
 }
+#endif
 
 uint32 op_get_list_pointer(void)
 {
        // Note: This register is LO / HI WORD, hence the funky look of this...
-//     return (objectp_ram[0x22] << 24) | (objectp_ram[0x23] << 16) | (objectp_ram[0x20] << 8) | objectp_ram[0x21];
-       return GET16(objectp_ram, 0x20) | (GET16(objectp_ram, 0x22) << 16);
+       return GET16(tom_ram_8, 0x20) | (GET16(tom_ram_8, 0x22) << 16);
 }
 
 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
 
 uint32 op_get_status_register(void)
 {
-//     return (objectp_ram[0x26] << 24) | (objectp_ram[0x27] << 16) | (objectp_ram[0x28] << 8) | objectp_ram[0x29];
-//     return GET32(objectp_ram, 0x26);
-       return GET16(objectp_ram, 0x26);
+       return GET16(tom_ram_8, 0x26);
 }
 
 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
 
 void op_set_status_register(uint32 data)
 {
-/*     objectp_ram[0x26] = (data & 0xFF000000) >> 24;
-       objectp_ram[0x27] = (data & 0x00FF0000) >> 16;
-       objectp_ram[0x28] = (data & 0x0000FF00) >> 8;
-       objectp_ram[0x29] |= (data & 0xFE);*/
-       objectp_ram[0x26] = (data & 0x0000FF00) >> 8;
-       objectp_ram[0x27] |= (data & 0xFE);
+       tom_ram_8[0x26] = (data & 0x0000FF00) >> 8;
+       tom_ram_8[0x27] |= (data & 0xFE);
 }
 
 void op_set_current_object(uint64 object)
@@ -225,15 +231,15 @@ void op_set_current_object(uint64 object)
        objectp_ram[0x15] = object & 0xFF; object >>= 8;
        objectp_ram[0x14] = object & 0xFF;*/
 // Let's try regular good old big endian...
-       objectp_ram[0x17] = object & 0xFF; object >>= 8;
-       objectp_ram[0x16] = object & 0xFF; object >>= 8;
-       objectp_ram[0x15] = object & 0xFF; object >>= 8;
-       objectp_ram[0x14] = object & 0xFF; object >>= 8;
-
-       objectp_ram[0x13] = object & 0xFF; object >>= 8;
-       objectp_ram[0x12] = object & 0xFF; object >>= 8;
-       objectp_ram[0x11] = object & 0xFF; object >>= 8;
-       objectp_ram[0x10] = object & 0xFF;
+       tom_ram_8[0x17] = object & 0xFF; object >>= 8;
+       tom_ram_8[0x16] = object & 0xFF; object >>= 8;
+       tom_ram_8[0x15] = object & 0xFF; object >>= 8;
+       tom_ram_8[0x14] = object & 0xFF; object >>= 8;
+
+       tom_ram_8[0x13] = object & 0xFF; object >>= 8;
+       tom_ram_8[0x12] = object & 0xFF; object >>= 8;
+       tom_ram_8[0x11] = object & 0xFF; object >>= 8;
+       tom_ram_8[0x10] = object & 0xFF;
 }
 
 uint64 op_load_phrase(uint32 offset)
@@ -258,7 +264,8 @@ void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
        WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
        WriteLog("                 %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
        uint8 bitdepth = (p1 >> 12) & 0x07;
-       int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
+//WAS: int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
+       int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
        int32 xpos = p1 & 0xFFF;
        xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
        uint32 iwidth = ((p1 >> 28) & 0x3FF);
@@ -283,7 +290,8 @@ void DumpFixedObject(uint64 p0, uint64 p1)
        WriteLog(" (BITMAP)");
        WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
        uint8 bitdepth = (p1 >> 12) & 0x07;
-       int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
+//WAS: int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
+       int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
        int32 xpos = p1 & 0xFFF;
        xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
        uint32 iwidth = ((p1 >> 28) & 0x3FF);
@@ -304,6 +312,7 @@ void DumpFixedObject(uint64 p0, uint64 p1)
 //
 //Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing
 //where we left off. !!! FIX !!!
+#warning Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!
 void OPProcessList(int scanline, bool render)
 {
 extern int op_start_log;
@@ -314,6 +323,9 @@ extern int op_start_log;
 
 //     objectp_stop_reading_list = false;
 
+//WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer);
+//op_done();
+
 // *** BEGIN OP PROCESSOR TESTING ONLY ***
 extern bool interactiveMode;
 extern bool iToggle;
@@ -322,6 +334,8 @@ bool inhibit;
 int bitmapCounter = 0;
 // *** END OP PROCESSOR TESTING ONLY ***
 
+       uint32 opCyclesToRun = 10000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
+
 //     if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
        while (op_pointer)
        {
@@ -335,9 +349,11 @@ else
 //                     return;
                        
                uint64 p0 = op_load_phrase(op_pointer);
+//WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
                op_pointer += 8;
 if (scanline == tom_get_vdb() && op_start_log)
 //if (scanline == 215 && op_start_log)
+//if (scanline == 28 && op_start_log)
 {
 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
@@ -346,7 +362,8 @@ WriteLog(" (BITMAP) ");
 uint64 p1 = op_load_phrase(op_pointer);
 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
        uint8 bitdepth = (p1 >> 12) & 0x07;
-       int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
+//WAS: int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
+       int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
 int32 xpos = p1 & 0xFFF;
 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
        uint32 iwidth = ((p1 >> 28) & 0x3FF);
@@ -368,7 +385,8 @@ uint64 p1 = op_load_phrase(op_pointer), p2 = op_load_phrase(op_pointer+8);
 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
        uint8 bitdepth = (p1 >> 12) & 0x07;
-       int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
+//WAS: int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
+       int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
 int32 xpos = p1 & 0xFFF;
 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
        uint32 iwidth = ((p1 >> 28) & 0x3FF);
@@ -402,12 +420,12 @@ if ((p0 & 0x07) == OBJECT_TYPE_STOP)
 WriteLog("    --> List end\n");
 }//*/
                
-//             WriteLog("%08X type %i\n", op_pointer, (uint8)p0 & 0x07);               
                switch ((uint8)p0 & 0x07)
                {
                case OBJECT_TYPE_BITMAP:
                {
-                       uint16 ypos = (p0 >> 3) & 0x3FF;
+//WAS:                 uint16 ypos = (p0 >> 3) & 0x3FF;
+                       uint16 ypos = (p0 >> 3) & 0x7FF;
 // This is only theory implied by Rayman...!
 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
 // the VDB value. With interlacing, this would be slightly more tricky.
@@ -441,15 +459,15 @@ if (!inhibit)     // For OP testing only!
 //???Does this really happen??? Doesn't seem to work if you do this...!
 //Probably not. Must be a bug in the documentation...!
 //                             uint32 link = (p0 & 0x7FFFF000000) >> 21;
-//                             SET16(objectp_ram, 0x20, link & 0xFFFF);        // OLP
-//                             SET16(objectp_ram, 0x22, link >> 16);
+//                             SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
+//                             SET16(tom_ram_8, 0x22, link >> 16);
 /*                             uint32 height = (p0 & 0xFFC000) >> 14;
                                if (height - 1 > 0)
                                        height--;*/
                                // NOTE: Would subtract 2 if in interlaced mode...!
 //                             uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
 //                             if (height)
-                                       height--;
+                               height--;
 
                                uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
                                uint64 dwidth = (p1 & 0xFFC0000) >> 15;
@@ -460,12 +478,22 @@ if (!inhibit)     // For OP testing only!
                                p0 |= data << 40;
                                OPStorePhrase(oldOPP, p0);
                        }
+//WriteLog("\t\tOld OP: %08X -> ", op_pointer);
+//Temp, for testing...
+//No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
+//And it does! !!! FIX !!!
+//Let's remove this "fix" since it screws up more than it fixes.
+/*     if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
+               return;*/
+
                        op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
+//WriteLog("New OP: %08X\n", op_pointer);
                        break;
                }
                case OBJECT_TYPE_SCALE:
                {
-                       uint16 ypos = (p0 >> 3) & 0x3FF;
+//WAS:                 uint16 ypos = (p0 >> 3) & 0x3FF;
+                       uint16 ypos = (p0 >> 3) & 0x7FF;
                        uint32 height = (p0 & 0xFFC000) >> 14;
                        uint32 oldOPP = op_pointer - 8;
 // *** BEGIN OP PROCESSOR TESTING ONLY ***
@@ -500,12 +528,14 @@ if (!inhibit)     // For OP testing only!
                                if (vscale == 0)
                                        vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
 
-/*extern int start_logging;
-if (start_logging)
-       WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);*/
+//extern int start_logging;
+//if (start_logging)
+//     WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
 //Locks up here:
 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
 //There are other problems here, it looks like...
+//Another lock up:
+//About to execute OP (508)...
 /*
 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
@@ -570,8 +600,8 @@ OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipp
 
                                remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
 
-/*if (start_logging)
-       WriteLog("--> Finished writebacks...\n");*/
+//if (start_logging)
+//     WriteLog("--> Finished writebacks...\n");//*/
 
 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
                                p2 &= ~0x0000000000FF0000LL;
@@ -658,6 +688,13 @@ OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipp
                        WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07)); 
                        return;
                }
+
+               // Here is a little sanity check to keep the OP from locking up the machine
+               // when fed bad data. Better would be to count how many actual cycles it used
+               // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
+               opCyclesToRun--;
+               if (!opCyclesToRun)
+                       return;
        }
 }
 
@@ -1110,8 +1147,10 @@ if (firstPix)
        uint16 * paletteRAM16 = (uint16 *)paletteRAM;
 
        uint8 hscale = p2 & 0xFF;
-//     uint8 horizontalRemainder = hscale;                             // Not sure if it starts full, but seems reasonable [It's not!]
-       uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay!
+// Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
+// but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
+       uint8 horizontalRemainder = hscale;                             // Not sure if it starts full, but seems reasonable [It's not!]
+//     uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
        int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
        uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
 
@@ -1259,6 +1298,13 @@ if (op_start_log && startPos == 13)
 {
        WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
        DumpScaledObject(p0, p1, p2);
+       if (iwidth == 7)
+       {
+               WriteLog("    %08X: ", data);
+               for(int i=0; i<7*8; i++)
+                       WriteLog("%02X ", JaguarReadByte(data+i));
+               WriteLog("\n");
+       }
 }
        // If the image is sitting on the line buffer left or right edge, we need to compensate
        // by decreasing the image phrase width accordingly.
@@ -1275,8 +1321,8 @@ if (op_start_log && startPos == 13)
 //     uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
        uint32 lbufAddress = 0x1800 + startPos * 2;
        uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
-uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
-       * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
+//uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
+//     * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
 
        // Render.
 
@@ -1317,13 +1363,20 @@ if (firstPix != 0)
 
                        currentLineBuffer += lbufDelta;
 
-                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
+/*                     horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
                        while (horizontalRemainder & 0x80)
+                       {
+                               horizontalRemainder += hscale;
+                               pixCount++;
+                               pixels <<= 1;
+                       }//*/
+                       while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
                        {
                                horizontalRemainder += hscale;
                                pixCount++;
                                pixels <<= 1;
                        }
+                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
 
                        if (pixCount > 63)
                        {
@@ -1369,13 +1422,20 @@ if (firstPix != 0)
 
                        currentLineBuffer += lbufDelta;
 
-                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
+/*                     horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
                        while (horizontalRemainder & 0x80)
+                       {
+                               horizontalRemainder += hscale;
+                               pixCount++;
+                               pixels <<= 2;
+                       }//*/
+                       while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
                        {
                                horizontalRemainder += hscale;
                                pixCount++;
                                pixels <<= 2;
                        }
+                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
 
                        if (pixCount > 31)
                        {
@@ -1421,13 +1481,20 @@ if (firstPix != 0)
 
                        currentLineBuffer += lbufDelta;
 
-                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
+/*                     horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
                        while (horizontalRemainder & 0x80)
+                       {
+                               horizontalRemainder += hscale;
+                               pixCount++;
+                               pixels <<= 4;
+                       }//*/
+                       while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
                        {
                                horizontalRemainder += hscale;
                                pixCount++;
                                pixels <<= 4;
                        }
+                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
 
                        if (pixCount > 15)
                        {
@@ -1476,13 +1543,13 @@ if (firstPix)
 
                        currentLineBuffer += lbufDelta;
 
-                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
-                       while (horizontalRemainder & 0x80)
+                       while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
                        {
                                horizontalRemainder += hscale;
                                pixCount++;
                                pixels <<= 8;
                        }
+                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
 
                        if (pixCount > 7)
                        {
@@ -1526,14 +1593,21 @@ if (firstPix != 0)
 
                        currentLineBuffer += lbufDelta;
 
-                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
+/*                     horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
                        while (horizontalRemainder & 0x80)
+                       {
+                               horizontalRemainder += hscale;
+                               pixCount++;
+                               pixels <<= 16;
+                       }//*/
+                       while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
                        {
                                horizontalRemainder += hscale;
                                pixCount++;
                                pixels <<= 16;
                        }
-
+                       horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
+//*/
                        if (pixCount > 3)
                        {
                                int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
@@ -1582,15 +1656,4 @@ if (firstPix != 0)
                        }
                }
        }
-/*if (depth == 3 && startPos == 13)
-{
-if (op_start_log)
-WriteLog("OP: Writing in the margins...\n");
-       for(int i=0; i<100*2; i+=2)
-//     for(int i=0; i<14*2; i+=2)
-               tom_ram_8[0x1800 + i] = 0xFF,
-               tom_ram_8[0x1800 + i + 1] = 0xFF;
-}*/
-//     uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
-//     uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
 }