+ data += (pitch << 3) * phrasesToSkip;
+ pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+ pixels <<= 1 * pixelShift;
+ iwidth -= phrasesToSkip;
+ pixCount = pixelShift;
+ }
+ }
+ }
+ else if (depth == 1) // 2 BPP
+ {
+if (firstPix != 0)
+ WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
+ index &= 0xFC; // Top six bits form CLUT index
+ // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
+ int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
+
+ int pixCount = 0;
+ uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+
+ while ((int32)iwidth > 0)
+ {
+ uint8 bits = pixels >> 62;
+
+ if (flagTRANS && bits == 0)
+ ; // Do nothing...
+ else
+ {
+ if (!flagRMW)
+ // This is the *only* correct use of endian-dependent code
+ // (i.e., mem-to-mem direct copying)!
+ *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
+ else
+ *currentLineBuffer =
+ BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
+ *(currentLineBuffer + 1) =
+ BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
+ }
+
+ currentLineBuffer += lbufDelta;
+
+/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
+ while (horizontalRemainder & 0x80)
+ {
+ horizontalRemainder += hscale;
+ pixCount++;
+ pixels <<= 2;
+ }//*/
+ while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
+ {
+ horizontalRemainder += hscale;
+ pixCount++;
+ pixels <<= 2;
+ }
+ horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
+
+ if (pixCount > 31)
+ {
+ int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
+
+ data += (pitch << 3) * phrasesToSkip;
+ pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+ pixels <<= 2 * pixelShift;
+ iwidth -= phrasesToSkip;
+ pixCount = pixelShift;
+ }
+ }
+ }
+ else if (depth == 2) // 4 BPP
+ {
+if (firstPix != 0)
+ WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
+ index &= 0xF0; // Top four bits form CLUT index
+ // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
+ int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
+
+ int pixCount = 0;
+ uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+
+ while ((int32)iwidth > 0)
+ {
+ uint8 bits = pixels >> 60;
+
+ if (flagTRANS && bits == 0)
+ ; // Do nothing...
+ else
+ {
+ if (!flagRMW)
+ // This is the *only* correct use of endian-dependent code
+ // (i.e., mem-to-mem direct copying)!
+ *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
+ else
+ *currentLineBuffer =
+ BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
+ *(currentLineBuffer + 1) =
+ BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
+ }
+
+ currentLineBuffer += lbufDelta;
+
+/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
+ while (horizontalRemainder & 0x80)
+ {
+ horizontalRemainder += hscale;
+ pixCount++;
+ pixels <<= 4;
+ }//*/
+ while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
+ {
+ horizontalRemainder += hscale;
+ pixCount++;
+ pixels <<= 4;
+ }
+ horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
+
+ if (pixCount > 15)
+ {
+ int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
+
+ data += (pitch << 3) * phrasesToSkip;
+ pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+ pixels <<= 4 * pixelShift;
+ iwidth -= phrasesToSkip;
+ pixCount = pixelShift;
+ }
+ }
+ }
+ else if (depth == 3) // 8 BPP
+ {
+if (firstPix)
+ WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
+ // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
+ int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
+
+ int pixCount = 0;
+ uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+
+ while ((int32)iwidth > 0)
+ {
+ uint8 bits = pixels >> 56;
+
+ if (flagTRANS && bits == 0)
+ ; // Do nothing...
+ else
+ {
+ if (!flagRMW)
+ // This is the *only* correct use of endian-dependent code
+ // (i.e., mem-to-mem direct copying)!
+ *(uint16 *)currentLineBuffer = paletteRAM16[bits];
+/* {
+ if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
+ *(uint16 *)currentLineBuffer = paletteRAM16[bits];
+ }*/
+ else
+ *currentLineBuffer =
+ BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
+ *(currentLineBuffer + 1) =
+ BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
+ }
+
+ currentLineBuffer += lbufDelta;
+
+ while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
+ {
+ horizontalRemainder += hscale;
+ pixCount++;
+ pixels <<= 8;
+ }
+ horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
+
+ if (pixCount > 7)
+ {
+ int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
+
+ data += (pitch << 3) * phrasesToSkip;
+ pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+ pixels <<= 8 * pixelShift;
+ iwidth -= phrasesToSkip;
+ pixCount = pixelShift;
+ }
+ }
+ }
+ else if (depth == 4) // 16 BPP
+ {
+if (firstPix != 0)
+ WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
+ // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
+ int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
+
+ int pixCount = 0;
+ uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+
+ while ((int32)iwidth > 0)
+ {
+ uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
+
+ if (flagTRANS && (bitsLo | bitsHi) == 0)
+ ; // Do nothing...
+ else
+ {
+ if (!flagRMW)
+ *currentLineBuffer = bitsHi,
+ *(currentLineBuffer + 1) = bitsLo;
+ else
+ *currentLineBuffer =
+ BLEND_CR(*currentLineBuffer, bitsHi),
+ *(currentLineBuffer + 1) =
+ BLEND_Y(*(currentLineBuffer + 1), bitsLo);
+ }
+
+ currentLineBuffer += lbufDelta;
+
+/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
+ while (horizontalRemainder & 0x80)
+ {
+ horizontalRemainder += hscale;
+ pixCount++;
+ pixels <<= 16;
+ }//*/
+ while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
+ {
+ horizontalRemainder += hscale;
+ pixCount++;
+ pixels <<= 16;
+ }
+ horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
+//*/
+ if (pixCount > 3)
+ {
+ int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
+
+ data += (pitch << 3) * phrasesToSkip;
+ pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+ pixels <<= 16 * pixelShift;
+
+ iwidth -= phrasesToSkip;
+
+ pixCount = pixelShift;
+ }
+ }
+ }
+ else if (depth == 5) // 24 BPP
+ {
+//I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
+WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
+if (firstPix != 0)
+ WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
+ // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
+ // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
+ int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
+
+ while (iwidth--)
+ {
+ // Fetch phrase...
+ uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
+ data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
+
+ for(int i=0; i<2; i++)
+ {
+ uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
+ bits1 = pixels >> 40, bits0 = pixels >> 32;
+
+ if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
+ ; // Do nothing...
+ else
+ *currentLineBuffer = bits3,
+ *(currentLineBuffer + 1) = bits2,
+ *(currentLineBuffer + 2) = bits1,
+ *(currentLineBuffer + 3) = bits0;
+
+ currentLineBuffer += lbufDelta;
+ pixels <<= 32;
+ }
+ }
+ }