X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fblitter.cpp;h=13960996e6e9acf7afe55f786ac1a2ef6bfede3a;hb=4906545f5d77d6d7a006fa0d765dc0ae522e477f;hp=046177106a2235b25eb28bd5e8fdc91b54652c12;hpb=fedd6b08fd48114f735fc7d37e350a09a306e22c;p=virtualjaguar

diff --git a/src/blitter.cpp b/src/blitter.cpp
index 0461771..1396099 100644
--- a/src/blitter.cpp
+++ b/src/blitter.cpp
@@ -1,20 +1,38 @@
 //
 // Blitter core
 //
-// by James L. Hammons
+// by James Hammons
+// (C) 2010 Underground Software
+//
+// JLH = James Hammons <jlhamm@acm.org>
+//
+// Who  When        What
+// ---  ----------  -------------------------------------------------------------
+// JLH  01/16/2010  Created this log ;-)
+//
+
 //
 // I owe a debt of gratitude to Curt Vendel and to John Mathieson--to Curt
 // for supplying the Oberon ASIC nets and to John for making them available
 // to Curt. ;-) Without that excellent documentation which shows *exactly*
 // what's going on inside the TOM chip, we'd all still be guessing as to how
 // the wily blitter and other pieces of the Jaguar puzzle actually work.
+// Now how about those JERRY ASIC nets gentlemen...? [We have those now!] ;-)
 //
 
-#include "jaguar.h"
 #include "blitter.h"
 
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "jaguar.h"
+#include "log.h"
+//#include "memory.h"
+
 // Various conditional compilation goodies...
 
+//#define LOG_BLITS
+
 //#define USE_ORIGINAL_BLITTER
 //#define USE_MIDSUMMER_BLITTER
 #define USE_MIDSUMMER_BLITTER_MKII
@@ -47,39 +65,39 @@ void BlitterMidsummer2(void);
 
 // Blitter registers (offsets from F02200)
 
-#define A1_BASE			((UINT32)0x00)
-#define A1_FLAGS		((UINT32)0x04)
-#define A1_CLIP			((UINT32)0x08)	// Height and width values for clipping
-#define A1_PIXEL		((UINT32)0x0C)	// Integer part of the pixel (Y.i and X.i)
-#define A1_STEP			((UINT32)0x10)	// Integer part of the step
-#define A1_FSTEP		((UINT32)0x14)	// Fractional part of the step
-#define A1_FPIXEL		((UINT32)0x18)	// Fractional part of the pixel (Y.f and X.f)
-#define A1_INC			((UINT32)0x1C)	// Integer part of the increment
-#define A1_FINC			((UINT32)0x20)	// Fractional part of the increment
-#define A2_BASE			((UINT32)0x24)
-#define A2_FLAGS		((UINT32)0x28)
-#define A2_MASK			((UINT32)0x2C)	// Modulo values for x and y (M.y  and M.x)
-#define A2_PIXEL		((UINT32)0x30)	// Integer part of the pixel (no fractional part for A2)
-#define A2_STEP			((UINT32)0x34)	// Integer part of the step (no fractional part for A2)
-#define COMMAND			((UINT32)0x38)
-#define PIXLINECOUNTER	((UINT32)0x3C)	// Inner & outer loop values
-#define SRCDATA			((UINT32)0x40)
-#define DSTDATA			((UINT32)0x48)
-#define DSTZ			((UINT32)0x50)
-#define SRCZINT			((UINT32)0x58)
-#define SRCZFRAC		((UINT32)0x60)
-#define PATTERNDATA		((UINT32)0x68)
-#define INTENSITYINC	((UINT32)0x70)
-#define ZINC			((UINT32)0x74)
-#define COLLISIONCTRL	((UINT32)0x78)
-#define PHRASEINT0		((UINT32)0x7C)
-#define PHRASEINT1		((UINT32)0x80)
-#define PHRASEINT2		((UINT32)0x84)
-#define PHRASEINT3		((UINT32)0x88)
-#define PHRASEZ0		((UINT32)0x8C)
-#define PHRASEZ1		((UINT32)0x90)
-#define PHRASEZ2		((UINT32)0x94)
-#define PHRASEZ3		((UINT32)0x98)
+#define A1_BASE			((uint32)0x00)
+#define A1_FLAGS		((uint32)0x04)
+#define A1_CLIP			((uint32)0x08)	// Height and width values for clipping
+#define A1_PIXEL		((uint32)0x0C)	// Integer part of the pixel (Y.i and X.i)
+#define A1_STEP			((uint32)0x10)	// Integer part of the step
+#define A1_FSTEP		((uint32)0x14)	// Fractional part of the step
+#define A1_FPIXEL		((uint32)0x18)	// Fractional part of the pixel (Y.f and X.f)
+#define A1_INC			((uint32)0x1C)	// Integer part of the increment
+#define A1_FINC			((uint32)0x20)	// Fractional part of the increment
+#define A2_BASE			((uint32)0x24)
+#define A2_FLAGS		((uint32)0x28)
+#define A2_MASK			((uint32)0x2C)	// Modulo values for x and y (M.y  and M.x)
+#define A2_PIXEL		((uint32)0x30)	// Integer part of the pixel (no fractional part for A2)
+#define A2_STEP			((uint32)0x34)	// Integer part of the step (no fractional part for A2)
+#define COMMAND			((uint32)0x38)
+#define PIXLINECOUNTER	((uint32)0x3C)	// Inner & outer loop values
+#define SRCDATA			((uint32)0x40)
+#define DSTDATA			((uint32)0x48)
+#define DSTZ			((uint32)0x50)
+#define SRCZINT			((uint32)0x58)
+#define SRCZFRAC		((uint32)0x60)
+#define PATTERNDATA		((uint32)0x68)
+#define INTENSITYINC	((uint32)0x70)
+#define ZINC			((uint32)0x74)
+#define COLLISIONCTRL	((uint32)0x78)
+#define PHRASEINT0		((uint32)0x7C)
+#define PHRASEINT1		((uint32)0x80)
+#define PHRASEINT2		((uint32)0x84)
+#define PHRASEINT3		((uint32)0x88)
+#define PHRASEZ0		((uint32)0x8C)
+#define PHRASEZ1		((uint32)0x90)
+#define PHRASEZ2		((uint32)0x94)
+#define PHRASEZ3		((uint32)0x98)
 
 // Blitter command bits
 
@@ -142,34 +160,34 @@ void BlitterMidsummer2(void);
 //Put 'em back, once we fix the problem!!! [KO]
 // 1 bpp pixel read
 #define PIXEL_SHIFT_1(a)      (((~a##_x) >> 16) & 7)
-#define PIXEL_OFFSET_1(a)     (((((UINT32)a##_y >> 16) * a##_width / 8) + (((UINT32)a##_x >> 19) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 19) & 7))
+#define PIXEL_OFFSET_1(a)     (((((uint32)a##_y >> 16) * a##_width / 8) + (((uint32)a##_x >> 19) & ~7)) * (1 + a##_pitch) + (((uint32)a##_x >> 19) & 7))
 #define READ_PIXEL_1(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER) >> PIXEL_SHIFT_1(a)) & 0x01)
 //#define READ_PIXEL_1(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a)) >> PIXEL_SHIFT_1(a)) & 0x01)
 
 // 2 bpp pixel read
 #define PIXEL_SHIFT_2(a)      (((~a##_x) >> 15) & 6)
-#define PIXEL_OFFSET_2(a)     (((((UINT32)a##_y >> 16) * a##_width / 4) + (((UINT32)a##_x >> 18) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 18) & 7))
+#define PIXEL_OFFSET_2(a)     (((((uint32)a##_y >> 16) * a##_width / 4) + (((uint32)a##_x >> 18) & ~7)) * (1 + a##_pitch) + (((uint32)a##_x >> 18) & 7))
 #define READ_PIXEL_2(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER) >> PIXEL_SHIFT_2(a)) & 0x03)
 //#define READ_PIXEL_2(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a)) >> PIXEL_SHIFT_2(a)) & 0x03)
 
 // 4 bpp pixel read
 #define PIXEL_SHIFT_4(a)      (((~a##_x) >> 14) & 4)
-#define PIXEL_OFFSET_4(a)     (((((UINT32)a##_y >> 16) * (a##_width/2)) + (((UINT32)a##_x >> 17) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 17) & 7))
+#define PIXEL_OFFSET_4(a)     (((((uint32)a##_y >> 16) * (a##_width/2)) + (((uint32)a##_x >> 17) & ~7)) * (1 + a##_pitch) + (((uint32)a##_x >> 17) & 7))
 #define READ_PIXEL_4(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER) >> PIXEL_SHIFT_4(a)) & 0x0f)
 //#define READ_PIXEL_4(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a)) >> PIXEL_SHIFT_4(a)) & 0x0f)
 
 // 8 bpp pixel read
-#define PIXEL_OFFSET_8(a)     (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 7))
+#define PIXEL_OFFSET_8(a)     (((((uint32)a##_y >> 16) * a##_width) + (((uint32)a##_x >> 16) & ~7)) * (1 + a##_pitch) + (((uint32)a##_x >> 16) & 7))
 #define READ_PIXEL_8(a)       (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a), BLITTER))
 //#define READ_PIXEL_8(a)       (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a)))
 
 // 16 bpp pixel read
-#define PIXEL_OFFSET_16(a)    (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~3)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 3))
+#define PIXEL_OFFSET_16(a)    (((((uint32)a##_y >> 16) * a##_width) + (((uint32)a##_x >> 16) & ~3)) * (1 + a##_pitch) + (((uint32)a##_x >> 16) & 3))
 #define READ_PIXEL_16(a)       (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), BLITTER))
 //#define READ_PIXEL_16(a)       (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1)))
 
 // 32 bpp pixel read
-#define PIXEL_OFFSET_32(a)    (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 1))
+#define PIXEL_OFFSET_32(a)    (((((uint32)a##_y >> 16) * a##_width) + (((uint32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32)a##_x >> 16) & 1))
 #define READ_PIXEL_32(a)      (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), BLITTER))
 //#define READ_PIXEL_32(a)      (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2)))
 
@@ -195,25 +213,25 @@ void BlitterMidsummer2(void);
 //#define WRITE_ZDATA_16(a,d)     {  JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d); }
 
 // z data write
-#define WRITE_ZDATA(a,f,d) WRITE_ZDATA_16(a,d); 
+#define WRITE_ZDATA(a,f,d) WRITE_ZDATA_16(a,d);
 
 // 1 bpp r data read
-#define READ_RDATA_1(r,a,p)  ((p) ?  ((REG(r+(((UINT32)a##_x >> 19) & 0x04))) >> (((UINT32)a##_x >> 16) & 0x1F)) & 0x0001 : (REG(r) & 0x0001))
+#define READ_RDATA_1(r,a,p)  ((p) ?  ((REG(r+(((uint32)a##_x >> 19) & 0x04))) >> (((uint32)a##_x >> 16) & 0x1F)) & 0x0001 : (REG(r) & 0x0001))
 
 // 2 bpp r data read
-#define READ_RDATA_2(r,a,p)  ((p) ?  ((REG(r+(((UINT32)a##_x >> 18) & 0x04))) >> (((UINT32)a##_x >> 15) & 0x3E)) & 0x0003 : (REG(r) & 0x0003))
+#define READ_RDATA_2(r,a,p)  ((p) ?  ((REG(r+(((uint32)a##_x >> 18) & 0x04))) >> (((uint32)a##_x >> 15) & 0x3E)) & 0x0003 : (REG(r) & 0x0003))
 
 // 4 bpp r data read
-#define READ_RDATA_4(r,a,p)  ((p) ?  ((REG(r+(((UINT32)a##_x >> 17) & 0x04))) >> (((UINT32)a##_x >> 14) & 0x28)) & 0x000F : (REG(r) & 0x000F))
+#define READ_RDATA_4(r,a,p)  ((p) ?  ((REG(r+(((uint32)a##_x >> 17) & 0x04))) >> (((uint32)a##_x >> 14) & 0x28)) & 0x000F : (REG(r) & 0x000F))
 
 // 8 bpp r data read
-#define READ_RDATA_8(r,a,p)  ((p) ?  ((REG(r+(((UINT32)a##_x >> 16) & 0x04))) >> (((UINT32)a##_x >> 13) & 0x18)) & 0x00FF : (REG(r) & 0x00FF))
+#define READ_RDATA_8(r,a,p)  ((p) ?  ((REG(r+(((uint32)a##_x >> 16) & 0x04))) >> (((uint32)a##_x >> 13) & 0x18)) & 0x00FF : (REG(r) & 0x00FF))
 
 // 16 bpp r data read
-#define READ_RDATA_16(r,a,p)  ((p) ? ((REG(r+(((UINT32)a##_x >> 15) & 0x04))) >> (((UINT32)a##_x >> 12) & 0x10)) & 0xFFFF : (REG(r) & 0xFFFF))
+#define READ_RDATA_16(r,a,p)  ((p) ? ((REG(r+(((uint32)a##_x >> 15) & 0x04))) >> (((uint32)a##_x >> 12) & 0x10)) & 0xFFFF : (REG(r) & 0xFFFF))
 
 // 32 bpp r data read
-#define READ_RDATA_32(r,a,p)  ((p) ? REG(r+(((UINT32)a##_x >> 14) & 0x04)) : REG(r))
+#define READ_RDATA_32(r,a,p)  ((p) ? REG(r+(((uint32)a##_x >> 14) & 0x04)) : REG(r))
 
 // register data read
 #define READ_RDATA(r,a,f,p) (\
@@ -246,8 +264,8 @@ void BlitterMidsummer2(void);
 //#define WRITE_PIXEL_16(a,d)     {  JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d); if (specialLog) WriteLog("Pixel write address: %08X\n", a##_addr+(PIXEL_OFFSET_16(a)<<1)); }
 
 // 32 bpp pixel write
-#define WRITE_PIXEL_32(a,d)		{ JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d, BLITTER); } 
-//#define WRITE_PIXEL_32(a,d)		{ JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d); } 
+#define WRITE_PIXEL_32(a,d)		{ JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d, BLITTER); }
+//#define WRITE_PIXEL_32(a,d)		{ JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d); }
 
 // pixel write
 #define WRITE_PIXEL(a,f,d) {\
@@ -267,8 +285,8 @@ void BlitterMidsummer2(void);
 // as a floating point bit pattern being followed by a number of zeroes. So, e.g., 001101 translates to
 // 1.01 (the "1." being implied) x (2 ^ 3) or 1010 -> 10 in base 10 (i.e., 1.01 with the decimal place
 // being shifted to the right 3 places).
-/*static uint32 blitter_scanline_width[48] = 
-{             
+/*static uint32 blitter_scanline_width[48] =
+{
      0,    0,    0,    0,					// Note: This would really translate to 1, 1, 1, 1
      2,    0,    0,    0,
      4,    0,    6,    0,
@@ -351,7 +369,7 @@ void blitter_generic(uint32 cmd)
 {
 /*
 Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
   A1 step values: -2 (X), 1 (Y)
   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -427,7 +445,7 @@ if (specialLog)
 						srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
 				}
 
-				// load dst data and Z 
+				// load dst data and Z
 				if (DSTEN)
 				{
 					dstdata = READ_PIXEL(a1, REG(A1_FLAGS));
@@ -453,14 +471,14 @@ if (specialLog)
 						inhibit = 1;
 				}//*/
 
-				if (GOURZ) 
+				if (GOURZ)
 					srczdata = z_i[colour_index] >> 16;
 
 				// apply z comparator
 				if (Z_OP_INF && srczdata <  dstzdata)	inhibit = 1;
 				if (Z_OP_EQU && srczdata == dstzdata)	inhibit = 1;
 				if (Z_OP_SUP && srczdata >  dstzdata)	inhibit = 1;
-				
+
 				// apply data comparator
 // Note: DCOMPEN only works in 8/16 bpp modes! !!! FIX !!!
 // Does BCOMPEN only work in 1 bpp mode???
@@ -490,7 +508,7 @@ if (specialLog)
 Interesting (Hover Strike--large letter):
 
 Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
   A1 step values: -2 (X), 1 (Y)
   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -498,7 +516,7 @@ Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
         A1 x/y: 100/12, A2 x/y: 106/0 Pattern: 000000F300000000
 
 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
   A1 step values: -8 (X), 1 (Y)
   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -506,7 +524,7 @@ Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
         A1 x/y: 102/12, A2 x/y: 107/0 Pattern: 000000F300000000
 
 Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
   A1 step values: -1 (X), 1 (Y)
   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -514,7 +532,7 @@ Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
         A1 x/y: 118/12, A2 x/y: 70/0 Pattern: 000000F300000000
 
 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
   A1 step values: -8 (X), 1 (Y)
   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -522,7 +540,7 @@ Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
         A1 x/y: 119/12, A2 x/y: 71/0 Pattern: 000000F300000000
 
 Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
   A1 step values: -1 (X), 1 (Y)
   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -530,7 +548,7 @@ Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
         A1 x/y: 127/12, A2 x/y: 66/0 Pattern: 000000F300000000
 
 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
   A1 step values: -8 (X), 1 (Y)
   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -545,7 +563,7 @@ Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
 						// compare source pixel with pattern pixel
 /*
 Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd: 05810001]
- CMD -> src: SRCEN  dst:  misc:  a1ctl:  mode:  ity: PATDSEL z-op:  op: LFU_REPLACE ctrl: BCOMPEN 
+ CMD -> src: SRCEN  dst:  misc:  a1ctl:  mode:  ity: PATDSEL z-op:  op: LFU_REPLACE ctrl: BCOMPEN
   A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
   A2 -> pitch: 1 phrases, depth: 1bpp, z-off: 0, width: 16 (10), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
         x/y: 0/20
@@ -592,12 +610,12 @@ Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd:
 
 				// compute the write data and store
 				if (!inhibit)
-				{			
+				{
 // Houston, we have a problem...
 // Look here, at PATDSEL and GOURD. If both are active (as they are on the BIOS intro), then there's
 // a conflict! E.g.:
 //Blit! (00100000 <- 000095D0) count: 3 x 1, A1/2_FLAGS: 00014220/00004020 [cmd: 00011008]
-// CMD -> src:  dst: DSTEN  misc:  a1ctl:  mode: GOURD  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: 
+// CMD -> src:  dst: DSTEN  misc:  a1ctl:  mode: GOURD  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
 //  A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 //  A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 //        A1 x/y: 90/171, A2 x/y: 808/0 Pattern: 776D770077007700
@@ -630,7 +648,7 @@ Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd:
 Hover Strike ADDDSEL blit:
 
 Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cmd: 00020208]
- CMD -> src:  dst: DSTEN  misc:  a1ctl: UPDA1  mode:  ity: ADDDSEL z-op:  op: LFU_CLEAR ctrl: 
+ CMD -> src:  dst: DSTEN  misc:  a1ctl: UPDA1  mode:  ity: ADDDSEL z-op:  op: LFU_CLEAR ctrl:
   A1 step values: -320 (X), 1 (Y)
   A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -675,10 +693,10 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
 //According to JTRM, this is part of the four things the blitter does with the write data (the other
 //three being PATDSEL, ADDDSEL, and LFU (default). I'm not sure which gets precedence, this or PATDSEL
 //(see above blit example)...
-					if (GOURD) 
+					if (GOURD)
 						writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
 
-					if (SRCSHADE) 
+					if (SRCSHADE)
 					{
 						int intensity = srcdata & 0xFF;
 						int ia = gd_ia >> 16;
@@ -705,7 +723,7 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
 /*if (((REG(A1_FLAGS) >> 3) & 0x07) == 5)
 {
 	uint32 offset = a1_addr+(PIXEL_OFFSET_32(a1)<<2);
-// (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 1))
+// (((((uint32)a##_y >> 16) * a##_width) + (((uint32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32)a##_x >> 16) & 1))
 	if ((offset >= 0x1FF020 && offset <= 0x1FF03F) || (offset >= 0x1FF820 && offset <= 0x1FF83F))
 		WriteLog("32bpp pixel write: A1 Phrase mode --> ");
 }//*/
@@ -733,7 +751,7 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
 						srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
 				}
 
-				// load dst data and Z 
+				// load dst data and Z
 				if (DSTEN)
 				{
 					dstdata = READ_PIXEL(a2, REG(A2_FLAGS));
@@ -749,14 +767,14 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
 						dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
 				}
 
-				if (GOURZ) 
+				if (GOURZ)
 					srczdata = z_i[colour_index] >> 16;
 
 				// apply z comparator
 				if (Z_OP_INF && srczdata < dstzdata)	inhibit = 1;
 				if (Z_OP_EQU && srczdata == dstzdata)	inhibit = 1;
 				if (Z_OP_SUP && srczdata > dstzdata)	inhibit = 1;
-				
+
 				// apply data comparator
 //NOTE: The bit comparator (BCOMPEN) is NOT the same at the data comparator!
 				if (DCOMPEN | BCOMPEN)
@@ -795,7 +813,7 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
 //					if (a1_phrase_mode || a2_phrase_mode)
 //						inhibit = !inhibit;
 				}
-				
+
 				if (CLIPA1)
 				{
 					inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
@@ -804,7 +822,7 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
 
 				// compute the write data and store
 				if (!inhibit)
-				{			
+				{
 					if (PATDSEL)
 					{
 						// use pattern data for write data
@@ -833,10 +851,10 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
 							writedata |= srcdata & dstdata;
 					}
 
-					if (GOURD) 
+					if (GOURD)
 						writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
 
-					if (SRCSHADE) 
+					if (SRCSHADE)
 					{
 						int intensity = srcdata & 0xFF;
 						int ia = gd_ia >> 16;
@@ -861,7 +879,7 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
 /*if (logGo)
 {
 	uint32 offset = a2_addr+(PIXEL_OFFSET_16(a2)<<1);
-// (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 1))
+// (((((uint32)a##_y >> 16) * a##_width) + (((uint32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32)a##_x >> 16) & 1))
 	WriteLog("[%08X:%04X] ", offset, writedata);
 }//*/
 					// write to the destination
@@ -940,7 +958,7 @@ Below fixes it, but then borks:
 ; O
 
 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
- CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN 
+ CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
   A1 step values: -15 (X), 1 (Y)
   A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -1048,8 +1066,8 @@ Lesse, with pre-add we'd have:
 		a2_y += a2_step_y;//*/
 #endif
 	}
-	
-	// write values back to registers 
+
+	// write values back to registers
 	WREG(A1_PIXEL,  (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
 	WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
 	WREG(A2_PIXEL,  (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
@@ -1085,7 +1103,7 @@ void blitter_blit(uint32 cmd)
 
 	a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
 	a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
-	
+
 	xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
 	xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
 
@@ -1107,7 +1125,7 @@ void blitter_blit(uint32 cmd)
 //	a1_width = blitter_scanline_width[((REG(A1_FLAGS) & 0x00007E00) >> 9)];
 // According to JTRM, this must give a *whole number* of phrases in the current
 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
-	UINT32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
+	uint32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
 	a1_width = ((0x04 | m) << e) >> 2;//*/
 
 	a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
@@ -1154,7 +1172,7 @@ void blitter_blit(uint32 cmd)
 		// add pixelsize (1) to X
 		a1_xadd = 1 << 16;
 		break;
-	case XADD0:	
+	case XADD0:
 		// add zero (for those nice vertical lines)
 		a1_xadd = 0;
 		break;
@@ -1170,7 +1188,7 @@ void blitter_blit(uint32 cmd)
 //  A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 128 (1C), addctl: XADDINC YADD1 XSIGNADD YSIGNADD
 //  A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADD0 YADD1 XSIGNADD YSIGNADD
 //if (YADD1_A1 && YADD1_A2 && xadd_a2_control == XADD0 && xadd_a1_control == XADDINC)// &&
-//	UINT32 a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
+//	uint32 a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
 //Ok, so this ISN'T it... Prolly the XADDPHR code above that's doing it...
 //if (REG(A1_FLAGS) == 0x00073820 && REG(A2_FLAGS) == 0x00064220 && cmd == 0x41802801)
 //        A1 x/y: 14368/7, A2 x/y: 150/36
@@ -1198,7 +1216,7 @@ void blitter_blit(uint32 cmd)
 		// add pixelsize (1) to X
 		a2_xadd = 1 << 16;
 		break;
-	case XADD0:	
+	case XADD0:
 		// add zero (for those nice vertical lines)
 		a2_xadd = 0;
 		break;
@@ -1276,7 +1294,7 @@ WriteLog("BLIT: Asked to use invalid bit combo (XADDINC) for A2...\n");
 			| ((uint32)blitter_ram[SRCDATA + 0] << 8) | blitter_ram[SRCDATA + 1];
 
 		gouraud_add = REG(INTENSITYINC);
-		
+
 		gd_ia = gouraud_add & 0x00FFFFFF;
 		if (gd_ia & 0x00800000)
 			gd_ia = 0xFF000000 | gd_ia;
@@ -1346,7 +1364,7 @@ WriteLog("BLIT: Asked to use invalid bit combo (XADDINC) for A2...\n");
 		WriteLog("  GOURZ   = %i\n",GOURZ);
 		WriteLog("  GOURD   = %i\n",GOURD);
 		WriteLog("  SRCSHADE= %i\n",SRCSHADE);
-	}	
+	}
 #endif
 
 //NOTE: Pitch is ignored!
@@ -1356,24 +1374,24 @@ WriteLog("BLIT: Asked to use invalid bit combo (XADDINC) for A2...\n");
 //Black is short by 3, pink is short by 1...
 /*
 Blit! (00110000 <- 000BF010) count: 9 x 31, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
- CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: 
+ CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
         A1 x/y: 262/124, A2 x/y: 128/0
 Blit! (00110000 <- 000BF010) count: 5 x 38, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
- CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: 
+ CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
         A1 x/y: 264/117, A2 x/y: 407/0
 
 Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
- CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: 
+ CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
   A1 step values: -10 (X), 1 (Y)
   A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
   A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
         A1 x/y: 262/132, A2 x/y: 129/0
 Blit! (00110000 <- 000BF010) count: 5 x 27, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
- CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: 
+ CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
   A1 step values: -8 (X), 1 (Y)
   A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
   A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -1392,7 +1410,7 @@ Fixed! Now for more:
 ; This looks like the ship icon in the upper left corner...
 
 Blit! (00110000 <- 0010B2A8) count: 11 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
- CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN 
+ CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
   A1 step values: -12 (X), 1 (Y)
   A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -1409,7 +1427,7 @@ Actually, if you look at the A1 step values, there IS a discrepancy!
 ; D
 
 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
- CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN 
+ CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
   A1 step values: -14 (X), 1 (Y)
   A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -1420,7 +1438,7 @@ Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd:
 ; E
 
 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
- CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN 
+ CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
   A1 step values: -13 (X), 1 (Y)
   A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -1430,7 +1448,7 @@ Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd:
 ; M
 
 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
- CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN 
+ CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
   A1 step values: -12 (X), 1 (Y)
   A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -1440,7 +1458,7 @@ Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd:
 ; O
 
 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
- CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN 
+ CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
   A1 step values: -15 (X), 1 (Y)
   A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -1458,14 +1476,14 @@ if (blit_start_log)
 	uint32 /*src = cmd & 0x07, dst = (cmd >> 3) & 0x07, misc = (cmd >> 6) & 0x03,
 		a1ctl = (cmd >> 8) & 0x07,*/ mode = (cmd >> 11) & 0x07/*, ity = (cmd >> 14) & 0x0F,
 		zop = (cmd >> 18) & 0x07, op = (cmd >> 21) & 0x0F, ctrl = (cmd >> 25) & 0x3F*/;
-	UINT32 a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
+	uint32 a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
 	uint32 p1 = a1f & 0x07, p2 = a2f & 0x07,
 		d1 = (a1f >> 3) & 0x07, d2 = (a2f >> 3) & 0x07,
 		zo1 = (a1f >> 6) & 0x07, zo2 = (a2f >> 6) & 0x07,
 		w1 = (a1f >> 9) & 0x3F, w2 = (a2f >> 9) & 0x3F,
 		ac1 = (a1f >> 16) & 0x1F, ac2 = (a2f >> 16) & 0x1F;
-	UINT32 iw1 = ((0x04 | (w1 & 0x03)) << ((w1 & 0x3C) >> 2)) >> 2;
-	UINT32 iw2 = ((0x04 | (w2 & 0x03)) << ((w2 & 0x3C) >> 2)) >> 2;
+	uint32 iw1 = ((0x04 | (w1 & 0x03)) << ((w1 & 0x3C) >> 2)) >> 2;
+	uint32 iw2 = ((0x04 | (w2 & 0x03)) << ((w2 & 0x3C) >> 2)) >> 2;
 	WriteLog("Blit! (%08X %s %08X) count: %d x %d, A1/2_FLAGS: %08X/%08X [cmd: %08X]\n", a1_addr, (mode&0x01 ? "->" : "<-"), a2_addr, n_pixels, n_lines, a1f, a2f, cmd);
 //	WriteLog(" CMD -> src: %d, dst: %d, misc: %d, a1ctl: %d, mode: %d, ity: %1X, z-op: %d, op: %1X, ctrl: %02X\n", src, dst, misc, a1ctl, mode, ity, zop, op, ctrl);
 
@@ -1519,17 +1537,17 @@ if (blit_start_log)
 ********************** STUFF CUT ABOVE THIS LINE! ******************************
 *******************************************************************************/
 
-void blitter_init(void)
+void BlitterInit(void)
 {
-	blitter_reset();
+	BlitterReset();
 }
 
-void blitter_reset(void)
+void BlitterReset(void)
 {
 	memset(blitter_ram, 0x00, 0xA0);
 }
 
-void blitter_done(void)
+void BlitterDone(void)
 {
 	WriteLog("BLIT: Done.\n");
 }
@@ -1541,10 +1559,17 @@ uint8 BlitterReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
 	// status register
 //This isn't cycle accurate--how to fix? !!! FIX !!!
 //Probably have to do some multi-threaded implementation or at least a reentrant safe implementation...
+//Real hardware returns $00000805, just like the JTRM says.
+	if (offset == (0x38 + 0))
+		return 0x00;
+	if (offset == (0x38 + 1))
+		return 0x00;
+	if (offset == (0x38 + 2))
+		return 0x08;
 	if (offset == (0x38 + 3))
-		return 0x01;	// always idle
+		return 0x05;	// always idle/never stopped (collision detection ignored!)
 
-// CHECK HERE ONCE THIS FIX HAS BEEN TESTED: [ ]
+// CHECK HERE ONCE THIS FIX HAS BEEN TESTED: [X]
 //Fix for AvP:
 	if (offset >= 0x04 && offset <= 0x07)
 //This is it. I wonder if it just ignores the lower three bits?
@@ -1596,12 +1621,12 @@ void BlitterWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
 		case 0x81: blitter_ram[PATTERNDATA + 5] = data; break;
 		case 0x82: blitter_ram[SRCDATA + 4] = data; break;
 		case 0x83: blitter_ram[SRCDATA + 5] = data; break;
-		
+
 		case 0x84: break;
 		case 0x85: blitter_ram[PATTERNDATA + 3] = data; break;
 		case 0x86: blitter_ram[SRCDATA + 2] = data; break;
 		case 0x87: blitter_ram[SRCDATA + 3] = data; break;
-		
+
 		case 0x88: break;
 		case 0x89: blitter_ram[PATTERNDATA + 1] = data; break;
 		case 0x8A: blitter_ram[SRCDATA + 0] = data; break;
@@ -1618,12 +1643,12 @@ void BlitterWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
 		case 0x91: blitter_ram[SRCZINT + 5] = data; break;
 		case 0x92: blitter_ram[SRCZFRAC + 4] = data; break;
 		case 0x93: blitter_ram[SRCZFRAC + 5] = data; break;
-		
+
 		case 0x94: blitter_ram[SRCZINT + 2] = data; break;
 		case 0x95: blitter_ram[SRCZINT + 3] = data; break;
 		case 0x96: blitter_ram[SRCZFRAC + 2] = data; break;
 		case 0x97: blitter_ram[SRCZFRAC + 3] = data; break;
-		
+
 		case 0x98: blitter_ram[SRCZINT + 0] = data; break;
 		case 0x99: blitter_ram[SRCZINT + 1] = data; break;
 		case 0x9A: blitter_ram[SRCZFRAC + 0] = data; break;
@@ -1724,13 +1749,13 @@ doGPUDis = true;
 
 void LogBlit(void)
 {
-	char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
+	const char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
 		"LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
 	uint32 cmd = GET32(blitter_ram, 0x38);
-	UINT32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
-	UINT32 a1_width = ((0x04 | m) << e) >> 2;
+	uint32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
+	uint32 a1_width = ((0x04 | m) << e) >> 2;
 	m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
-	UINT32 a2_width = ((0x04 | m) << e) >> 2;
+	uint32 a2_width = ((0x04 | m) << e) >> 2;
 
 	WriteLog("Blit!\n");
 	WriteLog("  COMMAND  = %08X\n", cmd);
@@ -1809,9 +1834,9 @@ void LogBlit(void)
 	WriteLog("  UPDA2    = %s\n", (UPDA2 ? "1" : "0"));
 	WriteLog("  DSTA2    = %s\n", (DSTA2 ? "1" : "0"));
 	WriteLog("  ZOP      = %s %s %s\n", (Z_OP_INF ? "<" : ""), (Z_OP_EQU ? "=" : ""), (Z_OP_SUP ? ">" : ""));
-	WriteLog("--LFUFUNC  = %s\n", opStr[(cmd >> 21) & 0x0F]);
+	WriteLog("+-LFUFUNC  = %s\n", opStr[(cmd >> 21) & 0x0F]);
 	WriteLog("| PATDSEL  = %s (PD=%08X%08X)\n", (PATDSEL ? "1" : "0"), REG(PATTERNDATA), REG(PATTERNDATA + 4));
-	WriteLog("--ADDDSEL  = %s\n", (ADDDSEL ? "1" : "0"));
+	WriteLog("+-ADDDSEL  = %s\n", (ADDDSEL ? "1" : "0"));
 	WriteLog("  CMPDST   = %s\n", (CMPDST ? "1" : "0"));
 	WriteLog("  BCOMPEN  = %s\n", (BCOMPEN ? "1" : "0"));
 	WriteLog("  DCOMPEN  = %s\n", (DCOMPEN ? "1" : "0"));
@@ -1838,6 +1863,9 @@ void LogBlit(void)
 
 void BlitterMidsummer(uint32 cmd)
 {
+#ifdef LOG_BLITS
+	LogBlit();
+#endif
 uint32 outer_loop, inner_loop, a1_addr, a2_addr;
 int32 a1_x, a1_y, a2_x, a2_y, a1_width, a2_width;
 uint8 a1_phrase_mode, a2_phrase_mode;
@@ -1846,7 +1874,7 @@ uint8 a1_phrase_mode, a2_phrase_mode;
 	a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
 	a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
 	a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
-	UINT32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
+	uint32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
 	a1_width = ((0x04 | m) << e) >> 2;//*/
 	a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
 	a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
@@ -2009,7 +2037,7 @@ sread:							// Source data read
 //pointing at. Likewise, the pixel (if in BPP 1, 2 & 4, chopped) otherwise. It probably still
 //transfers an entire phrase even in pixel mode.
 //Odd thought: Does it expand, e.g., 1 BPP pixels into 32 BPP internally? Hmm...
-//No. 
+//No.
 /*
 	a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
 	a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
@@ -2030,7 +2058,7 @@ sread:							// Source data read
 	a1_width = ((0x04 | m) << e) >> 2;
 	a2_width = ((0x04 | m) << e) >> 2;
 
-	// write values back to registers 
+	// write values back to registers
 	WREG(A1_PIXEL,  (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
 	WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
 	WREG(A2_PIXEL,  (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
@@ -2245,7 +2273,7 @@ Blit!
 	}
 
 	// Figure out what gets written...
-	
+
 	if (PATDSEL)
 	{
 		writeData = GET64(blitter_ram, PATTERNDATA);
@@ -2268,7 +2296,7 @@ Blit!
 	else	// LFUFUNC is the default...
 	{
 		writeData = 0;
-		
+
 		if (LFU_NAN)
 			writeData |= ~srcData & ~dstData;
 		if (LFU_NA)
@@ -2370,7 +2398,7 @@ inhibitWrite://Should this go here? or on the other side of the X/Y incrementing
 		a2_x += (blitter_ram[A2_FLAGS + 1] & 0x08 ? -1 << 16 : 1 << 16);
 /*	else if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 2)
 		a2_x += 0 << 16;                              */
-	
+
 	if (blitter_ram[A2_FLAGS + 1] & 0x04)
 		a2_y += (blitter_ram[A2_FLAGS + 1] & 0x10 ? -1 << 16 : 1 << 16);
 
@@ -2475,10 +2503,10 @@ a1fupdate    A1 step fraction is added to A1 pointer fraction
 goto a1update
 */
 /*
-#define A1_PIXEL		((UINT32)0x0C)	// Integer part of the pixel (Y.i and X.i)
-#define A1_STEP			((UINT32)0x10)	// Integer part of the step
-#define A1_FSTEP		((UINT32)0x14)	// Fractional part of the step
-#define A1_FPIXEL		((UINT32)0x18)	// Fractional part of the pixel (Y.f and X.f)
+#define A1_PIXEL		((uint32)0x0C)	// Integer part of the pixel (Y.i and X.i)
+#define A1_STEP			((uint32)0x10)	// Integer part of the step
+#define A1_FSTEP		((uint32)0x14)	// Fractional part of the step
+#define A1_FPIXEL		((uint32)0x18)	// Fractional part of the pixel (Y.f and X.f)
 */
 
 // This is all kinda murky. All we have are the Midsummer docs to give us any guidance,
@@ -2656,6 +2684,9 @@ bool logBlit = false;
 
 void BlitterMidsummer2(void)
 {
+#ifdef LOG_BLITS
+	LogBlit();
+#endif
 	// Here's what the specs say the state machine does. Note that this can probably be
 	// greatly simplified (also, it's different from what John has in his Oberon docs):
 //Will remove stuff that isn't in Jaguar I once fully described (stuff like texture won't
@@ -2663,10 +2694,10 @@ void BlitterMidsummer2(void)
 
 	uint32 cmd = GET32(blitter_ram, COMMAND);
 
-/*logBlit = false;
+logBlit = false;
 if (
 	cmd != 0x00010200 &&	// PATDSEL
-	cmd != 0x01800001
+	cmd != 0x01800001		// SRCEN LFUFUNC=C
 	&& cmd != 0x01800005
 //Boot ROM ATARI letters:
 	&& cmd != 0x00011008	// DSTEN GOURD PATDSEL
@@ -2681,7 +2712,7 @@ if (
 //Static pic on title screen:
 	&& cmd != 0x01800601	// SRCEN UPDA1 UPDA2 LFUFUNC=C
 //Turning letters on Cybermorph intro screen:
-	&& cmd != 0x09800F41	// SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
+//	&& cmd != 0x09800F41	// SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
 	&& cmd != 0x00113078	// DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
 	&& cmd != 0x09900F39	// SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
 	&& cmd != 0x09800209	// SRCEN DSTEN UPDA1 LFUFUNC=C DCOMPEN
@@ -2691,7 +2722,7 @@ if (
 //Hover Strike text:
 	&& cmd != 0x1401060C	// SRCENX DSTEN UPDA1 UPDA2 PATDSEL BCOMPEN BKGWREN
 //Hover Strike 3D stuff
-//	&& cmd != 0x01902839	// SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
+	&& cmd != 0x01902839	// SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
 //Hover Strike darkening on intro to play (briefing) screen
 	&& cmd != 0x00020208	// DSTEN UPDA1 ADDDSEL
 //Trevor McFur stuff:
@@ -2700,9 +2731,15 @@ if (
 //T2K:
 	&& cmd != 0x00011000	// GOURD PATDSEL
 	&& cmd != 0x00011040	// CLIP_A1 GOURD PATDSEL
+//Checkered flag:
+	&& cmd != 0x01800000	// LFUFUNC=C
+	&& cmd != 0x01800401	//
+	&& cmd != 0x01800040	//
+	&& cmd != 0x00020008	//
+//	&& cmd != 0x09800F41	// SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
 	)
 	logBlit = true;//*/
-logBlit = true;
+//logBlit = true;
 if (blit_start_log == 0)	// Wait for the signal...
 	logBlit = false;//*/
 /*
@@ -2795,7 +2832,7 @@ fflush(stdout);
 #endif
 
 	// Lines that don't exist in Jaguar I (and will never be asserted)
-	
+
 	bool polygon = false, datinit = false, a1_stepld = false, a2_stepld = false, ext_int = false;
 	bool istepadd = false, istepfadd = false, finneradd = false, inneradd = false;
 	bool zstepfadd = false, zstepadd = false;
@@ -2920,7 +2957,7 @@ if ((cmd == 0x00010200) && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 9))
 ; Pink altimeter bar
 
 Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
- CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: 
+ CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
   A1 step values: -10 (X), 1 (Y)
   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -2931,7 +2968,7 @@ Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd:
 ; Black altimeter bar
 
 Blit! (00110000 <- 000BF010) count: 5 x 29, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
- CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: 
+ CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
   A1 step values: -8 (X), 1 (Y)
   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -2975,7 +3012,7 @@ Flags: UPDA1 PATDSEL
 */
 
 	// Bugs in Jaguar I
-	
+
 	a2addy = a1addy;							// A2 channel Y add bit is tied to A1's
 
 //if (logBlit && (ocount > 20)) logBlit = false;
@@ -3001,7 +3038,7 @@ printf("  srcz1=%08X%08X srcz2=%08X%08X dstz=%08X%08X zinc=%08X, coll=%X\n",
 	(uint32)(srcz2 >> 32), (uint32)(srcz2 & 0xFFFFFFFF),
 	(uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF), zinc, collision);
 }
-#endif	
+#endif
 
 	// Various state lines set up by user
 
@@ -3022,7 +3059,7 @@ fflush(stdout);
 	while (true)
 	{
 		// IDLE
-	
+
 		if ((idle && !go) || (inner && outer0 && indone))
 		{
 #ifdef VERBOSE_BLITTER_LOGGING
@@ -3040,7 +3077,7 @@ break;
 		}
 		else
 			idlei = false;
-	
+
 		// INNER LOOP ACTIVE
 /*
   Entering DWRITE state... (icount=0000, inc=4)
@@ -3051,7 +3088,7 @@ break;
 Now:
   [in=F a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
 */
-	
+
 		if ((idle && go && !datinit)
 			|| (inner && !indone)
 			|| (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && !datinit)
@@ -3065,9 +3102,9 @@ Now:
 		}
 		else
 			inneri = false;
-	
+
 		// A1 FRACTION UPDATE
-	
+
 		if (inner && indone && !outer0 && upda1f)
 		{
 			a1fupdatei = true;
@@ -3114,9 +3151,9 @@ Now:
 		}
 		else
 			a2updatei = false;
-	
+
 		// INITIALIZE INTENSITY FRACTION
-	
+
 		if ((zupdate && !upda2 && datinit)
 			|| (a1update && !upda2 && datinit && notgzandp)
 			|| (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && datinit)
@@ -3127,34 +3164,34 @@ Now:
 		}
 		else
 			init_ifi = false;
-	
+
 		// INITIALIZE INTENSITY INTEGER
-	
+
 		if (init_if)
 		{
 			init_iii = true;
 		}
 		else
 			init_iii = false;
-	
+
 		// INITIALIZE Z FRACTION
-	
+
 		if (init_ii && gourz)
 		{
 			init_zfi = true;
 		}
 		else
 			init_zfi = false;
-	
+
 		// INITIALIZE Z INTEGER
-	
+
 		if (init_zf)
 		{
 			init_zii = true;
 		}
 		else
 			init_zii = false;
-	
+
 // Here we move the fooi into their foo counterparts in order to simulate the moving
 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
 
@@ -3592,14 +3629,14 @@ daddbsel |= (istepadd && istepfadd && zstepadd && zstepfadd ? 0x08 : 0x00);
 /* Data adder mode control
 000	16-bit normal add
 001	16-bit saturating add with carry
-010	8-bit saturating add with carry, carry into top byte is 
+010	8-bit saturating add with carry, carry into top byte is
 	inhibited (YCrCb)
-011	8-bit saturating add with carry, carry into top byte and 
+011	8-bit saturating add with carry, carry into top byte and
 	between top nybbles is inhibited (CRY)
 100	16-bit normal add with carry
 101	16-bit saturating add
 110	8-bit saturating add, carry into top byte is inhibited
-111	8-bit saturating add, carry into top byte and between top 
+111	8-bit saturating add, carry into top byte and between top
 	nybbles is inhibited
 
 The first five are used for Gouraud calculations, the latter three
@@ -3618,7 +3655,7 @@ Bit 0 =   dzwrite . gourz . atick[1]
 	+ init_ii . /topnen . /topben . /ext_int
 	+ init_ii .  topnen .  topben . /ext_int
 	+ init_zi
-		
+
 Bit 1 =   dwrite . gourd . atick[1] . /topben . /ext_int
 	+ istepadd . /topben . /ext_int
 	+ /gourd . /gourz .  /topben
@@ -3643,8 +3680,8 @@ daddmode |= ((dwrite && gourd && !topben && !ext_int) || (istepadd && !topben &&
 	|| (init_ii && !topben && !ext_int) ? 0x02 : 0x00);
 daddmode |= ((!gourd && !gourz) || shadeadd || (dwrite && gourd && ext_int)
 	|| (istepadd && ext_int) || (init_ii && ext_int) ? 0x04 : 0x00);
-/* Data add load controls 
-Pattern fraction (dest data) is loaded on 
+/* Data add load controls
+Pattern fraction (dest data) is loaded on
 	  dwrite . gourd . atick[0]
 	+ istepfadd . /datinit
 	+ init_if
@@ -3652,11 +3689,11 @@ Pattern data is loaded on
 	  dwrite . gourd . atick[1]
 	+ istepadd . /datinit . /datinit
 	+ init_ii
-Source z1 is loaded on 
+Source z1 is loaded on
 	  dzwrite . gourz . atick[1]
 	+ zstepadd . /datinit . /datinit
 	+ init_zi
-Source z2 is loaded on 
+Source z2 is loaded on
 	  dzwrite . gourz . atick[0]
 	+ zstepfadd
 	+ init_zf
@@ -3697,17 +3734,17 @@ if (!justify)
 
 /* Generate source alignment shift
    -------------------------------
-The source alignment shift for data move is the difference between 
-the source and destination X pointers, multiplied by the pixel 
-size.  Only the low six bits of the pointers are of interest, as 
-pixel sizes are always a power of 2 and window rows are always 
-phrase aligned.  
+The source alignment shift for data move is the difference between
+the source and destination X pointers, multiplied by the pixel
+size.  Only the low six bits of the pointers are of interest, as
+pixel sizes are always a power of 2 and window rows are always
+phrase aligned.
 
 When not in phrase mode, the top 3 bits of the shift value are
 set to zero (2/26).
 
 Source shifting is also used to extract bits for bit-to-byte
-expansion in phrase mode.  This involves only the bottom three 
+expansion in phrase mode.  This involves only the bottom three
 bits of the shift value, and is based on the offset within the
 phrase of the destination X pointer, in pixels.
 
@@ -3734,39 +3771,6 @@ uint8 shfti = (srcen || pobbsel ? (sshftld ? loshd : srcshift & 0x07) : 0);
 shfti |= (srcen && phrase_mode ? (sshftld ? shftv & 0x38 : srcshift & 0x38) : 0);
 srcshift = shfti;
 
-/*
-Note that there's a problem here--even though it's NOT in phrase mode, it's still calculating
-a source shift... !!! FIX !!!
-Actually, the problem is the code that utilizes the source shift even when it's not needed... I think.
-
-Blit! (CMD = 01800609)
-Flags: SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C
-  count = 10 x 12
-  a1_base = 001F8300, a2_base = 00812F80
-  a1_x = 0007, a1_y = 0000, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0000, a2_y = 0000
-  a1_step_x = FFF6, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFF6, a2_step_y = 0001
-  a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
-  a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
-  a2_mask=F a1add=+1/+0 a2add=+1/+0
-  a1_pixsize = 2, a2_pixsize = 2
-   srcd=0000000000000000  dstd=0000000000000000 patd=0000000000000000 iinc=00000000
-  srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
-  Phrase mode is off
-  [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
-  Entering INNER state...
-  Entering SREAD state...     Source read address/pix address: 00812F80/0 [0000000000000000]
-  Entering A2_ADD state [a2_x=0000, a2_y=0000, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DREAD state...       Dest read address/pix address: 001F8303/4 [0000000000000000]
-  Entering DWRITE state...     Dest write address/pix address: 001F8303/4 srcz=0000000000000000]
-
-[dcomp=FF zcomp=00 dbinh=00]
-
-[srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=000F]
- [0000000000000000] (icount=0009, inc=1)
-    [dstart=4 dend=8 pwidth=4 srcshift=4][daas=0 dabs=0 dam=7 ds=1 daq=F]
-  Entering A1_ADD state [a1_x=0007, a1_y=0000, addasel=0, addbsel=0, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-
-*/
 				if (sreadx)
 				{
 #ifdef VERBOSE_BLITTER_LOGGING
@@ -4109,6 +4113,11 @@ uint64 srcd = (srcd2 << (64 - srcshift)) | (srcd1 >> srcshift);
 if (srcshift == 0)
 	srcd = srcd1;
 
+//NOTE: This only works with pixel sizes less than 8BPP...
+//DOUBLE NOTE: Still need to do regression testing to ensure that this doesn't break other stuff... !!! CHECK !!!
+if (!phrase_mode && srcshift != 0)
+	srcd = ((srcd2 & 0xFF) << (8 - srcshift)) | ((srcd1 & 0xFF) >> srcshift);
+
 //Z DATA() stuff done here... And it has to be done before any Z shifting...
 //Note that we need to have phrase mode start/end support here... (Not since we moved it from dzwrite...!)
 /*
@@ -4134,7 +4143,7 @@ void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
 	ADDARRAY(addq, 6/*daddasel*/, 7/*daddbsel*/, 1/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
 	srcz1 = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
 
-#ifdef VERBOSE_BLITTER_LOGGING
+#if 0//def VERBOSE_BLITTER_LOGGING
 if (logBlit)
 {
 	printf("\n[srcz1=%08X%08X, srcz2=%08X%08X, zinc=%08X",
@@ -4151,7 +4160,7 @@ srcz = (srcz2 << (64 - zSrcShift)) | (srcz1 >> zSrcShift);
 if (zSrcShift == 0)
 	srcz = srcz1;
 
-#ifdef VERBOSE_BLITTER_LOGGING
+#if 0//def VERBOSE_BLITTER_LOGGING
 if (logBlit)
 {
 	printf(" srcz=%08X%08X]\n", (uint32)(srcz >> 32), (uint32)(srcz & 0xFFFFFFFF));
@@ -4308,7 +4317,7 @@ if (!winhibit)
 #ifdef VERBOSE_BLITTER_LOGGING
 if (logBlit)
 {
-	printf(" [%08X%08X]", (uint32)(wdata >> 32), (uint32)(wdata & 0xFFFFFFFF));
+	printf(" [%08X%08X]%s", (uint32)(wdata >> 32), (uint32)(wdata & 0xFFFFFFFF), (winhibit ? "[X]" : ""));
 	printf(" (icount=%04X, inc=%u)\n", icount, (uint16)inc);
 	printf("    [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
 	printf("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
@@ -4585,7 +4594,7 @@ cause the inner state to go active */
 //Since we don't get here until the inner loop is finished (indone = true) we can get
 //away with doing it here...!
 			ocount--;
-		
+
 			if (ocount == 0)
 				outer0 = true;
 #ifdef VERBOSE_BLITTER_LOGGING
@@ -4633,7 +4642,7 @@ fflush(stdout);
 }
 #endif
 		}
-		
+
 		if (a2update)
 		{
 #ifdef VERBOSE_BLITTER_LOGGING
@@ -4660,7 +4669,7 @@ fflush(stdout);
 #ifdef VERBOSE_BLITTER_LOGGING
 if (logBlit)
 {
-	printf("Done!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n", 
+	printf("Done!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n",
 		GET16(blitter_ram, A1_PIXEL + 2),
 		GET16(blitter_ram, A1_PIXEL + 0),
 		GET16(blitter_ram, A1_FPIXEL + 2),
@@ -4682,7 +4691,7 @@ if (logBlit)
 #ifdef VERBOSE_BLITTER_LOGGING
 if (logBlit)
 {
-	printf("Writeback!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n", 
+	printf("Writeback!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n",
 		GET16(blitter_ram, A1_PIXEL + 2),
 		GET16(blitter_ram, A1_PIXEL + 0),
 		GET16(blitter_ram, A1_FPIXEL + 2),
@@ -5135,7 +5144,7 @@ addasel[0..2] select the register to add
 adda_xconst[0..2] generate a power of 2 in the range 1-64 or all zeroes when
 they are all 1.
 
-addareg selects register value to be added as opposed to constant 
+addareg selects register value to be added as opposed to constant
 value.
 
 suba_x, suba_y complement the X and Y values
@@ -5170,7 +5179,7 @@ void ADDAMUX(int16 &adda_x, int16 &adda_y, uint8 addasel, int16 a1_step_x, int16
 	bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
 {
 
-/*INT16/	addac_x, addac_y, addar_x, addar_y, addart_x, addart_y, 
+/*INT16/	addac_x, addac_y, addar_x, addar_y, addart_x, addart_y,
 INT16/	addas_x, addas_y, suba_x16, suba_y16
 :LOCAL;
 BEGIN
@@ -5193,7 +5202,7 @@ Addar_y		:= MX2 (addar_y, addart_y, a2_step_y, addaselb[2]);*/
 	int16 addar_y = (addasel & 0x04 ? a2_step_y : yterm[addasel & 0x03]);
 //////////////////////////////////////////////////////////////////////////////////////
 
-/* Generate a constant value - this is a power of 2 in the range 
+/* Generate a constant value - this is a power of 2 in the range
 0-64, or zero.  The control bits are adda_xconst[0..2], when they
 are all 1  the result is 0.
 Constants for Y can only be 0 or 1 */
@@ -5202,7 +5211,7 @@ Constants for Y can only be 0 or 1 */
 Unused[0]	:= DUMMY (unused[0]);
 
 Addac_x		:= JOIN (addac_x, addac_x[0..6], zero, zero, zero, zero, zero, zero, zero, zero, zero);
-Addac_y		:= JOIN (addac_y, adda_yconst, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, 
+Addac_y		:= JOIN (addac_y, adda_yconst, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero,
 			zero, zero, zero, zero, zero);*/
 ////////////////////////////////////// C++ CODE //////////////////////////////////////
 	int16 addac_x = (adda_xconst == 0x07 ? 0 : 1 << adda_xconst);
@@ -5220,9 +5229,9 @@ Addas_y		:= MX2 (addas_y, addac_y, addar_y, addareg);*/
 
 /* Complement these values (complement flag gives adder carry in)*/
 
-/*Suba_x16	:= JOIN (suba_x16, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, 
+/*Suba_x16	:= JOIN (suba_x16, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x,
 			suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x);
-Suba_y16	:= JOIN (suba_y16, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, 
+Suba_y16	:= JOIN (suba_y16, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y,
 			suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y);
 Adda_x		:= EO (adda_x, suba_x16, addas_x);
 Adda_y		:= EO (adda_y, suba_y16, addas_y);*/
@@ -5236,9 +5245,9 @@ Adda_y		:= EO (adda_y, suba_y16, addas_y);*/
 
 /**  ADDBMUX - Address adder input B selection  *******************
 
-This module selects the register to be updated by the address 
-adder.  This can be one of three registers, the A1 and A2 
-pointers, or the A1 fractional part. It can also be zero, so that the step 
+This module selects the register to be updated by the address
+adder.  This can be one of three registers, the A1 and A2
+pointers, or the A1 fractional part. It can also be zero, so that the step
 registers load directly into the pointers.
 */
 
@@ -5261,7 +5270,7 @@ void ADDBMUX(int16 &addb_x, int16 &addb_y, uint8 addbsel, int16 a1_x, int16 a1_y
 {
 
 /*Zero		:= TIE0 (zero);
-Zero16		:= JOIN (zero16, zero, zero, zero, zero, zero, zero, zero, 
+Zero16		:= JOIN (zero16, zero, zero, zero, zero, zero, zero, zero,
 			zero, zero, zero, zero, zero, zero, zero, zero, zero);
 Addbselb[0-1]	:= BUF8 (addbselb[0-1], addbsel[0-1]);
 Addb_x		:= MX4 (addb_x, a1_x, a2_x, a1_frac_x, zero16, addbselb[0..1]);
@@ -5317,10 +5326,10 @@ addradd
 
 Blitter Address Adder
 ---------------------
-The blitter address adder is a pair of sixteen bit adders, one 
-each for X and Y.  The multiplexing of the input terms is 
-performed elsewhere, but this adder can also perform modulo 
-arithmetic to align X-addresses onto phrase boundaries. 
+The blitter address adder is a pair of sixteen bit adders, one
+each for X and Y.  The multiplexing of the input terms is
+performed elsewhere, but this adder can also perform modulo
+arithmetic to align X-addresses onto phrase boundaries.
 
 modx[0..2] take values
 000	no mask
@@ -5346,7 +5355,7 @@ INT16/	addb_y
 		modx[0..2]
 		suba_x
 		suba_y
-		:IN); 
+		:IN);
 
 BEGIN
 
@@ -5410,7 +5419,7 @@ DEF DATA (
 		dcomp[0..7]		// data byte equal flags
 		srcd[0..7]		// bits to use for bit to byte expansion
 		zcomp[0..3]		// output from Z comparators
-		:OUT; 
+		:OUT;
 		a1_x[0..1]		// low two bits of A1 X pointer
 		big_pix			// pixel organisation is big-endian
 		blitter_active	// blitter is active
@@ -5493,13 +5502,13 @@ Srcd[0-7]	:= JOIN (srcd[0-7], srcdlo{0-7});
 Srcd[8-31]	:= JOIN (srcd[8-31], srcdlo{8-31});
 Srcd[32-63]	:= JOIN (srcd[32-63], srcdhi{0-31});*/
 
-// Destination data registers 
+// Destination data registers
 
 /*Data_dst	:= DATA_DST (dstd[0..63], dstz[0..1], clk, dstdld[0..1], dstzld[0..1], load_data[0..1]);
 Dstdlo		:= JOIN (dstdlo, dstd[0..31]);
 Dstdhi		:= JOIN (dstdhi, dstd[32..63]);*/
 
-// Pattern and Color data registers 
+// Pattern and Color data registers
 
 // Looks like this is simply another register file for the pattern data registers. No adding or anything funky
 // going on. Note that patd & patdv will output the same info.
@@ -5513,7 +5522,7 @@ Patdhi		:= JOIN (patdhi, patd[32..63]);*/
 
 // Multiplying data Mixer (NOT IN JAGUAR I)
 
-/*Datamix		:= DATAMIX (patdo[0..1], clk, colord[0..15], dpipe[1], dstd[0..63], int0dp[8..10], int1dp[8..10], 
+/*Datamix		:= DATAMIX (patdo[0..1], clk, colord[0..15], dpipe[1], dstd[0..63], int0dp[8..10], int1dp[8..10],
 			int2dp[8..10], int3dp[8..10], mixsel[0..2], patd[0..63], pdsel[0..1], srcd[0..63], textrgb, txtd[0..63]);*/
 
 // Logic function unit
@@ -5529,7 +5538,7 @@ Patdhi		:= JOIN (patdhi, patd[32..63]);*/
 //////////////////////////////////////////////////////////////////////////////////////
 
 // Increment and Step Registers
-   
+
 // Does it do anything without the step add lines? Check it!
 // No. This is pretty much just a register file without the Jaguar II lines...
 /*Inc_step	:= INC_STEP (iinc, istep[0..31], zinc, zstep[0..31], clk, ext_int, gpu_din, iincld, iincldx, istepadd,
@@ -5625,7 +5634,7 @@ if (logBlit)
 // 22 Mar 94
 // The data initializer - allows all four initial values to be computed from one (NOT IN JAGUAR I)
 
-/*Datinit		:= DATINIT (initcin[0..3], initinc[0..63], initpix[0..15], a1_x[0..1], big_pix, clk, iinc, init_if, init_ii, 
+/*Datinit		:= DATINIT (initcin[0..3], initinc[0..63], initpix[0..15], a1_x[0..1], big_pix, clk, iinc, init_if, init_ii,
 			init_zf, istep[0..31], zinc, zstep[0..31]);*/
 
 // Adder array for Z and intensity increments
@@ -5754,7 +5763,7 @@ Maskt[9-14]	:= OAN1P (maskt[9-14], maskt[8-13], s_coarse[2-7], e_coarse\[2-7]);*
 //////////////////////////////////////////////////////////////////////////////////////
 
 /* The bit terms are mirrored for big-endian pixels outside phrase
-mode.  The byte terms are mirrored for big-endian pixels in phrase 
+mode.  The byte terms are mirrored for big-endian pixels in phrase
 mode.  */
 
 /*Mirror_bit	:= AN2M (mir_bit, phrase_mode\, big_pix);
@@ -5903,7 +5912,7 @@ END;*/
 /**  COMP_CTRL - Comparator output control logic  *****************
 
 This block is responsible for taking the comparator outputs and
-using them as appropriate to inhibit writes.  Two methods are 
+using them as appropriate to inhibit writes.  Two methods are
 supported for inhibiting write data:
 
 -	suppression of the inner loop controlled write operation
@@ -5916,13 +5925,13 @@ and sixteen bit pixel modes.
 Writes can be suppressed by data being equal, by the Z comparator
 conditions being met, or by the bit to pixel expansion scheme.
 
-Pipe-lining issues: the data derived comparator outputs are stable 
+Pipe-lining issues: the data derived comparator outputs are stable
 until the next data read, well after the affected write from this
 operation.  However, the inner counter bits can count immediately
-before the ack for the last write.  Therefore, it is necessary to 
+before the ack for the last write.  Therefore, it is necessary to
 delay bcompbit select terms by one inner loop pipe-line stage,
 when generating the select for the data control - the output is
-delayed one further tick to give it write data timing (2/34). 
+delayed one further tick to give it write data timing (2/34).
 
 There is also a problem with computed data - the new values are
 calculated before the write associated with the old value has been
@@ -5971,7 +5980,7 @@ In phrase mode, the eight bits are used directly, and this mode is
 only applicable to 8-bit pixel mode (2/34) */
 
 /*Bcompselt[0-2]	:= EO (bcompselt[0-2], icount[0-2], big_pix);
-Bcompbit	:= MX8 (bcompbit, srcd[7], srcd[6], srcd[5], 
+Bcompbit	:= MX8 (bcompbit, srcd[7], srcd[6], srcd[5],
 			srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompselt[0..2]);
 Bcompbit\	:= INV1 (bcompbit\, bcompbit);*/
 ////////////////////////////////////// C++ CODE //////////////////////////////////////
@@ -5990,14 +5999,14 @@ if (logBlit)
 
 /* pipe-line the count */
 /*Bcompsel[0-2]	:= FDSYNC (bcompsel[0-2], bcompselt[0-2], step_inner, clk);
-Bcompbt		:= MX8 (bcompbitpt, srcd[7], srcd[6], srcd[5], 
+Bcompbt		:= MX8 (bcompbitpt, srcd[7], srcd[6], srcd[5],
 			srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompsel[0..2]);
 Bcompbitp	:= FD1Q (bcompbitp, bcompbitpt, clk);
 Bcompbitp\	:= INV1 (bcompbitp\, bcompbitp);*/
 
 /* For pixel mode, generate the write inhibit signal for all modes
 on bit inhibit, for 8 and 16 bit modes on comparator inhibit, and
-for 16 bit mode on Z inhibit 
+for 16 bit mode on Z inhibit
 
 Nowrite = bcompen . /bcompbit . /phrase_mode
 	+ dcompen . dcomp[0] . /phrase_mode . pixsize = 011
@@ -6039,7 +6048,7 @@ if (logBlit)
 //////////////////////////////////////////////////////////////////////////////////////
 
 /* For phrase mode, generate the byte inhibit signals for eight bit
-mode 011, or sixteen bit mode 100 
+mode 011, or sixteen bit mode 100
 dbinh\[0] =  pixsize[2] . zcomp[0]
 	 +  pixsize[2] . dcomp[0] . dcomp[1] . dcompen
 	 + /pixsize[2] . dcomp[0] . dcompen
@@ -6231,4 +6240,9 @@ if (logBlit)
 ////////////////////////////////////// C++ CODE //////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////////////
 
+// !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
+// !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
+// !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
+
 #endif
+