Removed some cruft and nonstandard int/uint types, added M series BIOS.

[virtualjaguar] / src / blitter.cpp
diff --git a/src/blitter.cpp b/src/blitter.cpp

index 5baf7a551a20d97425079e233317ef40ee16c697..59880a08e9f5030d4f6292b9a56797e830378af1 100644 (file)
--- a/src/blitter.cpp
+++ b/src/blitter.cpp
@@ -1,20 +1,38 @@
  //
  // Blitter core
  //
-// by James L. Hammons
+// by James Hammons
+// (C) 2010 Underground Software
+//
+// JLH = James Hammons <jlhamm@acm.org>
+//
+// Who  When        What
+// ---  ----------  -------------------------------------------------------------
+// JLH  01/16/2010  Created this log ;-)
+//
+
  //
  // I owe a debt of gratitude to Curt Vendel and to John Mathieson--to Curt
  // for supplying the Oberon ASIC nets and to John for making them available
  // to Curt. ;-) Without that excellent documentation which shows *exactly*
  // what's going on inside the TOM chip, we'd all still be guessing as to how
  // the wily blitter and other pieces of the Jaguar puzzle actually work.
+// Now how about those JERRY ASIC nets gentlemen...? [We have those now!] ;-)
  //
  
-#include "jaguar.h"
  #include "blitter.h"
  
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "jaguar.h"
+#include "log.h"
+//#include "memory.h"
+
  // Various conditional compilation goodies...
  
+//#define LOG_BLITS
+
  //#define USE_ORIGINAL_BLITTER
  //#define USE_MIDSUMMER_BLITTER
  #define USE_MIDSUMMER_BLITTER_MKII
@@ -26,60 +44,60 @@ extern int jaguar_active_memory_dumps;
  // Local global variables
  
  int start_logging = 0;
-uint8 blitter_working = 0;
+uint8_t blitter_working = 0;
  
  // Blitter register RAM (most of it is hidden from the user)
  
-static uint8 blitter_ram[0x100];
+static uint8_t blitter_ram[0x100];
  
  // Other crapola
  
  bool specialLog = false;
  extern int effect_start;
  extern int blit_start_log;
-void BlitterMidsummer(uint32 cmd);
+void BlitterMidsummer(uint32_t cmd);
  void BlitterMidsummer2(void);
  
-#define REG(A) (((uint32)blitter_ram[(A)] << 24) | ((uint32)blitter_ram[(A)+1] << 16) \
-                               | ((uint32)blitter_ram[(A)+2] << 8) | (uint32)blitter_ram[(A)+3])
+#define REG(A) (((uint32_t)blitter_ram[(A)] << 24) | ((uint32_t)blitter_ram[(A)+1] << 16) \
+                               | ((uint32_t)blitter_ram[(A)+2] << 8) | (uint32_t)blitter_ram[(A)+3])
  #define WREG(A,D)      (blitter_ram[(A)] = ((D)>>24)&0xFF, blitter_ram[(A)+1] = ((D)>>16)&0xFF, \
                                         blitter_ram[(A)+2] = ((D)>>8)&0xFF, blitter_ram[(A)+3] = (D)&0xFF)
  
  // Blitter registers (offsets from F02200)
  
-#define A1_BASE                        ((UINT32)0x00)
-#define A1_FLAGS               ((UINT32)0x04)
-#define A1_CLIP                        ((UINT32)0x08)  // Height and width values for clipping
-#define A1_PIXEL               ((UINT32)0x0C)  // Integer part of the pixel (Y.i and X.i)
-#define A1_STEP                        ((UINT32)0x10)  // Integer part of the step
-#define A1_FSTEP               ((UINT32)0x14)  // Fractional part of the step
-#define A1_FPIXEL              ((UINT32)0x18)  // Fractional part of the pixel (Y.f and X.f)
-#define A1_INC                 ((UINT32)0x1C)  // Integer part of the increment
-#define A1_FINC                        ((UINT32)0x20)  // Fractional part of the increment
-#define A2_BASE                        ((UINT32)0x24)
-#define A2_FLAGS               ((UINT32)0x28)
-#define A2_MASK                        ((UINT32)0x2C)  // Modulo values for x and y (M.y  and M.x)
-#define A2_PIXEL               ((UINT32)0x30)  // Integer part of the pixel (no fractional part for A2)
-#define A2_STEP                        ((UINT32)0x34)  // Integer part of the step (no fractional part for A2)
-#define COMMAND                        ((UINT32)0x38)
-#define PIXLINECOUNTER ((UINT32)0x3C)  // Inner & outer loop values
-#define SRCDATA                        ((UINT32)0x40)
-#define DSTDATA                        ((UINT32)0x48)
-#define DSTZ                   ((UINT32)0x50)
-#define SRCZINT                        ((UINT32)0x58)
-#define SRCZFRAC               ((UINT32)0x60)
-#define PATTERNDATA            ((UINT32)0x68)
-#define INTENSITYINC   ((UINT32)0x70)
-#define ZINC                   ((UINT32)0x74)
-#define COLLISIONCTRL  ((UINT32)0x78)
-#define PHRASEINT0             ((UINT32)0x7C)
-#define PHRASEINT1             ((UINT32)0x80)
-#define PHRASEINT2             ((UINT32)0x84)
-#define PHRASEINT3             ((UINT32)0x88)
-#define PHRASEZ0               ((UINT32)0x8C)
-#define PHRASEZ1               ((UINT32)0x90)
-#define PHRASEZ2               ((UINT32)0x94)
-#define PHRASEZ3               ((UINT32)0x98)
+#define A1_BASE                        ((uint32_t)0x00)
+#define A1_FLAGS               ((uint32_t)0x04)
+#define A1_CLIP                        ((uint32_t)0x08)        // Height and width values for clipping
+#define A1_PIXEL               ((uint32_t)0x0C)        // Integer part of the pixel (Y.i and X.i)
+#define A1_STEP                        ((uint32_t)0x10)        // Integer part of the step
+#define A1_FSTEP               ((uint32_t)0x14)        // Fractional part of the step
+#define A1_FPIXEL              ((uint32_t)0x18)        // Fractional part of the pixel (Y.f and X.f)
+#define A1_INC                 ((uint32_t)0x1C)        // Integer part of the increment
+#define A1_FINC                        ((uint32_t)0x20)        // Fractional part of the increment
+#define A2_BASE                        ((uint32_t)0x24)
+#define A2_FLAGS               ((uint32_t)0x28)
+#define A2_MASK                        ((uint32_t)0x2C)        // Modulo values for x and y (M.y  and M.x)
+#define A2_PIXEL               ((uint32_t)0x30)        // Integer part of the pixel (no fractional part for A2)
+#define A2_STEP                        ((uint32_t)0x34)        // Integer part of the step (no fractional part for A2)
+#define COMMAND                        ((uint32_t)0x38)
+#define PIXLINECOUNTER ((uint32_t)0x3C)        // Inner & outer loop values
+#define SRCDATA                        ((uint32_t)0x40)
+#define DSTDATA                        ((uint32_t)0x48)
+#define DSTZ                   ((uint32_t)0x50)
+#define SRCZINT                        ((uint32_t)0x58)
+#define SRCZFRAC               ((uint32_t)0x60)
+#define PATTERNDATA            ((uint32_t)0x68)
+#define INTENSITYINC   ((uint32_t)0x70)
+#define ZINC                   ((uint32_t)0x74)
+#define COLLISIONCTRL  ((uint32_t)0x78)
+#define PHRASEINT0             ((uint32_t)0x7C)
+#define PHRASEINT1             ((uint32_t)0x80)
+#define PHRASEINT2             ((uint32_t)0x84)
+#define PHRASEINT3             ((uint32_t)0x88)
+#define PHRASEZ0               ((uint32_t)0x8C)
+#define PHRASEZ1               ((uint32_t)0x90)
+#define PHRASEZ2               ((uint32_t)0x94)
+#define PHRASEZ3               ((uint32_t)0x98)
  
  // Blitter command bits
  
@@ -142,34 +160,34 @@ void BlitterMidsummer2(void);
  //Put 'em back, once we fix the problem!!! [KO]
  // 1 bpp pixel read
  #define PIXEL_SHIFT_1(a)      (((~a##_x) >> 16) & 7)
-#define PIXEL_OFFSET_1(a)     (((((UINT32)a##_y >> 16) * a##_width / 8) + (((UINT32)a##_x >> 19) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 19) & 7))
+#define PIXEL_OFFSET_1(a)     (((((uint32_t)a##_y >> 16) * a##_width / 8) + (((uint32_t)a##_x >> 19) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 19) & 7))
  #define READ_PIXEL_1(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER) >> PIXEL_SHIFT_1(a)) & 0x01)
  //#define READ_PIXEL_1(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a)) >> PIXEL_SHIFT_1(a)) & 0x01)
  
  // 2 bpp pixel read
  #define PIXEL_SHIFT_2(a)      (((~a##_x) >> 15) & 6)
-#define PIXEL_OFFSET_2(a)     (((((UINT32)a##_y >> 16) * a##_width / 4) + (((UINT32)a##_x >> 18) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 18) & 7))
+#define PIXEL_OFFSET_2(a)     (((((uint32_t)a##_y >> 16) * a##_width / 4) + (((uint32_t)a##_x >> 18) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 18) & 7))
  #define READ_PIXEL_2(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER) >> PIXEL_SHIFT_2(a)) & 0x03)
  //#define READ_PIXEL_2(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a)) >> PIXEL_SHIFT_2(a)) & 0x03)
  
  // 4 bpp pixel read
  #define PIXEL_SHIFT_4(a)      (((~a##_x) >> 14) & 4)
-#define PIXEL_OFFSET_4(a)     (((((UINT32)a##_y >> 16) * (a##_width/2)) + (((UINT32)a##_x >> 17) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 17) & 7))
+#define PIXEL_OFFSET_4(a)     (((((uint32_t)a##_y >> 16) * (a##_width/2)) + (((uint32_t)a##_x >> 17) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 17) & 7))
  #define READ_PIXEL_4(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER) >> PIXEL_SHIFT_4(a)) & 0x0f)
  //#define READ_PIXEL_4(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a)) >> PIXEL_SHIFT_4(a)) & 0x0f)
  
  // 8 bpp pixel read
-#define PIXEL_OFFSET_8(a)     (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 7))
+#define PIXEL_OFFSET_8(a)     (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 7))
  #define READ_PIXEL_8(a)       (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a), BLITTER))
  //#define READ_PIXEL_8(a)       (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a)))
  
  // 16 bpp pixel read
-#define PIXEL_OFFSET_16(a)    (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~3)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 3))
+#define PIXEL_OFFSET_16(a)    (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~3)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 3))
  #define READ_PIXEL_16(a)       (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), BLITTER))
  //#define READ_PIXEL_16(a)       (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1)))
  
  // 32 bpp pixel read
-#define PIXEL_OFFSET_32(a)    (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 1))
+#define PIXEL_OFFSET_32(a)    (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
  #define READ_PIXEL_32(a)      (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), BLITTER))
  //#define READ_PIXEL_32(a)      (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2)))
  
@@ -195,25 +213,25 @@ void BlitterMidsummer2(void);
  //#define WRITE_ZDATA_16(a,d)     {  JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d); }
  
  // z data write
-#define WRITE_ZDATA(a,f,d) WRITE_ZDATA_16(a,d); 
+#define WRITE_ZDATA(a,f,d) WRITE_ZDATA_16(a,d);
  
  // 1 bpp r data read
-#define READ_RDATA_1(r,a,p)  ((p) ?  ((REG(r+(((UINT32)a##_x >> 19) & 0x04))) >> (((UINT32)a##_x >> 16) & 0x1F)) & 0x0001 : (REG(r) & 0x0001))
+#define READ_RDATA_1(r,a,p)  ((p) ?  ((REG(r+(((uint32_t)a##_x >> 19) & 0x04))) >> (((uint32_t)a##_x >> 16) & 0x1F)) & 0x0001 : (REG(r) & 0x0001))
  
  // 2 bpp r data read
-#define READ_RDATA_2(r,a,p)  ((p) ?  ((REG(r+(((UINT32)a##_x >> 18) & 0x04))) >> (((UINT32)a##_x >> 15) & 0x3E)) & 0x0003 : (REG(r) & 0x0003))
+#define READ_RDATA_2(r,a,p)  ((p) ?  ((REG(r+(((uint32_t)a##_x >> 18) & 0x04))) >> (((uint32_t)a##_x >> 15) & 0x3E)) & 0x0003 : (REG(r) & 0x0003))
  
  // 4 bpp r data read
-#define READ_RDATA_4(r,a,p)  ((p) ?  ((REG(r+(((UINT32)a##_x >> 17) & 0x04))) >> (((UINT32)a##_x >> 14) & 0x28)) & 0x000F : (REG(r) & 0x000F))
+#define READ_RDATA_4(r,a,p)  ((p) ?  ((REG(r+(((uint32_t)a##_x >> 17) & 0x04))) >> (((uint32_t)a##_x >> 14) & 0x28)) & 0x000F : (REG(r) & 0x000F))
  
  // 8 bpp r data read
-#define READ_RDATA_8(r,a,p)  ((p) ?  ((REG(r+(((UINT32)a##_x >> 16) & 0x04))) >> (((UINT32)a##_x >> 13) & 0x18)) & 0x00FF : (REG(r) & 0x00FF))
+#define READ_RDATA_8(r,a,p)  ((p) ?  ((REG(r+(((uint32_t)a##_x >> 16) & 0x04))) >> (((uint32_t)a##_x >> 13) & 0x18)) & 0x00FF : (REG(r) & 0x00FF))
  
  // 16 bpp r data read
-#define READ_RDATA_16(r,a,p)  ((p) ? ((REG(r+(((UINT32)a##_x >> 15) & 0x04))) >> (((UINT32)a##_x >> 12) & 0x10)) & 0xFFFF : (REG(r) & 0xFFFF))
+#define READ_RDATA_16(r,a,p)  ((p) ? ((REG(r+(((uint32_t)a##_x >> 15) & 0x04))) >> (((uint32_t)a##_x >> 12) & 0x10)) & 0xFFFF : (REG(r) & 0xFFFF))
  
  // 32 bpp r data read
-#define READ_RDATA_32(r,a,p)  ((p) ? REG(r+(((UINT32)a##_x >> 14) & 0x04)) : REG(r))
+#define READ_RDATA_32(r,a,p)  ((p) ? REG(r+(((uint32_t)a##_x >> 14) & 0x04)) : REG(r))
  
  // register data read
  #define READ_RDATA(r,a,f,p) (\
@@ -246,8 +264,8 @@ void BlitterMidsummer2(void);
  //#define WRITE_PIXEL_16(a,d)     {  JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d); if (specialLog) WriteLog("Pixel write address: %08X\n", a##_addr+(PIXEL_OFFSET_16(a)<<1)); }
  
  // 32 bpp pixel write
-#define WRITE_PIXEL_32(a,d)            { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d, BLITTER); } 
-//#define WRITE_PIXEL_32(a,d)          { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d); } 
+#define WRITE_PIXEL_32(a,d)            { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d, BLITTER); }
+//#define WRITE_PIXEL_32(a,d)          { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d); }
  
  // pixel write
  #define WRITE_PIXEL(a,f,d) {\
@@ -267,8 +285,8 @@ void BlitterMidsummer2(void);
  // as a floating point bit pattern being followed by a number of zeroes. So, e.g., 001101 translates to
  // 1.01 (the "1." being implied) x (2 ^ 3) or 1010 -> 10 in base 10 (i.e., 1.01 with the decimal place
  // being shifted to the right 3 places).
-/*static uint32 blitter_scanline_width[48] = 
-{             
+/*static uint32_t blitter_scanline_width[48] =
+{
       0,    0,    0,    0,                                      // Note: This would really translate to 1, 1, 1, 1
       2,    0,    0,    0,
       4,    0,    6,    0,
@@ -283,62 +301,62 @@ void BlitterMidsummer2(void);
    2048, 2560, 3072, 3584
  };//*/
  
-//static uint8 * tom_ram_8;
-//static uint8 * paletteRam;
-static uint8 src;
-static uint8 dst;
-static uint8 misc;
-static uint8 a1ctl;
-static uint8 mode;
-static uint8 ity;
-static uint8 zop;
-static uint8 op;
-static uint8 ctrl;
-static uint32 a1_addr;
-static uint32 a2_addr;
-static int32 a1_zoffs;
-static int32 a2_zoffs;
-static uint32 xadd_a1_control;
-static uint32 xadd_a2_control;
-static int32 a1_pitch;
-static int32 a2_pitch;
-static uint32 n_pixels;
-static uint32 n_lines;
-static int32 a1_x;
-static int32 a1_y;
-static int32 a1_width;
-static int32 a2_x;
-static int32 a2_y;
-static int32 a2_width;
-static int32 a2_mask_x;
-static int32 a2_mask_y;
-static int32 a1_xadd;
-static int32 a1_yadd;
-static int32 a2_xadd;
-static int32 a2_yadd;
-static uint8 a1_phrase_mode;
-static uint8 a2_phrase_mode;
-static int32 a1_step_x = 0;
-static int32 a1_step_y = 0;
-static int32 a2_step_x = 0;
-static int32 a2_step_y = 0;
-static uint32 outer_loop;
-static uint32 inner_loop;
-static uint32 a2_psize;
-static uint32 a1_psize;
-static uint32 gouraud_add;
-//static uint32 gouraud_data;
-//static uint16 gint[4];
-//static uint16 gfrac[4];
-//static uint8  gcolour[4];
+//static uint8_t * tom_ram_8;
+//static uint8_t * paletteRam;
+static uint8_t src;
+static uint8_t dst;
+static uint8_t misc;
+static uint8_t a1ctl;
+static uint8_t mode;
+static uint8_t ity;
+static uint8_t zop;
+static uint8_t op;
+static uint8_t ctrl;
+static uint32_t a1_addr;
+static uint32_t a2_addr;
+static int32_t a1_zoffs;
+static int32_t a2_zoffs;
+static uint32_t xadd_a1_control;
+static uint32_t xadd_a2_control;
+static int32_t a1_pitch;
+static int32_t a2_pitch;
+static uint32_t n_pixels;
+static uint32_t n_lines;
+static int32_t a1_x;
+static int32_t a1_y;
+static int32_t a1_width;
+static int32_t a2_x;
+static int32_t a2_y;
+static int32_t a2_width;
+static int32_t a2_mask_x;
+static int32_t a2_mask_y;
+static int32_t a1_xadd;
+static int32_t a1_yadd;
+static int32_t a2_xadd;
+static int32_t a2_yadd;
+static uint8_t a1_phrase_mode;
+static uint8_t a2_phrase_mode;
+static int32_t a1_step_x = 0;
+static int32_t a1_step_y = 0;
+static int32_t a2_step_x = 0;
+static int32_t a2_step_y = 0;
+static uint32_t outer_loop;
+static uint32_t inner_loop;
+static uint32_t a2_psize;
+static uint32_t a1_psize;
+static uint32_t gouraud_add;
+//static uint32_t gouraud_data;
+//static uint16_t gint[4];
+//static uint16_t gfrac[4];
+//static uint8_t  gcolour[4];
  static int gd_i[4];
  static int gd_c[4];
  static int gd_ia, gd_ca;
  static int colour_index = 0;
-static int32 zadd;
-static uint32 z_i[4];
+static int32_t zadd;
+static uint32_t z_i[4];
  
-static int32 a1_clip_x, a1_clip_y;
+static int32_t a1_clip_x, a1_clip_y;
  
  // In the spirit of "get it right first, *then* optimize" I've taken the liberty
  // of removing all the unnecessary code caching. If it turns out to be a good way
@@ -347,11 +365,11 @@ static int32 a1_clip_x, a1_clip_y;
  //
  // Generic blit handler
  //
-void blitter_generic(uint32 cmd)
+void blitter_generic(uint32_t cmd)
  {
  /*
  Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
    A1 step values: -2 (X), 1 (Y)
    A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -363,9 +381,9 @@ Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
  /*if (cmd == 0x1401060C && blit_start_log)
         specialLog = true;//*/
  //Testing only!
-//uint32 logGo = ((cmd == 0x01800E01 && REG(A1_BASE) == 0x898000) ? 1 : 0);
-       uint32 srcdata, srczdata, dstdata, dstzdata, writedata, inhibit;
-       uint32 bppSrc = (DSTA2 ? 1 << ((REG(A1_FLAGS) >> 3) & 0x07) : 1 << ((REG(A2_FLAGS) >> 3) & 0x07));
+//uint32_t logGo = ((cmd == 0x01800E01 && REG(A1_BASE) == 0x898000) ? 1 : 0);
+       uint32_t srcdata, srczdata, dstdata, dstzdata, writedata, inhibit;
+       uint32_t bppSrc = (DSTA2 ? 1 << ((REG(A1_FLAGS) >> 3) & 0x07) : 1 << ((REG(A2_FLAGS) >> 3) & 0x07));
  
  if (specialLog)
  {
@@ -386,7 +404,7 @@ if (specialLog)
  {
         WriteLog("  A1_X/Y = %08X/%08X, A2_X/Y = %08X/%08X\n", a1_x, a1_y, a2_x, a2_y);
  }
-               uint32 a1_start = a1_x, a2_start = a2_x, bitPos = 0;
+               uint32_t a1_start = a1_x, a2_start = a2_x, bitPos = 0;
  
                 //Kludge for Hover Strike...
                 //I wonder if this kludge is in conjunction with the SRCENX down below...
@@ -427,7 +445,7 @@ if (specialLog)
                                                 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
                                 }
  
-                               // load dst data and Z 
+                               // load dst data and Z
                                 if (DSTEN)
                                 {
                                         dstdata = READ_PIXEL(a1, REG(A1_FLAGS));
@@ -453,14 +471,14 @@ if (specialLog)
                                                 inhibit = 1;
                                 }//*/
  
-                               if (GOURZ) 
+                               if (GOURZ)
                                         srczdata = z_i[colour_index] >> 16;
  
                                 // apply z comparator
                                 if (Z_OP_INF && srczdata <  dstzdata)   inhibit = 1;
                                 if (Z_OP_EQU && srczdata == dstzdata)   inhibit = 1;
                                 if (Z_OP_SUP && srczdata >  dstzdata)   inhibit = 1;
-                               
+
                                 // apply data comparator
  // Note: DCOMPEN only works in 8/16 bpp modes! !!! FIX !!!
  // Does BCOMPEN only work in 1 bpp mode???
@@ -479,7 +497,7 @@ if (specialLog)
  //and *then* we can do the bit stepping from there the way it's *supposed* to be done... !!! FIX !!!
  //[DONE]
         {
-               uint32 pixShift = (~bitPos) & (bppSrc - 1);
+               uint32_t pixShift = (~bitPos) & (bppSrc - 1);
                 srcdata = (srcdata >> pixShift) & 0x01;
  
                 bitPos++;
@@ -490,7 +508,7 @@ if (specialLog)
  Interesting (Hover Strike--large letter):
  
  Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
    A1 step values: -2 (X), 1 (Y)
    A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -498,7 +516,7 @@ Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
          A1 x/y: 100/12, A2 x/y: 106/0 Pattern: 000000F300000000
  
  Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
    A1 step values: -8 (X), 1 (Y)
    A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -506,7 +524,7 @@ Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
          A1 x/y: 102/12, A2 x/y: 107/0 Pattern: 000000F300000000
  
  Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
    A1 step values: -1 (X), 1 (Y)
    A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -514,7 +532,7 @@ Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
          A1 x/y: 118/12, A2 x/y: 70/0 Pattern: 000000F300000000
  
  Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
    A1 step values: -8 (X), 1 (Y)
    A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -522,7 +540,7 @@ Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
          A1 x/y: 119/12, A2 x/y: 71/0 Pattern: 000000F300000000
  
  Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
    A1 step values: -1 (X), 1 (Y)
    A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -530,7 +548,7 @@ Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
          A1 x/y: 127/12, A2 x/y: 66/0 Pattern: 000000F300000000
  
  Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
- CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN 
+ CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
    A1 step values: -8 (X), 1 (Y)
    A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -545,7 +563,7 @@ Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd:
                                                 // compare source pixel with pattern pixel
  /*
  Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd: 05810001]
- CMD -> src: SRCEN  dst:  misc:  a1ctl:  mode:  ity: PATDSEL z-op:  op: LFU_REPLACE ctrl: BCOMPEN 
+ CMD -> src: SRCEN  dst:  misc:  a1ctl:  mode:  ity: PATDSEL z-op:  op: LFU_REPLACE ctrl: BCOMPEN
    A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
    A2 -> pitch: 1 phrases, depth: 1bpp, z-off: 0, width: 16 (10), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
          x/y: 0/20
@@ -560,7 +578,7 @@ Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd:
  /*                                             if (srcdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
  //                                             if (srcdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
                                                         inhibit = 1;//*/
-/*                                             uint32 A2bpp = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
+/*                                             uint32_t A2bpp = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
                                                 if (A2bpp == 1 || A2bpp == 16 || A2bpp == 8)
                                                         inhibit = (srcdata == 0 ? 1: 0);
  //                                                     inhibit = !srcdata;
@@ -592,12 +610,12 @@ Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd:
  
                                 // compute the write data and store
                                 if (!inhibit)
-                               {                       
+                               {
  // Houston, we have a problem...
  // Look here, at PATDSEL and GOURD. If both are active (as they are on the BIOS intro), then there's
  // a conflict! E.g.:
  //Blit! (00100000 <- 000095D0) count: 3 x 1, A1/2_FLAGS: 00014220/00004020 [cmd: 00011008]
-// CMD -> src:  dst: DSTEN  misc:  a1ctl:  mode: GOURD  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: 
+// CMD -> src:  dst: DSTEN  misc:  a1ctl:  mode: GOURD  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
  //  A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
  //  A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
  //        A1 x/y: 90/171, A2 x/y: 808/0 Pattern: 776D770077007700
@@ -630,7 +648,7 @@ Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd:
  Hover Strike ADDDSEL blit:
  
  Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cmd: 00020208]
- CMD -> src:  dst: DSTEN  misc:  a1ctl: UPDA1  mode:  ity: ADDDSEL z-op:  op: LFU_CLEAR ctrl: 
+ CMD -> src:  dst: DSTEN  misc:  a1ctl: UPDA1  mode:  ity: ADDDSEL z-op:  op: LFU_CLEAR ctrl:
    A1 step values: -320 (X), 1 (Y)
    A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
    A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -641,16 +659,16 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
                                                 if (!TOPBEN)
                                                 {
  //This is correct now, but slow...
-                                                       int16 s = (srcdata & 0xFF) | (srcdata & 0x80 ? 0xFF00 : 0x0000),
+                                                       int16_t s = (srcdata & 0xFF) | (srcdata & 0x80 ? 0xFF00 : 0x0000),
                                                                 d = dstdata & 0xFF;
-                                                       int16 sum = s + d;
+                                                       int16_t sum = s + d;
  
                                                         if (sum < 0)
                                                                 writedata = 0x00;
                                                         else if (sum > 0xFF)
                                                                 writedata = 0xFF;
                                                         else
-                                                               writedata = (uint32)sum;
+                                                               writedata = (uint32_t)sum;
                                                 }
  
  //This doesn't seem right... Looks like it would muck up the low byte... !!! FIX !!!
@@ -675,10 +693,10 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
  //According to JTRM, this is part of the four things the blitter does with the write data (the other
  //three being PATDSEL, ADDDSEL, and LFU (default). I'm not sure which gets precedence, this or PATDSEL
  //(see above blit example)...
-                                       if (GOURD) 
+                                       if (GOURD)
                                                 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
  
-                                       if (SRCSHADE) 
+                                       if (SRCSHADE)
                                         {
                                                 int intensity = srcdata & 0xFF;
                                                 int ia = gd_ia >> 16;
@@ -704,8 +722,8 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
                                 {
  /*if (((REG(A1_FLAGS) >> 3) & 0x07) == 5)
  {
-       uint32 offset = a1_addr+(PIXEL_OFFSET_32(a1)<<2);
-// (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 1))
+       uint32_t offset = a1_addr+(PIXEL_OFFSET_32(a1)<<2);
+// (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
         if ((offset >= 0x1FF020 && offset <= 0x1FF03F) || (offset >= 0x1FF820 && offset <= 0x1FF83F))
                 WriteLog("32bpp pixel write: A1 Phrase mode --> ");
  }//*/
@@ -733,7 +751,7 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
                                                 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
                                 }
  
-                               // load dst data and Z 
+                               // load dst data and Z
                                 if (DSTEN)
                                 {
                                         dstdata = READ_PIXEL(a2, REG(A2_FLAGS));
@@ -749,14 +767,14 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
                                                 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
                                 }
  
-                               if (GOURZ) 
+                               if (GOURZ)
                                         srczdata = z_i[colour_index] >> 16;
  
                                 // apply z comparator
                                 if (Z_OP_INF && srczdata < dstzdata)    inhibit = 1;
                                 if (Z_OP_EQU && srczdata == dstzdata)   inhibit = 1;
                                 if (Z_OP_SUP && srczdata > dstzdata)    inhibit = 1;
-                               
+
                                 // apply data comparator
  //NOTE: The bit comparator (BCOMPEN) is NOT the same at the data comparator!
                                 if (DCOMPEN | BCOMPEN)
@@ -774,7 +792,7 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
  // AvP: This is causing blocks to be written instead of bit patterns...
  // Works now...
  // NOTE: We really should separate out the BCOMPEN & DCOMPEN stuff!
-/*                                             uint32 A1bpp = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
+/*                                             uint32_t A1bpp = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
                                                 if (A1bpp == 1 || A1bpp == 16 || A1bpp == 8)
                                                         inhibit = (srcdata == 0 ? 1: 0);
                                                 else
@@ -795,7 +813,7 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
  //                                     if (a1_phrase_mode || a2_phrase_mode)
  //                                             inhibit = !inhibit;
                                 }
-                               
+
                                 if (CLIPA1)
                                 {
                                         inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
@@ -804,7 +822,7 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
  
                                 // compute the write data and store
                                 if (!inhibit)
-                               {                       
+                               {
                                         if (PATDSEL)
                                         {
                                                 // use pattern data for write data
@@ -833,10 +851,10 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
                                                         writedata |= srcdata & dstdata;
                                         }
  
-                                       if (GOURD) 
+                                       if (GOURD)
                                                 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
  
-                                       if (SRCSHADE) 
+                                       if (SRCSHADE)
                                         {
                                                 int intensity = srcdata & 0xFF;
                                                 int ia = gd_ia >> 16;
@@ -860,8 +878,8 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
                                 {
  /*if (logGo)
  {
-       uint32 offset = a2_addr+(PIXEL_OFFSET_16(a2)<<1);
-// (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 1))
+       uint32_t offset = a2_addr+(PIXEL_OFFSET_16(a2)<<1);
+// (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
         WriteLog("[%08X:%04X] ", offset, writedata);
  }//*/
                                         // write to the destination
@@ -905,13 +923,13 @@ Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cm
                                 gd_i[colour_index] += gd_ia;
  //Hmm, this doesn't seem to do anything...
  //But it is correct according to the JTRM...!
-if ((int32)gd_i[colour_index] < 0)
+if ((int32_t)gd_i[colour_index] < 0)
         gd_i[colour_index] = 0;
  if (gd_i[colour_index] > 0x00FFFFFF)
         gd_i[colour_index] = 0x00FFFFFF;//*/
  
                                 gd_c[colour_index] += gd_ca;
-if ((int32)gd_c[colour_index] < 0)
+if ((int32_t)gd_c[colour_index] < 0)
         gd_c[colour_index] = 0;
  if (gd_c[colour_index] > 0x000000FF)
         gd_c[colour_index] = 0x000000FF;//*/
@@ -940,7 +958,7 @@ Below fixes it, but then borks:
  ; O
  
  Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
- CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN 
+ CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
    A1 step values: -15 (X), 1 (Y)
    A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -958,7 +976,7 @@ Lesse, with pre-add we'd have:
  */
  //NOTE: The way to fix the CD BIOS is to uncomment below and comment the stuff after
  //      the phrase mode mucking around. But it fucks up everything else...
-#define SCREWY_CD_DEPENDENT
+//#define SCREWY_CD_DEPENDENT
  #ifdef SCREWY_CD_DEPENDENT
                 a1_x += a1_step_x;
                 a1_y += a1_step_y;
@@ -971,17 +989,17 @@ Lesse, with pre-add we'd have:
                 {
                         // Bump the pointer to the next phrase boundary
                         // Even though it works, this is crappy... Clean it up!
-                       uint32 size = 64 / a1_psize;
+                       uint32_t size = 64 / a1_psize;
  
                         // Crappy kludge... ('aligning' source to destination)
                         if (a2_phrase_mode && DSTA2)
                         {
-                               uint32 extra = (a2_start >> 16) % size;
+                               uint32_t extra = (a2_start >> 16) % size;
                                 a1_x += extra << 16;
                         }
  
-                       uint32 newx = (a1_x >> 16) / size;
-                       uint32 newxrem = (a1_x >> 16) % size;
+                       uint32_t newx = (a1_x >> 16) / size;
+                       uint32_t newxrem = (a1_x >> 16) % size;
                         a1_x &= 0x0000FFFF;
                         a1_x |= (((newx + (newxrem == 0 ? 0 : 1)) * size) & 0xFFFF) << 16;
                 }//*/
@@ -989,16 +1007,16 @@ Lesse, with pre-add we'd have:
                 {
                         // Bump the pointer to the next phrase boundary
                         // Even though it works, this is crappy... Clean it up!
-                       uint32 size = 64 / a1_psize;
+                       uint32_t size = 64 / a1_psize;
  
                         // Crappy kludge... ('aligning' source to destination)
                         if (a2_phrase_mode && DSTA2)
                         {
-                               uint32 extra = (a2_start >> 16) % size;
+                               uint32_t extra = (a2_start >> 16) % size;
                                 a1_x += extra << 16;
                         }
  
-                       uint32 pixelSize = (size - 1) << 16;
+                       uint32_t pixelSize = (size - 1) << 16;
                         a1_x = (a1_x + pixelSize) & ~pixelSize;
                 }
  
@@ -1006,18 +1024,18 @@ Lesse, with pre-add we'd have:
                 {
                         // Bump the pointer to the next phrase boundary
                         // Even though it works, this is crappy... Clean it up!
-                       uint32 size = 64 / a2_psize;
+                       uint32_t size = 64 / a2_psize;
  
                         // Crappy kludge... ('aligning' source to destination)
                         // Prolly should do this for A1 channel as well... [DONE]
                         if (a1_phrase_mode && !DSTA2)
                         {
-                               uint32 extra = (a1_start >> 16) % size;
+                               uint32_t extra = (a1_start >> 16) % size;
                                 a2_x += extra << 16;
                         }
  
-                       uint32 newx = (a2_x >> 16) / size;
-                       uint32 newxrem = (a2_x >> 16) % size;
+                       uint32_t newx = (a2_x >> 16) / size;
+                       uint32_t newxrem = (a2_x >> 16) % size;
                         a2_x &= 0x0000FFFF;
                         a2_x |= (((newx + (newxrem == 0 ? 0 : 1)) * size) & 0xFFFF) << 16;
                 }//*/
@@ -1025,17 +1043,17 @@ Lesse, with pre-add we'd have:
                 {
                         // Bump the pointer to the next phrase boundary
                         // Even though it works, this is crappy... Clean it up!
-                       uint32 size = 64 / a2_psize;
+                       uint32_t size = 64 / a2_psize;
  
                         // Crappy kludge... ('aligning' source to destination)
                         // Prolly should do this for A1 channel as well... [DONE]
                         if (a1_phrase_mode && !DSTA2)
                         {
-                               uint32 extra = (a1_start >> 16) % size;
+                               uint32_t extra = (a1_start >> 16) % size;
                                 a2_x += extra << 16;
                         }
  
-                       uint32 pixelSize = (size - 1) << 16;
+                       uint32_t pixelSize = (size - 1) << 16;
                         a2_x = (a2_x + pixelSize) & ~pixelSize;
                 }
  
@@ -1048,15 +1066,15 @@ Lesse, with pre-add we'd have:
                 a2_y += a2_step_y;//*/
  #endif
         }
-       
-       // write values back to registers 
+
+       // write values back to registers
         WREG(A1_PIXEL,  (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
         WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
         WREG(A2_PIXEL,  (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
  specialLog = false;
  }
  
-void blitter_blit(uint32 cmd)
+void blitter_blit(uint32_t cmd)
  {
  //Apparently this is doing *something*, just not sure exactly what...
  /*if (cmd == 0x41802E01)
@@ -1066,7 +1084,7 @@ void blitter_blit(uint32 cmd)
         WriteLog("Is: %08X\n", cmd);
  }//*/
  
-       uint32 pitchValue[4] = { 0, 1, 3, 2 };
+       uint32_t pitchValue[4] = { 0, 1, 3, 2 };
         colour_index = 0;
         src = cmd & 0x07;
         dst = (cmd >> 3) & 0x07;
@@ -1085,7 +1103,7 @@ void blitter_blit(uint32 cmd)
  
         a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
         a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
-       
+
         xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
         xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
  
@@ -1107,7 +1125,7 @@ void blitter_blit(uint32 cmd)
  //     a1_width = blitter_scanline_width[((REG(A1_FLAGS) & 0x00007E00) >> 9)];
  // According to JTRM, this must give a *whole number* of phrases in the current
  // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
-       UINT32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
+       uint32_t m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
         a1_width = ((0x04 | m) << e) >> 2;//*/
  
         a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
@@ -1154,7 +1172,7 @@ void blitter_blit(uint32 cmd)
                 // add pixelsize (1) to X
                 a1_xadd = 1 << 16;
                 break;
-       case XADD0:     
+       case XADD0:
                 // add zero (for those nice vertical lines)
                 a1_xadd = 0;
                 break;
@@ -1170,7 +1188,7 @@ void blitter_blit(uint32 cmd)
  //  A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 128 (1C), addctl: XADDINC YADD1 XSIGNADD YSIGNADD
  //  A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADD0 YADD1 XSIGNADD YSIGNADD
  //if (YADD1_A1 && YADD1_A2 && xadd_a2_control == XADD0 && xadd_a1_control == XADDINC)// &&
-//     UINT32 a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
+//     uint32_t a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
  //Ok, so this ISN'T it... Prolly the XADDPHR code above that's doing it...
  //if (REG(A1_FLAGS) == 0x00073820 && REG(A2_FLAGS) == 0x00064220 && cmd == 0x41802801)
  //        A1 x/y: 14368/7, A2 x/y: 150/36
@@ -1198,7 +1216,7 @@ void blitter_blit(uint32 cmd)
                 // add pixelsize (1) to X
                 a2_xadd = 1 << 16;
                 break;
-       case XADD0:     
+       case XADD0:
                 // add zero (for those nice vertical lines)
                 a2_xadd = 0;
                 break;
@@ -1260,23 +1278,23 @@ WriteLog("BLIT: Asked to use invalid bit combo (XADDINC) for A2...\n");
         if (GOURD || GOURZ || SRCSHADE)
         {
                 gd_c[0] = blitter_ram[PATTERNDATA + 6];
-               gd_i[0] = ((uint32)blitter_ram[PATTERNDATA + 7] << 16)
-                       | ((uint32)blitter_ram[SRCDATA + 6] << 8) | blitter_ram[SRCDATA + 1];
+               gd_i[0] = ((uint32_t)blitter_ram[PATTERNDATA + 7] << 16)
+                       | ((uint32_t)blitter_ram[SRCDATA + 6] << 8) | blitter_ram[SRCDATA + 7];
  
                 gd_c[1] = blitter_ram[PATTERNDATA + 4];
-               gd_i[1] = ((uint32)blitter_ram[PATTERNDATA + 5] << 16)
-                       | ((uint32)blitter_ram[SRCDATA + 4] << 8) | blitter_ram[SRCDATA + 3];
+               gd_i[1] = ((uint32_t)blitter_ram[PATTERNDATA + 5] << 16)
+                       | ((uint32_t)blitter_ram[SRCDATA + 4] << 8) | blitter_ram[SRCDATA + 5];
  
                 gd_c[2] = blitter_ram[PATTERNDATA + 2];
-               gd_i[2] = ((uint32)blitter_ram[PATTERNDATA + 3] << 16)
-                       | ((uint32)blitter_ram[SRCDATA + 2] << 8) | blitter_ram[SRCDATA + 5];
+               gd_i[2] = ((uint32_t)blitter_ram[PATTERNDATA + 3] << 16)
+                       | ((uint32_t)blitter_ram[SRCDATA + 2] << 8) | blitter_ram[SRCDATA + 3];
  
                 gd_c[3] = blitter_ram[PATTERNDATA + 0];
-               gd_i[3] = ((uint32)blitter_ram[PATTERNDATA + 1] << 16)
-                       | ((uint32)blitter_ram[SRCDATA + 0] << 8) | blitter_ram[SRCDATA + 7];
+               gd_i[3] = ((uint32_t)blitter_ram[PATTERNDATA + 1] << 16)
+                       | ((uint32_t)blitter_ram[SRCDATA + 0] << 8) | blitter_ram[SRCDATA + 1];
  
                 gouraud_add = REG(INTENSITYINC);
-               
+
                 gd_ia = gouraud_add & 0x00FFFFFF;
                 if (gd_ia & 0x00800000)
                         gd_ia = 0xFF000000 | gd_ia;
@@ -1346,7 +1364,7 @@ WriteLog("BLIT: Asked to use invalid bit combo (XADDINC) for A2...\n");
                 WriteLog("  GOURZ   = %i\n",GOURZ);
                 WriteLog("  GOURD   = %i\n",GOURD);
                 WriteLog("  SRCSHADE= %i\n",SRCSHADE);
-       }       
+       }
  #endif
  
  //NOTE: Pitch is ignored!
@@ -1356,24 +1374,24 @@ WriteLog("BLIT: Asked to use invalid bit combo (XADDINC) for A2...\n");
  //Black is short by 3, pink is short by 1...
  /*
  Blit! (00110000 <- 000BF010) count: 9 x 31, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
- CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: 
+ CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
    A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
    A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
          A1 x/y: 262/124, A2 x/y: 128/0
  Blit! (00110000 <- 000BF010) count: 5 x 38, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
- CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: 
+ CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
    A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
    A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
          A1 x/y: 264/117, A2 x/y: 407/0
  
  Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
- CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: 
+ CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
    A1 step values: -10 (X), 1 (Y)
    A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
    A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
          A1 x/y: 262/132, A2 x/y: 129/0
  Blit! (00110000 <- 000BF010) count: 5 x 27, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
- CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: 
+ CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
    A1 step values: -8 (X), 1 (Y)
    A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
    A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
@@ -1392,7 +1410,7 @@ Fixed! Now for more:
  ; This looks like the ship icon in the upper left corner...
  
  Blit! (00110000 <- 0010B2A8) count: 11 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
- CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN 
+ CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
    A1 step values: -12 (X), 1 (Y)
    A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -1409,7 +1427,7 @@ Actually, if you look at the A1 step values, there IS a discrepancy!
  ; D
  
  Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
- CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN 
+ CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
    A1 step values: -14 (X), 1 (Y)
    A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -1420,7 +1438,7 @@ Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd:
  ; E
  
  Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
- CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN 
+ CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
    A1 step values: -13 (X), 1 (Y)
    A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -1430,7 +1448,7 @@ Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd:
  ; M
  
  Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
- CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN 
+ CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
    A1 step values: -12 (X), 1 (Y)
    A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -1440,7 +1458,7 @@ Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd:
  ; O
  
  Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
- CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN 
+ CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
    A1 step values: -15 (X), 1 (Y)
    A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
    A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
@@ -1455,17 +1473,17 @@ if (blit_start_log)
         char * bppStr[8] = { "1bpp\0", "2bpp\0", "4bpp\0", "8bpp\0", "16bpp\0", "32bpp\0", "???\0", "!!!\0" };
         char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
                 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
-       uint32 /*src = cmd & 0x07, dst = (cmd >> 3) & 0x07, misc = (cmd >> 6) & 0x03,
+       uint32_t /*src = cmd & 0x07, dst = (cmd >> 3) & 0x07, misc = (cmd >> 6) & 0x03,
                 a1ctl = (cmd >> 8) & 0x07,*/ mode = (cmd >> 11) & 0x07/*, ity = (cmd >> 14) & 0x0F,
                 zop = (cmd >> 18) & 0x07, op = (cmd >> 21) & 0x0F, ctrl = (cmd >> 25) & 0x3F*/;
-       UINT32 a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
-       uint32 p1 = a1f & 0x07, p2 = a2f & 0x07,
+       uint32_t a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
+       uint32_t p1 = a1f & 0x07, p2 = a2f & 0x07,
                 d1 = (a1f >> 3) & 0x07, d2 = (a2f >> 3) & 0x07,
                 zo1 = (a1f >> 6) & 0x07, zo2 = (a2f >> 6) & 0x07,
                 w1 = (a1f >> 9) & 0x3F, w2 = (a2f >> 9) & 0x3F,
                 ac1 = (a1f >> 16) & 0x1F, ac2 = (a2f >> 16) & 0x1F;
-       UINT32 iw1 = ((0x04 | (w1 & 0x03)) << ((w1 & 0x3C) >> 2)) >> 2;
-       UINT32 iw2 = ((0x04 | (w2 & 0x03)) << ((w2 & 0x3C) >> 2)) >> 2;
+       uint32_t iw1 = ((0x04 | (w1 & 0x03)) << ((w1 & 0x3C) >> 2)) >> 2;
+       uint32_t iw2 = ((0x04 | (w2 & 0x03)) << ((w2 & 0x3C) >> 2)) >> 2;
         WriteLog("Blit! (%08X %s %08X) count: %d x %d, A1/2_FLAGS: %08X/%08X [cmd: %08X]\n", a1_addr, (mode&0x01 ? "->" : "<-"), a2_addr, n_pixels, n_lines, a1f, a2f, cmd);
  //     WriteLog(" CMD -> src: %d, dst: %d, misc: %d, a1ctl: %d, mode: %d, ity: %1X, z-op: %d, op: %1X, ctrl: %02X\n", src, dst, misc, a1ctl, mode, ity, zop, op, ctrl);
  
@@ -1519,32 +1537,39 @@ if (blit_start_log)
  ********************** STUFF CUT ABOVE THIS LINE! ******************************
  *******************************************************************************/
  
-void blitter_init(void)
+void BlitterInit(void)
  {
-       blitter_reset();
+       BlitterReset();
  }
  
-void blitter_reset(void)
+void BlitterReset(void)
  {
         memset(blitter_ram, 0x00, 0xA0);
  }
  
-void blitter_done(void)
+void BlitterDone(void)
  {
         WriteLog("BLIT: Done.\n");
  }
  
-uint8 BlitterReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
+uint8_t BlitterReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
  {
         offset &= 0xFF;
  
         // status register
  //This isn't cycle accurate--how to fix? !!! FIX !!!
  //Probably have to do some multi-threaded implementation or at least a reentrant safe implementation...
+//Real hardware returns $00000805, just like the JTRM says.
+       if (offset == (0x38 + 0))
+               return 0x00;
+       if (offset == (0x38 + 1))
+               return 0x00;
+       if (offset == (0x38 + 2))
+               return 0x08;
         if (offset == (0x38 + 3))
-               return 0x01;    // always idle
+               return 0x05;    // always idle/never stopped (collision detection ignored!)
  
-// CHECK HERE ONCE THIS FIX HAS BEEN TESTED: [ ]
+// CHECK HERE ONCE THIS FIX HAS BEEN TESTED: [X]
  //Fix for AvP:
         if (offset >= 0x04 && offset <= 0x07)
  //This is it. I wonder if it just ignores the lower three bits?
@@ -1558,18 +1583,18 @@ uint8 BlitterReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
  }
  
  //Crappy!
-uint16 BlitterReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
+uint16_t BlitterReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
  {
-       return ((uint16)BlitterReadByte(offset, who) << 8) | (uint16)BlitterReadByte(offset+1, who);
+       return ((uint16_t)BlitterReadByte(offset, who) << 8) | (uint16_t)BlitterReadByte(offset+1, who);
  }
  
  //Crappy!
-uint32 BlitterReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
+uint32_t BlitterReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
  {
         return (BlitterReadWord(offset, who) << 16) | BlitterReadWord(offset+2, who);
  }
  
-void BlitterWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
+void BlitterWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
  {
  /*if (offset & 0xFF == 0x7B)
         WriteLog("--> Wrote to B_STOP: value -> %02X\n", data);*/
@@ -1596,12 +1621,12 @@ void BlitterWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
                 case 0x81: blitter_ram[PATTERNDATA + 5] = data; break;
                 case 0x82: blitter_ram[SRCDATA + 4] = data; break;
                 case 0x83: blitter_ram[SRCDATA + 5] = data; break;
-               
+
                 case 0x84: break;
                 case 0x85: blitter_ram[PATTERNDATA + 3] = data; break;
                 case 0x86: blitter_ram[SRCDATA + 2] = data; break;
                 case 0x87: blitter_ram[SRCDATA + 3] = data; break;
-               
+
                 case 0x88: break;
                 case 0x89: blitter_ram[PATTERNDATA + 1] = data; break;
                 case 0x8A: blitter_ram[SRCDATA + 0] = data; break;
@@ -1618,12 +1643,12 @@ void BlitterWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
                 case 0x91: blitter_ram[SRCZINT + 5] = data; break;
                 case 0x92: blitter_ram[SRCZFRAC + 4] = data; break;
                 case 0x93: blitter_ram[SRCZFRAC + 5] = data; break;
-               
+
                 case 0x94: blitter_ram[SRCZINT + 2] = data; break;
                 case 0x95: blitter_ram[SRCZINT + 3] = data; break;
                 case 0x96: blitter_ram[SRCZFRAC + 2] = data; break;
                 case 0x97: blitter_ram[SRCZFRAC + 3] = data; break;
-               
+
                 case 0x98: blitter_ram[SRCZINT + 0] = data; break;
                 case 0x99: blitter_ram[SRCZINT + 1] = data; break;
                 case 0x9A: blitter_ram[SRCZFRAC + 0] = data; break;
@@ -1655,7 +1680,7 @@ void BlitterWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
                 blitter_ram[offset] = data;
  }
  
-void BlitterWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
+void BlitterWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
  {
  /*if (((offset & 0xFF) >= PATTERNDATA) && ((offset & 0xFF) < PATTERNDATA + 8))
  {
@@ -1702,7 +1727,7 @@ if (blit_start_log)
  }
  //F02278,9,A,B
  
-void BlitterWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
+void BlitterWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
  {
  /*if (((offset & 0xFF) >= PATTERNDATA) && ((offset & 0xFF) < PATTERNDATA + 8))
  {
@@ -1724,13 +1749,13 @@ doGPUDis = true;
  
  void LogBlit(void)
  {
-       char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
+       const char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
                 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
-       uint32 cmd = GET32(blitter_ram, 0x38);
-       UINT32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
-       UINT32 a1_width = ((0x04 | m) << e) >> 2;
+       uint32_t cmd = GET32(blitter_ram, 0x38);
+       uint32_t m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
+       uint32_t a1_width = ((0x04 | m) << e) >> 2;
         m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
-       UINT32 a2_width = ((0x04 | m) << e) >> 2;
+       uint32_t a2_width = ((0x04 | m) << e) >> 2;
  
         WriteLog("Blit!\n");
         WriteLog("  COMMAND  = %08X\n", cmd);
@@ -1759,11 +1784,11 @@ void LogBlit(void)
                 REG(A1_FLAGS) & 0x00003, (REG(A1_FLAGS) & 0x00038) >> 3,
                 (REG(A1_FLAGS) & 0x001C0) >> 6,  a1_width, (REG(A1_FLAGS) & 0x30000) >> 16);
         WriteLog("  a1_clip  = %u, %u (%08X)\n", GET16(blitter_ram, A1_CLIP + 2), GET16(blitter_ram, A1_CLIP + 0), GET32(blitter_ram, A1_CLIP));
-       WriteLog("  a1_pixel = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A1_PIXEL + 2), (int16)GET16(blitter_ram, A1_PIXEL + 0), GET32(blitter_ram, A1_PIXEL));
-       WriteLog("  a1_step  = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A1_STEP + 2), (int16)GET16(blitter_ram, A1_STEP + 0), GET32(blitter_ram, A1_STEP));
+       WriteLog("  a1_pixel = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A1_PIXEL + 2), (int16_t)GET16(blitter_ram, A1_PIXEL + 0), GET32(blitter_ram, A1_PIXEL));
+       WriteLog("  a1_step  = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A1_STEP + 2), (int16_t)GET16(blitter_ram, A1_STEP + 0), GET32(blitter_ram, A1_STEP));
         WriteLog("  a1_fstep = %u, %u (%08X)\n", GET16(blitter_ram, A1_FSTEP + 2), GET16(blitter_ram, A1_FSTEP + 0), GET32(blitter_ram, A1_FSTEP));
         WriteLog("  a1_fpixel= %u, %u (%08X)\n", GET16(blitter_ram, A1_FPIXEL + 2), GET16(blitter_ram, A1_FPIXEL + 0), GET32(blitter_ram, A1_FPIXEL));
-       WriteLog("  a1_inc   = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A1_INC + 2), (int16)GET16(blitter_ram, A1_INC + 0), GET32(blitter_ram, A1_INC));
+       WriteLog("  a1_inc   = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A1_INC + 2), (int16_t)GET16(blitter_ram, A1_INC + 0), GET32(blitter_ram, A1_INC));
         WriteLog("  a1_finc  = %u, %u (%08X)\n", GET16(blitter_ram, A1_FINC + 2), GET16(blitter_ram, A1_FINC + 0), GET32(blitter_ram, A1_FINC));
  
         WriteLog("  a2_base  = %08X\n", REG(A2_BASE));
@@ -1792,8 +1817,8 @@ void LogBlit(void)
                 REG(A2_FLAGS) & 0x00003, (REG(A2_FLAGS) & 0x00038) >> 3,
                 (REG(A2_FLAGS) & 0x001C0) >> 6,  a2_width, (REG(A2_FLAGS) & 0x30000) >> 16);
         WriteLog("  a2_mask  = %u, %u (%08X)\n", GET16(blitter_ram, A2_MASK + 2), GET16(blitter_ram, A2_MASK + 0), GET32(blitter_ram, A2_MASK));
-       WriteLog("  a2_pixel = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A2_PIXEL + 2), (int16)GET16(blitter_ram, A2_PIXEL + 0), GET32(blitter_ram, A2_PIXEL));
-       WriteLog("  a2_step  = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A2_STEP + 2), (int16)GET16(blitter_ram, A2_STEP + 0), GET32(blitter_ram, A2_STEP));
+       WriteLog("  a2_pixel = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A2_PIXEL + 2), (int16_t)GET16(blitter_ram, A2_PIXEL + 0), GET32(blitter_ram, A2_PIXEL));
+       WriteLog("  a2_step  = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A2_STEP + 2), (int16_t)GET16(blitter_ram, A2_STEP + 0), GET32(blitter_ram, A2_STEP));
  
         WriteLog("  count    = %d x %d\n", GET16(blitter_ram, PIXLINECOUNTER + 2), GET16(blitter_ram, PIXLINECOUNTER));
  
@@ -1809,9 +1834,9 @@ void LogBlit(void)
         WriteLog("  UPDA2    = %s\n", (UPDA2 ? "1" : "0"));
         WriteLog("  DSTA2    = %s\n", (DSTA2 ? "1" : "0"));
         WriteLog("  ZOP      = %s %s %s\n", (Z_OP_INF ? "<" : ""), (Z_OP_EQU ? "=" : ""), (Z_OP_SUP ? ">" : ""));
-       WriteLog("--LFUFUNC  = %s\n", opStr[(cmd >> 21) & 0x0F]);
+       WriteLog("+-LFUFUNC  = %s\n", opStr[(cmd >> 21) & 0x0F]);
         WriteLog("| PATDSEL  = %s (PD=%08X%08X)\n", (PATDSEL ? "1" : "0"), REG(PATTERNDATA), REG(PATTERNDATA + 4));
-       WriteLog("--ADDDSEL  = %s\n", (ADDDSEL ? "1" : "0"));
+       WriteLog("+-ADDDSEL  = %s\n", (ADDDSEL ? "1" : "0"));
         WriteLog("  CMPDST   = %s\n", (CMPDST ? "1" : "0"));
         WriteLog("  BCOMPEN  = %s\n", (BCOMPEN ? "1" : "0"));
         WriteLog("  DCOMPEN  = %s\n", (DCOMPEN ? "1" : "0"));
@@ -1836,17 +1861,20 @@ void LogBlit(void)
  #define TXTEXT  (false)
  #define POLYGON (false)
  
-void BlitterMidsummer(uint32 cmd)
+void BlitterMidsummer(uint32_t cmd)
  {
-uint32 outer_loop, inner_loop, a1_addr, a2_addr;
-int32 a1_x, a1_y, a2_x, a2_y, a1_width, a2_width;
-uint8 a1_phrase_mode, a2_phrase_mode;
+#ifdef LOG_BLITS
+       LogBlit();
+#endif
+uint32_t outer_loop, inner_loop, a1_addr, a2_addr;
+int32_t a1_x, a1_y, a2_x, a2_y, a1_width, a2_width;
+uint8_t a1_phrase_mode, a2_phrase_mode;
  
         a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
         a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
         a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
         a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
-       UINT32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
+       uint32_t m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
         a1_width = ((0x04 | m) << e) >> 2;//*/
         a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
         a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
@@ -1875,14 +1903,14 @@ if (cmd != 0x00010200 && cmd != 0x01800001 && cmd != 0x01800005
         LogBlit();
  }//*/
  
-       uint64 srcData = GET64(blitter_ram, SRCDATA), srcXtraData,
+       uint64_t srcData = GET64(blitter_ram, SRCDATA), srcXtraData,
                 dstData = GET64(blitter_ram, DSTDATA), writeData;
-       uint32 srcAddr, dstAddr;
-       uint8 bitCount, a1PixelSize, a2PixelSize;
+       uint32_t srcAddr, dstAddr;
+       uint8_t bitCount, a1PixelSize, a2PixelSize;
  
         // JTRM says phrase mode only works for 8BPP or higher, so let's try this...
-       uint32 phraseOffset[8] = { 8, 8, 8, 8, 4, 2, 0, 0 };
-       uint8 pixelShift[8] = { 3, 2, 1, 0, 1, 2, 0, 0 };
+       uint32_t phraseOffset[8] = { 8, 8, 8, 8, 4, 2, 0, 0 };
+       uint8_t pixelShift[8] = { 3, 2, 1, 0, 1, 2, 0, 0 };
  
         a1PixelSize = (blitter_ram[A1_FLAGS + 3] >> 3) & 0x07;
         a2PixelSize = (blitter_ram[A2_FLAGS + 3] >> 3) & 0x07;
@@ -2009,7 +2037,7 @@ sread:                                                    // Source data read
  //pointing at. Likewise, the pixel (if in BPP 1, 2 & 4, chopped) otherwise. It probably still
  //transfers an entire phrase even in pixel mode.
  //Odd thought: Does it expand, e.g., 1 BPP pixels into 32 BPP internally? Hmm...
-//No. 
+//No.
  /*
         a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
         a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
@@ -2030,7 +2058,7 @@ sread:                                                    // Source data read
         a1_width = ((0x04 | m) << e) >> 2;
         a2_width = ((0x04 | m) << e) >> 2;
  
-       // write values back to registers 
+       // write values back to registers
         WREG(A1_PIXEL,  (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
         WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
         WREG(A2_PIXEL,  (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
@@ -2049,10 +2077,10 @@ sread:                                                  // Source data read
         }
         else*/
         {
-//             uint32 pixAddr = ((DSTA2 ? a1_x : a2_x) >> 16)
+//             uint32_t pixAddr = ((DSTA2 ? a1_x : a2_x) >> 16)
  //                     + (((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
-               int32 pixAddr = (int16)((DSTA2 ? a1_x : a2_x) >> 16)
-                       + ((int16)((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
+               int32_t pixAddr = (int16_t)((DSTA2 ? a1_x : a2_x) >> 16)
+                       + ((int16_t)((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
  
                 if ((DSTA2 ? a1PixelSize : a2PixelSize) < 3)
                         pixAddr >>= pixelShift[(DSTA2 ? a1PixelSize : a2PixelSize)];
@@ -2066,8 +2094,8 @@ sread:                                                    // Source data read
  
         if ((DSTA2 ? a1_phrase_mode : a2_phrase_mode) == 1)
         {
-               srcData = ((uint64)JaguarReadLong(srcAddr, BLITTER) << 32)
-                       | (uint64)JaguarReadLong(srcAddr + 4, BLITTER);
+               srcData = ((uint64_t)JaguarReadLong(srcAddr, BLITTER) << 32)
+                       | (uint64_t)JaguarReadLong(srcAddr + 4, BLITTER);
         }
         else
         {
@@ -2088,7 +2116,7 @@ sread:                                                    // Source data read
  
  #ifdef LOG_BLITTER_MEMORY_ACCESSES
  if (logBlit)
-       WriteLog("BLITTER: srcAddr=%08X,   srcData=%08X %08X\n", srcAddr, (uint32)(srcData >> 32), (uint32)(srcData & 0xFFFFFFFF));
+       WriteLog("BLITTER: srcAddr=%08X,   srcData=%08X %08X\n", srcAddr, (uint32_t)(srcData >> 32), (uint32_t)(srcData & 0xFFFFFFFF));
  #endif
  
         if (SRCENZ)
@@ -2130,10 +2158,10 @@ if STEP
         dstAddr = (DSTA2 ? a2_addr : a1_addr);
  
         {
-//     uint32 pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
+//     uint32_t pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
  //             + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
-       int32 pixAddr = (int16)((DSTA2 ? a2_x : a1_x) >> 16)
-               + ((int16)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
+       int32_t pixAddr = (int16_t)((DSTA2 ? a2_x : a1_x) >> 16)
+               + ((int16_t)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
  
         if ((DSTA2 ? a2PixelSize : a1PixelSize) < 3)
                 pixAddr >>= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
@@ -2147,8 +2175,8 @@ if STEP
  
         if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
         {
-               dstData = ((uint64)JaguarReadLong(srcAddr, BLITTER) << 32)
-                       | (uint64)JaguarReadLong(srcAddr + 4, BLITTER);
+               dstData = ((uint64_t)JaguarReadLong(srcAddr, BLITTER) << 32)
+                       | (uint64_t)JaguarReadLong(srcAddr + 4, BLITTER);
         }
         else
         {
@@ -2169,7 +2197,7 @@ if STEP
  
  #ifdef LOG_BLITTER_MEMORY_ACCESSES
  if (logBlit)
-       WriteLog("BLITTER (dread): dstAddr=%08X,   dstData=%08X %08X\n", dstAddr, (uint32)(dstData >> 32), (uint32)(dstData & 0xFFFFFFFF));
+       WriteLog("BLITTER (dread): dstAddr=%08X,   dstData=%08X %08X\n", dstAddr, (uint32_t)(dstData >> 32), (uint32_t)(dstData & 0xFFFFFFFF));
  #endif
  
         if (DSTENZ)
@@ -2238,14 +2266,14 @@ Blit!
  
         if (CLIPA1)
         {
-               uint16 x = a1_x >> 16, y = a1_y >> 16;
+               uint16_t x = a1_x >> 16, y = a1_y >> 16;
  
                 if (x >= GET16(blitter_ram, A1_CLIP + 2) || y >= GET16(blitter_ram, A1_CLIP))
                         goto inhibitWrite;
         }
  
         // Figure out what gets written...
-       
+
         if (PATDSEL)
         {
                 writeData = GET64(blitter_ram, PATTERNDATA);
@@ -2268,7 +2296,7 @@ Blit!
         else    // LFUFUNC is the default...
         {
                 writeData = 0;
-               
+
                 if (LFU_NAN)
                         writeData |= ~srcData & ~dstData;
                 if (LFU_NA)
@@ -2294,10 +2322,10 @@ Blit!
         {
  /*             dstAddr += ((DSTA2 ? a2_x : a1_x) >> 16)
                         + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));*/
-//             uint32 pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
+//             uint32_t pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
  //                     + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
-               int32 pixAddr = (int16)((DSTA2 ? a2_x : a1_x) >> 16)
-                       + ((int16)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
+               int32_t pixAddr = (int16_t)((DSTA2 ? a2_x : a1_x) >> 16)
+                       + ((int16_t)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
  
                 if ((DSTA2 ? a2PixelSize : a1PixelSize) < 3)
                         pixAddr >>= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
@@ -2333,7 +2361,7 @@ Blit!
  
  #ifdef LOG_BLITTER_MEMORY_ACCESSES
  if (logBlit)
-       WriteLog("BLITTER: dstAddr=%08X, writeData=%08X %08X\n", dstAddr, (uint32)(writeData >> 32), (uint32)(writeData & 0xFFFFFFFF));
+       WriteLog("BLITTER: dstAddr=%08X, writeData=%08X %08X\n", dstAddr, (uint32_t)(writeData >> 32), (uint32_t)(writeData & 0xFFFFFFFF));
  #endif
  
  inhibitWrite://Should this go here? or on the other side of the X/Y incrementing?
@@ -2370,7 +2398,7 @@ inhibitWrite://Should this go here? or on the other side of the X/Y incrementing
                 a2_x += (blitter_ram[A2_FLAGS + 1] & 0x08 ? -1 << 16 : 1 << 16);
  /*     else if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 2)
                 a2_x += 0 << 16;                              */
-       
+
         if (blitter_ram[A2_FLAGS + 1] & 0x04)
                 a2_y += (blitter_ram[A2_FLAGS + 1] & 0x10 ? -1 << 16 : 1 << 16);
  
@@ -2475,18 +2503,18 @@ a1fupdate    A1 step fraction is added to A1 pointer fraction
  goto a1update
  */
  /*
-#define A1_PIXEL               ((UINT32)0x0C)  // Integer part of the pixel (Y.i and X.i)
-#define A1_STEP                        ((UINT32)0x10)  // Integer part of the step
-#define A1_FSTEP               ((UINT32)0x14)  // Fractional part of the step
-#define A1_FPIXEL              ((UINT32)0x18)  // Fractional part of the pixel (Y.f and X.f)
+#define A1_PIXEL               ((uint32_t)0x0C)        // Integer part of the pixel (Y.i and X.i)
+#define A1_STEP                        ((uint32_t)0x10)        // Integer part of the step
+#define A1_FSTEP               ((uint32_t)0x14)        // Fractional part of the step
+#define A1_FPIXEL              ((uint32_t)0x18)        // Fractional part of the pixel (Y.f and X.f)
  */
  
  // This is all kinda murky. All we have are the Midsummer docs to give us any guidance,
  // and it's incomplete or filled with errors (like above). Aarrrgggghhhhh!
  
  //This isn't right. Is it? I don't think the fractional parts are signed...
-//     a1_x += (int32)((int16)GET16(blitter_ram, A1_FSTEP + 2));
-//     a1_y += (int32)((int16)GET16(blitter_ram, A1_FSTEP + 0));
+//     a1_x += (int32_t)((int16_t)GET16(blitter_ram, A1_FSTEP + 2));
+//     a1_y += (int32_t)((int16_t)GET16(blitter_ram, A1_FSTEP + 0));
         a1_x += GET16(blitter_ram, A1_FSTEP + 2);
         a1_y += GET16(blitter_ram, A1_FSTEP + 0);
  
@@ -2510,8 +2538,8 @@ else if UPDA2 goto a2update
  else if DATINIT goto init_if
  else restart inner
  */
-       a1_x += (int32)(GET16(blitter_ram, A1_STEP + 2) << 16);
-       a1_y += (int32)(GET16(blitter_ram, A1_STEP + 0) << 16);
+       a1_x += (int32_t)(GET16(blitter_ram, A1_STEP + 2) << 16);
+       a1_y += (int32_t)(GET16(blitter_ram, A1_STEP + 0) << 16);
  
  
  //kill this, for now...
@@ -2554,8 +2582,8 @@ a2update     A2 step is added to the A2 pointer
  if DATINIT goto init_if
  else restart inner
  */
-       a2_x += (int32)(GET16(blitter_ram, A2_STEP + 2) << 16);
-       a2_y += (int32)(GET16(blitter_ram, A2_STEP + 0) << 16);
+       a2_x += (int32_t)(GET16(blitter_ram, A2_STEP + 2) << 16);
+       a2_y += (int32_t)(GET16(blitter_ram, A2_STEP + 0) << 16);
  
  
         if (DATINIT)
@@ -2618,103 +2646,58 @@ blitter_done:
  }
  #endif
  
-/*
-Here's a collection of various blits that aren't handled correctly yet...
-
-Spinning cube in Jaguar BIOS opening:
-
-  COMMAND  = 41802F41
-  a1_base  = 0015B000
-  a1_flags = 00033020 (0 0 0 11 . 011000 000 100 . 00)
-             pitch=0, pixSz=4, zOff=0, width=64, xCtrl=3
-  a1_clip  = 64, 64 (00400040)
-  a1_pixel = 0, 65534 (FFFE0000)
-  a1_step  = -64, -1 (FFFFFFC0)
-  a1_fstep = 0, 18161 (46F10000)
-  a1_fpixel= 32768, 42497 (A6018000)
-  a1_inc   = 1, 0 (00000001)
-  a1_finc  = 0, 1926 (07860000)
-  a2_base  = 0014B000
-  a2_flags = 00013820 (0 0 0 01 0 011100 000 100 . 00)
-             pitch=0, pixSz=4, zOff=0, width=128, xCtrl=1
-  a2_mask  = 0, 0 (00000000)
-  a2_pixel = 33, 55 (00370021)
-  a2_step  = -64, 1 (0001FFC0)
-  count    = 64 x 57
-  SRCEN    = 1
-  SRCENZ   = 0
-  SRCENX   = 0
-  DSTEN    = 0
-  DSTENZ   = 0
-  DSTWRZ   = 0
-  CLIPA1   = 1
-  UPDA1F   = 1
-  UPDA1    = 1
-  UPDA2    = 1
-  DSTA2    = 1
-  ZOP      =
---LFUFUNC  = LFU_REPLACE
-| PATDSEL  = 0 (PD=0000000000000000)
---ADDDSEL  = 0
-  CMPDST   = 0
-  BCOMPEN  = 0
-  DCOMPEN  = 0
-  TOPBEN   = 0
-  TOPNEN   = 0
-  BKGWREN  = 0
-  GOURD    = 0 (II=00FCDC80, SD=FF00FF00FF00FF00)
-  GOURZ    = 1 (ZI=00000000, ZD=0000000000000000, SZ1=0000000000000000, SZ2=0000000000000000)
-  SRCSHADE = 1
  
-*/
+//
+// Here's attempt #2--taken from the Oberon chip specs!
+//
  
  #ifdef USE_MIDSUMMER_BLITTER_MKII
-// Here's attempt #2--taken from the Oberon chip specs!
  
-void ADDRGEN(uint32 &, uint32 &, bool, bool,
-       uint16, uint16, uint32, uint8, uint8, uint8, uint8,
-       uint16, uint16, uint32, uint8, uint8, uint8, uint8);
-void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
-       uint64 dstd, uint32 iinc, uint8 initcin[], uint64 initinc, uint16 initpix,
-       uint32 istep, uint64 patd, uint64 srcd, uint64 srcz1, uint64 srcz2,
-       uint32 zinc, uint32 zstep);
-void ADD16SAT(uint16 &r, uint8 &co, uint16 a, uint16 b, uint8 cin, bool sat, bool eightbit, bool hicinh);
-void ADDAMUX(int16 &adda_x, int16 &adda_y, uint8 addasel, int16 a1_step_x, int16 a1_step_y,
-       int16 a1_stepf_x, int16 a1_stepf_y, int16 a2_step_x, int16 a2_step_y,
-       int16 a1_inc_x, int16 a1_inc_y, int16 a1_incf_x, int16 a1_incf_y, uint8 adda_xconst,
+void ADDRGEN(uint32_t &, uint32_t &, bool, bool,
+       uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t,
+       uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t);
+void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
+       uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
+       uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
+       uint32_t zinc, uint32_t zstep);
+void ADD16SAT(uint16_t &r, uint8_t &co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh);
+void ADDAMUX(int16_t &adda_x, int16_t &adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
+       int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
+       int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
         bool adda_yconst, bool addareg, bool suba_x, bool suba_y);
-void ADDBMUX(int16 &addb_x, int16 &addb_y, uint8 addbsel, int16 a1_x, int16 a1_y,
-       int16 a2_x, int16 a2_y, int16 a1_frac_x, int16 a1_frac_y);
-void DATAMUX(int16 &data_x, int16 &data_y, uint32 gpu_din, int16 addq_x, int16 addq_y, bool addqsel);
-void ADDRADD(int16 &addq_x, int16 &addq_y, bool a1fracldi,
-       uint16 adda_x, uint16 adda_y, uint16 addb_x, uint16 addb_y, uint8 modx, bool suba_x, bool suba_y);
-void DATA (uint64 &wdata, uint8 &dcomp, uint8 &zcomp, bool &nowrite,
-       bool big_pix, bool cmpdst, uint8 daddasel, uint8 daddbsel, uint8 daddmode, bool daddq_sel, uint8 data_sel,
-       uint8 dbinh, uint8 dend, uint8 dstart, uint64 dstd, uint32 iinc, uint8 lfu_func, uint64 patd, bool patdadd,
-       bool phrase_mode, uint64 srcd, bool srcdread, bool srczread, bool srcz2add, uint8 zmode,
-       bool bcompen, bool bkgwren, bool dcompen, uint8 icount, uint8 pixsize);
-void COMP_CTRL(uint8 &dbinh, bool &nowrite,
-       bool bcompen, bool big_pix, bool bkgwren, uint8 dcomp, bool dcompen, uint8 icount,
-       uint8 pixsize, bool phrase_mode, uint8 srcd, uint8 zcomp);
+void ADDBMUX(int16_t &addb_x, int16_t &addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
+       int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y);
+void DATAMUX(int16_t &data_x, int16_t &data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel);
+void ADDRADD(int16_t &addq_x, int16_t &addq_y, bool a1fracldi,
+       uint16_t adda_x, uint16_t adda_y, uint16_t addb_x, uint16_t addb_y, uint8_t modx, bool suba_x, bool suba_y);
+void DATA(uint64_t &wdata, uint8_t &dcomp, uint8_t &zcomp, bool &nowrite,
+       bool big_pix, bool cmpdst, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode, bool daddq_sel, uint8_t data_sel,
+       uint8_t dbinh, uint8_t dend, uint8_t dstart, uint64_t dstd, uint32_t iinc, uint8_t lfu_func, uint64_t &patd, bool patdadd,
+       bool phrase_mode, uint64_t srcd, bool srcdread, bool srczread, bool srcz2add, uint8_t zmode,
+       bool bcompen, bool bkgwren, bool dcompen, uint8_t icount, uint8_t pixsize,
+       uint64_t &srcz, uint64_t dstz, uint32_t zinc);
+void COMP_CTRL(uint8_t &dbinh, bool &nowrite,
+       bool bcompen, bool big_pix, bool bkgwren, uint8_t dcomp, bool dcompen, uint8_t icount,
+       uint8_t pixsize, bool phrase_mode, uint8_t srcd, uint8_t zcomp);
  #define VERBOSE_BLITTER_LOGGING
  bool logBlit = false;
  
  void BlitterMidsummer2(void)
  {
+#ifdef LOG_BLITS
+       LogBlit();
+#endif
         // Here's what the specs say the state machine does. Note that this can probably be
         // greatly simplified (also, it's different from what John has in his Oberon docs):
  //Will remove stuff that isn't in Jaguar I once fully described (stuff like texture won't
  //be described here at all)...
  
-       uint32 cmd = GET32(blitter_ram, COMMAND);
+       uint32_t cmd = GET32(blitter_ram, COMMAND);
  
-// $01800005 has SRCENX, may have to investigate further...
-// $00011008 has GOURD & DSTEN.
-// $41802F41 has SRCSHADE, CLIPA1
  logBlit = false;
-/*if (
-       cmd != 0x00010200
-       && cmd != 0x01800001
+if (
+       cmd != 0x00010200 &&    // PATDSEL
+       cmd != 0x01800001               // SRCEN LFUFUNC=C
         && cmd != 0x01800005
  //Boot ROM ATARI letters:
         && cmd != 0x00011008    // DSTEN GOURD PATDSEL
@@ -2725,11 +2708,11 @@ logBlit = false;
  //T2K TEMPEST letters:
         && cmd != 0x09800741    // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 LFUFUNC=C DCOMPEN
  //Static letters on Cybermorph intro screen:
-       && cmd != 0x09800609    // DCOMPEN
+       && cmd != 0x09800609    // SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C DCOMPEN
  //Static pic on title screen:
         && cmd != 0x01800601    // SRCEN UPDA1 UPDA2 LFUFUNC=C
  //Turning letters on Cybermorph intro screen:
-       && cmd != 0x09800F41    // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
+//     && cmd != 0x09800F41    // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
         && cmd != 0x00113078    // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
         && cmd != 0x09900F39    // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
         && cmd != 0x09800209    // SRCEN DSTEN UPDA1 LFUFUNC=C DCOMPEN
@@ -2738,11 +2721,70 @@ logBlit = false;
         && cmd != 0x00010000    // PATDSEL
  //Hover Strike text:
         && cmd != 0x1401060C    // SRCENX DSTEN UPDA1 UPDA2 PATDSEL BCOMPEN BKGWREN
+//Hover Strike 3D stuff
+       && cmd != 0x01902839    // SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
+//Hover Strike darkening on intro to play (briefing) screen
+       && cmd != 0x00020208    // DSTEN UPDA1 ADDDSEL
+//Trevor McFur stuff:
+       && cmd != 0x05810601    // SRCEN UPDA1 UPDA2 PATDSEL BCOMPEN
+       && cmd != 0x01800201    // SRCEN UPDA1 LFUFUNC=C
+//T2K:
+       && cmd != 0x00011000    // GOURD PATDSEL
+       && cmd != 0x00011040    // CLIP_A1 GOURD PATDSEL
+//Checkered flag:
+       && cmd != 0x01800000    // LFUFUNC=C
+       && cmd != 0x01800401    //
+       && cmd != 0x01800040    //
+       && cmd != 0x00020008    //
+//     && cmd != 0x09800F41    // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
         )
         logBlit = true;//*/
+//logBlit = true;
+if (blit_start_log == 0)       // Wait for the signal...
+       logBlit = false;//*/
+/*
+Some T2K unique blits:
+logBlit = F, cmd = 00010200 *
+logBlit = F, cmd = 00011000
+logBlit = F, cmd = 00011040
+logBlit = F, cmd = 01800005 *
+logBlit = F, cmd = 09800741 *
+
+Hover Strike mission selection screen:
+Blit! (CMD = 01902839) // SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
+
+Checkered Flag blits in the screw up zone:
+Blit! (CMD = 01800001) // SRCEN LFUFUNC=C
+Blit! (CMD = 01800000) // LFUFUNC=C
+Blit! (CMD = 00010000) // PATDSEL
+
+Wolfenstein 3D in the fuckup zone:
+Blit! (CMD = 01800000) // LFUFUNC=C
+*/
  
  //printf("logBlit = %s, cmd = %08X\n", (logBlit ? "T" : "F"), cmd);
  //fflush(stdout);
+//logBlit = true;
+
+/*
+Blit! (CMD = 00011040)
+Flags: CLIP_A1 GOURD PATDSEL
+  count = 18 x 1
+  a1_base = 00100000, a2_base = 0081F6A8
+  a1_x = 00A7, a1_y = 0014, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0001, a2_y = 0000
+  a1_step_x = FE80, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFF8, a2_step_y = 0001
+  a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
+  a1_win_x = 0180, a1_win_y = 0118, a2_mask_x = 0000, a2_mask_y = 0000
+  a2_mask=F a1add=+phr/+0 a2add=+phr/+0
+  a1_pixsize = 4, a2_pixsize = 4
+*/
+//Testing T2K...
+/*logBlit = false;
+if (cmd == 0x00011040
+       && (GET16(blitter_ram, A1_PIXEL + 2) == 0x00A7) && (GET16(blitter_ram, A1_PIXEL + 0) == 0x0014)
+       && (GET16(blitter_ram, A2_PIXEL + 2) == 0x0001) && (GET16(blitter_ram, A2_PIXEL + 0) == 0x0000)
+       && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 18))
+       logBlit = true;*/
  
         // Line states passed in via the command register
  
@@ -2753,8 +2795,8 @@ logBlit = false;
                 patdsel = (PATDSEL), adddsel = (ADDDSEL), cmpdst = (CMPDST), bcompen = (BCOMPEN),
                 dcompen = (DCOMPEN), bkgwren = (BKGWREN), srcshade = (SRCSHADE);
  
-       uint8 zmode = (cmd & 0x01C0000) >> 18, lfufunc = (cmd & 0x1E00000) >> 21;
-//Missing: ZMODE, LFUFUNC, BUSHI
+       uint8_t zmode = (cmd & 0x01C0000) >> 18, lfufunc = (cmd & 0x1E00000) >> 21;
+//Missing: BUSHI
  //Where to find various lines:
  // clip_a1  -> inner
  // gourd    -> dcontrol, inner, outer, state
@@ -2790,7 +2832,7 @@ fflush(stdout);
  #endif
  
         // Lines that don't exist in Jaguar I (and will never be asserted)
-       
+
         bool polygon = false, datinit = false, a1_stepld = false, a2_stepld = false, ext_int = false;
         bool istepadd = false, istepfadd = false, finneradd = false, inneradd = false;
         bool zstepfadd = false, zstepadd = false;
@@ -2810,58 +2852,63 @@ fflush(stdout);
  
         // Various registers set up by user
  
-       uint16 ocount = GET16(blitter_ram, PIXLINECOUNTER);
-       uint8 a1_pitch = blitter_ram[A1_FLAGS + 3] & 0x03;
-       uint8 a2_pitch = blitter_ram[A2_FLAGS + 3] & 0x03;
-       uint8 a1_pixsize = (blitter_ram[A1_FLAGS + 3] & 0x38) >> 3;
-       uint8 a2_pixsize = (blitter_ram[A2_FLAGS + 3] & 0x38) >> 3;
-       uint8 a1_zoffset = (GET16(blitter_ram, A1_FLAGS + 2) >> 6) & 0x07;
-       uint8 a2_zoffset = (GET16(blitter_ram, A2_FLAGS + 2) >> 6) & 0x07;
-       uint8 a1_width = (blitter_ram[A1_FLAGS + 2] >> 1) & 0x3F;
-       uint8 a2_width = (blitter_ram[A2_FLAGS + 2] >> 1) & 0x3F;
+       uint16_t ocount = GET16(blitter_ram, PIXLINECOUNTER);
+       uint8_t a1_pitch = blitter_ram[A1_FLAGS + 3] & 0x03;
+       uint8_t a2_pitch = blitter_ram[A2_FLAGS + 3] & 0x03;
+       uint8_t a1_pixsize = (blitter_ram[A1_FLAGS + 3] & 0x38) >> 3;
+       uint8_t a2_pixsize = (blitter_ram[A2_FLAGS + 3] & 0x38) >> 3;
+       uint8_t a1_zoffset = (GET16(blitter_ram, A1_FLAGS + 2) >> 6) & 0x07;
+       uint8_t a2_zoffset = (GET16(blitter_ram, A2_FLAGS + 2) >> 6) & 0x07;
+       uint8_t a1_width = (blitter_ram[A1_FLAGS + 2] >> 1) & 0x3F;
+       uint8_t a2_width = (blitter_ram[A2_FLAGS + 2] >> 1) & 0x3F;
         bool a2_mask = blitter_ram[A2_FLAGS + 2] & 0x80;
-       uint8 a1addx = blitter_ram[A1_FLAGS + 1] & 0x03, a2addx = blitter_ram[A2_FLAGS + 1] & 0x03;
+       uint8_t a1addx = blitter_ram[A1_FLAGS + 1] & 0x03, a2addx = blitter_ram[A2_FLAGS + 1] & 0x03;
         bool a1addy = blitter_ram[A1_FLAGS + 1] & 0x04, a2addy = blitter_ram[A2_FLAGS + 1] & 0x04;
         bool a1xsign = blitter_ram[A1_FLAGS + 1] & 0x08, a2xsign = blitter_ram[A2_FLAGS + 1] & 0x08;
         bool a1ysign = blitter_ram[A1_FLAGS + 1] & 0x10, a2ysign = blitter_ram[A2_FLAGS + 1] & 0x10;
-       uint32 a1_base = GET32(blitter_ram, A1_BASE) & 0xFFFFFFF8;      // Phrase aligned by ignoring bottom 3 bits
-       uint32 a2_base = GET32(blitter_ram, A2_BASE) & 0xFFFFFFF8;
-
-       uint16 a1_win_x = GET16(blitter_ram, A1_CLIP + 2) & 0x7FFF;
-       uint16 a1_win_y = GET16(blitter_ram, A1_CLIP + 0) & 0x7FFF;
-       int16 a1_x = (int16)GET16(blitter_ram, A1_PIXEL + 2);
-       int16 a1_y = (int16)GET16(blitter_ram, A1_PIXEL + 0);
-       int16 a1_step_x = (int16)GET16(blitter_ram, A1_STEP + 2);
-       int16 a1_step_y = (int16)GET16(blitter_ram, A1_STEP + 0);
-       uint16 a1_stepf_x = GET16(blitter_ram, A1_FSTEP + 2);
-       uint16 a1_stepf_y = GET16(blitter_ram, A1_FSTEP + 0);
-       uint16 a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
-       uint16 a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
-       int16 a1_inc_x = (int16)GET16(blitter_ram, A1_INC + 2);
-       int16 a1_inc_y = (int16)GET16(blitter_ram, A1_INC + 0);
-       uint16 a1_incf_x = GET16(blitter_ram, A1_FINC + 2);
-       uint16 a1_incf_y = GET16(blitter_ram, A1_FINC + 0);
-
-       int16 a2_x = (int16)GET16(blitter_ram, A2_PIXEL + 2);
-       int16 a2_y = (int16)GET16(blitter_ram, A2_PIXEL + 0);
-       uint16 a2_mask_x = GET16(blitter_ram, A2_MASK + 2);
-       uint16 a2_mask_y = GET16(blitter_ram, A2_MASK + 0);
-       int16 a2_step_x = (int16)GET16(blitter_ram, A2_STEP + 2);
-       int16 a2_step_y = (int16)GET16(blitter_ram, A2_STEP + 0);
-
-       uint64 srcd1 = GET64(blitter_ram, SRCDATA);
-       uint64 srcd2 = 0;
-       uint64 dstd = GET64(blitter_ram, DSTDATA);
-       uint64 patd = GET64(blitter_ram, PATTERNDATA);
-       uint32 iinc = GET32(blitter_ram, INTENSITYINC);
-       uint64 srcz1 = GET64(blitter_ram, SRCZINT);
-       uint64 srcz2 = GET64(blitter_ram, SRCZFRAC);
-       uint64 dstz = GET64(blitter_ram, DSTZ);
-       uint32 zinc = GET32(blitter_ram, ZINC);
-       uint32 collision = GET32(blitter_ram, COLLISIONCTRL);// 0=RESUME, 1=ABORT, 2=STOPEN
-
-       uint8 pixsize = (dsta2 ? a2_pixsize : a1_pixsize);      // From ACONTROL
-
+       uint32_t a1_base = GET32(blitter_ram, A1_BASE) & 0xFFFFFFF8;    // Phrase aligned by ignoring bottom 3 bits
+       uint32_t a2_base = GET32(blitter_ram, A2_BASE) & 0xFFFFFFF8;
+
+       uint16_t a1_win_x = GET16(blitter_ram, A1_CLIP + 2) & 0x7FFF;
+       uint16_t a1_win_y = GET16(blitter_ram, A1_CLIP + 0) & 0x7FFF;
+       int16_t a1_x = (int16_t)GET16(blitter_ram, A1_PIXEL + 2);
+       int16_t a1_y = (int16_t)GET16(blitter_ram, A1_PIXEL + 0);
+       int16_t a1_step_x = (int16_t)GET16(blitter_ram, A1_STEP + 2);
+       int16_t a1_step_y = (int16_t)GET16(blitter_ram, A1_STEP + 0);
+       uint16_t a1_stepf_x = GET16(blitter_ram, A1_FSTEP + 2);
+       uint16_t a1_stepf_y = GET16(blitter_ram, A1_FSTEP + 0);
+       uint16_t a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
+       uint16_t a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
+       int16_t a1_inc_x = (int16_t)GET16(blitter_ram, A1_INC + 2);
+       int16_t a1_inc_y = (int16_t)GET16(blitter_ram, A1_INC + 0);
+       uint16_t a1_incf_x = GET16(blitter_ram, A1_FINC + 2);
+       uint16_t a1_incf_y = GET16(blitter_ram, A1_FINC + 0);
+
+       int16_t a2_x = (int16_t)GET16(blitter_ram, A2_PIXEL + 2);
+       int16_t a2_y = (int16_t)GET16(blitter_ram, A2_PIXEL + 0);
+       uint16_t a2_mask_x = GET16(blitter_ram, A2_MASK + 2);
+       uint16_t a2_mask_y = GET16(blitter_ram, A2_MASK + 0);
+       int16_t a2_step_x = (int16_t)GET16(blitter_ram, A2_STEP + 2);
+       int16_t a2_step_y = (int16_t)GET16(blitter_ram, A2_STEP + 0);
+
+       uint64_t srcd1 = GET64(blitter_ram, SRCDATA);
+       uint64_t srcd2 = 0;
+       uint64_t dstd = GET64(blitter_ram, DSTDATA);
+       uint64_t patd = GET64(blitter_ram, PATTERNDATA);
+       uint32_t iinc = GET32(blitter_ram, INTENSITYINC);
+       uint64_t srcz1 = GET64(blitter_ram, SRCZINT);
+       uint64_t srcz2 = GET64(blitter_ram, SRCZFRAC);
+       uint64_t dstz = GET64(blitter_ram, DSTZ);
+       uint32_t zinc = GET32(blitter_ram, ZINC);
+       uint32_t collision = GET32(blitter_ram, COLLISIONCTRL);// 0=RESUME, 1=ABORT, 2=STOPEN
+
+       uint8_t pixsize = (dsta2 ? a2_pixsize : a1_pixsize);    // From ACONTROL
+
+//Testing Trevor McFur--I *think* it's the circle on the lower RHS of the screen...
+/*logBlit = false;
+if (cmd == 0x05810601 && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 96)
+       && (GET16(blitter_ram, PIXLINECOUNTER + 0) == 72))
+       logBlit = true;//*/
  //Testing...
  //if (cmd == 0x1401060C) patd = 0xFFFFFFFFFFFFFFFFLL;
  //if (cmd == 0x1401060C) patd = 0x00000000000000FFLL;
@@ -2898,14 +2945,74 @@ patdld[1] -> 0 0110 1100 -> $F0226C (hi 32 bits)
  So... It's reversed! The data organization of the patd register is [low 32][high 32]! !!! FIX !!! [DONE]
  And fix all the other 64 bit registers [DONE]
  */
-
-if (cmd == 0x1401060C)
+/*if (cmd == 0x1401060C)
  {
         printf("logBlit = %s, cmd = %08X\n", (logBlit ? "T" : "F"), cmd);
         fflush(stdout);
-}
+}*/
+/*logBlit = false;
+if ((cmd == 0x00010200) && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 9))
+       logBlit = true;
+
+; Pink altimeter bar
+
+Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
+ CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
+  A1 step values: -10 (X), 1 (Y)
+  A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
+  A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
+        A1 x/y: 262/132, A2 x/y: 129/0
+;x-coord is 257 in pic, so add 5
+;20 for ship, 33 for #... Let's see if we can find 'em!
+
+; Black altimeter bar
+
+Blit! (00110000 <- 000BF010) count: 5 x 29, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
+ CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
+  A1 step values: -8 (X), 1 (Y)
+  A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
+  A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
+        A1 x/y: 264/126, A2 x/y: 336/0
+
+Here's the pink bar--note that it's phrase mode without dread, so how does this work???
+Not sure, but I *think* that somehow it MUXes the data at the write site in on the left or right side
+of the write data when masked in phrase mode. I'll have to do some tracing to see if this is the mechanism
+it uses or not...
+
+Blit! (CMD = 00010200)
+Flags: UPDA1 PATDSEL
+  count = 9 x 11
+  a1_base = 00110010, a2_base = 000BD7E0
+  a1_x = 0106, a1_y = 0090, a1_frac_x = 0000, a1_frac_y = 8000, a2_x = 025A, a2_y = 0000
+  a1_step_x = FFF6, a1_step_y = 0001, a1_stepf_x = 5E00, a1_stepf_y = D100, a2_step_x = FFF7, a2_step_y = 0001
+  a1_inc_x = 0001, a1_inc_y = FFFF, a1_incf_x = 0000, a1_incf_y = E000
+  a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
+  a2_mask=F a1add=+phr/+0 a2add=+1/+0
+  a1_pixsize = 4, a2_pixsize = 4
+   srcd=BAC673AC2C92E578  dstd=0000000000000000 patd=74C074C074C074C0 iinc=0002E398
+  srcz1=7E127E12000088DA srcz2=DBE06DF000000000 dstz=0000000000000000 zinc=FFFE4840, coll=0
+  Phrase mode is ON
+  [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
+  Entering INNER state...
+  Entering DWRITE state...
+     Dest write address/pix address: 0016A830/0 [dstart=20 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [7400000074C074C0] (icount=0007, inc=2)
+  Entering A1_ADD state [a1_x=0106, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
+  Entering DWRITE state...
+     Dest write address/pix address: 0016A850/0 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [74C074C074C074C0] (icount=0003, inc=4)
+  Entering A1_ADD state [a1_x=0108, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
+  Entering DWRITE state...
+     Dest write address/pix address: 0016A870/0 [dstart=0 dend=30 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [74C074C074C00000] (icount=FFFF, inc=4)
+  Entering A1_ADD state [a1_x=010C, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
+  Entering IDLE_INNER state...
+  Leaving INNER state... (ocount=000A)
+  [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
+  Entering A1UPDATE state... (272/144 -> 262/145)
+  [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
+  Entering INNER state...
+*/
+
         // Bugs in Jaguar I
-       
+
         a2addy = a1addy;                                                        // A2 channel Y add bit is tied to A1's
  
  //if (logBlit && (ocount > 20)) logBlit = false;
@@ -2913,9 +3020,9 @@ if (cmd == 0x1401060C)
  if (logBlit)
  {
  printf("  a1_base = %08X, a2_base = %08X\n", a1_base, a2_base);
-printf("  a1_x = %04X, a1_y = %04X, a1_frac_x = %04X, a1_frac_y = %04X, a2_x = %04X, a2_y = %04X\n", (uint16)a1_x, (uint16)a1_y, a1_frac_x, a1_frac_y, (uint16)a2_x, (uint16)a2_y);
-printf("  a1_step_x = %04X, a1_step_y = %04X, a1_stepf_x = %04X, a1_stepf_y = %04X, a2_step_x = %04X, a2_step_y = %04X\n", (uint16)a1_step_x, (uint16)a1_step_y, a1_stepf_x, a1_stepf_y, (uint16)a2_step_x, (uint16)a2_step_y);
-printf("  a1_inc_x = %04X, a1_inc_y = %04X, a1_incf_x = %04X, a1_incf_y = %04X\n", (uint16)a1_inc_x, (uint16)a1_inc_y, a1_incf_x, a1_incf_y);
+printf("  a1_x = %04X, a1_y = %04X, a1_frac_x = %04X, a1_frac_y = %04X, a2_x = %04X, a2_y = %04X\n", (uint16_t)a1_x, (uint16_t)a1_y, a1_frac_x, a1_frac_y, (uint16_t)a2_x, (uint16_t)a2_y);
+printf("  a1_step_x = %04X, a1_step_y = %04X, a1_stepf_x = %04X, a1_stepf_y = %04X, a2_step_x = %04X, a2_step_y = %04X\n", (uint16_t)a1_step_x, (uint16_t)a1_step_y, a1_stepf_x, a1_stepf_y, (uint16_t)a2_step_x, (uint16_t)a2_step_y);
+printf("  a1_inc_x = %04X, a1_inc_y = %04X, a1_incf_x = %04X, a1_incf_y = %04X\n", (uint16_t)a1_inc_x, (uint16_t)a1_inc_y, a1_incf_x, a1_incf_y);
  printf("  a1_win_x = %04X, a1_win_y = %04X, a2_mask_x = %04X, a2_mask_y = %04X\n", a1_win_x, a1_win_y, a2_mask_x, a2_mask_y);
  char x_add_str[4][4] = { "phr", "1", "0", "inc" };
  printf("  a2_mask=%s a1add=%s%s/%s%s a2add=%s%s/%s%s\n", (a2_mask ? "T" : "F"), (a1xsign ? "-" : "+"), x_add_str[a1addx],
@@ -2923,15 +3030,15 @@ printf("  a2_mask=%s a1add=%s%s/%s%s a2add=%s%s/%s%s\n", (a2_mask ? "T" : "F"),
         (a2ysign ? "-" : "+"), (a2addy ? "1" : "0"));
  printf("  a1_pixsize = %u, a2_pixsize = %u\n", a1_pixsize, a2_pixsize);
  printf("   srcd=%08X%08X  dstd=%08X%08X patd=%08X%08X iinc=%08X\n",
-       (uint32)(srcd1 >> 32), (uint32)(srcd1 & 0xFFFFFFFF),
-       (uint32)(dstd >> 32), (uint32)(dstd & 0xFFFFFFFF),
-       (uint32)(patd >> 32), (uint32)(patd & 0xFFFFFFFF), iinc);
+       (uint32_t)(srcd1 >> 32), (uint32_t)(srcd1 & 0xFFFFFFFF),
+       (uint32_t)(dstd >> 32), (uint32_t)(dstd & 0xFFFFFFFF),
+       (uint32_t)(patd >> 32), (uint32_t)(patd & 0xFFFFFFFF), iinc);
  printf("  srcz1=%08X%08X srcz2=%08X%08X dstz=%08X%08X zinc=%08X, coll=%X\n",
-       (uint32)(srcz1 >> 32), (uint32)(srcz1 & 0xFFFFFFFF),
-       (uint32)(srcz2 >> 32), (uint32)(srcz2 & 0xFFFFFFFF),
-       (uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF), zinc, collision);
+       (uint32_t)(srcz1 >> 32), (uint32_t)(srcz1 & 0xFFFFFFFF),
+       (uint32_t)(srcz2 >> 32), (uint32_t)(srcz2 & 0xFFFFFFFF),
+       (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF), zinc, collision);
  }
-#endif 
+#endif
  
         // Various state lines set up by user
  
@@ -2943,15 +3050,16 @@ printf("  Phrase mode is %s\n", (phrase_mode ? "ON" : "off"));
  fflush(stdout);
  }
  #endif
+//logBlit = false;
  
         // Stopgap vars to simulate various lines
  
-       uint16 a1FracCInX = 0, a1FracCInY = 0;
+       uint16_t a1FracCInX = 0, a1FracCInY = 0;
  
         while (true)
         {
                 // IDLE
-       
+
                 if ((idle && !go) || (inner && outer0 && indone))
                 {
  #ifdef VERBOSE_BLITTER_LOGGING
@@ -2963,11 +3071,13 @@ fflush(stdout);
  #endif
                         idlei = true;
  
-                       return;
+//Instead of a return, let's try breaking out of the loop...
+break;
+//                     return;
                 }
                 else
                         idlei = false;
-       
+
                 // INNER LOOP ACTIVE
  /*
    Entering DWRITE state... (icount=0000, inc=4)
@@ -2978,7 +3088,7 @@ fflush(stdout);
  Now:
    [in=F a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
  */
-       
+
                 if ((idle && go && !datinit)
                         || (inner && !indone)
                         || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && !datinit)
@@ -2992,9 +3102,9 @@ Now:
                 }
                 else
                         inneri = false;
-       
+
                 // A1 FRACTION UPDATE
-       
+
                 if (inner && indone && !outer0 && upda1f)
                 {
                         a1fupdatei = true;
@@ -3041,9 +3151,9 @@ Now:
                 }
                 else
                         a2updatei = false;
-       
+
                 // INITIALIZE INTENSITY FRACTION
-       
+
                 if ((zupdate && !upda2 && datinit)
                         || (a1update && !upda2 && datinit && notgzandp)
                         || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && datinit)
@@ -3054,34 +3164,34 @@ Now:
                 }
                 else
                         init_ifi = false;
-       
+
                 // INITIALIZE INTENSITY INTEGER
-       
+
                 if (init_if)
                 {
                         init_iii = true;
                 }
                 else
                         init_iii = false;
-       
+
                 // INITIALIZE Z FRACTION
-       
+
                 if (init_ii && gourz)
                 {
                         init_zfi = true;
                 }
                 else
                         init_zfi = false;
-       
+
                 // INITIALIZE Z INTEGER
-       
+
                 if (init_zf)
                 {
                         init_zii = true;
                 }
                 else
                         init_zii = false;
-       
+
  // Here we move the fooi into their foo counterparts in order to simulate the moving
  // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
  
@@ -3123,7 +3233,7 @@ printf("  Entering INNER state...\n");
  fflush(stdout);
  }
  #endif
-                       uint16 icount = GET16(blitter_ram, PIXLINECOUNTER + 2);
+                       uint16_t icount = GET16(blitter_ram, PIXLINECOUNTER + 2);
                         bool idle_inner = true, step = true, sreadx = false, szreadx = false, sread = false,
                                 szread = false, dread = false, dzread = false, dwrite = false, dzwrite = false;
                         bool inner0 = false;
@@ -3134,7 +3244,7 @@ fflush(stdout);
                         bool textext = false, txtread = false;
  
  //other stuff
-uint8 srcshift = 0;
+uint8_t srcshift = 0;
  bool sshftld = true; // D flipflop (D -> Q): instart -> sshftld
  //NOTE: sshftld probably is only asserted at the beginning of the inner loop. !!! FIX !!!
  /*
@@ -3319,7 +3429,7 @@ sshftld = idle_inner;
                                 bool dsta_addi = (dwritei && !dstwrz) || dzwritei;
  
                                 bool gensrc = sreadxi || szreadxi || sreadi || szreadi;
-                               bool gendst = dreadi || szreadi || dwritei || dzwritei;
+                               bool gendst = dreadi || dzreadi || dwritei || dzwritei;
                                 bool gena2i = (gensrc && !dsta2) || (gendst && dsta2);
  
                                 bool zaddr = szreadx || szread || dzread || dzwrite;
@@ -3357,7 +3467,7 @@ bit 0 = /a2update . ( a1fupdate
  The /a2update term on bits 0 and 1 is redundant.
  Now look-ahead based
  */
-                               uint8 addasel = (a1fupdate || (a1_add && a1addx == 3) ? 0x01 : 0x00);
+                               uint8_t addasel = (a1fupdate || (a1_add && a1addx == 3) ? 0x01 : 0x00);
                                 addasel |= (a1_add && a1addx == 3 ? 0x02 : 0x00);
                                 addasel |= (a2update ? 0x04 : 0x00);
  /* Address adder input A X constant selection
@@ -3372,7 +3482,7 @@ for A1
  similarly for A2
  JLH: Also, 11 will likewise set the value to 111
  */
-                               uint8 a1_xconst = 6 - a1_pixsize, a2_xconst = 6 - a2_pixsize;
+                               uint8_t a1_xconst = 6 - a1_pixsize, a2_xconst = 6 - a2_pixsize;
  
                                 if (a1addx == 1)
                                     a1_xconst = 0;
@@ -3384,7 +3494,7 @@ JLH: Also, 11 will likewise set the value to 111
                                 else if (a2addx & 0x02)
                                     a2_xconst = 7;
  
-                               uint8 adda_xconst = (a2_add ? a2_xconst : a1_xconst);
+                               uint8_t adda_xconst = (a2_add ? a2_xconst : a1_xconst);
  /* Address adder input A Y constant selection
  22 June 94 - This was erroneous, because only the a1addy bit was reflected here.
  Therefore, the selection has to be controlled by a bug fix bit.
@@ -3420,7 +3530,7 @@ Bit 0 =   a2update + a2_add
                 + a1update . a1_stepld
                 + a2update . a2_stepld
  */
-                               uint8 addbsel = (a2update || a2_add || (a1fupdate && a1_stepld)
+                               uint8_t addbsel = (a2update || a2_add || (a1fupdate && a1_stepld)
                                     || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x01 : 0x00);
                                 addbsel |= (a1fupdate || (a1_add && a1addx == 3) || (a1fupdate && a1_stepld)
                                     || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x02 : 0x00);
@@ -3436,9 +3546,9 @@ it is being updated by one phrase
  Masking is enabled for a1 when a1addx[0..1] is 00, and the value
  is 6 - the pixel size (again!)
  */
-                               uint8 maska1 = (a1_add && a1addx == 0 ? 6 - a1_pixsize : 0);
-                               uint8 maska2 = (a2_add && a2addx == 0 ? 6 - a2_pixsize : 0);
-                               uint8 modx = (a2_add ? maska2 : maska1);
+                               uint8_t maska1 = (a1_add && a1addx == 0 ? 6 - a1_pixsize : 0);
+                               uint8_t maska2 = (a2_add && a2addx == 0 ? 6 - a2_pixsize : 0);
+                               uint8_t modx = (a2_add ? maska2 : maska1);
  /* Generate load strobes for the increment updates */
  
  /*A1pldt               := NAN2 (a1pldt, atick[1], a1_add);
@@ -3478,7 +3588,7 @@ Bit 1 =   dzwrite . gourz . (atick[0] + atick[1])
  Bit 2 =   (gourd + gourz) . /(init_if + init_ii + init_zf + init_zi)
         + dwrite  . srcshade
  */
-uint8 daddasel = ((dwrite && gourd) || (dzwrite && gourz) || istepadd || zstepfadd
+uint8_t daddasel = ((dwrite && gourd) || (dzwrite && gourz) || istepadd || zstepfadd
         || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
  daddasel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
  daddasel |= (((gourd || gourz) && !(init_if || init_ii || init_zf || init_zi))
@@ -3510,7 +3620,7 @@ Bit 2 =   dwrite  . gourd . (atick[0] + atick[1])
         + istepadd + istepfadd + zstepadd + zstepfadd
  Bit 3 =   istepadd + istepfadd + zstepadd + zstepfadd
  */
-uint8 daddbsel = ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
+uint8_t daddbsel = ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
         || istepadd || zstepadd || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
  daddbsel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
  daddbsel |= ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
@@ -3519,14 +3629,14 @@ daddbsel |= (istepadd && istepfadd && zstepadd && zstepfadd ? 0x08 : 0x00);
  /* Data adder mode control
  000    16-bit normal add
  001    16-bit saturating add with carry
-010    8-bit saturating add with carry, carry into top byte is 
+010    8-bit saturating add with carry, carry into top byte is
         inhibited (YCrCb)
-011    8-bit saturating add with carry, carry into top byte and 
+011    8-bit saturating add with carry, carry into top byte and
         between top nybbles is inhibited (CRY)
  100    16-bit normal add with carry
  101    16-bit saturating add
  110    8-bit saturating add, carry into top byte is inhibited
-111    8-bit saturating add, carry into top byte and between top 
+111    8-bit saturating add, carry into top byte and between top
         nybbles is inhibited
  
  The first five are used for Gouraud calculations, the latter three
@@ -3545,7 +3655,7 @@ Bit 0 =   dzwrite . gourz . atick[1]
         + init_ii . /topnen . /topben . /ext_int
         + init_ii .  topnen .  topben . /ext_int
         + init_zi
-               
+
  Bit 1 =   dwrite . gourd . atick[1] . /topben . /ext_int
         + istepadd . /topben . /ext_int
         + /gourd . /gourz .  /topben
@@ -3558,7 +3668,7 @@ Bit 2 =   /gourd . /gourz
         + istepadd . ext_int
         + init_ii . ext_int
  */
-uint8 daddmode = ((dzwrite && gourz) || (dwrite && gourd && !topnen && !topben && !ext_int)
+uint8_t daddmode = ((dzwrite && gourz) || (dwrite && gourd && !topnen && !topben && !ext_int)
         || (dwrite && gourd && topnen && topben && !ext_int) || zstepadd
         || (istepadd && !topnen && !topben && !ext_int)
         || (istepadd && topnen && topben && !ext_int) || (!gourd && !gourz && !topnen && !topben)
@@ -3570,8 +3680,8 @@ daddmode |= ((dwrite && gourd && !topben && !ext_int) || (istepadd && !topben &&
         || (init_ii && !topben && !ext_int) ? 0x02 : 0x00);
  daddmode |= ((!gourd && !gourz) || shadeadd || (dwrite && gourd && ext_int)
         || (istepadd && ext_int) || (init_ii && ext_int) ? 0x04 : 0x00);
-/* Data add load controls 
-Pattern fraction (dest data) is loaded on 
+/* Data add load controls
+Pattern fraction (dest data) is loaded on
           dwrite . gourd . atick[0]
         + istepfadd . /datinit
         + init_if
@@ -3579,11 +3689,11 @@ Pattern data is loaded on
           dwrite . gourd . atick[1]
         + istepadd . /datinit . /datinit
         + init_ii
-Source z1 is loaded on 
+Source z1 is loaded on
           dzwrite . gourz . atick[1]
         + zstepadd . /datinit . /datinit
         + init_zi
-Source z2 is loaded on 
+Source z2 is loaded on
           dzwrite . gourz . atick[0]
         + zstepfadd
         + init_zf
@@ -3610,10 +3720,10 @@ Bit 0 =  /patdsel . /adddsel
  Bit 1 =   adddsel
         + dzwrite1d
  */
-uint8 data_sel = ((!patdsel && !adddsel) || dzwrite ? 0x01 : 0x00)
+uint8_t data_sel = ((!patdsel && !adddsel) || dzwrite ? 0x01 : 0x00)
         | (adddsel || dzwrite ? 0x02 : 0x00);
  
-uint32 address, pixAddr;
+uint32_t address, pixAddr;
  ADDRGEN(address, pixAddr, gena2i, zaddr,
         a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
         a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
@@ -3624,28 +3734,28 @@ if (!justify)
  
  /* Generate source alignment shift
     -------------------------------
-The source alignment shift for data move is the difference between 
-the source and destination X pointers, multiplied by the pixel 
-size.  Only the low six bits of the pointers are of interest, as 
-pixel sizes are always a power of 2 and window rows are always 
-phrase aligned.  
+The source alignment shift for data move is the difference between
+the source and destination X pointers, multiplied by the pixel
+size.  Only the low six bits of the pointers are of interest, as
+pixel sizes are always a power of 2 and window rows are always
+phrase aligned.
  
  When not in phrase mode, the top 3 bits of the shift value are
  set to zero (2/26).
  
  Source shifting is also used to extract bits for bit-to-byte
-expansion in phrase mode.  This involves only the bottom three 
+expansion in phrase mode.  This involves only the bottom three
  bits of the shift value, and is based on the offset within the
  phrase of the destination X pointer, in pixels.
  
  Source shifting is disabled when srcen is not set.
  */
-uint8 dstxp = (dsta2 ? a2_x : a1_x) & 0x3F;
-uint8 srcxp = (dsta2 ? a1_x : a2_x) & 0x3F;
-uint8 shftv = ((dstxp - srcxp) << pixsize) & 0x3F;
+uint8_t dstxp = (dsta2 ? a2_x : a1_x) & 0x3F;
+uint8_t srcxp = (dsta2 ? a1_x : a2_x) & 0x3F;
+uint8_t shftv = ((dstxp - srcxp) << pixsize) & 0x3F;
  /* The phrase mode alignment count is given by the phrase offset
  of the first pixel, for bit to byte expansion */
-uint8 pobb = 0;
+uint8_t pobb = 0;
  
  if (pixsize == 3)
         pobb = dstxp & 0x07;
@@ -3655,8 +3765,8 @@ if (pixsize == 5)
         pobb = dstxp & 0x01;
  
  bool pobbsel = phrase_mode && bcompen;
-uint8 loshd = (pobbsel ? pobb : shftv) & 0x07;
-uint8 shfti = (srcen || pobbsel ? (sshftld ? loshd : srcshift & 0x07) : 0);
+uint8_t loshd = (pobbsel ? pobb : shftv) & 0x07;
+uint8_t shfti = (srcen || pobbsel ? (sshftld ? loshd : srcshift & 0x07) : 0);
  /* Enable for high bits is srcen . phrase_mode */
  shfti |= (srcen && phrase_mode ? (sshftld ? shftv & 0x38 : srcshift & 0x38) : 0);
  srcshift = shfti;
@@ -3671,27 +3781,35 @@ printf("  Entering SREADX state...");
  fflush(stdout);
  }
  #endif
-//uint32 srcAddr, pixAddr;
+//uint32_t srcAddr, pixAddr;
  //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
  //     a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
  //     a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
-srcd2 = srcd1;
-srcd1 = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
+                                       srcd2 = srcd1;
+                                       srcd1 = ((uint64_t)JaguarReadLong(address + 0, BLITTER) << 32)
+                                               | (uint64_t)JaguarReadLong(address + 4, BLITTER);
  //Kludge to take pixel size into account...
+//Hmm. If we're not in phrase mode, this is most likely NOT going to be used...
+//Actually, it would be--because of BCOMPEN expansion, for example...
  if (!phrase_mode)
  {
-       if (pixsize == 5)
-               srcd1 >>= 32;
-       else if (pixsize == 4)
-               srcd1 >>= 48;
-       else
+       if (bcompen)
                 srcd1 >>= 56;
-}
+       else
+       {
+               if (pixsize == 5)
+                       srcd1 >>= 32;
+               else if (pixsize == 4)
+                       srcd1 >>= 48;
+               else
+                       srcd1 >>= 56;
+       }
+}//*/
  #ifdef VERBOSE_BLITTER_LOGGING
  if (logBlit)
  {
  printf("    Source extra read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
-       (uint32)(srcd1 >> 32), (uint32)(srcd1 & 0xFFFFFFFF));
+       (uint32_t)(srcd1 >> 32), (uint32_t)(srcd1 & 0xFFFFFFFF));
  fflush(stdout);
  }
  #endif
@@ -3702,9 +3820,19 @@ fflush(stdout);
  #ifdef VERBOSE_BLITTER_LOGGING
  if (logBlit)
  {
-printf("  Entering SZREADX state...\n");
+printf("  Entering SZREADX state...");
  fflush(stdout);
  }
+#endif
+                                       srcz2 = srcz1;
+                                       srcz1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
+#ifdef VERBOSE_BLITTER_LOGGING
+if (logBlit)
+{
+       printf(" Src Z extra read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
+               (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF));
+       fflush(stdout);
+}
  #endif
                                 }
  
@@ -3718,27 +3846,32 @@ printf("  Entering SREAD state...");
  fflush(stdout);
  }
  #endif
-//uint32 srcAddr, pixAddr;
+//uint32_t srcAddr, pixAddr;
  //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
  //     a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
  //     a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
  srcd2 = srcd1;
-srcd1 = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
+srcd1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
  //Kludge to take pixel size into account...
  if (!phrase_mode)
  {
-       if (pixsize == 5)
-               srcd1 >>= 32;
-       else if (pixsize == 4)
-               srcd1 >>= 48;
-       else
+       if (bcompen)
                 srcd1 >>= 56;
+       else
+       {
+               if (pixsize == 5)
+                       srcd1 >>= 32;
+               else if (pixsize == 4)
+                       srcd1 >>= 48;
+               else
+                       srcd1 >>= 56;
+       }
  }
  #ifdef VERBOSE_BLITTER_LOGGING
  if (logBlit)
  {
-printf("    Source read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
-       (uint32)(srcd1 >> 32), (uint32)(srcd1 & 0xFFFFFFFF));
+printf("     Source read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
+       (uint32_t)(srcd1 >> 32), (uint32_t)(srcd1 & 0xFFFFFFFF));
  fflush(stdout);
  }
  #endif
@@ -3749,9 +3882,23 @@ fflush(stdout);
  #ifdef VERBOSE_BLITTER_LOGGING
  if (logBlit)
  {
-printf("  Entering SZREADX state...\n");
+printf("  Entering SZREAD state...");
  fflush(stdout);
  }
+#endif
+                                       srcz2 = srcz1;
+                                       srcz1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
+//Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
+if (!phrase_mode && pixsize == 4)
+       srcz1 >>= 48;
+
+#ifdef VERBOSE_BLITTER_LOGGING
+if (logBlit)
+{
+       printf("     Src Z read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
+               (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF));
+       fflush(stdout);
+}
  #endif
                                 }
  
@@ -3760,15 +3907,15 @@ fflush(stdout);
  #ifdef VERBOSE_BLITTER_LOGGING
  if (logBlit)
  {
-printf("  Entering DREAD state...\n");
+printf("  Entering DREAD state...");
  fflush(stdout);
  }
  #endif
-//uint32 dstAddr, pixAddr;
+//uint32_t dstAddr, pixAddr;
  //ADDRGEN(dstAddr, pixAddr, gena2i, zaddr,
  //     a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
  //     a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
-dstd = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
+dstd = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
  //Kludge to take pixel size into account...
  if (!phrase_mode)
  {
@@ -3782,8 +3929,8 @@ if (!phrase_mode)
  #ifdef VERBOSE_BLITTER_LOGGING
  if (logBlit)
  {
-printf("      Dest read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
-       (uint32)(dstd >> 32), (uint32)(dstd & 0xFFFFFFFF));
+printf("       Dest read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
+       (uint32_t)(dstd >> 32), (uint32_t)(dstd & 0xFFFFFFFF));
  fflush(stdout);
  }
  #endif
@@ -3799,37 +3946,47 @@ if (logBlit)
         fflush(stdout);
  }
  #endif
-                                       dstz = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
+                                       dstz = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
+//Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
+if (!phrase_mode && pixsize == 4)
+       dstz >>= 48;
+
  #ifdef VERBOSE_BLITTER_LOGGING
  if (logBlit)
  {
         printf("    Dest Z read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
-               (uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF));
+               (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF));
         fflush(stdout);
  }
  #endif
                                 }
  
+// These vars should probably go further up in the code... !!! FIX !!!
+// We can't preassign these unless they're static...
+//uint64_t srcz = 0;                   // These are assigned to shut up stupid compiler warnings--dwrite is ALWAYS asserted
+//bool winhibit = false;
+uint64_t srcz;
+bool winhibit;
  //NOTE: SRCSHADE requires GOURZ to be set to work properly--another Jaguar I bug
                                 if (dwrite)
                                 {
  #ifdef VERBOSE_BLITTER_LOGGING
  if (logBlit)
  {
-printf("  Entering DWRITE state...\n");
+printf("  Entering DWRITE state...");
  fflush(stdout);
  }
  #endif
  //Counter is done on the dwrite state...! (We'll do it first, since it affects dstart/dend calculations.)
  //Here's the voodoo for figuring the correct amount of pixels in phrase mode (or not):
-                                       int8 inct = -((dsta2 ? a2_x : a1_x) & 0x07);    // From INNER_CNT
-                                       uint8 inc = 0;
+                                       int8_t inct = -((dsta2 ? a2_x : a1_x) & 0x07);  // From INNER_CNT
+                                       uint8_t inc = 0;
                                         inc = (!phrase_mode || (phrase_mode && (inct & 0x01)) ? 0x01 : 0x00);
                                         inc |= (phrase_mode && (((pixsize == 3 || pixsize == 4) && (inct & 0x02)) || pixsize == 5 && !(inct & 0x01)) ? 0x02 : 0x00);
                                         inc |= (phrase_mode && ((pixsize == 3 && (inct & 0x04)) || (pixsize == 4 && !(inct & 0x03))) ? 0x04 : 0x00);
                                         inc |= (phrase_mode && pixsize == 3 && !(inct & 0x07) ? 0x08 : 0x00);
  
-                                       uint16 oldicount = icount;      // Save icount to detect underflow...
+                                       uint16_t oldicount = icount;    // Save icount to detect underflow...
                                         icount -= inc;
  
                                         if (icount == 0 || ((icount & 0x8000) && !(oldicount & 0x8000)))
@@ -3840,7 +3997,7 @@ fflush(stdout);
  //Start & end write mask computations...
  //*********************************************************************************
  
-uint8 dstart = 0;
+uint8_t dstart = 0;
  
  if (pixsize == 3)
         dstart = (dstxp & 0x07) << 3;
@@ -3852,12 +4009,12 @@ if (pixsize == 5)
  dstart = (phrase_mode ? dstart : pixAddr & 0x07);
  
  //This is the other Jaguar I bug... Normally, should ALWAYS select a1_x here.
-uint16 dstxwr = (dsta2 ? a2_x : a1_x) & 0x7FFE;
-uint16 pseq = dstxwr ^ (a1_win_x & 0x7FFE);
+uint16_t dstxwr = (dsta2 ? a2_x : a1_x) & 0x7FFE;
+uint16_t pseq = dstxwr ^ (a1_win_x & 0x7FFE);
  pseq = (pixsize == 5 ? pseq : pseq & 0x7FFC);
  pseq = ((pixsize & 0x06) == 4 ? pseq : pseq & 0x7FF8);
  bool penden = clip_a1 && (pseq == 0);
-uint8 window_mask = 0;
+uint8_t window_mask = 0;
  
  if (pixsize == 3)
         window_mask = (a1_win_x & 0x07) << 3;
@@ -3893,7 +4050,7 @@ NOTE! This doesn't fix the problem because inner0 is asserted too late to help h
  
  /* The mask to be used if within one phrase of the end of the inner
  loop, similarly */
-uint8 inner_mask = 0;
+uint8_t inner_mask = 0;
  
  if (pixsize == 3)
         inner_mask = (icount & 0x07) << 3;
@@ -3907,12 +4064,12 @@ if (!inner0)
  the inner mask, where is all cases 000 means 1000. */
  window_mask = (window_mask == 0 ? 0x40 : window_mask);
  inner_mask = (inner_mask == 0 ? 0x40 : inner_mask);
-uint8 emask = (window_mask > inner_mask ? inner_mask : window_mask);
+uint8_t emask = (window_mask > inner_mask ? inner_mask : window_mask);
  /* The mask to be used for the pixel size, to which must be added
  the bit offset */
-uint8 pma = pixAddr + (1 << pixsize);
+uint8_t pma = pixAddr + (1 << pixsize);
  /* Select the mask */
-uint8 dend = (phrase_mode ? emask : pma);
+uint8_t dend = (phrase_mode ? emask : pma);
  
  /* The cycle width in phrase mode is normally one phrase.  However,
  at the start and end it may be narrower.  The start and end masks
@@ -3924,48 +4081,92 @@ are used to generate this.  The width is given by:
  This is only used for writes in phrase mode.
  Start and end from the address level of the pipeline are used.
  */
-uint8 pwidth = (((dend | dstart) & 0x07) == 0 ? 0x08 : (dend - dstart) & 0x07);
+uint8_t pwidth = (((dend | dstart) & 0x07) == 0 ? 0x08 : (dend - dstart) & 0x07);
  
-//uint32 dstAddr, pixAddr;
+//uint32_t dstAddr, pixAddr;
  //ADDRGEN(dstAddr, pixAddr, gena2i, zaddr,
  //     a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
  //     a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
  #ifdef VERBOSE_BLITTER_LOGGING
  if (logBlit)
  {
-printf("     Dest write address/pix address: %08X/%1X", address, pixAddr);
-printf(" [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
-fflush(stdout);
-printf("[daas=%X dabs=%X dam=%X ds=%X daq=%s]", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
-fflush(stdout);
+       printf("     Dest write address/pix address: %08X/%1X", address, pixAddr);
+       fflush(stdout);
  }
  #endif
  
+//More testing... This is almost certainly wrong, but how else does this work???
+//Seems to kinda work... But still, this doesn't seem to make any sense!
+if (phrase_mode && !dsten)
+       dstd = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
+
  //Testing only... for now...
  //This is wrong because the write data is a combination of srcd and dstd--either run
-//thru the LFU or in PATDSEL or ADDDSEL mode.
+//thru the LFU or in PATDSEL or ADDDSEL mode. [DONE now, thru DATA module]
  // Precedence is ADDDSEL > PATDSEL > LFU.
  //Also, doesn't take into account the start & end masks, or the phrase width...
  //Now it does!
  
  // srcd2 = xxxx xxxx 0123 4567, srcd = 8901 2345 xxxx xxxx, srcshift = $20 (32)
-uint64 srcd = (srcd2 << (64 - srcshift)) | (srcd1 >> srcshift);
+uint64_t srcd = (srcd2 << (64 - srcshift)) | (srcd1 >> srcshift);
  //bleh, ugly ugly ugly
  if (srcshift == 0)
         srcd = srcd1;
  
-//Temporary kludge, to see if the fractional pattern does anything...
-//This works, BTW
-if (patfadd)
+//NOTE: This only works with pixel sizes less than 8BPP...
+//DOUBLE NOTE: Still need to do regression testing to ensure that this doesn't break other stuff... !!! CHECK !!!
+if (!phrase_mode && srcshift != 0)
+       srcd = ((srcd2 & 0xFF) << (8 - srcshift)) | ((srcd1 & 0xFF) >> srcshift);
+
+//Z DATA() stuff done here... And it has to be done before any Z shifting...
+//Note that we need to have phrase mode start/end support here... (Not since we moved it from dzwrite...!)
+/*
+Here are a couple of Cybermorph blits with Z:
+$00113078      // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
+$09900F39      // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
+
+We're having the same phrase mode overwrite problem we had with the pixels... !!! FIX !!!
+Odd. It's equating 0 with 0... Even though ZMODE is $04 (less than)!
+*/
+if (gourz)
  {
-       uint16 addq[4];
-       uint8 initcin[4] = { 0, 0, 0, 0 };
-       ADDARRAY(addq, 4/*daddasel*/, 4/*daddbsel*/, 0/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
-       srcd1 = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
+/*
+void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
+       uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
+       uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
+       uint32_t zinc, uint32_t zstep)
+*/
+       uint16_t addq[4];
+       uint8_t initcin[4] = { 0, 0, 0, 0 };
+       ADDARRAY(addq, 7/*daddasel*/, 6/*daddbsel*/, 0/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
+       srcz2 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
+       ADDARRAY(addq, 6/*daddasel*/, 7/*daddbsel*/, 1/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
+       srcz1 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
+
+#if 0//def VERBOSE_BLITTER_LOGGING
+if (logBlit)
+{
+       printf("\n[srcz1=%08X%08X, srcz2=%08X%08X, zinc=%08X",
+               (uint32_t)(srcz1 >> 32), (uint32_t)(srcz1 & 0xFFFFFFFF),
+               (uint32_t)(srcz2 >> 32), (uint32_t)(srcz2 & 0xFFFFFFFF), zinc);
+       fflush(stdout);
+}
+#endif
  }
  
-//Note that we still don't take atick[0] & [1] into account here, so this will skip half of the data needed... !!! FIX !!!
-//Not yet enumerated: dbinh, srcdread, srczread
+uint8_t zSrcShift = srcshift & 0x30;
+srcz = (srcz2 << (64 - zSrcShift)) | (srcz1 >> zSrcShift);
+//bleh, ugly ugly ugly
+if (zSrcShift == 0)
+       srcz = srcz1;
+
+#if 0//def VERBOSE_BLITTER_LOGGING
+if (logBlit)
+{
+       printf(" srcz=%08X%08X]\n", (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF));
+       fflush(stdout);
+}
+#endif
  
  //When in SRCSHADE mode, it adds the IINC to the read source (from LFU???)
  //According to following line, it gets LFU mode. But does it feed the source into the LFU
@@ -3977,26 +4178,96 @@ if (srcshade)
  //NOTE: This is basically doubling the work done by DATA--since this is what
  //      ADDARRAY is loaded with when srschshade is enabled... !!! FIX !!!
  //      Also note that it doesn't work properly unless GOURZ is set--there's the clue!
-       uint16 addq[4];
-       uint8 initcin[4] = { 0, 0, 0, 0 };
+       uint16_t addq[4];
+       uint8_t initcin[4] = { 0, 0, 0, 0 };
         ADDARRAY(addq, 4/*daddasel*/, 5/*daddbsel*/, 7/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
-       srcd = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
+       srcd = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
  }
  //Seems to work... Not 100% sure tho.
  //end try this
  
-bool winhibit;// = false;
-uint64 wdata;
-uint8 dcomp, zcomp;
+//Temporary kludge, to see if the fractional pattern does anything...
+//This works, BTW
+//But it seems to mess up in Cybermorph... the shading should be smooth but it isn't...
+//Seems the carry out is lost again... !!! FIX !!! [DONE--see below]
+if (patfadd)
+{
+       uint16_t addq[4];
+       uint8_t initcin[4] = { 0, 0, 0, 0 };
+       ADDARRAY(addq, 4/*daddasel*/, 4/*daddbsel*/, 0/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
+       srcd1 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
+}
+
+//Note that we still don't take atick[0] & [1] into account here, so this will skip half of the data needed... !!! FIX !!!
+//Not yet enumerated: dbinh, srcdread, srczread
+//Also, should do srcshift on the z value in phrase mode... !!! FIX !!! [DONE]
+//As well as add a srcz variable we can set external to this state... !!! FIX !!! [DONE]
+
+uint64_t wdata;
+uint8_t dcomp, zcomp;
  DATA(wdata, dcomp, zcomp, winhibit,
         true, cmpdst, daddasel, daddbsel, daddmode, daddq_sel, data_sel, 0/*dbinh*/,
         dend, dstart, dstd, iinc, lfufunc, patd, patdadd,
         phrase_mode, srcd, false/*srcdread*/, false/*srczread*/, srcz2add, zmode,
-       bcompen, bkgwren, dcompen, icount & 0x07, pixsize);
-//     bool bcompen, bool bkgwren, bool dcompen, uint8 icount, uint8 pixsize)
+       bcompen, bkgwren, dcompen, icount & 0x07, pixsize,
+       srcz, dstz, zinc);
+/*
+Seems that the phrase mode writes with DCOMPEN and DSTEN are corrupting inside of DATA: !!! FIX !!!
+It's fairly random as well. 7CFE -> 7DFE, 7FCA -> 78CA, 7FA4 -> 78A4, 7F88 -> 8F88
+It could be related to an uninitialized variable, like the zmode bug...
+[DONE]
+It was a bug in the dech38el data--it returned $FF for ungated instead of $00...
+
+Blit! (CMD = 09800609)
+Flags: SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C DCOMPEN
+  count = 10 x 12
+  a1_base = 00110000, a2_base = 0010B2A8
+  a1_x = 004B, a1_y = 00D8, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0704, a2_y = 0000
+  a1_step_x = FFF3, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFFC, a2_step_y = 0000
+  a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
+  a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
+  a2_mask=F a1add=+phr/+0 a2add=+phr/+0
+  a1_pixsize = 4, a2_pixsize = 4
+   srcd=0000000000000000  dstd=0000000000000000 patd=0000000000000000 iinc=00000000
+  srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
+  Phrase mode is ON
+  [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
+  Entering INNER state...
+  Entering SREAD state...    Source read address/pix address: 0010C0B0/0 [0000000078047804]
+  Entering A2_ADD state [a2_x=0704, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
+  Entering DREAD state...
+      Dest read address/pix address: 00197240/0 [0000000000000028]
+  Entering DWRITE state...
+     Dest write address/pix address: 00197240/0 [dstart=30 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000028] (icount=0009, inc=1)
+  Entering A1_ADD state [a1_x=004B, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
+  Entering SREAD state...    Source read address/pix address: 0010C0B8/0 [7804780478047804]
+  Entering A2_ADD state [a2_x=0708, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
+  Entering DREAD state...
+      Dest read address/pix address: 00197260/0 [0028000000200008]
+  Entering DWRITE state...
+     Dest write address/pix address: 00197260/0 [dstart=0 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0028780478047804] (icount=0005, inc=4)
+  Entering A1_ADD state [a1_x=004C, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
+  Entering SREAD state...    Source read address/pix address: 0010C0C0/0 [0000000000000000]
+  Entering A2_ADD state [a2_x=070C, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
+  Entering DREAD state...
+      Dest read address/pix address: 00197280/0 [0008001800180018]
+  Entering DWRITE state...
+     Dest write address/pix address: 00197280/0 [dstart=0 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [7804780478040018] (icount=0001, inc=4)
+  Entering A1_ADD state [a1_x=0050, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
+  Entering SREAD state...    Source read address/pix address: 0010C0C8/0 [000078047BFE7BFE]
+  Entering A2_ADD state [a2_x=0710, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
+  Entering DREAD state...
+      Dest read address/pix address: 001972A0/0 [0008002000000000]
+  Entering DWRITE state...
+     Dest write address/pix address: 001972A0/0 [dstart=0 dend=10 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0008002000000000] (icount=FFFD, inc=4)
+  Entering A1_ADD state [a1_x=0054, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
+  Entering IDLE_INNER state...
+*/
  
-if (patdadd)
-       patd = wdata;
+//Why isn't this taken care of in DATA? Because, DATA is modifying its local copy instead of the one used here.
+//!!! FIX !!! [DONE]
+//if (patdadd)
+//     patd = wdata;
  
  //if (patfadd)
  //     srcd1 = wdata;
@@ -4020,17 +4291,11 @@ A1_ycomp        := MAG_15 (a1ygr, a1yeq, a1ylt, a1_y{0..14}, a1_win_y{0..14});
  A1_outside     := OR6 (a1_outside, a1_x{15}, a1xgr, a1xeq, a1_y{15}, a1ygr, a1yeq);
  */
  //NOTE: There seems to be an off-by-one bug here in the clip_a1 section... !!! FIX !!!
+//      Actually, seems to be related to phrase mode writes...
+//      Or is it? Could be related to non-15-bit compares as above?
  if (clip_a1 && ((a1_x & 0x8000) || (a1_y & 0x8000) || (a1_x >= a1_win_x) || (a1_y >= a1_win_y)))
         winhibit = true;
  
-/*if (dcompen)
-{
-//This is currently not correct for phrase mode. !!! FIX !!!
-       if ((pixsize == 3 && (dcomp & 0x01))
-               || (pixsize == 4 && (dcomp & 0x03)))
-               winhibit = true;
-}*/
-
  if (!winhibit)
  {
         if (phrase_mode)
@@ -4047,36 +4312,129 @@ if (!winhibit)
                 else
                         JaguarWriteByte(address, wdata & 0x000000FF, BLITTER);
         }
+}
  
  #ifdef VERBOSE_BLITTER_LOGGING
  if (logBlit)
  {
-       printf(" [%08X%08X]", (uint32)(wdata >> 32), (uint32)(wdata & 0xFFFFFFFF));
+       printf(" [%08X%08X]%s", (uint32_t)(wdata >> 32), (uint32_t)(wdata & 0xFFFFFFFF), (winhibit ? "[X]" : ""));
+       printf(" (icount=%04X, inc=%u)\n", icount, (uint16_t)inc);
+       printf("    [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
+       printf("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
         fflush(stdout);
  }
  #endif
-}
+                               }
  
+                               if (dzwrite)
+                               {
+// OK, here's the big insight: When NOT in GOURZ mode, srcz1 & 2 function EXACTLY the same way that
+// srcd1 & 2 work--there's an implicit shift from srcz1 to srcz2 whenever srcz1 is read.
+// OTHERWISE, srcz1 is the integer for the computed Z and srcz2 is the fractional part.
+// Writes to srcz1 & 2 follow the same pattern as the other 64-bit registers--low 32 at the low address,
+// high 32 at the high address (little endian!).
+// NOTE: GOURZ is still not properly supported. Check patd/patf handling...
+//       Phrase mode start/end masks are not properly supported either...
  #ifdef VERBOSE_BLITTER_LOGGING
  if (logBlit)
  {
-printf(" (icount=%04X, inc=%u)\n", icount, (uint16)inc);
-fflush(stdout);
+       printf("  Entering DZWRITE state...");
+       printf("  Dest Z write address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
+               (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF));
+       fflush(stdout);
  }
  #endif
-                               }
-
-                               if (dzwrite)
-                               {
+//This is not correct... !!! FIX !!!
+//Should be OK now... We'll see...
+//Nope. Having the same starstep write problems in phrase mode as we had with pixels... !!! FIX !!!
+//This is not causing the problem in Hover Strike... :-/
+//The problem was with the SREADX not shifting. Still problems with Z comparisons & other text in pregame screen...
+if (!winhibit)
+{
+       if (phrase_mode)
+       {
+               JaguarWriteLong(address + 0, srcz >> 32, BLITTER);
+               JaguarWriteLong(address + 4, srcz & 0xFFFFFFFF, BLITTER);
+       }
+       else
+       {
+               if (pixsize == 4)
+                       JaguarWriteWord(address, srcz & 0x0000FFFF, BLITTER);
+       }
+}//*/
  #ifdef VERBOSE_BLITTER_LOGGING
  if (logBlit)
  {
-printf("  Entering DZWRITE state...\n");
-fflush(stdout);
+//     printf(" [%08X%08X]\n", (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF));
+//     fflush(stdout);
+//printf(" [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
+       printf("    [dstart=? dend=? pwidth=? srcshift=%X]", srcshift);
+       printf("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
+       fflush(stdout);
  }
  #endif
                                 }
  
+/*
+This is because the address generator was using only 15 bits of the X when it should have
+used 16!
+
+There's a slight problem here: The X pointer isn't wrapping like it should when it hits
+the edge of the window... Notice how the X isn't reset at the edge of the window:
+
+Blit! (CMD = 00010000)
+Flags: PATDSEL
+  count = 160 x 261
+  a1_base = 000E8008, a2_base = 0001FA68
+  a1_x = 0000, a1_y = 0000, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0000, a2_y = 0000
+  a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
+  a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
+  a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
+  a2_mask=F a1add=+phr/+0 a2add=+phr/+0
+  a1_pixsize = 5, a2_pixsize = 5
+   srcd=7717771777177717  dstd=0000000000000000 patd=7730773077307730 iinc=00000000
+  srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
+  Phrase mode is ON
+  [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
+  Entering INNER state...
+  Entering DWRITE state...     Dest write address/pix address: 000E8008/0 [7730773077307730] (icount=009E, inc=2)
+ srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
+[srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
+    [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
+  Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
+  Entering DWRITE state...     Dest write address/pix address: 000E8018/0 [7730773077307730] (icount=009C, inc=2)
+ srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
+[srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
+    [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
+  Entering A1_ADD state [a1_x=0002, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
+
+...
+
+  Entering A1_ADD state [a1_x=009C, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
+  Entering DWRITE state...     Dest write address/pix address: 000E84F8/0 [7730773077307730] (icount=0000, inc=2)
+ srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
+[srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
+    [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
+  Entering A1_ADD state [a1_x=009E, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
+  Entering IDLE_INNER state...
+
+  Leaving INNER state... (ocount=0104)
+  [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
+
+  Entering INNER state...
+  Entering DWRITE state...     Dest write address/pix address: 000E8508/0 [7730773077307730] (icount=009E, inc=2)
+ srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
+[srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
+    [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
+  Entering A1_ADD state [a1_x=00A0, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
+  Entering DWRITE state...     Dest write address/pix address: 000E8518/0 [7730773077307730] (icount=009C, inc=2)
+ srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
+[srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
+    [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
+  Entering A1_ADD state [a1_x=00A2, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
+
+*/
+
                                 if (a1_add)
                                 {
  #ifdef VERBOSE_BLITTER_LOGGING
@@ -4087,7 +4445,7 @@ printf("  Entering A1_ADD state [a1_x=%04X, a1_y=%04X, addasel=%X, addbsel=%X, m
  fflush(stdout);
  }
  #endif
-int16 adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
+int16_t adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
  ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
         a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
  ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
@@ -4134,16 +4492,16 @@ printf("  Entering A2_ADD state [a2_x=%04X, a2_y=%04X, addasel=%X, addbsel=%X, m
  fflush(stdout);
  }
  #endif
-//void ADDAMUX(int16 &adda_x, int16 &adda_y, uint8 addasel, int16 a1_step_x, int16 a1_step_y,
-//     int16 a1_stepf_x, int16 a1_stepf_y, int16 a2_step_x, int16 a2_step_y,
-//     int16 a1_inc_x, int16 a1_inc_y, int16 a1_incf_x, int16 a1_incf_y, uint8 adda_xconst,
+//void ADDAMUX(int16_t &adda_x, int16_t &adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
+//     int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
+//     int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
  //     bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
-//void ADDBMUX(int16 &addb_x, int16 &addb_y, uint8 addbsel, int16 a1_x, int16 a1_y,
-//     int16 a2_x, int16 a2_y, int16 a1_frac_x, int16 a1_frac_y)
-//void ADDRADD(int16 &addq_x, int16 &addq_y, bool a1fracldi,
-//     int16 adda_x, int16 adda_y, int16 addb_x, int16 addb_y, uint8 modx, bool suba_x, bool suba_y)
-//void DATAMUX(int16 &data_x, int16 &data_y, uint32 gpu_din, int16 addq_x, int16 addq_y, bool addqsel)
-int16 adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
+//void ADDBMUX(int16_t &addb_x, int16_t &addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
+//     int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y)
+//void ADDRADD(int16_t &addq_x, int16_t &addq_y, bool a1fracldi,
+//     int16_t adda_x, int16_t adda_y, int16_t addb_x, int16_t addb_y, uint8_t modx, bool suba_x, bool suba_y)
+//void DATAMUX(int16_t &data_x, int16_t &data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel)
+int16_t adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
  ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
         a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
  ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
@@ -4236,7 +4594,7 @@ cause the inner state to go active */
  //Since we don't get here until the inner loop is finished (indone = true) we can get
  //away with doing it here...!
                         ocount--;
-               
+
                         if (ocount == 0)
                                 outer0 = true;
  #ifdef VERBOSE_BLITTER_LOGGING
@@ -4257,12 +4615,12 @@ printf("  Entering A1FUPDATE state...\n");
  fflush(stdout);
  }
  #endif
-                       uint32 a1_frac_xt = (uint32)a1_frac_x + (uint32)a1_stepf_x;
-                       uint32 a1_frac_yt = (uint32)a1_frac_y + (uint32)a1_stepf_y;
+                       uint32_t a1_frac_xt = (uint32_t)a1_frac_x + (uint32_t)a1_stepf_x;
+                       uint32_t a1_frac_yt = (uint32_t)a1_frac_y + (uint32_t)a1_stepf_y;
                         a1FracCInX = a1_frac_xt >> 16;
                         a1FracCInY = a1_frac_yt >> 16;
-                       a1_frac_x = (uint16)(a1_frac_xt & 0xFFFF);
-                       a1_frac_y = (uint16)(a1_frac_yt & 0xFFFF);
+                       a1_frac_x = (uint16_t)(a1_frac_xt & 0xFFFF);
+                       a1_frac_y = (uint16_t)(a1_frac_yt & 0xFFFF);
                 }
  
                 if (a1update)
@@ -4284,7 +4642,7 @@ fflush(stdout);
  }
  #endif
                 }
-               
+
                 if (a2update)
                 {
  #ifdef VERBOSE_BLITTER_LOGGING
@@ -4306,122 +4664,86 @@ fflush(stdout);
                 }
         }
  
+// We never get here! !!! FIX !!!
+
+#ifdef VERBOSE_BLITTER_LOGGING
+if (logBlit)
+{
+       printf("Done!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n",
+               GET16(blitter_ram, A1_PIXEL + 2),
+               GET16(blitter_ram, A1_PIXEL + 0),
+               GET16(blitter_ram, A1_FPIXEL + 2),
+               GET16(blitter_ram, A1_FPIXEL + 0),
+               GET16(blitter_ram, A2_PIXEL + 2),
+               GET16(blitter_ram, A2_PIXEL + 0));
+       fflush(stdout);
+}
+#endif
+
         // Write values back to registers (in real blitter, these are continuously updated)
-       SET16(blitter_ram, A1_PIXEL + 0, a1_y);
         SET16(blitter_ram, A1_PIXEL + 2, a1_x);
-       SET16(blitter_ram, A1_FPIXEL + 0, a1_frac_y);
+       SET16(blitter_ram, A1_PIXEL + 0, a1_y);
         SET16(blitter_ram, A1_FPIXEL + 2, a1_frac_x);
-       SET16(blitter_ram, A2_PIXEL + 0, a2_y);
+       SET16(blitter_ram, A1_FPIXEL + 0, a1_frac_y);
         SET16(blitter_ram, A2_PIXEL + 2, a2_x);
-}
+       SET16(blitter_ram, A2_PIXEL + 0, a2_y);
  
+#ifdef VERBOSE_BLITTER_LOGGING
+if (logBlit)
+{
+       printf("Writeback!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n",
+               GET16(blitter_ram, A1_PIXEL + 2),
+               GET16(blitter_ram, A1_PIXEL + 0),
+               GET16(blitter_ram, A1_FPIXEL + 2),
+               GET16(blitter_ram, A1_FPIXEL + 0),
+               GET16(blitter_ram, A2_PIXEL + 2),
+               GET16(blitter_ram, A2_PIXEL + 0));
+       fflush(stdout);
+}
+#endif
+}
  
  /*
-The latest that doesn't work properly:
-
-Blit! (CMD = 09800741)
-Flags: SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 LFUFUNC=C DCOMPEN
-  count = 15 x 18
-  a1_base = 00050000, a2_base = 0083F400
-  a1_x = 003D, a1_y = 00AD, a1_frac_x = 8000, a1_frac_y = 0000, a2_x = 0027, a2_y = 00A4
-  a1_step_x = FFF1, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFF1, a2_step_y = 0001
-  a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
-  a1_win_x = 0180, a1_win_y = 0118, a2_mask_x = 0000, a2_mask_y = 0000
-  a2_mask=F a1add=+inc/+0 a2add=+1/+0
-  a1_pixsize = 4, a2_pixsize = 4
-   srcd=0000000000000000  dstd=0000000000000000 patd=0000000000000000 iinc=00FFF000
-  srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, col=2
-  Phrase mode is off
-  [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
-  Entering INNER state...
-  Entering SREAD state...    Source read address/pix address: 00858E4E/0 [0000000000000000]
-  Entering A2_ADD state [a2_x=0027, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 0007077A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000000] (icount=000E, inc=1)
-  Entering A1_ADD state [a1_x=003D, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E50/0 [0000000000000000]
-  Entering A2_ADD state [a2_x=0028, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 0007077C/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000000] (icount=000D, inc=1)
-  Entering A1_ADD state [a1_x=003E, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E52/0 [0000000000000000]
-  Entering A2_ADD state [a2_x=0029, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 0007077E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000000] (icount=000C, inc=1)
-  Entering A1_ADD state [a1_x=003F, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E54/0 [000000000000014A]
-  Entering A2_ADD state [a2_x=002A, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 00070780/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=000B, inc=1)
-  Entering A1_ADD state [a1_x=0040, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E56/0 [000000000000014A]
-  Entering A2_ADD state [a2_x=002B, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 00070782/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=000A, inc=1)
-  Entering A1_ADD state [a1_x=0041, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E58/0 [000000000000014A]
-  Entering A2_ADD state [a2_x=002C, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 00070784/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0009, inc=1)
-  Entering A1_ADD state [a1_x=0042, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E5A/0 [000000000000014A]
-  Entering A2_ADD state [a2_x=002D, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 00070786/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0008, inc=1)
-  Entering A1_ADD state [a1_x=0043, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E5C/0 [000000000000014A]
-  Entering A2_ADD state [a2_x=002E, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 00070788/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0007, inc=1)
-  Entering A1_ADD state [a1_x=0044, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E5E/0 [000000000000014A]
-  Entering A2_ADD state [a2_x=002F, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 0007078A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0006, inc=1)
-  Entering A1_ADD state [a1_x=0045, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E60/0 [000000000000014A]
-  Entering A2_ADD state [a2_x=0030, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 0007078C/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0005, inc=1)
-  Entering A1_ADD state [a1_x=0046, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E62/0 [000000000000014A]
-  Entering A2_ADD state [a2_x=0031, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 0007078E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0004, inc=1)
-  Entering A1_ADD state [a1_x=0047, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E64/0 [000000000000014A]
-  Entering A2_ADD state [a2_x=0032, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 00070790/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0003, inc=1)
-  Entering A1_ADD state [a1_x=0048, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E66/0 [0000000000000000]
-  Entering A2_ADD state [a2_x=0033, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 00070792/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000000] (icount=0002, inc=1)
-  Entering A1_ADD state [a1_x=0049, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E68/0 [0000000000000000]
-  Entering A2_ADD state [a2_x=0034, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 00070794/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000000] (icount=0001, inc=1)
-  Entering A1_ADD state [a1_x=004A, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering SREAD state...    Source read address/pix address: 00858E6A/0 [0000000000000000]
-  Entering A2_ADD state [a2_x=0035, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
-  Entering DWRITE state...
-     Dest write address/pix address: 00070796/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000000] (icount=0000, inc=1)
-  Entering A1_ADD state [a1_x=004B, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
-  Entering IDLE_INNER state...
-  Leaving INNER state... (ocount=0011)
-  [in=F a1f=T a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
-  Entering A1FUPDATE state...
-  [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
-  Entering A1UPDATE state... (76/173 -> 61/174)
-  [in=F a1f=F a1=F zf=F z=F a2=T iif=F iii=F izf=F izi=F]
-  Entering A2UPDATE state... (54/164 -> 39/165)
-  [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
-  Entering INNER state...
+       int16_t a1_x = (int16_t)GET16(blitter_ram, A1_PIXEL + 2);
+       int16_t a1_y = (int16_t)GET16(blitter_ram, A1_PIXEL + 0);
+       uint16_t a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
+       uint16_t a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
+       int16_t a2_x = (int16_t)GET16(blitter_ram, A2_PIXEL + 2);
+       int16_t a2_y = (int16_t)GET16(blitter_ram, A2_PIXEL + 0);
+
+Seems that the ending a1_x should be written between blits, but it doesn't seem to be...
+
+Blit! (CMD = 01800000)
+Flags: LFUFUNC=C
+  count = 28672 x 1
+  a1_base = 00050000, a2_base = 00070000
+  a1_x = 0000, a1_y = 0000, a1_frac_x = 49CD, a1_frac_y = 0000, a2_x = 0033, a2_y = 0001
+  a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 939A, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
+  a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
+  a1_win_x = 0100, a1_win_y = 0020, a2_mask_x = 0000, a2_mask_y = 0000
+  a2_mask=F a1add=+phr/+0 a2add=+phr/+0
+  a1_pixsize = 4, a2_pixsize = 3
+   srcd=DEDEDEDEDEDEDEDE  dstd=0000000000000000 patd=0000000000000000 iinc=00000000
+  srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
+  Phrase mode is ON
+
+Blit! (CMD = 01800000)
+Flags: LFUFUNC=C
+  count = 28672 x 1
+  a1_base = 00050000, a2_base = 00070000
+  a1_x = 0000, a1_y = 0000, a1_frac_x = 49CD, a1_frac_y = 0000, a2_x = 0033, a2_y = 0001
+  a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 939A, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
+  a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
+  a1_win_x = 0100, a1_win_y = 0020, a2_mask_x = 0000, a2_mask_y = 0000
+  a2_mask=F a1add=+phr/+0 a2add=+phr/+0
+  a1_pixsize = 4, a2_pixsize = 3
+   srcd=D6D6D6D6D6D6D6D6  dstd=0000000000000000 patd=0000000000000000 iinc=00000000
+  srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
+  Phrase mode is ON
  */
  
  
+
  // Various pieces of the blitter puzzle are teased out here...
  
  
@@ -4452,35 +4774,36 @@ INT21/  a2_base
                 :IN);
  */
  
-void ADDRGEN(uint32 &address, uint32 &pixa, bool gena2, bool zaddr,
-       uint16 a1_x, uint16 a1_y, uint32 a1_base, uint8 a1_pitch, uint8 a1_pixsize, uint8 a1_width, uint8 a1_zoffset,
-       uint16 a2_x, uint16 a2_y, uint32 a2_base, uint8 a2_pitch, uint8 a2_pixsize, uint8 a2_width, uint8 a2_zoffset)
+void ADDRGEN(uint32_t &address, uint32_t &pixa, bool gena2, bool zaddr,
+       uint16_t a1_x, uint16_t a1_y, uint32_t a1_base, uint8_t a1_pitch, uint8_t a1_pixsize, uint8_t a1_width, uint8_t a1_zoffset,
+       uint16_t a2_x, uint16_t a2_y, uint32_t a2_base, uint8_t a2_pitch, uint8_t a2_pixsize, uint8_t a2_width, uint8_t a2_zoffset)
  {
-       uint16 x = (gena2 ? a2_x : a1_x) & 0x7FFF;
-       uint16 y = (gena2 ? a2_y : a1_y) & 0x0FFF;
-       uint8 width = (gena2 ? a2_width : a1_width);
-       uint8 pixsize = (gena2 ? a2_pixsize : a1_pixsize);
-       uint8 pitch = (gena2 ? a2_pitch : a1_pitch);
-       uint32 base = (gena2 ? a2_base : a1_base) >> 3;//Only upper 21 bits are passed around the bus? Seems like it...
-       uint8 zoffset = (gena2 ? a2_zoffset : a1_zoffset);
+//     uint16_t x = (gena2 ? a2_x : a1_x) & 0x7FFF;
+       uint16_t x = (gena2 ? a2_x : a1_x) & 0xFFFF;    // Actually uses all 16 bits to generate address...!
+       uint16_t y = (gena2 ? a2_y : a1_y) & 0x0FFF;
+       uint8_t width = (gena2 ? a2_width : a1_width);
+       uint8_t pixsize = (gena2 ? a2_pixsize : a1_pixsize);
+       uint8_t pitch = (gena2 ? a2_pitch : a1_pitch);
+       uint32_t base = (gena2 ? a2_base : a1_base) >> 3;//Only upper 21 bits are passed around the bus? Seems like it...
+       uint8_t zoffset = (gena2 ? a2_zoffset : a1_zoffset);
  
-       uint32 ytm = ((uint32)y << 2) + (width & 0x02 ? (uint32)y << 1 : 0) + (width & 0x01 ? (uint32)y : 0);
+       uint32_t ytm = ((uint32_t)y << 2) + (width & 0x02 ? (uint32_t)y << 1 : 0) + (width & 0x01 ? (uint32_t)y : 0);
  
-       uint32 ya = (ytm << (width >> 2)) >> 2;
+       uint32_t ya = (ytm << (width >> 2)) >> 2;
  
-       uint32 pa = ya + x;
+       uint32_t pa = ya + x;
  
         /*uint32*/ pixa = pa << pixsize;
  
-       uint8 pt = ((pitch & 0x01) && !(pitch & 0x02) ? 0x01 : 0x00)
+       uint8_t pt = ((pitch & 0x01) && !(pitch & 0x02) ? 0x01 : 0x00)
                 | (!(pitch & 0x01) && (pitch & 0x02) ? 0x02 : 0x00);
-//     uint32 phradr = pixa << pt;
-       uint32 phradr = (pixa >> 6) << pt;
-       uint32 shup = (pitch == 0x03 ? (pixa >> 6) : 0);
+//     uint32_t phradr = pixa << pt;
+       uint32_t phradr = (pixa >> 6) << pt;
+       uint32_t shup = (pitch == 0x03 ? (pixa >> 6) : 0);
  
-       uint8 za = (zaddr ? zoffset : 0) & 0x03;
-//     uint32 addr = za + (phradr & 0x07) + (shup << 1) + base;
-       uint32 addr = za + phradr + (shup << 1) + base;
+       uint8_t za = (zaddr ? zoffset : 0) & 0x03;
+//     uint32_t addr = za + (phradr & 0x07) + (shup << 1) + base;
+       uint32_t addr = za + phradr + (shup << 1) + base;
         /*uint32*/ address = ((pixa & 0x38) >> 3) | ((addr & 0x1FFFFF) << 3);
  #if 0//def VERBOSE_BLITTER_LOGGING
  if (logBlit)
@@ -4674,13 +4997,13 @@ INT32/  zinc
  INT32/  zstep
          :IN);
  */
-void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
-       uint64 dstd, uint32 iinc, uint8 initcin[], uint64 initinc, uint16 initpix,
-       uint32 istep, uint64 patd, uint64 srcd, uint64 srcz1, uint64 srcz2,
-       uint32 zinc, uint32 zstep)
+void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
+       uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
+       uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
+       uint32_t zinc, uint32_t zstep)
  {
-       uint32 initpix2 = ((uint32)initpix << 16) | initpix;
-       uint32 addalo[8], addahi[8];
+       uint32_t initpix2 = ((uint32_t)initpix << 16) | initpix;
+       uint32_t addalo[8], addahi[8];
         addalo[0] = dstd & 0xFFFFFFFF;
         addalo[1] = initpix2;
         addalo[2] = 0;
@@ -4697,13 +5020,13 @@ void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
         addahi[5] = patd >> 32;
         addahi[6] = srcz1 >> 32;
         addahi[7] = srcz2 >> 32;
-       uint16 adda[4];
+       uint16_t adda[4];
         adda[0] = addalo[daddasel] & 0xFFFF;
         adda[1] = addalo[daddasel] >> 16;
         adda[2] = addahi[daddasel] & 0xFFFF;
         adda[3] = addahi[daddasel] >> 16;
  
-       uint16 wordmux[8];
+       uint16_t wordmux[8];
         wordmux[0] = iinc & 0xFFFF;
         wordmux[1] = iinc >> 16;
         wordmux[2] = zinc & 0xFFFF;
@@ -4712,8 +5035,8 @@ void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
         wordmux[5] = istep >> 16;;
         wordmux[6] = zstep & 0xFFFF;
         wordmux[7] = zstep >> 16;;
-       uint16 word = wordmux[((daddbsel & 0x08) >> 1) | (daddbsel & 0x03)];
-       uint16 addb[4];
+       uint16_t word = wordmux[((daddbsel & 0x08) >> 1) | (daddbsel & 0x03)];
+       uint16_t addb[4];
         bool dbsel2 = daddbsel & 0x04;
         bool iincsel = (daddbsel & 0x01) && !(daddbsel & 0x04);
  
@@ -4732,10 +5055,10 @@ void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
         else
                 addb[0] = addb[1] = addb[2] = addb[3] = 0;
  
-       uint8 cinsel = (daddmode >= 1 && daddmode <= 4 ? 1 : 0);
+       uint8_t cinsel = (daddmode >= 1 && daddmode <= 4 ? 1 : 0);
  
-static uint8 co[4];//These are preserved between calls...
-       uint8 cin[4];
+static uint8_t co[4];//These are preserved between calls...
+       uint8_t cin[4];
  
         for(int i=0; i<4; i++)
                 cin[i] = initcin[i] | (co[i] & cinsel);
@@ -4762,12 +5085,17 @@ INT16/  b
          hicinh
          :IN);
  */
-void ADD16SAT(uint16 &r, uint8 &co, uint16 a, uint16 b, uint8 cin, bool sat, bool eightbit, bool hicinh)
+void ADD16SAT(uint16_t &r, uint8_t &co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh)
+{
+/*if (logBlit)
  {
-       uint8 carry[4];
-       uint32 qt = (a & 0xFF) + (b & 0xFF) + cin;
+       printf("--> [sat=%s 8b=%s hicinh=%s] %04X + %04X (+ %u) = ", (sat ? "T" : "F"), (eightbit ? "T" : "F"), (hicinh ? "T" : "F"), a, b, cin);
+       fflush(stdout);
+}*/
+       uint8_t carry[4];
+       uint32_t qt = (a & 0xFF) + (b & 0xFF) + cin;
         carry[0] = (qt & 0x0100 ? 1 : 0);
-       uint16 q = qt & 0x00FF;
+       uint16_t q = qt & 0x00FF;
         carry[1] = (carry[0] && !eightbit ? carry[0] : 0);
         qt = (a & 0x0F00) + (b & 0x0F00) + (carry[1] << 8);
         carry[2] = (qt & 0x1000 ? 1 : 0);
@@ -4777,14 +5105,24 @@ void ADD16SAT(uint16 &r, uint8 &co, uint16 a, uint16 b, uint8 cin, bool sat, boo
         co = (qt & 0x10000 ? 1 : 0);
         q |= qt & 0xF000;
  
-       uint8 btop = (eightbit ? (b & 0x0080) >> 7 : (b & 0x8000) >> 15);
-       uint8 ctop = (eightbit ? carry[0] : co);
+       uint8_t btop = (eightbit ? (b & 0x0080) >> 7 : (b & 0x8000) >> 15);
+       uint8_t ctop = (eightbit ? carry[0] : co);
  
         bool saturate = sat && (btop ^ ctop);
         bool hisaturate = saturate && !eightbit;
+/*if (logBlit)
+{
+       printf("bt=%u ct=%u s=%u hs=%u] ", btop, ctop, saturate, hisaturate);
+       fflush(stdout);
+}*/
  
         r = (saturate ? (ctop ? 0x00FF : 0x0000) : q & 0x00FF);
         r |= (hisaturate ? (ctop ? 0xFF00 : 0x0000) : q & 0xFF00);
+/*if (logBlit)
+{
+       printf("%04X (co=%u)\n", r, co);
+       fflush(stdout);
+}*/
  }
  
  /**  ADDAMUX - Address adder input A selection  *******************
@@ -4806,7 +5144,7 @@ addasel[0..2] select the register to add
  adda_xconst[0..2] generate a power of 2 in the range 1-64 or all zeroes when
  they are all 1.
  
-addareg selects register value to be added as opposed to constant 
+addareg selects register value to be added as opposed to constant
  value.
  
  suba_x, suba_y complement the X and Y values
@@ -4835,13 +5173,13 @@ INT16/  a1_incf_y
         suba_x
         suba_y :IN);
  */
-void ADDAMUX(int16 &adda_x, int16 &adda_y, uint8 addasel, int16 a1_step_x, int16 a1_step_y,
-       int16 a1_stepf_x, int16 a1_stepf_y, int16 a2_step_x, int16 a2_step_y,
-       int16 a1_inc_x, int16 a1_inc_y, int16 a1_incf_x, int16 a1_incf_y, uint8 adda_xconst,
+void ADDAMUX(int16_t &adda_x, int16_t &adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
+       int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
+       int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
         bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
  {
  
-/*INT16/       addac_x, addac_y, addar_x, addar_y, addart_x, addart_y, 
+/*INT16/       addac_x, addac_y, addar_x, addar_y, addart_x, addart_y,
  INT16/ addas_x, addas_y, suba_x16, suba_y16
  :LOCAL;
  BEGIN
@@ -4857,14 +5195,14 @@ Addart_y        := MX4 (addart_y, a1_step_y, a1_stepf_y, a1_inc_y, a1_incf_y, addaselb[
  Addar_y                := MX2 (addar_y, addart_y, a2_step_y, addaselb[2]);*/
  
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       int16 xterm[4], yterm[4];
+       int16_t xterm[4], yterm[4];
         xterm[0] = a1_step_x, xterm[1] = a1_stepf_x, xterm[2] = a1_inc_x, xterm[3] = a1_incf_x;
         yterm[0] = a1_step_y, yterm[1] = a1_stepf_y, yterm[2] = a1_inc_y, yterm[3] = a1_incf_y;
-       int16 addar_x = (addasel & 0x04 ? a2_step_x : xterm[addasel & 0x03]);
-       int16 addar_y = (addasel & 0x04 ? a2_step_y : yterm[addasel & 0x03]);
+       int16_t addar_x = (addasel & 0x04 ? a2_step_x : xterm[addasel & 0x03]);
+       int16_t addar_y = (addasel & 0x04 ? a2_step_y : yterm[addasel & 0x03]);
  //////////////////////////////////////////////////////////////////////////////////////
  
-/* Generate a constant value - this is a power of 2 in the range 
+/* Generate a constant value - this is a power of 2 in the range
  0-64, or zero.  The control bits are adda_xconst[0..2], when they
  are all 1  the result is 0.
  Constants for Y can only be 0 or 1 */
@@ -4873,11 +5211,11 @@ Constants for Y can only be 0 or 1 */
  Unused[0]      := DUMMY (unused[0]);
  
  Addac_x                := JOIN (addac_x, addac_x[0..6], zero, zero, zero, zero, zero, zero, zero, zero, zero);
-Addac_y                := JOIN (addac_y, adda_yconst, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, 
+Addac_y                := JOIN (addac_y, adda_yconst, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero,
                         zero, zero, zero, zero, zero);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       int16 addac_x = (adda_xconst == 0x07 ? 0 : 1 << adda_xconst);
-       int16 addac_y = (adda_yconst ? 0x01 : 0);
+       int16_t addac_x = (adda_xconst == 0x07 ? 0 : 1 << adda_xconst);
+       int16_t addac_y = (adda_yconst ? 0x01 : 0);
  //////////////////////////////////////////////////////////////////////////////////////
  
  /* Select between constant value and register value */
@@ -4885,15 +5223,15 @@ Addac_y         := JOIN (addac_y, adda_yconst, zero, zero, zero, zero, zero, zero, zero
  /*Addas_x              := MX2 (addas_x, addac_x, addar_x, addareg);
  Addas_y                := MX2 (addas_y, addac_y, addar_y, addareg);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       int16 addas_x = (addareg ? addar_x : addac_x);
-       int16 addas_y = (addareg ? addar_y : addac_y);
+       int16_t addas_x = (addareg ? addar_x : addac_x);
+       int16_t addas_y = (addareg ? addar_y : addac_y);
  //////////////////////////////////////////////////////////////////////////////////////
  
  /* Complement these values (complement flag gives adder carry in)*/
  
-/*Suba_x16     := JOIN (suba_x16, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, 
+/*Suba_x16     := JOIN (suba_x16, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x,
                         suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x);
-Suba_y16       := JOIN (suba_y16, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, 
+Suba_y16       := JOIN (suba_y16, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y,
                         suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y);
  Adda_x         := EO (adda_x, suba_x16, addas_x);
  Adda_y         := EO (adda_y, suba_y16, addas_y);*/
@@ -4907,9 +5245,9 @@ Adda_y            := EO (adda_y, suba_y16, addas_y);*/
  
  /**  ADDBMUX - Address adder input B selection  *******************
  
-This module selects the register to be updated by the address 
-adder.  This can be one of three registers, the A1 and A2 
-pointers, or the A1 fractional part. It can also be zero, so that the step 
+This module selects the register to be updated by the address
+adder.  This can be one of three registers, the A1 and A2
+pointers, or the A1 fractional part. It can also be zero, so that the step
  registers load directly into the pointers.
  */
  
@@ -4927,18 +5265,18 @@ INT16/  a1_frac_y
         :IN);
  INT16/ zero16 :LOCAL;
  BEGIN*/
-void ADDBMUX(int16 &addb_x, int16 &addb_y, uint8 addbsel, int16 a1_x, int16 a1_y,
-       int16 a2_x, int16 a2_y, int16 a1_frac_x, int16 a1_frac_y)
+void ADDBMUX(int16_t &addb_x, int16_t &addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
+       int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y)
  {
  
  /*Zero         := TIE0 (zero);
-Zero16         := JOIN (zero16, zero, zero, zero, zero, zero, zero, zero, 
+Zero16         := JOIN (zero16, zero, zero, zero, zero, zero, zero, zero,
                         zero, zero, zero, zero, zero, zero, zero, zero, zero);
  Addbselb[0-1]  := BUF8 (addbselb[0-1], addbsel[0-1]);
  Addb_x         := MX4 (addb_x, a1_x, a2_x, a1_frac_x, zero16, addbselb[0..1]);
  Addb_y         := MX4 (addb_y, a1_y, a2_y, a1_frac_y, zero16, addbselb[0..1]);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       int16 xterm[4], yterm[4];
+       int16_t xterm[4], yterm[4];
         xterm[0] = a1_x, xterm[1] = a2_x, xterm[2] = a1_frac_x, xterm[3] = 0;
         yterm[0] = a1_y, yterm[1] = a2_y, yterm[2] = a1_frac_y, yterm[3] = 0;
         addb_x = xterm[addbsel & 0x03];
@@ -4966,7 +5304,7 @@ INT16/    addq_y
  INT16/ gpu_lo, gpu_hi
  :LOCAL;
  BEGIN*/
-void DATAMUX(int16 &data_x, int16 &data_y, uint32 gpu_din, int16 addq_x, int16 addq_y, bool addqsel)
+void DATAMUX(int16_t &data_x, int16_t &data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel)
  {
  /*Gpu_lo               := JOIN (gpu_lo, gpu_din{0..15});
  Gpu_hi         := JOIN (gpu_hi, gpu_din{16..31});
@@ -4975,8 +5313,8 @@ Addqselb  := BUF8 (addqselb, addqsel);
  Data_x         := MX2 (data_x, gpu_lo, addq_x, addqselb);
  Data_y         := MX2 (data_y, gpu_hi, addq_y, addqselb);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       data_x = (addqsel ? addq_x : (int16)(gpu_din & 0xFFFF));
-       data_y = (addqsel ? addq_y : (int16)(gpu_din >> 16));
+       data_x = (addqsel ? addq_x : (int16_t)(gpu_din & 0xFFFF));
+       data_y = (addqsel ? addq_y : (int16_t)(gpu_din >> 16));
  //////////////////////////////////////////////////////////////////////////////////////
  
  //END;
@@ -4988,10 +5326,10 @@ addradd
  
  Blitter Address Adder
  ---------------------
-The blitter address adder is a pair of sixteen bit adders, one 
-each for X and Y.  The multiplexing of the input terms is 
-performed elsewhere, but this adder can also perform modulo 
-arithmetic to align X-addresses onto phrase boundaries. 
+The blitter address adder is a pair of sixteen bit adders, one
+each for X and Y.  The multiplexing of the input terms is
+performed elsewhere, but this adder can also perform modulo
+arithmetic to align X-addresses onto phrase boundaries.
  
  modx[0..2] take values
  000    no mask
@@ -5017,13 +5355,13 @@ INT16/  addb_y
                 modx[0..2]
                 suba_x
                 suba_y
-               :IN); 
+               :IN);
  
  BEGIN
  
  Zero           := TIE0 (zero);*/
-void ADDRADD(int16 &addq_x, int16 &addq_y, bool a1fracldi,
-       uint16 adda_x, uint16 adda_y, uint16 addb_x, uint16 addb_y, uint8 modx, bool suba_x, bool suba_y)
+void ADDRADD(int16_t &addq_x, int16_t &addq_y, bool a1fracldi,
+       uint16_t adda_x, uint16_t adda_y, uint16_t addb_x, uint16_t addb_y, uint8_t modx, bool suba_x, bool suba_y)
  {
  
  /* Perform the addition */
@@ -5043,11 +5381,11 @@ Ci_y            := EO (ci_y, cyt[1], suba_y);*/
  
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
  //I'm sure the following will generate a bunch of warnings, but will have to do for now.
-       static uint16 co_x = 0, co_y = 0;       // Carry out has to propogate between function calls...
-       uint16 ci_x = co_x ^ (suba_x ? 1 : 0);
-       uint16 ci_y = co_y ^ (suba_y ? 1 : 0);
-       uint32 addqt_x = adda_x + addb_x + ci_x;
-       uint32 addqt_y = adda_y + addb_y + ci_y;
+       static uint16_t co_x = 0, co_y = 0;     // Carry out has to propogate between function calls...
+       uint16_t ci_x = co_x ^ (suba_x ? 1 : 0);
+       uint16_t ci_y = co_y ^ (suba_y ? 1 : 0);
+       uint32_t addqt_x = adda_x + addb_x + ci_x;
+       uint32_t addqt_y = adda_y + addb_y + ci_y;
         co_x = ((addqt_x & 0x10000) && a1fracldi ? 1 : 0);
         co_y = ((addqt_y & 0x10000) && a1fracldi ? 1 : 0);
  //////////////////////////////////////////////////////////////////////////////////////
@@ -5064,7 +5402,7 @@ Addq_x            := JOIN (addq_x, addq_x[0..5], addqt_x[6..15]);
  Addq_y         := JOIN (addq_y, addq_y[0..15]);*/
  
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       int16 mask[8] = { 0xFFFF, 0xFFFE, 0xFFFC, 0xFFF8, 0xFFF0, 0xFFE0, 0xFFC0, 0x0000 };
+       int16_t mask[8] = { 0xFFFF, 0xFFFE, 0xFFFC, 0xFFF8, 0xFFF0, 0xFFE0, 0xFFC0, 0x0000 };
         addq_x = addqt_x & mask[modx];
         addq_y = addqt_y & 0xFFFF;
  //////////////////////////////////////////////////////////////////////////////////////
@@ -5081,7 +5419,7 @@ DEF DATA (
                 dcomp[0..7]             // data byte equal flags
                 srcd[0..7]              // bits to use for bit to byte expansion
                 zcomp[0..3]             // output from Z comparators
-               :OUT; 
+               :OUT;
                 a1_x[0..1]              // low two bits of A1 X pointer
                 big_pix                 // pixel organisation is big-endian
                 blitter_active  // blitter is active
@@ -5140,18 +5478,19 @@ INT32/  gpu_din                 // GPU data bus
                 :IN);
  */
  
-void DATA (uint64 &wdata, uint8 &dcomp, uint8 &zcomp, bool &nowrite,
-       bool big_pix, bool cmpdst, uint8 daddasel, uint8 daddbsel, uint8 daddmode, bool daddq_sel, uint8 data_sel,
-       uint8 dbinh, uint8 dend, uint8 dstart, uint64 dstd, uint32 iinc, uint8 lfu_func, uint64 patd, bool patdadd,
-       bool phrase_mode, uint64 srcd, bool srcdread, bool srczread, bool srcz2add, uint8 zmode,
-       bool bcompen, bool bkgwren, bool dcompen, uint8 icount, uint8 pixsize)
+void DATA(uint64_t &wdata, uint8_t &dcomp, uint8_t &zcomp, bool &nowrite,
+       bool big_pix, bool cmpdst, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode, bool daddq_sel, uint8_t data_sel,
+       uint8_t dbinh, uint8_t dend, uint8_t dstart, uint64_t dstd, uint32_t iinc, uint8_t lfu_func, uint64_t &patd, bool patdadd,
+       bool phrase_mode, uint64_t srcd, bool srcdread, bool srczread, bool srcz2add, uint8_t zmode,
+       bool bcompen, bool bkgwren, bool dcompen, uint8_t icount, uint8_t pixsize,
+       uint64_t &srcz, uint64_t dstz, uint32_t zinc)
  {
  /*
    Stuff we absolutely *need* to have passed in/out:
  IN:
    patdadd, dstd, srcd, patd, daddasel, daddbsel, daddmode, iinc, srcz1, srcz2, big_pix, phrase_mode, cmpdst
  OUT:
-  changed patd (wdata I guess...)
+  changed patd (wdata I guess...) (Nope. We pass it back directly now...)
  */
  
  // Source data registers
@@ -5163,13 +5502,13 @@ Srcd[0-7]       := JOIN (srcd[0-7], srcdlo{0-7});
  Srcd[8-31]     := JOIN (srcd[8-31], srcdlo{8-31});
  Srcd[32-63]    := JOIN (srcd[32-63], srcdhi{0-31});*/
  
-// Destination data registers 
+// Destination data registers
  
  /*Data_dst     := DATA_DST (dstd[0..63], dstz[0..1], clk, dstdld[0..1], dstzld[0..1], load_data[0..1]);
  Dstdlo         := JOIN (dstdlo, dstd[0..31]);
  Dstdhi         := JOIN (dstdhi, dstd[32..63]);*/
  
-// Pattern and Color data registers 
+// Pattern and Color data registers
  
  // Looks like this is simply another register file for the pattern data registers. No adding or anything funky
  // going on. Note that patd & patdv will output the same info.
@@ -5183,23 +5522,23 @@ Patdhi          := JOIN (patdhi, patd[32..63]);*/
  
  // Multiplying data Mixer (NOT IN JAGUAR I)
  
-/*Datamix              := DATAMIX (patdo[0..1], clk, colord[0..15], dpipe[1], dstd[0..63], int0dp[8..10], int1dp[8..10], 
+/*Datamix              := DATAMIX (patdo[0..1], clk, colord[0..15], dpipe[1], dstd[0..63], int0dp[8..10], int1dp[8..10],
                         int2dp[8..10], int3dp[8..10], mixsel[0..2], patd[0..63], pdsel[0..1], srcd[0..63], textrgb, txtd[0..63]);*/
  
  // Logic function unit
  
  /*Lfu          := LFU (lfu[0..1], srcdlo, srcdhi, dstdlo, dstdhi, lfu_func[0..3]);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       uint64 funcmask[2] = { 0, 0xFFFFFFFFFFFFFFFFLL };
-       uint64 func0 = funcmask[lfu_func & 0x01];
-       uint64 func1 = funcmask[(lfu_func >> 1) & 0x01];
-       uint64 func2 = funcmask[(lfu_func >> 2) & 0x01];
-       uint64 func3 = funcmask[(lfu_func >> 3) & 0x01];
-       uint64 lfu = (~srcd & ~dstd & func0) | (~srcd & dstd & func1) | (srcd & ~dstd & func2) | (srcd & dstd & func3);
+       uint64_t funcmask[2] = { 0, 0xFFFFFFFFFFFFFFFFLL };
+       uint64_t func0 = funcmask[lfu_func & 0x01];
+       uint64_t func1 = funcmask[(lfu_func >> 1) & 0x01];
+       uint64_t func2 = funcmask[(lfu_func >> 2) & 0x01];
+       uint64_t func3 = funcmask[(lfu_func >> 3) & 0x01];
+       uint64_t lfu = (~srcd & ~dstd & func0) | (~srcd & dstd & func1) | (srcd & ~dstd & func2) | (srcd & dstd & func3);
  //////////////////////////////////////////////////////////////////////////////////////
  
  // Increment and Step Registers
-   
+
  // Does it do anything without the step add lines? Check it!
  // No. This is pretty much just a register file without the Jaguar II lines...
  /*Inc_step     := INC_STEP (iinc, istep[0..31], zinc, zstep[0..31], clk, ext_int, gpu_din, iincld, iincldx, istepadd,
@@ -5212,7 +5551,7 @@ Zstep             := JOIN (zstep, zstep[0..31]);*/
  /*Datacomp     := DATACOMP (dcomp[0..7], cmpdst, dstdlo, dstdhi, patdlo, patdhi, srcdlo, srcdhi);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
         dcomp = 0;
-       uint64 cmpd = patd ^ (cmpdst ? dstd : srcd);
+       uint64_t cmpd = patd ^ (cmpdst ? dstd : srcd);
  
         if ((cmpd & 0x00000000000000FFLL) == 0)
                 dcomp |= 0x01;
@@ -5230,29 +5569,72 @@ Zstep           := JOIN (zstep, zstep[0..31]);*/
                 dcomp |= 0x40;
         if ((cmpd & 0xFF00000000000000LL) == 0)
                 dcomp |= 0x80;
+//////////////////////////////////////////////////////////////////////////////////////
+
+// Zed comparator for Z-buffer operations
+
+/*Zedcomp              := ZEDCOMP (zcomp[0..3], srczp[0..1], dstz[0..1], zmode[0..2]);*/
+////////////////////////////////////// C++ CODE //////////////////////////////////////
+//srczp is srcz pipelined, also it goes through a source shift as well...
+/*The shift is basically like so (each piece is 16 bits long):
+
+       0         1         2         3         4          5         6
+       srcz1lolo srcz1lohi srcz1hilo srcz1hihi srcrz2lolo srcz2lohi srcz2hilo
+
+with srcshift bits 4 & 5 selecting the start position
+*/
+//So... basically what we have here is:
+       zcomp = 0;
+
+       if ((((srcz & 0x000000000000FFFFLL) < (dstz & 0x000000000000FFFFLL)) && (zmode & 0x01))
+               || (((srcz & 0x000000000000FFFFLL) == (dstz & 0x000000000000FFFFLL)) && (zmode & 0x02))
+               || (((srcz & 0x000000000000FFFFLL) > (dstz & 0x000000000000FFFFLL)) && (zmode & 0x04)))
+               zcomp |= 0x01;
+
+       if ((((srcz & 0x00000000FFFF0000LL) < (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x01))
+               || (((srcz & 0x00000000FFFF0000LL) == (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x02))
+               || (((srcz & 0x00000000FFFF0000LL) > (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x04)))
+               zcomp |= 0x02;
+
+       if ((((srcz & 0x0000FFFF00000000LL) < (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x01))
+               || (((srcz & 0x0000FFFF00000000LL) == (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x02))
+               || (((srcz & 0x0000FFFF00000000LL) > (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x04)))
+               zcomp |= 0x04;
+
+       if ((((srcz & 0xFFFF000000000000LL) < (dstz & 0xFFFF000000000000LL)) && (zmode & 0x01))
+               || (((srcz & 0xFFFF000000000000LL) == (dstz & 0xFFFF000000000000LL)) && (zmode & 0x02))
+               || (((srcz & 0xFFFF000000000000LL) > (dstz & 0xFFFF000000000000LL)) && (zmode & 0x04)))
+               zcomp |= 0x08;
+
+//TEMP, TO TEST IF ZCOMP IS THE CULPRIT...
+//Nope, this is NOT the problem...
+//zcomp=0;
  // We'll do the comparison/bit/byte inhibits here, since that's they way it happens
  // in the real thing (dcomp goes out to COMP_CTRL and back into DATA through dbinh)...
  #if 1
-       uint8 dbinht;
+       uint8_t dbinht;
  //     bool nowrite;
         COMP_CTRL(dbinht, nowrite,
-               bcompen, true/*big_pix*/, bkgwren, dcomp, dcompen, icount, pixsize, phrase_mode, srcd & 0xFF, 0);//zcomp);
+               bcompen, true/*big_pix*/, bkgwren, dcomp, dcompen, icount, pixsize, phrase_mode, srcd & 0xFF, zcomp);
         dbinh = dbinht;
  //     dbinh = 0x00;
  #endif
-//////////////////////////////////////////////////////////////////////////////////////
-
-// Zed comparator for Z-buffer operations
-
-/*Zedcomp              := ZEDCOMP (zcomp[0..3], srczp[0..1], dstz[0..1], zmode[0..2]);*/
-////////////////////////////////////// C++ CODE //////////////////////////////////////
  
+#if 1
+#ifdef VERBOSE_BLITTER_LOGGING
+if (logBlit)
+{
+       printf("\n[dcomp=%02X zcomp=%02X dbinh=%02X]\n", dcomp, zcomp, dbinh);
+       fflush(stdout);
+}//*/
+#endif
+#endif
  //////////////////////////////////////////////////////////////////////////////////////
  
  // 22 Mar 94
  // The data initializer - allows all four initial values to be computed from one (NOT IN JAGUAR I)
  
-/*Datinit              := DATINIT (initcin[0..3], initinc[0..63], initpix[0..15], a1_x[0..1], big_pix, clk, iinc, init_if, init_ii, 
+/*Datinit              := DATINIT (initcin[0..3], initinc[0..63], initpix[0..15], a1_x[0..1], big_pix, clk, iinc, init_if, init_ii,
                         init_zf, istep[0..31], zinc, zstep[0..31]);*/
  
  // Adder array for Z and intensity increments
@@ -5260,18 +5642,21 @@ Zstep           := JOIN (zstep, zstep[0..31]);*/
  /*Addarray     := ADDARRAY (addq[0..3], clk, daddasel[0..2], daddbsel[0..3], daddmode[0..2], dstdlo, dstdhi, iinc,
                         initcin[0..3], initinc[0..63], initpix[0..15], istep, patdv[0..1], srcdlo, srcdhi, srcz1[0..1],
                         srcz2[0..1], reset\, zinc, zstep);*/
-/*void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
-       uint64 dstd, uint32 iinc, uint8 initcin[], uint64 initinc, uint16 initpix,
-       uint32 istep, uint64 patd, uint64 srcd, uint64 srcz1, uint64 srcz2,
-       uint32 zinc, uint32 zstep)*/
+/*void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
+       uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
+       uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
+       uint32_t zinc, uint32_t zstep)*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       uint16 addq[4];
-       uint8 initcin[4] = { 0, 0, 0, 0 };
+       uint16_t addq[4];
+       uint8_t initcin[4] = { 0, 0, 0, 0 };
         ADDARRAY(addq, daddasel, daddbsel, daddmode, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
  
         //This is normally done asynchronously above (thru local_data) when in patdadd mode...
+//And now it's passed back to the caller to be persistent between calls...!
+//But it's causing some serious fuck-ups in T2K now... !!! FIX !!! [DONE--???]
+//Weird! It doesn't anymore...!
         if (patdadd)
-               patd = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
+               patd = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
  //////////////////////////////////////////////////////////////////////////////////////
  
  // Local data bus multiplexer
@@ -5329,15 +5714,15 @@ Ecoarse         := DECL38E (e_coarse\[0..7], dend[3..5], edis\);
  E_coarse[0]    := INV1 (e_coarse[0], e_coarse\[0]);
  Efine          := DECL38E (unused[0], e_fine\[1..7], dend[0..2], e_coarse[0]);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       uint8 decl38e[2][8] = { { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
+       uint8_t decl38e[2][8] = { { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
                 { 0xFE, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x7F } };
-       uint8 dech38[8] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
-       uint8 dech38el[2][8] = { { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 },
-               { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF } };
+       uint8_t dech38[8] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
+       uint8_t dech38el[2][8] = { { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 },
+               { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } };
  
                         int en = (dend & 0x3F ? 1 : 0);
-       uint8 e_coarse = decl38e[en][(dend & 0x38) >> 3];               // Actually, this is e_coarse inverted...
-       uint8 e_fine = decl38e[(e_coarse & 0x01) ^ 0x01][dend & 0x07];
+       uint8_t e_coarse = decl38e[en][(dend & 0x38) >> 3];             // Actually, this is e_coarse inverted...
+       uint8_t e_fine = decl38e[(e_coarse & 0x01) ^ 0x01][dend & 0x07];
         e_fine &= 0xFE;
  //////////////////////////////////////////////////////////////////////////////////////
  
@@ -5345,14 +5730,14 @@ Efine           := DECL38E (unused[0], e_fine\[1..7], dend[0..2], e_coarse[0]);*/
  Sfen\          := INV1 (sfen\, s_coarse[0]);
  Sfine          := DECH38EL (s_fine[0..7], dstart[0..2], sfen\);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       uint8 s_coarse = dech38[(dstart & 0x38) >> 3];
-       uint8 s_fine = dech38el[(s_coarse & 0x01) ^ 0x01][dstart & 0x07];
+       uint8_t s_coarse = dech38[(dstart & 0x38) >> 3];
+       uint8_t s_fine = dech38el[(s_coarse & 0x01) ^ 0x01][dstart & 0x07];
  //////////////////////////////////////////////////////////////////////////////////////
  
  /*Maskt[0]     := BUF1 (maskt[0], s_fine[0]);
  Maskt[1-7]     := OAN1P (maskt[1-7], maskt[0-6], s_fine[1-7], e_fine\[1-7]);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       uint16 maskt = s_fine & 0x0001;
+       uint16_t maskt = s_fine & 0x0001;
         maskt |= (((maskt & 0x0001) || (s_fine & 0x02)) && (e_fine & 0x02) ? 0x0002 : 0x0000);
         maskt |= (((maskt & 0x0002) || (s_fine & 0x04)) && (e_fine & 0x04) ? 0x0004 : 0x0000);
         maskt |= (((maskt & 0x0004) || (s_fine & 0x08)) && (e_fine & 0x08) ? 0x0008 : 0x0000);
@@ -5378,7 +5763,7 @@ Maskt[9-14]       := OAN1P (maskt[9-14], maskt[8-13], s_coarse[2-7], e_coarse\[2-7]);*
  //////////////////////////////////////////////////////////////////////////////////////
  
  /* The bit terms are mirrored for big-endian pixels outside phrase
-mode.  The byte terms are mirrored for big-endian pixels in phrase 
+mode.  The byte terms are mirrored for big-endian pixels in phrase
  mode.  */
  
  /*Mirror_bit   := AN2M (mir_bit, phrase_mode\, big_pix);
@@ -5403,7 +5788,7 @@ Masku[14] := MX2 (masku[14], maskt[14], maskt[0],  mir_byte);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
         bool mir_bit = true/*big_pix*/ && !phrase_mode;
         bool mir_byte = true/*big_pix*/ && phrase_mode;
-       uint16 masku = maskt;
+       uint16_t masku = maskt;
  
         if (mir_bit)
         {
@@ -5446,8 +5831,8 @@ inhibit terms can override these */
  /*Mask[0-7]    := AN2 (mask[0-7], masku[0-7], dbinh\[0]);
  Mask[8-14]     := AN2H (mask[8-14], masku[8-14], dbinh\[1-7]);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       uint16 mask = masku & (!(dbinh & 0x01) ? 0xFFFF : 0xFF00);
-       mask &= ~(((uint16)dbinh & 0x00FE) << 7);
+       uint16_t mask = masku & (!(dbinh & 0x01) ? 0xFFFF : 0xFF00);
+       mask &= ~(((uint16_t)dbinh & 0x00FE) << 7);
  //////////////////////////////////////////////////////////////////////////////////////
  
  /*Addql[0]     := JOIN (addql[0], addq[0..1]);
@@ -5458,12 +5843,12 @@ Dsel1b[0-1]     := BUF8 (dsel1b[0-1], data_sel[1]);
  Ddatlo         := MX4 (ddatlo, patd[0], lfu[0], addql[0], zero32, dsel0b[0], dsel1b[0]);
  Ddathi         := MX4 (ddathi, patd[1], lfu[1], addql[1], zero32, dsel0b[1], dsel1b[1]);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
-       uint64 dmux[4];
+       uint64_t dmux[4];
         dmux[0] = patd;
         dmux[1] = lfu;
-       dmux[2] = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
+       dmux[2] = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
         dmux[3] = 0;
-       uint64 ddat = dmux[data_sel];
+       uint64_t ddat = dmux[data_sel];
  //////////////////////////////////////////////////////////////////////////////////////
  
  /*Zed_sel              := AN2 (zed_sel, data_sel[0..1]);
@@ -5486,6 +5871,33 @@ Dat[56-63]       := MX4 (dat[56-63], dstdhi{24-31}, ddathi{24-31}, dstzhi{24-31}, srcz
         wdata |= (mask & 0x1000 ? ddat : dstd) & 0x0000FF0000000000LL;
         wdata |= (mask & 0x2000 ? ddat : dstd) & 0x00FF000000000000LL;
         wdata |= (mask & 0x4000 ? ddat : dstd) & 0xFF00000000000000LL;
+/*if (logBlit)
+{
+       printf("\n[ddat=%08X%08X dstd=%08X%08X wdata=%08X%08X mask=%04X]\n",
+               (uint32_t)(ddat >> 32), (uint32_t)(ddat & 0xFFFFFFFF),
+               (uint32_t)(dstd >> 32), (uint32_t)(dstd & 0xFFFFFFFF),
+               (uint32_t)(wdata >> 32), (uint32_t)(wdata & 0xFFFFFFFF), mask);
+       fflush(stdout);
+}//*/
+//This is a crappy way of handling this, but it should work for now...
+       uint64_t zwdata;
+       zwdata = ((srcz & mask) | (dstz & ~mask)) & 0x00000000000000FFLL;
+       zwdata |= (mask & 0x0100 ? srcz : dstz) & 0x000000000000FF00LL;
+       zwdata |= (mask & 0x0200 ? srcz : dstz) & 0x0000000000FF0000LL;
+       zwdata |= (mask & 0x0400 ? srcz : dstz) & 0x00000000FF000000LL;
+       zwdata |= (mask & 0x0800 ? srcz : dstz) & 0x000000FF00000000LL;
+       zwdata |= (mask & 0x1000 ? srcz : dstz) & 0x0000FF0000000000LL;
+       zwdata |= (mask & 0x2000 ? srcz : dstz) & 0x00FF000000000000LL;
+       zwdata |= (mask & 0x4000 ? srcz : dstz) & 0xFF00000000000000LL;
+if (logBlit)
+{
+       printf("\n[srcz=%08X%08X dstz=%08X%08X zwdata=%08X%08X mask=%04X]\n",
+               (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF),
+               (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF),
+               (uint32_t)(zwdata >> 32), (uint32_t)(zwdata & 0xFFFFFFFF), mask);
+       fflush(stdout);
+}//*/
+       srcz = zwdata;
  //////////////////////////////////////////////////////////////////////////////////////
  
  /*Data_enab[0-1]       := BUF8 (data_enab[0-1], data_ena);
@@ -5500,7 +5912,7 @@ END;*/
  /**  COMP_CTRL - Comparator output control logic  *****************
  
  This block is responsible for taking the comparator outputs and
-using them as appropriate to inhibit writes.  Two methods are 
+using them as appropriate to inhibit writes.  Two methods are
  supported for inhibiting write data:
  
  -      suppression of the inner loop controlled write operation
@@ -5513,13 +5925,13 @@ and sixteen bit pixel modes.
  Writes can be suppressed by data being equal, by the Z comparator
  conditions being met, or by the bit to pixel expansion scheme.
  
-Pipe-lining issues: the data derived comparator outputs are stable 
+Pipe-lining issues: the data derived comparator outputs are stable
  until the next data read, well after the affected write from this
  operation.  However, the inner counter bits can count immediately
-before the ack for the last write.  Therefore, it is necessary to 
+before the ack for the last write.  Therefore, it is necessary to
  delay bcompbit select terms by one inner loop pipe-line stage,
  when generating the select for the data control - the output is
-delayed one further tick to give it write data timing (2/34). 
+delayed one further tick to give it write data timing (2/34).
  
  There is also a problem with computed data - the new values are
  calculated before the write associated with the old value has been
@@ -5545,9 +5957,9 @@ pipe-lining the comparator inputs where appropriate.
         step_inner      // inner loop advance
         zcomp[0..3]     // output of word zed comparators
         :IN);*/
-void COMP_CTRL(uint8 &dbinh, bool &nowrite,
-       bool bcompen, bool big_pix, bool bkgwren, uint8 dcomp, bool dcompen, uint8 icount,
-       uint8 pixsize, bool phrase_mode, uint8 srcd, uint8 zcomp)
+void COMP_CTRL(uint8_t &dbinh, bool &nowrite,
+       bool bcompen, bool big_pix, bool bkgwren, uint8_t dcomp, bool dcompen, uint8_t icount,
+       uint8_t pixsize, bool phrase_mode, uint8_t srcd, uint8_t zcomp)
  {
  //BEGIN
  
@@ -5568,7 +5980,7 @@ In phrase mode, the eight bits are used directly, and this mode is
  only applicable to 8-bit pixel mode (2/34) */
  
  /*Bcompselt[0-2]       := EO (bcompselt[0-2], icount[0-2], big_pix);
-Bcompbit       := MX8 (bcompbit, srcd[7], srcd[6], srcd[5], 
+Bcompbit       := MX8 (bcompbit, srcd[7], srcd[6], srcd[5],
                         srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompselt[0..2]);
  Bcompbit\      := INV1 (bcompbit\, bcompbit);*/
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
@@ -5580,21 +5992,21 @@ if (logBlit)
         fflush(stdout);
  }
  #endif
-       uint8 bcompselt = (big_pix ? ~icount : icount) & 0x07;
-       uint8 bitmask[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
+       uint8_t bcompselt = (big_pix ? ~icount : icount) & 0x07;
+       uint8_t bitmask[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
         bool bcompbit = srcd & bitmask[bcompselt];
  //////////////////////////////////////////////////////////////////////////////////////
  
  /* pipe-line the count */
  /*Bcompsel[0-2]        := FDSYNC (bcompsel[0-2], bcompselt[0-2], step_inner, clk);
-Bcompbt                := MX8 (bcompbitpt, srcd[7], srcd[6], srcd[5], 
+Bcompbt                := MX8 (bcompbitpt, srcd[7], srcd[6], srcd[5],
                         srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompsel[0..2]);
  Bcompbitp      := FD1Q (bcompbitp, bcompbitpt, clk);
  Bcompbitp\     := INV1 (bcompbitp\, bcompbitp);*/
  
  /* For pixel mode, generate the write inhibit signal for all modes
  on bit inhibit, for 8 and 16 bit modes on comparator inhibit, and
-for 16 bit mode on Z inhibit 
+for 16 bit mode on Z inhibit
  
  Nowrite = bcompen . /bcompbit . /phrase_mode
         + dcompen . dcomp[0] . /phrase_mode . pixsize = 011
@@ -5636,7 +6048,7 @@ if (logBlit)
  //////////////////////////////////////////////////////////////////////////////////////
  
  /* For phrase mode, generate the byte inhibit signals for eight bit
-mode 011, or sixteen bit mode 100 
+mode 011, or sixteen bit mode 100
  dbinh\[0] =  pixsize[2] . zcomp[0]
          +  pixsize[2] . dcomp[0] . dcomp[1] . dcompen
          + /pixsize[2] . dcomp[0] . dcompen
@@ -5828,4 +6240,9 @@ if (logBlit)
  ////////////////////////////////////// C++ CODE //////////////////////////////////////
  //////////////////////////////////////////////////////////////////////////////////////
  
+// !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
+// !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
+// !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
+
  #endif
+