Shamusworld >> Repos - virtualjaguar/blob - src/blitter.cpp

   1 //
   2 // Blitter core
   3 //
   4 // by James L. Hammons
   5 //
   6 // I owe a debt of gratitude to Curt Vendel and to John Mathieson--to Curt
   7 // for supplying the Oberon ASIC nets and to John for making them available
   8 // to Curt. ;-) Without that excellent documentation which shows *exactly*
   9 // what's going on inside the TOM chip, we'd all still be guessing as to how
  10 // the wily blitter and other pieces of the Jaguar puzzle actually work.
  11 //
  12
  13 #include "jaguar.h"
  14 #include "blitter.h"
  15
  16 // Various conditional compilation goodies...
  17
  18 //#define USE_ORIGINAL_BLITTER
  19 //#define USE_MIDSUMMER_BLITTER
  20 #define USE_MIDSUMMER_BLITTER_MKII
  21
  22 // External global variables
  23
  24 extern int jaguar_active_memory_dumps;
  25
  26 // Local global variables
  27
  28 int start_logging = 0;
  29 uint8 blitter_working = 0;
  30
  31 // Blitter register RAM (most of it is hidden from the user)
  32
  33 static uint8 blitter_ram[0x100];
  34
  35 // Other crapola
  36
  37 bool specialLog = false;
  38 extern int effect_start;
  39 extern int blit_start_log;
  40 void BlitterMidsummer(uint32 cmd);
  41 void BlitterMidsummer2(void);
  42
  43 #define REG(A)  (((uint32)blitter_ram[(A)] << 24) | ((uint32)blitter_ram[(A)+1] << 16) \
  44                                 | ((uint32)blitter_ram[(A)+2] << 8) | (uint32)blitter_ram[(A)+3])
  45 #define WREG(A,D)       (blitter_ram[(A)] = ((D)>>24)&0xFF, blitter_ram[(A)+1] = ((D)>>16)&0xFF, \
  46                                         blitter_ram[(A)+2] = ((D)>>8)&0xFF, blitter_ram[(A)+3] = (D)&0xFF)
  47
  48 // Blitter registers (offsets from F02200)
  49
  50 #define A1_BASE                 ((UINT32)0x00)
  51 #define A1_FLAGS                ((UINT32)0x04)
  52 #define A1_CLIP                 ((UINT32)0x08)  // Height and width values for clipping
  53 #define A1_PIXEL                ((UINT32)0x0C)  // Integer part of the pixel (Y.i and X.i)
  54 #define A1_STEP                 ((UINT32)0x10)  // Integer part of the step
  55 #define A1_FSTEP                ((UINT32)0x14)  // Fractional part of the step
  56 #define A1_FPIXEL               ((UINT32)0x18)  // Fractional part of the pixel (Y.f and X.f)
  57 #define A1_INC                  ((UINT32)0x1C)  // Integer part of the increment
  58 #define A1_FINC                 ((UINT32)0x20)  // Fractional part of the increment
  59 #define A2_BASE                 ((UINT32)0x24)
  60 #define A2_FLAGS                ((UINT32)0x28)
  61 #define A2_MASK                 ((UINT32)0x2C)  // Modulo values for x and y (M.y  and M.x)
  62 #define A2_PIXEL                ((UINT32)0x30)  // Integer part of the pixel (no fractional part for A2)
  63 #define A2_STEP                 ((UINT32)0x34)  // Integer part of the step (no fractional part for A2)
  64 #define COMMAND                 ((UINT32)0x38)
  65 #define PIXLINECOUNTER  ((UINT32)0x3C)  // Inner & outer loop values
  66 #define SRCDATA                 ((UINT32)0x40)
  67 #define DSTDATA                 ((UINT32)0x48)
  68 #define DSTZ                    ((UINT32)0x50)
  69 #define SRCZINT                 ((UINT32)0x58)
  70 #define SRCZFRAC                ((UINT32)0x60)
  71 #define PATTERNDATA             ((UINT32)0x68)
  72 #define INTENSITYINC    ((UINT32)0x70)
  73 #define ZINC                    ((UINT32)0x74)
  74 #define COLLISIONCTRL   ((UINT32)0x78)
  75 #define PHRASEINT0              ((UINT32)0x7C)
  76 #define PHRASEINT1              ((UINT32)0x80)
  77 #define PHRASEINT2              ((UINT32)0x84)
  78 #define PHRASEINT3              ((UINT32)0x88)
  79 #define PHRASEZ0                ((UINT32)0x8C)
  80 #define PHRASEZ1                ((UINT32)0x90)
  81 #define PHRASEZ2                ((UINT32)0x94)
  82 #define PHRASEZ3                ((UINT32)0x98)
  83
  84 // Blitter command bits
  85
  86 #define SRCEN                   (cmd & 0x00000001)
  87 #define SRCENZ                  (cmd & 0x00000002)
  88 #define SRCENX                  (cmd & 0x00000004)
  89 #define DSTEN                   (cmd & 0x00000008)
  90 #define DSTENZ                  (cmd & 0x00000010)
  91 #define DSTWRZ                  (cmd & 0x00000020)
  92 #define CLIPA1                  (cmd & 0x00000040)
  93
  94 #define UPDA1F                  (cmd & 0x00000100)
  95 #define UPDA1                   (cmd & 0x00000200)
  96 #define UPDA2                   (cmd & 0x00000400)
  97
  98 #define DSTA2                   (cmd & 0x00000800)
  99
 100 #define Z_OP_INF                (cmd & 0x00040000)
 101 #define Z_OP_EQU                (cmd & 0x00080000)
 102 #define Z_OP_SUP                (cmd & 0x00100000)
 103
 104 #define LFU_NAN                 (cmd & 0x00200000)
 105 #define LFU_NA                  (cmd & 0x00400000)
 106 #define LFU_AN                  (cmd & 0x00800000)
 107 #define LFU_A                   (cmd & 0x01000000)
 108
 109 #define CMPDST                  (cmd & 0x02000000)
 110 #define BCOMPEN                 (cmd & 0x04000000)
 111 #define DCOMPEN                 (cmd & 0x08000000)
 112
 113 #define PATDSEL                 (cmd & 0x00010000)
 114 #define ADDDSEL                 (cmd & 0x00020000)
 115 #define TOPBEN                  (cmd & 0x00004000)
 116 #define TOPNEN                  (cmd & 0x00008000)
 117 #define BKGWREN                 (cmd & 0x10000000)
 118 #define GOURD                   (cmd & 0x00001000)
 119 #define GOURZ                   (cmd & 0x00002000)
 120 #define SRCSHADE                (cmd & 0x40000000)
 121
 122
 123 #define XADDPHR  0
 124 #define XADDPIX  1
 125 #define XADD0    2
 126 #define XADDINC  3
 127
 128 #define XSIGNSUB_A1             (REG(A1_FLAGS)&0x080000)
 129 #define XSIGNSUB_A2             (REG(A2_FLAGS)&0x080000)
 130
 131 #define YSIGNSUB_A1             (REG(A1_FLAGS)&0x100000)
 132 #define YSIGNSUB_A2             (REG(A2_FLAGS)&0x100000)
 133
 134 #define YADD1_A1                (REG(A1_FLAGS)&0x040000)
 135 #define YADD1_A2                (REG(A2_FLAGS)&0x040000)
 136
 137 /*******************************************************************************
 138 ********************** STUFF CUT BELOW THIS LINE! ******************************
 139 *******************************************************************************/
 140 #ifdef USE_ORIGINAL_BLITTER                                                                             // We're ditching this crap for now...
 141
 142 //Put 'em back, once we fix the problem!!! [KO]
 143 // 1 bpp pixel read
 144 #define PIXEL_SHIFT_1(a)      (((~a##_x) >> 16) & 7)
 145 #define PIXEL_OFFSET_1(a)     (((((UINT32)a##_y >> 16) * a##_width / 8) + (((UINT32)a##_x >> 19) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 19) & 7))
 146 #define READ_PIXEL_1(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER) >> PIXEL_SHIFT_1(a)) & 0x01)
 147 //#define READ_PIXEL_1(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a)) >> PIXEL_SHIFT_1(a)) & 0x01)
 148
 149 // 2 bpp pixel read
 150 #define PIXEL_SHIFT_2(a)      (((~a##_x) >> 15) & 6)
 151 #define PIXEL_OFFSET_2(a)     (((((UINT32)a##_y >> 16) * a##_width / 4) + (((UINT32)a##_x >> 18) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 18) & 7))
 152 #define READ_PIXEL_2(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER) >> PIXEL_SHIFT_2(a)) & 0x03)
 153 //#define READ_PIXEL_2(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a)) >> PIXEL_SHIFT_2(a)) & 0x03)
 154
 155 // 4 bpp pixel read
 156 #define PIXEL_SHIFT_4(a)      (((~a##_x) >> 14) & 4)
 157 #define PIXEL_OFFSET_4(a)     (((((UINT32)a##_y >> 16) * (a##_width/2)) + (((UINT32)a##_x >> 17) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 17) & 7))
 158 #define READ_PIXEL_4(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER) >> PIXEL_SHIFT_4(a)) & 0x0f)
 159 //#define READ_PIXEL_4(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a)) >> PIXEL_SHIFT_4(a)) & 0x0f)
 160
 161 // 8 bpp pixel read
 162 #define PIXEL_OFFSET_8(a)     (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 7))
 163 #define READ_PIXEL_8(a)       (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a), BLITTER))
 164 //#define READ_PIXEL_8(a)       (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a)))
 165
 166 // 16 bpp pixel read
 167 #define PIXEL_OFFSET_16(a)    (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~3)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 3))
 168 #define READ_PIXEL_16(a)       (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), BLITTER))
 169 //#define READ_PIXEL_16(a)       (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1)))
 170
 171 // 32 bpp pixel read
 172 #define PIXEL_OFFSET_32(a)    (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 1))
 173 #define READ_PIXEL_32(a)      (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), BLITTER))
 174 //#define READ_PIXEL_32(a)      (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2)))
 175
 176 // pixel read
 177 #define READ_PIXEL(a,f) (\
 178          (((f>>3)&0x07) == 0) ? (READ_PIXEL_1(a)) : \
 179          (((f>>3)&0x07) == 1) ? (READ_PIXEL_2(a)) : \
 180          (((f>>3)&0x07) == 2) ? (READ_PIXEL_4(a)) : \
 181          (((f>>3)&0x07) == 3) ? (READ_PIXEL_8(a)) : \
 182          (((f>>3)&0x07) == 4) ? (READ_PIXEL_16(a)) : \
 183          (((f>>3)&0x07) == 5) ? (READ_PIXEL_32(a)) : 0)
 184
 185 // 16 bpp z data read
 186 #define ZDATA_OFFSET_16(a)     (PIXEL_OFFSET_16(a) + a##_zoffs * 4)
 187 #define READ_ZDATA_16(a)       (JaguarReadWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), BLITTER))
 188 //#define READ_ZDATA_16(a)       (JaguarReadWord(a##_addr+(ZDATA_OFFSET_16(a)<<1)))
 189
 190 // z data read
 191 #define READ_ZDATA(a,f) (READ_ZDATA_16(a))
 192
 193 // 16 bpp z data write
 194 #define WRITE_ZDATA_16(a,d)     {  JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d, BLITTER); }
 195 //#define WRITE_ZDATA_16(a,d)     {  JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d); }
 196
 197 // z data write
 198 #define WRITE_ZDATA(a,f,d) WRITE_ZDATA_16(a,d);
 199
 200 // 1 bpp r data read
 201 #define READ_RDATA_1(r,a,p)  ((p) ?  ((REG(r+(((UINT32)a##_x >> 19) & 0x04))) >> (((UINT32)a##_x >> 16) & 0x1F)) & 0x0001 : (REG(r) & 0x0001))
 202
 203 // 2 bpp r data read
 204 #define READ_RDATA_2(r,a,p)  ((p) ?  ((REG(r+(((UINT32)a##_x >> 18) & 0x04))) >> (((UINT32)a##_x >> 15) & 0x3E)) & 0x0003 : (REG(r) & 0x0003))
 205
 206 // 4 bpp r data read
 207 #define READ_RDATA_4(r,a,p)  ((p) ?  ((REG(r+(((UINT32)a##_x >> 17) & 0x04))) >> (((UINT32)a##_x >> 14) & 0x28)) & 0x000F : (REG(r) & 0x000F))
 208
 209 // 8 bpp r data read
 210 #define READ_RDATA_8(r,a,p)  ((p) ?  ((REG(r+(((UINT32)a##_x >> 16) & 0x04))) >> (((UINT32)a##_x >> 13) & 0x18)) & 0x00FF : (REG(r) & 0x00FF))
 211
 212 // 16 bpp r data read
 213 #define READ_RDATA_16(r,a,p)  ((p) ? ((REG(r+(((UINT32)a##_x >> 15) & 0x04))) >> (((UINT32)a##_x >> 12) & 0x10)) & 0xFFFF : (REG(r) & 0xFFFF))
 214
 215 // 32 bpp r data read
 216 #define READ_RDATA_32(r,a,p)  ((p) ? REG(r+(((UINT32)a##_x >> 14) & 0x04)) : REG(r))
 217
 218 // register data read
 219 #define READ_RDATA(r,a,f,p) (\
 220          (((f>>3)&0x07) == 0) ? (READ_RDATA_1(r,a,p)) : \
 221          (((f>>3)&0x07) == 1) ? (READ_RDATA_2(r,a,p)) : \
 222          (((f>>3)&0x07) == 2) ? (READ_RDATA_4(r,a,p)) : \
 223          (((f>>3)&0x07) == 3) ? (READ_RDATA_8(r,a,p)) : \
 224          (((f>>3)&0x07) == 4) ? (READ_RDATA_16(r,a,p)) : \
 225          (((f>>3)&0x07) == 5) ? (READ_RDATA_32(r,a,p)) : 0)
 226
 227 // 1 bpp pixel write
 228 #define WRITE_PIXEL_1(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_1(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER)&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a)), BLITTER); }
 229 //#define WRITE_PIXEL_1(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_1(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a))&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a))); }
 230
 231 // 2 bpp pixel write
 232 #define WRITE_PIXEL_2(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_2(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER)&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a)), BLITTER); }
 233 //#define WRITE_PIXEL_2(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_2(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a))&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a))); }
 234
 235 // 4 bpp pixel write
 236 #define WRITE_PIXEL_4(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_4(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER)&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a)), BLITTER); }
 237 //#define WRITE_PIXEL_4(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_4(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a))&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a))); }
 238
 239 // 8 bpp pixel write
 240 #define WRITE_PIXEL_8(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_8(a), d, BLITTER); }
 241 //#define WRITE_PIXEL_8(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_8(a), d); }
 242
 243 // 16 bpp pixel write
 244 //#define WRITE_PIXEL_16(a,d)     {  JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1),d); }
 245 #define WRITE_PIXEL_16(a,d)     {  JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d, BLITTER); if (specialLog) WriteLog("Pixel write address: %08X\n", a##_addr+(PIXEL_OFFSET_16(a)<<1)); }
 246 //#define WRITE_PIXEL_16(a,d)     {  JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d); if (specialLog) WriteLog("Pixel write address: %08X\n", a##_addr+(PIXEL_OFFSET_16(a)<<1)); }
 247
 248 // 32 bpp pixel write
 249 #define WRITE_PIXEL_32(a,d)             { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d, BLITTER); }
 250 //#define WRITE_PIXEL_32(a,d)           { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d); }
 251
 252 // pixel write
 253 #define WRITE_PIXEL(a,f,d) {\
 254         switch ((f>>3)&0x07) { \
 255         case 0: WRITE_PIXEL_1(a,d);  break;  \
 256         case 1: WRITE_PIXEL_2(a,d);  break;  \
 257         case 2: WRITE_PIXEL_4(a,d);  break;  \
 258         case 3: WRITE_PIXEL_8(a,d);  break;  \
 259         case 4: WRITE_PIXEL_16(a,d); break;  \
 260         case 5: WRITE_PIXEL_32(a,d); break;  \
 261         }}
 262
 263 // Width in Pixels of a Scanline
 264 // This is a pretranslation of the value found in the A1 & A2 flags: It's really a floating point value
 265 // of the form EEEEMM where MM is the mantissa with an implied "1." in front of it and the EEEE value is
 266 // the exponent. Valid values for the exponent range from 0 to 11 (decimal). It's easiest to think of it
 267 // as a floating point bit pattern being followed by a number of zeroes. So, e.g., 001101 translates to
 268 // 1.01 (the "1." being implied) x (2 ^ 3) or 1010 -> 10 in base 10 (i.e., 1.01 with the decimal place
 269 // being shifted to the right 3 places).
 270 /*static uint32 blitter_scanline_width[48] =
 271 {
 272      0,    0,    0,    0,                                       // Note: This would really translate to 1, 1, 1, 1
 273      2,    0,    0,    0,
 274      4,    0,    6,    0,
 275      8,   10,   12,   14,
 276     16,   20,   24,   28,
 277     32,   40,   48,   56,
 278     64,   80,   96,  112,
 279    128,  160,  192,  224,
 280    256,  320,  384,  448,
 281    512,  640,  768,  896,
 282   1024, 1280, 1536, 1792,
 283   2048, 2560, 3072, 3584
 284 };//*/
 285
 286 //static uint8 * tom_ram_8;
 287 //static uint8 * paletteRam;
 288 static uint8 src;
 289 static uint8 dst;
 290 static uint8 misc;
 291 static uint8 a1ctl;
 292 static uint8 mode;
 293 static uint8 ity;
 294 static uint8 zop;
 295 static uint8 op;
 296 static uint8 ctrl;
 297 static uint32 a1_addr;
 298 static uint32 a2_addr;
 299 static int32 a1_zoffs;
 300 static int32 a2_zoffs;
 301 static uint32 xadd_a1_control;
 302 static uint32 xadd_a2_control;
 303 static int32 a1_pitch;
 304 static int32 a2_pitch;
 305 static uint32 n_pixels;
 306 static uint32 n_lines;
 307 static int32 a1_x;
 308 static int32 a1_y;
 309 static int32 a1_width;
 310 static int32 a2_x;
 311 static int32 a2_y;
 312 static int32 a2_width;
 313 static int32 a2_mask_x;
 314 static int32 a2_mask_y;
 315 static int32 a1_xadd;
 316 static int32 a1_yadd;
 317 static int32 a2_xadd;
 318 static int32 a2_yadd;
 319 static uint8 a1_phrase_mode;
 320 static uint8 a2_phrase_mode;
 321 static int32 a1_step_x = 0;
 322 static int32 a1_step_y = 0;
 323 static int32 a2_step_x = 0;
 324 static int32 a2_step_y = 0;
 325 static uint32 outer_loop;
 326 static uint32 inner_loop;
 327 static uint32 a2_psize;
 328 static uint32 a1_psize;
 329 static uint32 gouraud_add;
 330 //static uint32 gouraud_data;
 331 //static uint16 gint[4];
 332 //static uint16 gfrac[4];
 333 //static uint8  gcolour[4];
 334 static int gd_i[4];
 335 static int gd_c[4];
 336 static int gd_ia, gd_ca;
 337 static int colour_index = 0;
 338 static int32 zadd;
 339 static uint32 z_i[4];
 340
 341 static int32 a1_clip_x, a1_clip_y;
 342
 343 // In the spirit of "get it right first, *then* optimize" I've taken the liberty
 344 // of removing all the unnecessary code caching. If it turns out to be a good way
 345 // to optimize the blitter, then we may revisit it in the future...
 346
 347 //
 348 // Generic blit handler
 349 //
 350 void blitter_generic(uint32 cmd)
 351 {
 352 /*
 353 Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 354  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 355   A1 step values: -2 (X), 1 (Y)
 356   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 357   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 358   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 359         A1 x/y: 100/12, A2 x/y: 106/0 Pattern: 000000F300000000
 360 */
 361 //if (effect_start)
 362 //      specialLog = true;
 363 /*if (cmd == 0x1401060C && blit_start_log)
 364         specialLog = true;//*/
 365 //Testing only!
 366 //uint32 logGo = ((cmd == 0x01800E01 && REG(A1_BASE) == 0x898000) ? 1 : 0);
 367         uint32 srcdata, srczdata, dstdata, dstzdata, writedata, inhibit;
 368         uint32 bppSrc = (DSTA2 ? 1 << ((REG(A1_FLAGS) >> 3) & 0x07) : 1 << ((REG(A2_FLAGS) >> 3) & 0x07));
 369
 370 if (specialLog)
 371 {
 372         WriteLog("About to do n x m blit (BM width is ? pixels)...\n");
 373         WriteLog("A1_STEP_X/Y = %08X/%08X, A2_STEP_X/Y = %08X/%08X\n", a1_step_x, a1_step_y, a2_step_x, a2_step_y);
 374 }
 375 /*      if (BCOMPEN)
 376         {
 377                 if (DSTA2)
 378                         a1_xadd = 0;
 379                 else
 380                         a2_xadd = 0;
 381         }//*/
 382
 383         while (outer_loop--)
 384         {
 385 if (specialLog)
 386 {
 387         WriteLog("  A1_X/Y = %08X/%08X, A2_X/Y = %08X/%08X\n", a1_x, a1_y, a2_x, a2_y);
 388 }
 389                 uint32 a1_start = a1_x, a2_start = a2_x, bitPos = 0;
 390
 391                 //Kludge for Hover Strike...
 392                 //I wonder if this kludge is in conjunction with the SRCENX down below...
 393                 // This isn't so much a kludge but the way things work in BCOMPEN mode...!
 394                 if (BCOMPEN && SRCENX)
 395                 {
 396                         if (n_pixels < bppSrc)
 397                                 bitPos = bppSrc - n_pixels;
 398                 }
 399
 400                 inner_loop = n_pixels;
 401                 while (inner_loop--)
 402                 {
 403 if (specialLog)
 404 {
 405         WriteLog("    A1_X/Y = %08X/%08X, A2_X/Y = %08X/%08X\n", a1_x, a1_y, a2_x, a2_y);
 406 }
 407                         srcdata = srczdata = dstdata = dstzdata = writedata = inhibit = 0;
 408
 409                         if (!DSTA2)                                                     // Data movement: A1 <- A2
 410                         {
 411                                 // load src data and Z
 412 //                              if (SRCEN)
 413                                 if (SRCEN || SRCENX)    // Not sure if this is correct... (seems to be...!)
 414                                 {
 415                                         srcdata = READ_PIXEL(a2, REG(A2_FLAGS));
 416
 417                                         if (SRCENZ)
 418                                                 srczdata = READ_ZDATA(a2, REG(A2_FLAGS));
 419                                         else if (cmd & 0x0001C020)      // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
 420                                                 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
 421                                 }
 422                                 else    // Use SRCDATA register...
 423                                 {
 424                                         srcdata = READ_RDATA(SRCDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
 425
 426                                         if (cmd & 0x0001C020)           // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
 427                                                 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
 428                                 }
 429
 430                                 // load dst data and Z
 431                                 if (DSTEN)
 432                                 {
 433                                         dstdata = READ_PIXEL(a1, REG(A1_FLAGS));
 434
 435                                         if (DSTENZ)
 436                                                 dstzdata = READ_ZDATA(a1, REG(A1_FLAGS));
 437                                         else
 438                                                 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
 439                                 }
 440                                 else
 441                                 {
 442                                         dstdata = READ_RDATA(DSTDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
 443
 444                                         if (DSTENZ)
 445                                                 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
 446                                 }
 447
 448 /*This wasn't working...                                // a1 clipping
 449                                 if (cmd & 0x00000040)
 450                                 {
 451                                         if (a1_x < 0 || a1_y < 0 || (a1_x >> 16) >= (REG(A1_CLIP) & 0x7FFF)
 452                                                 || (a1_y >> 16) >= ((REG(A1_CLIP) >> 16) & 0x7FFF))
 453                                                 inhibit = 1;
 454                                 }//*/
 455
 456                                 if (GOURZ)
 457                                         srczdata = z_i[colour_index] >> 16;
 458
 459                                 // apply z comparator
 460                                 if (Z_OP_INF && srczdata <  dstzdata)   inhibit = 1;
 461                                 if (Z_OP_EQU && srczdata == dstzdata)   inhibit = 1;
 462                                 if (Z_OP_SUP && srczdata >  dstzdata)   inhibit = 1;
 463
 464                                 // apply data comparator
 465 // Note: DCOMPEN only works in 8/16 bpp modes! !!! FIX !!!
 466 // Does BCOMPEN only work in 1 bpp mode???
 467 //   No, but it always does a 1 bit expansion no matter what the BPP of the channel is set to. !!! FIX !!!
 468 //   This is bit tricky... We need to fix the XADD value so that it acts like a 1BPP value while inside
 469 //   an 8BPP space.
 470                                 if (DCOMPEN | BCOMPEN)
 471                                 {
 472 //Temp, for testing Hover Strike
 473 //Doesn't seem to do it... Why?
 474 //What needs to happen here is twofold. First, the address generator in the outer loop has
 475 //to honor the BPP when calculating the start address (which it kinda does already). Second,
 476 //it has to step bit by bit when using BCOMPEN. How to do this???
 477         if (BCOMPEN)
 478 //small problem with this approach: it's not accurate... We need a proper address to begin with
 479 //and *then* we can do the bit stepping from there the way it's *supposed* to be done... !!! FIX !!!
 480 //[DONE]
 481         {
 482                 uint32 pixShift = (~bitPos) & (bppSrc - 1);
 483                 srcdata = (srcdata >> pixShift) & 0x01;
 484
 485                 bitPos++;
 486 //              if (bitPos % bppSrc == 0)
 487 //                      a2_x += 0x00010000;
 488         }
 489 /*
 490 Interesting (Hover Strike--large letter):
 491
 492 Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 493  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 494   A1 step values: -2 (X), 1 (Y)
 495   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 496   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 497   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 498         A1 x/y: 100/12, A2 x/y: 106/0 Pattern: 000000F300000000
 499
 500 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 501  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 502   A1 step values: -8 (X), 1 (Y)
 503   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 504   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 505   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 506         A1 x/y: 102/12, A2 x/y: 107/0 Pattern: 000000F300000000
 507
 508 Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 509  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 510   A1 step values: -1 (X), 1 (Y)
 511   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 512   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 513   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 514         A1 x/y: 118/12, A2 x/y: 70/0 Pattern: 000000F300000000
 515
 516 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 517  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 518   A1 step values: -8 (X), 1 (Y)
 519   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 520   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 521   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 522         A1 x/y: 119/12, A2 x/y: 71/0 Pattern: 000000F300000000
 523
 524 Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 525  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 526   A1 step values: -1 (X), 1 (Y)
 527   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 528   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 529   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 530         A1 x/y: 127/12, A2 x/y: 66/0 Pattern: 000000F300000000
 531
 532 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 533  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 534   A1 step values: -8 (X), 1 (Y)
 535   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 536   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 537   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 538         A1 x/y: 128/12, A2 x/y: 67/0 Pattern: 000000F300000000
 539 */
 540
 541
 542                                         if (!CMPDST)
 543                                         {
 544 //WriteLog("Blitter: BCOMPEN set on command %08X inhibit prev:%u, now:", cmd, inhibit);
 545                                                 // compare source pixel with pattern pixel
 546 /*
 547 Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd: 05810001]
 548  CMD -> src: SRCEN  dst:  misc:  a1ctl:  mode:  ity: PATDSEL z-op:  op: LFU_REPLACE ctrl: BCOMPEN
 549   A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 550   A2 -> pitch: 1 phrases, depth: 1bpp, z-off: 0, width: 16 (10), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 551         x/y: 0/20
 552 ...
 553 */
 554 // AvP is still wrong, could be cuz it's doing A1 -> A2...
 555
 556 // Src is the 1bpp bitmap... DST is the PATTERN!!!
 557 // This seems to solve at least ONE of the problems with MC3D...
 558 // Why should this be inverted???
 559 // Bcuz it is. This is supposed to be used only for a bit -> pixel expansion...
 560 /*                                              if (srcdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
 561 //                                              if (srcdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
 562                                                         inhibit = 1;//*/
 563 /*                                              uint32 A2bpp = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
 564                                                 if (A2bpp == 1 || A2bpp == 16 || A2bpp == 8)
 565                                                         inhibit = (srcdata == 0 ? 1: 0);
 566 //                                                      inhibit = !srcdata;
 567                                                 else
 568                                                         WriteLog("Blitter: Bad BPP (%u) selected for BCOMPEN mode!\n", A2bpp);//*/
 569 // What it boils down to is this:
 570
 571                                                 if (srcdata == 0)
 572                                                         inhibit = 1;//*/
 573                                         }
 574                                         else
 575                                         {
 576                                                 // compare destination pixel with pattern pixel
 577                                                 if (dstdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
 578 //                                              if (dstdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
 579                                                         inhibit = 1;
 580                                         }
 581
 582 // This is DEFINITELY WRONG
 583 //                                      if (a1_phrase_mode || a2_phrase_mode)
 584 //                                              inhibit = !inhibit;
 585                                 }
 586
 587                                 if (CLIPA1)
 588                                 {
 589                                         inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
 590                                                 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
 591                                 }
 592
 593                                 // compute the write data and store
 594                                 if (!inhibit)
 595                                 {
 596 // Houston, we have a problem...
 597 // Look here, at PATDSEL and GOURD. If both are active (as they are on the BIOS intro), then there's
 598 // a conflict! E.g.:
 599 //Blit! (00100000 <- 000095D0) count: 3 x 1, A1/2_FLAGS: 00014220/00004020 [cmd: 00011008]
 600 // CMD -> src:  dst: DSTEN  misc:  a1ctl:  mode: GOURD  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
 601 //  A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 602 //  A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 603 //        A1 x/y: 90/171, A2 x/y: 808/0 Pattern: 776D770077007700
 604
 605                                         if (PATDSEL)
 606                                         {
 607                                                 // use pattern data for write data
 608                                                 writedata = READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
 609                                         }
 610                                         else if (ADDDSEL)
 611                                         {
 612 /*if (blit_start_log)
 613         WriteLog("BLIT: ADDDSEL srcdata: %08X\, dstdata: %08X, ", srcdata, dstdata);//*/
 614
 615                                                 // intensity addition
 616 //Ok, this is wrong... Or is it? Yes, it's wrong! !!! FIX !!!
 617 /*                                              writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
 618                                                 if (!(TOPBEN) && writedata > 0xFF)
 619 //                                                      writedata = 0xFF;
 620                                                         writedata &= 0xFF;
 621                                                 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
 622                                                 if (!(TOPNEN) && writedata > 0xFFF)
 623 //                                                      writedata = 0xFFF;
 624                                                         writedata &= 0xFFF;
 625                                                 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);//*/
 626 //notneeded--writedata &= 0xFFFF;
 627 /*if (blit_start_log)
 628         WriteLog("writedata: %08X\n", writedata);//*/
 629 /*
 630 Hover Strike ADDDSEL blit:
 631
 632 Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cmd: 00020208]
 633  CMD -> src:  dst: DSTEN  misc:  a1ctl: UPDA1  mode:  ity: ADDDSEL z-op:  op: LFU_CLEAR ctrl:
 634   A1 step values: -320 (X), 1 (Y)
 635   A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 636   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 637         A1 x/y: 0/0, A2 x/y: 3288/0 Pattern: 0000000000000000 SRCDATA: 00FD00FD00FD00FD
 638 */
 639                                                 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
 640
 641                                                 if (!TOPBEN)
 642                                                 {
 643 //This is correct now, but slow...
 644                                                         int16 s = (srcdata & 0xFF) | (srcdata & 0x80 ? 0xFF00 : 0x0000),
 645                                                                 d = dstdata & 0xFF;
 646                                                         int16 sum = s + d;
 647
 648                                                         if (sum < 0)
 649                                                                 writedata = 0x00;
 650                                                         else if (sum > 0xFF)
 651                                                                 writedata = 0xFF;
 652                                                         else
 653                                                                 writedata = (uint32)sum;
 654                                                 }
 655
 656 //This doesn't seem right... Looks like it would muck up the low byte... !!! FIX !!!
 657                                                 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
 658
 659                                                 if (!TOPNEN && writedata > 0xFFF)
 660                                                 {
 661                                                         writedata &= 0xFFF;
 662                                                 }
 663
 664                                                 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
 665                                         }
 666                                         else
 667                                         {
 668                                                 if (LFU_NAN) writedata |= ~srcdata & ~dstdata;
 669                                                 if (LFU_NA)  writedata |= ~srcdata & dstdata;
 670                                                 if (LFU_AN)  writedata |= srcdata  & ~dstdata;
 671                                                 if (LFU_A)       writedata |= srcdata  & dstdata;
 672                                         }
 673
 674 //Although, this looks like it's OK... (even if it is shitty!)
 675 //According to JTRM, this is part of the four things the blitter does with the write data (the other
 676 //three being PATDSEL, ADDDSEL, and LFU (default). I'm not sure which gets precedence, this or PATDSEL
 677 //(see above blit example)...
 678                                         if (GOURD)
 679                                                 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
 680
 681                                         if (SRCSHADE)
 682                                         {
 683                                                 int intensity = srcdata & 0xFF;
 684                                                 int ia = gd_ia >> 16;
 685                                                 if (ia & 0x80)
 686                                                         ia = 0xFFFFFF00 | ia;
 687                                                 intensity += ia;
 688                                                 if (intensity < 0)
 689                                                         intensity = 0;
 690                                                 if (intensity > 0xFF)
 691                                                         intensity = 0xFF;
 692                                                 writedata = (srcdata & 0xFF00) | intensity;
 693                                         }
 694                                 }
 695                                 else
 696                                 {
 697                                         writedata = dstdata;
 698                                         srczdata = dstzdata;
 699                                 }
 700
 701 //Tried 2nd below for Hover Strike: No dice.
 702                                 if (/*a1_phrase_mode || */BKGWREN || !inhibit)
 703 //                              if (/*a1_phrase_mode || BKGWREN ||*/ !inhibit)
 704                                 {
 705 /*if (((REG(A1_FLAGS) >> 3) & 0x07) == 5)
 706 {
 707         uint32 offset = a1_addr+(PIXEL_OFFSET_32(a1)<<2);
 708 // (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 1))
 709         if ((offset >= 0x1FF020 && offset <= 0x1FF03F) || (offset >= 0x1FF820 && offset <= 0x1FF83F))
 710                 WriteLog("32bpp pixel write: A1 Phrase mode --> ");
 711 }//*/
 712                                         // write to the destination
 713                                         WRITE_PIXEL(a1, REG(A1_FLAGS), writedata);
 714                                         if (DSTWRZ)
 715                                                 WRITE_ZDATA(a1, REG(A1_FLAGS), srczdata);
 716                                 }
 717                         }
 718                         else    // if (DSTA2)                                                   // Data movement: A1 -> A2
 719                         {
 720                                 // load src data and Z
 721                                 if (SRCEN)
 722                                 {
 723                                         srcdata = READ_PIXEL(a1, REG(A1_FLAGS));
 724                                         if (SRCENZ)
 725                                                 srczdata = READ_ZDATA(a1, REG(A1_FLAGS));
 726                                         else if (cmd & 0x0001C020)      // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
 727                                                 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
 728                                 }
 729                                 else
 730                                 {
 731                                         srcdata = READ_RDATA(SRCDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
 732                                         if (cmd & 0x001C020)    // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
 733                                                 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
 734                                 }
 735
 736                                 // load dst data and Z
 737                                 if (DSTEN)
 738                                 {
 739                                         dstdata = READ_PIXEL(a2, REG(A2_FLAGS));
 740                                         if (DSTENZ)
 741                                                 dstzdata = READ_ZDATA(a2, REG(A2_FLAGS));
 742                                         else
 743                                                 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
 744                                 }
 745                                 else
 746                                 {
 747                                         dstdata = READ_RDATA(DSTDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
 748                                         if (DSTENZ)
 749                                                 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
 750                                 }
 751
 752                                 if (GOURZ)
 753                                         srczdata = z_i[colour_index] >> 16;
 754
 755                                 // apply z comparator
 756                                 if (Z_OP_INF && srczdata < dstzdata)    inhibit = 1;
 757                                 if (Z_OP_EQU && srczdata == dstzdata)   inhibit = 1;
 758                                 if (Z_OP_SUP && srczdata > dstzdata)    inhibit = 1;
 759
 760                                 // apply data comparator
 761 //NOTE: The bit comparator (BCOMPEN) is NOT the same at the data comparator!
 762                                 if (DCOMPEN | BCOMPEN)
 763                                 {
 764                                         if (!CMPDST)
 765                                         {
 766                                                 // compare source pixel with pattern pixel
 767 // AvP: Numbers are correct, but sprites are not!
 768 //This doesn't seem to be a problem... But could still be wrong...
 769 /*                                              if (srcdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
 770 //                                              if (srcdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
 771                                                         inhibit = 1;//*/
 772 // This is probably not 100% correct... It works in the 1bpp case
 773 // (in A1 <- A2 mode, that is...)
 774 // AvP: This is causing blocks to be written instead of bit patterns...
 775 // Works now...
 776 // NOTE: We really should separate out the BCOMPEN & DCOMPEN stuff!
 777 /*                                              uint32 A1bpp = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
 778                                                 if (A1bpp == 1 || A1bpp == 16 || A1bpp == 8)
 779                                                         inhibit = (srcdata == 0 ? 1: 0);
 780                                                 else
 781                                                         WriteLog("Blitter: Bad BPP (%u) selected for BCOMPEN mode!\n", A1bpp);//*/
 782 // What it boils down to is this:
 783                                                 if (srcdata == 0)
 784                                                         inhibit = 1;//*/
 785                                         }
 786                                         else
 787                                         {
 788                                                 // compare destination pixel with pattern pixel
 789                                                 if (dstdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
 790 //                                              if (dstdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
 791                                                         inhibit = 1;
 792                                         }
 793
 794 // This is DEFINITELY WRONG
 795 //                                      if (a1_phrase_mode || a2_phrase_mode)
 796 //                                              inhibit = !inhibit;
 797                                 }
 798
 799                                 if (CLIPA1)
 800                                 {
 801                                         inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
 802                                                 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
 803                                 }
 804
 805                                 // compute the write data and store
 806                                 if (!inhibit)
 807                                 {
 808                                         if (PATDSEL)
 809                                         {
 810                                                 // use pattern data for write data
 811                                                 writedata = READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
 812                                         }
 813                                         else if (ADDDSEL)
 814                                         {
 815                                                 // intensity addition
 816                                                 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
 817                                                 if (!(TOPBEN) && writedata > 0xFF)
 818                                                         writedata = 0xFF;
 819                                                 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
 820                                                 if (!(TOPNEN) && writedata > 0xFFF)
 821                                                         writedata = 0xFFF;
 822                                                 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
 823                                         }
 824                                         else
 825                                         {
 826                                                 if (LFU_NAN)
 827                                                         writedata |= ~srcdata & ~dstdata;
 828                                                 if (LFU_NA)
 829                                                         writedata |= ~srcdata & dstdata;
 830                                                 if (LFU_AN)
 831                                                         writedata |= srcdata & ~dstdata;
 832                                                 if (LFU_A)
 833                                                         writedata |= srcdata & dstdata;
 834                                         }
 835
 836                                         if (GOURD)
 837                                                 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
 838
 839                                         if (SRCSHADE)
 840                                         {
 841                                                 int intensity = srcdata & 0xFF;
 842                                                 int ia = gd_ia >> 16;
 843                                                 if (ia & 0x80)
 844                                                         ia = 0xFFFFFF00 | ia;
 845                                                 intensity += ia;
 846                                                 if (intensity < 0)
 847                                                         intensity = 0;
 848                                                 if (intensity > 0xFF)
 849                                                         intensity = 0xFF;
 850                                                 writedata = (srcdata & 0xFF00) | intensity;
 851                                         }
 852                                 }
 853                                 else
 854                                 {
 855                                         writedata = dstdata;
 856                                         srczdata = dstzdata;
 857                                 }
 858
 859                                 if (/*a2_phrase_mode || */BKGWREN || !inhibit)
 860                                 {
 861 /*if (logGo)
 862 {
 863         uint32 offset = a2_addr+(PIXEL_OFFSET_16(a2)<<1);
 864 // (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 1))
 865         WriteLog("[%08X:%04X] ", offset, writedata);
 866 }//*/
 867                                         // write to the destination
 868                                         WRITE_PIXEL(a2, REG(A2_FLAGS), writedata);
 869
 870                                         if (DSTWRZ)
 871                                                 WRITE_ZDATA(a2, REG(A2_FLAGS), srczdata);
 872                                 }
 873                         }
 874
 875                         // Update x and y (inner loop)
 876 //Now it does! But crappy, crappy, crappy! !!! FIX !!! [DONE]
 877 //This is less than ideal, but it works...
 878                         if (!BCOMPEN)
 879                         {//*/
 880                                 a1_x += a1_xadd, a1_y += a1_yadd;
 881                                 a2_x = (a2_x + a2_xadd) & a2_mask_x, a2_y = (a2_y + a2_yadd) & a2_mask_y;
 882                         }
 883                         else
 884                         {
 885                                 a1_y += a1_yadd, a2_y = (a2_y + a2_yadd) & a2_mask_y;
 886                                 if (!DSTA2)
 887                                 {
 888                                         a1_x += a1_xadd;
 889                                         if (bitPos % bppSrc == 0)
 890                                                 a2_x = (a2_x + a2_xadd) & a2_mask_x;
 891                                 }
 892                                 else
 893                                 {
 894                                         a2_x = (a2_x + a2_xadd) & a2_mask_x;
 895                                         if (bitPos % bppSrc == 0)
 896                                                 a1_x += a1_xadd;
 897                                 }
 898                         }//*/
 899
 900                         if (GOURZ)
 901                                 z_i[colour_index] += zadd;
 902
 903                         if (GOURD || SRCSHADE)
 904                         {
 905                                 gd_i[colour_index] += gd_ia;
 906 //Hmm, this doesn't seem to do anything...
 907 //But it is correct according to the JTRM...!
 908 if ((int32)gd_i[colour_index] < 0)
 909         gd_i[colour_index] = 0;
 910 if (gd_i[colour_index] > 0x00FFFFFF)
 911         gd_i[colour_index] = 0x00FFFFFF;//*/
 912
 913                                 gd_c[colour_index] += gd_ca;
 914 if ((int32)gd_c[colour_index] < 0)
 915         gd_c[colour_index] = 0;
 916 if (gd_c[colour_index] > 0x000000FF)
 917         gd_c[colour_index] = 0x000000FF;//*/
 918                         }
 919
 920                         if (GOURD || SRCSHADE || GOURZ)
 921                         {
 922                                 if (a1_phrase_mode)
 923 //This screws things up WORSE (for the BIOS opening screen)
 924 //                              if (a1_phrase_mode || a2_phrase_mode)
 925                                         colour_index = (colour_index + 1) & 0x03;
 926                         }
 927                 }
 928
 929 /*
 930 Here's the problem... The phrase mode code!
 931 Blit! (00100000 -> 00148000) count: 327 x 267, A1/2_FLAGS: 00004420/00004420 [cmd: 41802E01]
 932  CMD -> src: SRCEN  dst:  misc:  a1ctl: UPDA1 UPDA2 mode: DSTA2 GOURZ ity:  z-op:  op: LFU_REPLACE ctrl: SRCSHADE
 933   A1 step values: -327 (X), 1 (Y)
 934   A2 step values: -327 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 935   A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 936   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 937         A1 x/y: 28/58, A2 x/y: 28/58 Pattern: 00EA7BEA77EA77EA SRCDATA: 7BFF7BFF7BFF7BFF
 938
 939 Below fixes it, but then borks:
 940 ; O
 941
 942 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
 943  CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
 944   A1 step values: -15 (X), 1 (Y)
 945   A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 946   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 947   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 948         A1 x/y: 173/144, A2 x/y: 4052/0
 949
 950 Lesse, with pre-add we'd have:
 951
 952      oooooooooooo
 953 00001111222233334444555566667777
 954   ^  ^starts here...
 955   |             ^ends here.
 956   |rolls back to here. Hmm.
 957
 958 */
 959 //NOTE: The way to fix the CD BIOS is to uncomment below and comment the stuff after
 960 //      the phrase mode mucking around. But it fucks up everything else...
 961 //#define SCREWY_CD_DEPENDENT
 962 #ifdef SCREWY_CD_DEPENDENT
 963                 a1_x += a1_step_x;
 964                 a1_y += a1_step_y;
 965                 a2_x += a2_step_x;
 966                 a2_y += a2_step_y;//*/
 967 #endif
 968
 969                 //New: Phrase mode taken into account! :-p
 970 /*              if (a1_phrase_mode)                     // v1
 971                 {
 972                         // Bump the pointer to the next phrase boundary
 973                         // Even though it works, this is crappy... Clean it up!
 974                         uint32 size = 64 / a1_psize;
 975
 976                         // Crappy kludge... ('aligning' source to destination)
 977                         if (a2_phrase_mode && DSTA2)
 978                         {
 979                                 uint32 extra = (a2_start >> 16) % size;
 980                                 a1_x += extra << 16;
 981                         }
 982
 983                         uint32 newx = (a1_x >> 16) / size;
 984                         uint32 newxrem = (a1_x >> 16) % size;
 985                         a1_x &= 0x0000FFFF;
 986                         a1_x |= (((newx + (newxrem == 0 ? 0 : 1)) * size) & 0xFFFF) << 16;
 987                 }//*/
 988                 if (a1_phrase_mode)                     // v2
 989                 {
 990                         // Bump the pointer to the next phrase boundary
 991                         // Even though it works, this is crappy... Clean it up!
 992                         uint32 size = 64 / a1_psize;
 993
 994                         // Crappy kludge... ('aligning' source to destination)
 995                         if (a2_phrase_mode && DSTA2)
 996                         {
 997                                 uint32 extra = (a2_start >> 16) % size;
 998                                 a1_x += extra << 16;
 999                         }
1000
1001                         uint32 pixelSize = (size - 1) << 16;
1002                         a1_x = (a1_x + pixelSize) & ~pixelSize;
1003                 }
1004
1005 /*              if (a2_phrase_mode)                     // v1
1006                 {
1007                         // Bump the pointer to the next phrase boundary
1008                         // Even though it works, this is crappy... Clean it up!
1009                         uint32 size = 64 / a2_psize;
1010
1011                         // Crappy kludge... ('aligning' source to destination)
1012                         // Prolly should do this for A1 channel as well... [DONE]
1013                         if (a1_phrase_mode && !DSTA2)
1014                         {
1015                                 uint32 extra = (a1_start >> 16) % size;
1016                                 a2_x += extra << 16;
1017                         }
1018
1019                         uint32 newx = (a2_x >> 16) / size;
1020                         uint32 newxrem = (a2_x >> 16) % size;
1021                         a2_x &= 0x0000FFFF;
1022                         a2_x |= (((newx + (newxrem == 0 ? 0 : 1)) * size) & 0xFFFF) << 16;
1023                 }//*/
1024                 if (a2_phrase_mode)                     // v1
1025                 {
1026                         // Bump the pointer to the next phrase boundary
1027                         // Even though it works, this is crappy... Clean it up!
1028                         uint32 size = 64 / a2_psize;
1029
1030                         // Crappy kludge... ('aligning' source to destination)
1031                         // Prolly should do this for A1 channel as well... [DONE]
1032                         if (a1_phrase_mode && !DSTA2)
1033                         {
1034                                 uint32 extra = (a1_start >> 16) % size;
1035                                 a2_x += extra << 16;
1036                         }
1037
1038                         uint32 pixelSize = (size - 1) << 16;
1039                         a2_x = (a2_x + pixelSize) & ~pixelSize;
1040                 }
1041
1042                 //Not entirely: This still mucks things up... !!! FIX !!!
1043                 //Should this go before or after the phrase mode mucking around?
1044 #ifndef SCREWY_CD_DEPENDENT
1045                 a1_x += a1_step_x;
1046                 a1_y += a1_step_y;
1047                 a2_x += a2_step_x;
1048                 a2_y += a2_step_y;//*/
1049 #endif
1050         }
1051
1052         // write values back to registers
1053         WREG(A1_PIXEL,  (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
1054         WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
1055         WREG(A2_PIXEL,  (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
1056 specialLog = false;
1057 }
1058
1059 void blitter_blit(uint32 cmd)
1060 {
1061 //Apparently this is doing *something*, just not sure exactly what...
1062 /*if (cmd == 0x41802E01)
1063 {
1064         WriteLog("BLIT: Found our blit. Was: %08X ", cmd);
1065         cmd = 0x01800E01;
1066         WriteLog("Is: %08X\n", cmd);
1067 }//*/
1068
1069         uint32 pitchValue[4] = { 0, 1, 3, 2 };
1070         colour_index = 0;
1071         src = cmd & 0x07;
1072         dst = (cmd >> 3) & 0x07;
1073         misc = (cmd >> 6) & 0x03;
1074         a1ctl = (cmd >> 8) & 0x7;
1075         mode = (cmd >> 11) & 0x07;
1076         ity = (cmd >> 14) & 0x0F;
1077         zop = (cmd >> 18) & 0x07;
1078         op = (cmd >> 21) & 0x0F;
1079         ctrl = (cmd >> 25) & 0x3F;
1080
1081         // Addresses in A1/2_BASE are *phrase* aligned, i.e., bottom three bits are ignored!
1082         // NOTE: This fixes Rayman's bad collision detection AND keeps T2K working!
1083         a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
1084         a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
1085
1086         a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
1087         a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
1088
1089         xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
1090         xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
1091
1092         a1_pitch = pitchValue[(REG(A1_FLAGS) & 0x03)];
1093         a2_pitch = pitchValue[(REG(A2_FLAGS) & 0x03)];
1094
1095         n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
1096         n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
1097
1098         a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
1099         a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
1100 //According to the JTRM, X is restricted to 15 bits and Y is restricted to 12.
1101 //But it seems to fuck up T2K! !!! FIX !!!
1102 //Could it be sign extended??? Doesn't seem to be so according to JTRM
1103 //      a1_x &= 0x7FFFFFFF, a1_y &= 0x0FFFFFFF;
1104 //Actually, it says that the X is 16 bits. But it still seems to mess with the Y when restricted to 12...
1105 //      a1_y &= 0x0FFFFFFF;
1106
1107 //      a1_width = blitter_scanline_width[((REG(A1_FLAGS) & 0x00007E00) >> 9)];
1108 // According to JTRM, this must give a *whole number* of phrases in the current
1109 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
1110         UINT32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1111         a1_width = ((0x04 | m) << e) >> 2;//*/
1112
1113         a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
1114         a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
1115 //According to the JTRM, X is restricted to 15 bits and Y is restricted to 12.
1116 //But it seems to fuck up T2K! !!! FIX !!!
1117 //      a2_x &= 0x7FFFFFFF, a2_y &= 0x0FFFFFFF;
1118 //Actually, it says that the X is 16 bits. But it still seems to mess with the Y when restricted to 12...
1119 //      a2_y &= 0x0FFFFFFF;
1120
1121 //      a2_width = blitter_scanline_width[((REG(A2_FLAGS) & 0x00007E00) >> 9)];
1122 // According to JTRM, this must give a *whole number* of phrases in the current
1123 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
1124         m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1125         a2_width = ((0x04 | m) << e) >> 2;//*/
1126         a2_mask_x = ((REG(A2_MASK) & 0x0000FFFF) << 16) | 0xFFFF;
1127         a2_mask_y = (REG(A2_MASK) & 0xFFFF0000) | 0xFFFF;
1128
1129         // Check for "use mask" flag
1130         if (!(REG(A2_FLAGS) & 0x8000))
1131         {
1132                 a2_mask_x = 0xFFFFFFFF; // must be 16.16
1133                 a2_mask_y = 0xFFFFFFFF; // must be 16.16
1134         }
1135
1136         a1_phrase_mode = 0;
1137
1138         // According to the official documentation, a hardware bug ties A2's yadd bit to A1's...
1139         a2_yadd = a1_yadd = (YADD1_A1 ? 1 << 16 : 0);
1140
1141         if (YSIGNSUB_A1)
1142                 a1_yadd = -a1_yadd;
1143
1144         // determine a1_xadd
1145         switch (xadd_a1_control)
1146         {
1147         case XADDPHR:
1148 // This is a documented Jaguar bug relating to phrase mode and truncation... Look into it!
1149                 // add phrase offset to X and truncate
1150                 a1_xadd = 1 << 16;
1151                 a1_phrase_mode = 1;
1152                 break;
1153         case XADDPIX:
1154                 // add pixelsize (1) to X
1155                 a1_xadd = 1 << 16;
1156                 break;
1157         case XADD0:
1158                 // add zero (for those nice vertical lines)
1159                 a1_xadd = 0;
1160                 break;
1161         case XADDINC:
1162                 // add the contents of the increment register
1163                 a1_xadd = (REG(A1_INC) << 16)            | (REG(A1_FINC) & 0x0000FFFF);
1164                 a1_yadd = (REG(A1_INC) & 0xFFFF0000) | (REG(A1_FINC) >> 16);
1165                 break;
1166         }
1167
1168
1169 //Blit! (0011D000 -> 000B9600) count: 228 x 1, A1/2_FLAGS: 00073820/00064220 [cmd: 41802801]
1170 //  A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 128 (1C), addctl: XADDINC YADD1 XSIGNADD YSIGNADD
1171 //  A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADD0 YADD1 XSIGNADD YSIGNADD
1172 //if (YADD1_A1 && YADD1_A2 && xadd_a2_control == XADD0 && xadd_a1_control == XADDINC)// &&
1173 //      UINT32 a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
1174 //Ok, so this ISN'T it... Prolly the XADDPHR code above that's doing it...
1175 //if (REG(A1_FLAGS) == 0x00073820 && REG(A2_FLAGS) == 0x00064220 && cmd == 0x41802801)
1176 //        A1 x/y: 14368/7, A2 x/y: 150/36
1177 //This is it... The problem...
1178 //if ((a1_x >> 16) == 14368) // 14368 = $3820
1179 //      return; //Lesse what we got...
1180
1181         if (XSIGNSUB_A1)
1182                 a1_xadd = -a1_xadd;
1183
1184         if (YSIGNSUB_A2)
1185                 a2_yadd = -a2_yadd;
1186
1187         a2_phrase_mode = 0;
1188
1189         // determine a2_xadd
1190         switch (xadd_a2_control)
1191         {
1192         case XADDPHR:
1193                 // add phrase offset to X and truncate
1194                 a2_xadd = 1 << 16;
1195                 a2_phrase_mode = 1;
1196                 break;
1197         case XADDPIX:
1198                 // add pixelsize (1) to X
1199                 a2_xadd = 1 << 16;
1200                 break;
1201         case XADD0:
1202                 // add zero (for those nice vertical lines)
1203                 a2_xadd = 0;
1204                 break;
1205 //This really isn't a valid bit combo for A2... Shouldn't this cause the blitter to just say no?
1206         case XADDINC:
1207 WriteLog("BLIT: Asked to use invalid bit combo (XADDINC) for A2...\n");
1208                 // add the contents of the increment register
1209                 // since there is no register for a2 we just add 1
1210 //Let's do nothing, since it's not listed as a valid bit combo...
1211 //              a2_xadd = 1 << 16;
1212                 break;
1213         }
1214
1215         if (XSIGNSUB_A2)
1216                 a2_xadd = -a2_xadd;
1217
1218         // Modify outer loop steps based on blitter command
1219
1220         a1_step_x = 0;
1221         a1_step_y = 0;
1222         a2_step_x = 0;
1223         a2_step_y = 0;
1224
1225         if (UPDA1F)
1226                 a1_step_x = (REG(A1_FSTEP) & 0xFFFF),
1227                 a1_step_y = (REG(A1_FSTEP) >> 16);
1228
1229         if (UPDA1)
1230                 a1_step_x |= ((REG(A1_STEP) & 0x0000FFFF) << 16),
1231                 a1_step_y |= ((REG(A1_STEP) & 0xFFFF0000));
1232
1233         if (UPDA2)
1234                 a2_step_x = (REG(A2_STEP) & 0x0000FFFF) << 16,
1235                 a2_step_y = (REG(A2_STEP) & 0xFFFF0000);
1236
1237         outer_loop = n_lines;
1238
1239         // Clipping...
1240
1241         if (CLIPA1)
1242                 a1_clip_x = REG(A1_CLIP) & 0x7FFF,
1243                 a1_clip_y = (REG(A1_CLIP) >> 16) & 0x7FFF;
1244
1245 // This phrase sizing is incorrect as well... !!! FIX !!! [NOTHING TO FIX]
1246 // Err, this is pixel size... (and it's OK)
1247         a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
1248         a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
1249
1250         // Z-buffering
1251         if (GOURZ)
1252         {
1253                 zadd = REG(ZINC);
1254
1255                 for(int v=0; v<4; v++)
1256                         z_i[v] = REG(PHRASEZ0 + v*4);
1257         }
1258
1259         // Gouraud shading
1260         if (GOURD || GOURZ || SRCSHADE)
1261         {
1262                 gd_c[0] = blitter_ram[PATTERNDATA + 6];
1263                 gd_i[0] = ((uint32)blitter_ram[PATTERNDATA + 7] << 16)
1264                         | ((uint32)blitter_ram[SRCDATA + 6] << 8) | blitter_ram[SRCDATA + 7];
1265
1266                 gd_c[1] = blitter_ram[PATTERNDATA + 4];
1267                 gd_i[1] = ((uint32)blitter_ram[PATTERNDATA + 5] << 16)
1268                         | ((uint32)blitter_ram[SRCDATA + 4] << 8) | blitter_ram[SRCDATA + 5];
1269
1270                 gd_c[2] = blitter_ram[PATTERNDATA + 2];
1271                 gd_i[2] = ((uint32)blitter_ram[PATTERNDATA + 3] << 16)
1272                         | ((uint32)blitter_ram[SRCDATA + 2] << 8) | blitter_ram[SRCDATA + 3];
1273
1274                 gd_c[3] = blitter_ram[PATTERNDATA + 0];
1275                 gd_i[3] = ((uint32)blitter_ram[PATTERNDATA + 1] << 16)
1276                         | ((uint32)blitter_ram[SRCDATA + 0] << 8) | blitter_ram[SRCDATA + 1];
1277
1278                 gouraud_add = REG(INTENSITYINC);
1279
1280                 gd_ia = gouraud_add & 0x00FFFFFF;
1281                 if (gd_ia & 0x00800000)
1282                         gd_ia = 0xFF000000 | gd_ia;
1283
1284                 gd_ca = (gouraud_add >> 24) & 0xFF;
1285                 if (gd_ca & 0x00000080)
1286                         gd_ca = 0xFFFFFF00 | gd_ca;
1287         }
1288
1289         // Bit comparitor fixing...
1290 /*      if (BCOMPEN)
1291         {
1292                 // Determine the data flow direction...
1293                 if (!DSTA2)
1294                         a2_step_x /= (1 << ((REG(A2_FLAGS) >> 3) & 0x07));
1295                 else
1296                         ;//add this later
1297         }//*/
1298 /*      if (BCOMPEN)//Kludge for Hover Strike... !!! FIX !!!
1299         {
1300                 // Determine the data flow direction...
1301                 if (!DSTA2)
1302                         a2_x <<= 3;
1303         }//*/
1304
1305 #ifdef LOG_BLITS
1306         if (start_logging)
1307         {
1308                 WriteLog("Blit!\n");
1309                 WriteLog("  cmd      = 0x%.8x\n",cmd);
1310                 WriteLog("  a1_base  = %08X\n", a1_addr);
1311                 WriteLog("  a1_pitch = %d\n", a1_pitch);
1312                 WriteLog("  a1_psize = %d\n", a1_psize);
1313                 WriteLog("  a1_width = %d\n", a1_width);
1314                 WriteLog("  a1_xadd  = %f (phrase=%d)\n", (float)a1_xadd / 65536.0, a1_phrase_mode);
1315                 WriteLog("  a1_yadd  = %f\n", (float)a1_yadd / 65536.0);
1316                 WriteLog("  a1_xstep = %f\n", (float)a1_step_x / 65536.0);
1317                 WriteLog("  a1_ystep = %f\n", (float)a1_step_y / 65536.0);
1318                 WriteLog("  a1_x     = %f\n", (float)a1_x / 65536.0);
1319                 WriteLog("  a1_y     = %f\n", (float)a1_y / 65536.0);
1320                 WriteLog("  a1_zoffs = %i\n",a1_zoffs);
1321
1322                 WriteLog("  a2_base  = %08X\n", a2_addr);
1323                 WriteLog("  a2_pitch = %d\n", a2_pitch);
1324                 WriteLog("  a2_psize = %d\n", a2_psize);
1325                 WriteLog("  a2_width = %d\n", a2_width);
1326                 WriteLog("  a2_xadd  = %f (phrase=%d)\n", (float)a2_xadd / 65536.0, a2_phrase_mode);
1327                 WriteLog("  a2_yadd  = %f\n", (float)a2_yadd / 65536.0);
1328                 WriteLog("  a2_xstep = %f\n", (float)a2_step_x / 65536.0);
1329                 WriteLog("  a2_ystep = %f\n", (float)a2_step_y / 65536.0);
1330                 WriteLog("  a2_x     = %f\n", (float)a2_x / 65536.0);
1331                 WriteLog("  a2_y     = %f\n", (float)a2_y / 65536.0);
1332                 WriteLog("  a2_mask_x= 0x%.4x\n",a2_mask_x);
1333                 WriteLog("  a2_mask_y= 0x%.4x\n",a2_mask_y);
1334                 WriteLog("  a2_zoffs = %i\n",a2_zoffs);
1335
1336                 WriteLog("  count    = %d x %d\n", n_pixels, n_lines);
1337
1338                 WriteLog("  command  = %08X\n", cmd);
1339                 WriteLog("  dsten    = %i\n",DSTEN);
1340                 WriteLog("  srcen    = %i\n",SRCEN);
1341                 WriteLog("  patdsel  = %i\n",PATDSEL);
1342                 WriteLog("  color    = 0x%.8x\n",REG(PATTERNDATA));
1343                 WriteLog("  dcompen  = %i\n",DCOMPEN);
1344                 WriteLog("  bcompen  = %i\n",BCOMPEN);
1345                 WriteLog("  cmpdst   = %i\n",CMPDST);
1346                 WriteLog("  GOURZ   = %i\n",GOURZ);
1347                 WriteLog("  GOURD   = %i\n",GOURD);
1348                 WriteLog("  SRCSHADE= %i\n",SRCSHADE);
1349         }
1350 #endif
1351
1352 //NOTE: Pitch is ignored!
1353
1354 //This *might* be the altimeter blits (they are)...
1355 //On captured screen, x-pos for black (inner) is 259, for pink is 257
1356 //Black is short by 3, pink is short by 1...
1357 /*
1358 Blit! (00110000 <- 000BF010) count: 9 x 31, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1359  CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
1360   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1361   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1362         A1 x/y: 262/124, A2 x/y: 128/0
1363 Blit! (00110000 <- 000BF010) count: 5 x 38, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1364  CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
1365   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1366   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1367         A1 x/y: 264/117, A2 x/y: 407/0
1368
1369 Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1370  CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
1371   A1 step values: -10 (X), 1 (Y)
1372   A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1373   A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1374         A1 x/y: 262/132, A2 x/y: 129/0
1375 Blit! (00110000 <- 000BF010) count: 5 x 27, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1376  CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
1377   A1 step values: -8 (X), 1 (Y)
1378   A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1379   A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1380         A1 x/y: 264/128, A2 x/y: 336/0
1381
1382   264v       vCursor ends up here...
1383      xxxxx...`
1384      111122223333
1385
1386 262v         vCursor ends up here...
1387    xxxxxxxxx.'
1388  1111222233334444
1389
1390 Fixed! Now for more:
1391
1392 ; This looks like the ship icon in the upper left corner...
1393
1394 Blit! (00110000 <- 0010B2A8) count: 11 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1395  CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
1396   A1 step values: -12 (X), 1 (Y)
1397   A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1398   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1399   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1400         A1 x/y: 20/24, A2 x/y: 5780/0
1401
1402 Also fixed!
1403
1404 More (not sure this is a blitter problem as much as it's a GPU problem):
1405 All but the "M" are trashed...
1406 This does *NOT* look like a blitter problem, as it's rendering properly...
1407 Actually, if you look at the A1 step values, there IS a discrepancy!
1408
1409 ; D
1410
1411 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1412  CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
1413   A1 step values: -14 (X), 1 (Y)
1414   A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1415   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1416   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1417         A1 x/y: 134/144, A2 x/y: 2516/0
1418 ;129,146: +5,-2
1419
1420 ; E
1421
1422 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1423  CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
1424   A1 step values: -13 (X), 1 (Y)
1425   A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1426   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1427   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1428         A1 x/y: 147/144, A2 x/y: 2660/0
1429
1430 ; M
1431
1432 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1433  CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
1434   A1 step values: -12 (X), 1 (Y)
1435   A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1436   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1437   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1438         A1 x/y: 160/144, A2 x/y: 3764/0
1439
1440 ; O
1441
1442 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1443  CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
1444   A1 step values: -15 (X), 1 (Y)
1445   A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1446   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1447   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1448         A1 x/y: 173/144, A2 x/y: 4052/0
1449
1450 */
1451 //extern int op_start_log;
1452 if (blit_start_log)
1453 {
1454         char * ctrlStr[4] = { "XADDPHR\0", "XADDPIX\0", "XADD0\0", "XADDINC\0" };
1455         char * bppStr[8] = { "1bpp\0", "2bpp\0", "4bpp\0", "8bpp\0", "16bpp\0", "32bpp\0", "???\0", "!!!\0" };
1456         char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
1457                 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
1458         uint32 /*src = cmd & 0x07, dst = (cmd >> 3) & 0x07, misc = (cmd >> 6) & 0x03,
1459                 a1ctl = (cmd >> 8) & 0x07,*/ mode = (cmd >> 11) & 0x07/*, ity = (cmd >> 14) & 0x0F,
1460                 zop = (cmd >> 18) & 0x07, op = (cmd >> 21) & 0x0F, ctrl = (cmd >> 25) & 0x3F*/;
1461         UINT32 a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
1462         uint32 p1 = a1f & 0x07, p2 = a2f & 0x07,
1463                 d1 = (a1f >> 3) & 0x07, d2 = (a2f >> 3) & 0x07,
1464                 zo1 = (a1f >> 6) & 0x07, zo2 = (a2f >> 6) & 0x07,
1465                 w1 = (a1f >> 9) & 0x3F, w2 = (a2f >> 9) & 0x3F,
1466                 ac1 = (a1f >> 16) & 0x1F, ac2 = (a2f >> 16) & 0x1F;
1467         UINT32 iw1 = ((0x04 | (w1 & 0x03)) << ((w1 & 0x3C) >> 2)) >> 2;
1468         UINT32 iw2 = ((0x04 | (w2 & 0x03)) << ((w2 & 0x3C) >> 2)) >> 2;
1469         WriteLog("Blit! (%08X %s %08X) count: %d x %d, A1/2_FLAGS: %08X/%08X [cmd: %08X]\n", a1_addr, (mode&0x01 ? "->" : "<-"), a2_addr, n_pixels, n_lines, a1f, a2f, cmd);
1470 //      WriteLog(" CMD -> src: %d, dst: %d, misc: %d, a1ctl: %d, mode: %d, ity: %1X, z-op: %d, op: %1X, ctrl: %02X\n", src, dst, misc, a1ctl, mode, ity, zop, op, ctrl);
1471
1472         WriteLog(" CMD -> src: %s%s%s ", (cmd & 0x0001 ? "SRCEN " : ""), (cmd & 0x0002 ? "SRCENZ " : ""), (cmd & 0x0004 ? "SRCENX" : ""));
1473         WriteLog("dst: %s%s%s ", (cmd & 0x0008 ? "DSTEN " : ""), (cmd & 0x0010 ? "DSTENZ " : ""), (cmd & 0x0020 ? "DSTWRZ" : ""));
1474         WriteLog("misc: %s%s ", (cmd & 0x0040 ? "CLIP_A1 " : ""), (cmd & 0x0080 ? "???" : ""));
1475         WriteLog("a1ctl: %s%s%s ", (cmd & 0x0100 ? "UPDA1F " : ""), (cmd & 0x0200 ? "UPDA1 " : ""), (cmd & 0x0400 ? "UPDA2" : ""));
1476         WriteLog("mode: %s%s%s ", (cmd & 0x0800 ? "DSTA2 " : ""), (cmd & 0x1000 ? "GOURD " : ""), (cmd & 0x2000 ? "GOURZ" : ""));
1477         WriteLog("ity: %s%s%s%s ", (cmd & 0x4000 ? "TOPBEN " : ""), (cmd & 0x8000 ? "TOPNEN " : ""), (cmd & 0x00010000 ? "PATDSEL" : ""), (cmd & 0x00020000 ? "ADDDSEL" : ""));
1478         WriteLog("z-op: %s%s%s ", (cmd & 0x00040000 ? "ZMODELT " : ""), (cmd & 0x00080000 ? "ZMODEEQ " : ""), (cmd & 0x00100000 ? "ZMODEGT" : ""));
1479         WriteLog("op: %s ", opStr[(cmd >> 21) & 0x0F]);
1480         WriteLog("ctrl: %s%s%s%s%s%s\n", (cmd & 0x02000000 ? "CMPDST " : ""), (cmd & 0x04000000 ? "BCOMPEN " : ""), (cmd & 0x08000000 ? "DCOMPEN " : ""), (cmd & 0x10000000 ? "BKGWREN " : ""), (cmd & 0x20000000 ? "BUSHI " : ""), (cmd & 0x40000000 ? "SRCSHADE" : ""));
1481
1482         if (UPDA1)
1483                 WriteLog("  A1 step values: %d (X), %d (Y)\n", a1_step_x >> 16, a1_step_y >> 16);
1484
1485         if (UPDA2)
1486                 WriteLog("  A2 step values: %d (X), %d (Y) [mask (%sused): %08X - %08X/%08X]\n", a2_step_x >> 16, a2_step_y >> 16, (a2f & 0x8000 ? "" : "un"), REG(A2_MASK), a2_mask_x, a2_mask_y);
1487
1488         WriteLog("  A1 -> pitch: %d phrases, depth: %s, z-off: %d, width: %d (%02X), addctl: %s %s %s %s\n", 1 << p1, bppStr[d1], zo1, iw1, w1, ctrlStr[ac1&0x03], (ac1&0x04 ? "YADD1" : "YADD0"), (ac1&0x08 ? "XSIGNSUB" : "XSIGNADD"), (ac1&0x10 ? "YSIGNSUB" : "YSIGNADD"));
1489         WriteLog("  A2 -> pitch: %d phrases, depth: %s, z-off: %d, width: %d (%02X), addctl: %s %s %s %s\n", 1 << p2, bppStr[d2], zo2, iw2, w2, ctrlStr[ac2&0x03], (ac2&0x04 ? "YADD1" : "YADD0"), (ac2&0x08 ? "XSIGNSUB" : "XSIGNADD"), (ac2&0x10 ? "YSIGNSUB" : "YSIGNADD"));
1490         WriteLog("        A1 x/y: %d/%d, A2 x/y: %d/%d Pattern: %08X%08X SRCDATA: %08X%08X\n", a1_x >> 16, a1_y >> 16, a2_x >> 16, a2_y >> 16, REG(PATTERNDATA), REG(PATTERNDATA + 4), REG(SRCDATA), REG(SRCDATA + 4));
1491 //      blit_start_log = 0;
1492 //      op_start_log = 1;
1493 }
1494
1495         blitter_working = 1;
1496 //#ifndef USE_GENERIC_BLITTER
1497 //      if (!blitter_execute_cached_code(blitter_in_cache(cmd)))
1498 //#endif
1499         blitter_generic(cmd);
1500
1501 /*if (blit_start_log)
1502 {
1503         if (a1_addr == 0xF03000 && a2_addr == 0x004D58)
1504         {
1505                 WriteLog("\nBytes at 004D58:\n");
1506                 for(int i=0x004D58; i<0x004D58+(10*127*4); i++)
1507                         WriteLog("%02X ", JaguarReadByte(i));
1508                 WriteLog("\nBytes at F03000:\n");
1509                 for(int i=0xF03000; i<0xF03000+(6*127*4); i++)
1510                         WriteLog("%02X ", JaguarReadByte(i));
1511                 WriteLog("\n\n");
1512         }
1513 }//*/
1514
1515         blitter_working = 0;
1516 }
1517 #endif                                                                                  // of the #if 0 near the top...
1518 /*******************************************************************************
1519 ********************** STUFF CUT ABOVE THIS LINE! ******************************
1520 *******************************************************************************/
1521
1522 void blitter_init(void)
1523 {
1524         blitter_reset();
1525 }
1526
1527 void blitter_reset(void)
1528 {
1529         memset(blitter_ram, 0x00, 0xA0);
1530 }
1531
1532 void blitter_done(void)
1533 {
1534         WriteLog("BLIT: Done.\n");
1535 }
1536
1537 uint8 BlitterReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
1538 {
1539         offset &= 0xFF;
1540
1541         // status register
1542 //This isn't cycle accurate--how to fix? !!! FIX !!!
1543 //Probably have to do some multi-threaded implementation or at least a reentrant safe implementation...
1544         if (offset == (0x38 + 3))
1545                 return 0x01;    // always idle
1546
1547 // CHECK HERE ONCE THIS FIX HAS BEEN TESTED: [ ]
1548 //Fix for AvP:
1549         if (offset >= 0x04 && offset <= 0x07)
1550 //This is it. I wonder if it just ignores the lower three bits?
1551 //No, this is a documented Jaguar I bug. It also bites the read at $F02230 as well...
1552                 return blitter_ram[offset + 0x08];              // A1_PIXEL ($F0220C) read at $F02204
1553
1554         if (offset >= 0x2C && offset <= 0x2F)
1555                 return blitter_ram[offset + 0x04];              // A2_PIXEL ($F02230) read at $F0222C
1556
1557         return blitter_ram[offset];
1558 }
1559
1560 //Crappy!
1561 uint16 BlitterReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
1562 {
1563         return ((uint16)BlitterReadByte(offset, who) << 8) | (uint16)BlitterReadByte(offset+1, who);
1564 }
1565
1566 //Crappy!
1567 uint32 BlitterReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
1568 {
1569         return (BlitterReadWord(offset, who) << 16) | BlitterReadWord(offset+2, who);
1570 }
1571
1572 void BlitterWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
1573 {
1574 /*if (offset & 0xFF == 0x7B)
1575         WriteLog("--> Wrote to B_STOP: value -> %02X\n", data);*/
1576         offset &= 0xFF;
1577 /*if ((offset >= PATTERNDATA) && (offset < PATTERNDATA + 8))
1578 {
1579         printf("--> %s wrote %02X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - PATTERNDATA);
1580         fflush(stdout);
1581 }//*/
1582
1583         // This handles writes to INTENSITY0-3 by also writing them to their proper places in
1584         // PATTERNDATA & SOURCEDATA (should do the same for the Z registers! !!! FIX !!! [DONE])
1585         if ((offset >= 0x7C) && (offset <= 0x9B))
1586         {
1587                 switch (offset)
1588                 {
1589                 // INTENSITY registers 0-3
1590                 case 0x7C: break;
1591                 case 0x7D: blitter_ram[PATTERNDATA + 7] = data; break;
1592                 case 0x7E: blitter_ram[SRCDATA + 6] = data; break;
1593                 case 0x7F: blitter_ram[SRCDATA + 7] = data; break;
1594
1595                 case 0x80: break;
1596                 case 0x81: blitter_ram[PATTERNDATA + 5] = data; break;
1597                 case 0x82: blitter_ram[SRCDATA + 4] = data; break;
1598                 case 0x83: blitter_ram[SRCDATA + 5] = data; break;
1599
1600                 case 0x84: break;
1601                 case 0x85: blitter_ram[PATTERNDATA + 3] = data; break;
1602                 case 0x86: blitter_ram[SRCDATA + 2] = data; break;
1603                 case 0x87: blitter_ram[SRCDATA + 3] = data; break;
1604
1605                 case 0x88: break;
1606                 case 0x89: blitter_ram[PATTERNDATA + 1] = data; break;
1607                 case 0x8A: blitter_ram[SRCDATA + 0] = data; break;
1608                 case 0x8B: blitter_ram[SRCDATA + 1] = data; break;
1609
1610
1611                 // Z registers 0-3
1612                 case 0x8C: blitter_ram[SRCZINT + 6] = data; break;
1613                 case 0x8D: blitter_ram[SRCZINT + 7] = data; break;
1614                 case 0x8E: blitter_ram[SRCZFRAC + 6] = data; break;
1615                 case 0x8F: blitter_ram[SRCZFRAC + 7] = data; break;
1616
1617                 case 0x90: blitter_ram[SRCZINT + 4] = data; break;
1618                 case 0x91: blitter_ram[SRCZINT + 5] = data; break;
1619                 case 0x92: blitter_ram[SRCZFRAC + 4] = data; break;
1620                 case 0x93: blitter_ram[SRCZFRAC + 5] = data; break;
1621
1622                 case 0x94: blitter_ram[SRCZINT + 2] = data; break;
1623                 case 0x95: blitter_ram[SRCZINT + 3] = data; break;
1624                 case 0x96: blitter_ram[SRCZFRAC + 2] = data; break;
1625                 case 0x97: blitter_ram[SRCZFRAC + 3] = data; break;
1626
1627                 case 0x98: blitter_ram[SRCZINT + 0] = data; break;
1628                 case 0x99: blitter_ram[SRCZINT + 1] = data; break;
1629                 case 0x9A: blitter_ram[SRCZFRAC + 0] = data; break;
1630                 case 0x9B: blitter_ram[SRCZFRAC + 1] = data; break;
1631                 }
1632         }
1633
1634         // It looks weird, but this is how the 64 bit registers are actually handled...!
1635
1636         else if ((offset >= SRCDATA + 0) && (offset <= SRCDATA + 3)
1637                 || (offset >= DSTDATA + 0) && (offset <= DSTDATA + 3)
1638                 || (offset >= DSTZ + 0) && (offset <= DSTZ + 3)
1639                 || (offset >= SRCZINT + 0) && (offset <= SRCZINT + 3)
1640                 || (offset >= SRCZFRAC + 0) && (offset <= SRCZFRAC + 3)
1641                 || (offset >= PATTERNDATA + 0) && (offset <= PATTERNDATA + 3))
1642         {
1643                 blitter_ram[offset + 4] = data;
1644         }
1645         else if ((offset >= SRCDATA + 4) && (offset <= SRCDATA + 7)
1646                 || (offset >= DSTDATA + 4) && (offset <= DSTDATA + 7)
1647                 || (offset >= DSTZ + 4) && (offset <= DSTZ + 7)
1648                 || (offset >= SRCZINT + 4) && (offset <= SRCZINT + 7)
1649                 || (offset >= SRCZFRAC + 4) && (offset <= SRCZFRAC + 7)
1650                 || (offset >= PATTERNDATA + 4) && (offset <= PATTERNDATA + 7))
1651         {
1652                 blitter_ram[offset - 4] = data;
1653         }
1654         else
1655                 blitter_ram[offset] = data;
1656 }
1657
1658 void BlitterWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
1659 {
1660 /*if (((offset & 0xFF) >= PATTERNDATA) && ((offset & 0xFF) < PATTERNDATA + 8))
1661 {
1662         printf("----> %s wrote %04X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - (0xF02200 + PATTERNDATA));
1663         fflush(stdout);
1664 }*/
1665 //#if 1
1666 /*      if (offset & 0xFF == A1_PIXEL && data == 14368)
1667         {
1668                 WriteLog("\n1\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1669 extern bool doGPUDis;
1670 doGPUDis = true;
1671         }
1672         if ((offset & 0xFF) == (A1_PIXEL + 2) && data == 14368)
1673         {
1674                 WriteLog("\n2\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1675 extern bool doGPUDis;
1676 doGPUDis = true;
1677         }//*/
1678 //#endif
1679
1680         BlitterWriteByte(offset + 0, data >> 8, who);
1681         BlitterWriteByte(offset + 1, data & 0xFF, who);
1682
1683         if ((offset & 0xFF) == 0x3A)
1684         // I.e., the second write of 32-bit value--not convinced this is the best way to do this!
1685         // But then again, according to the Jaguar docs, this is correct...!
1686 /*extern int blit_start_log;
1687 extern bool doGPUDis;
1688 if (blit_start_log)
1689 {
1690         WriteLog("BLIT: Blitter started by %s...\n", whoName[who]);
1691         doGPUDis = true;
1692 }//*/
1693 #ifdef USE_ORIGINAL_BLITTER
1694                 blitter_blit(GET32(blitter_ram, 0x38));
1695 #endif
1696 #ifdef USE_MIDSUMMER_BLITTER
1697                 BlitterMidsummer(GET32(blitter_ram, 0x38));
1698 #endif
1699 #ifdef USE_MIDSUMMER_BLITTER_MKII
1700                 BlitterMidsummer2();
1701 #endif
1702 }
1703 //F02278,9,A,B
1704
1705 void BlitterWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
1706 {
1707 /*if (((offset & 0xFF) >= PATTERNDATA) && ((offset & 0xFF) < PATTERNDATA + 8))
1708 {
1709         printf("------> %s wrote %08X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - (0xF02200 + PATTERNDATA));
1710         fflush(stdout);
1711 }//*/
1712 //#if 1
1713 /*      if ((offset & 0xFF) == A1_PIXEL && (data & 0xFFFF) == 14368)
1714         {
1715                 WriteLog("\n3\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1716 extern bool doGPUDis;
1717 doGPUDis = true;
1718         }//*/
1719 //#endif
1720
1721         BlitterWriteWord(offset + 0, data >> 16, who);
1722         BlitterWriteWord(offset + 2, data & 0xFFFF, who);
1723 }
1724
1725 void LogBlit(void)
1726 {
1727         char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
1728                 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
1729         uint32 cmd = GET32(blitter_ram, 0x38);
1730         UINT32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1731         UINT32 a1_width = ((0x04 | m) << e) >> 2;
1732         m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1733         UINT32 a2_width = ((0x04 | m) << e) >> 2;
1734
1735         WriteLog("Blit!\n");
1736         WriteLog("  COMMAND  = %08X\n", cmd);
1737         WriteLog("  a1_base  = %08X\n", REG(A1_BASE));
1738         WriteLog("  a1_flags = %08X (%c %c %c %c%c . %c%c%c%c%c%c %c%c%c %c%c%c . %c%c)\n", REG(A1_FLAGS),
1739                 (REG(A1_FLAGS) & 0x100000 ? '1' : '0'),
1740                 (REG(A1_FLAGS) & 0x080000 ? '1' : '0'),
1741                 (REG(A1_FLAGS) & 0x040000 ? '1' : '0'),
1742                 (REG(A1_FLAGS) & 0x020000 ? '1' : '0'),
1743                 (REG(A1_FLAGS) & 0x010000 ? '1' : '0'),
1744                 (REG(A1_FLAGS) & 0x004000 ? '1' : '0'),
1745                 (REG(A1_FLAGS) & 0x002000 ? '1' : '0'),
1746                 (REG(A1_FLAGS) & 0x001000 ? '1' : '0'),
1747                 (REG(A1_FLAGS) & 0x000800 ? '1' : '0'),
1748                 (REG(A1_FLAGS) & 0x000400 ? '1' : '0'),
1749                 (REG(A1_FLAGS) & 0x000200 ? '1' : '0'),
1750                 (REG(A1_FLAGS) & 0x000100 ? '1' : '0'),
1751                 (REG(A1_FLAGS) & 0x000080 ? '1' : '0'),
1752                 (REG(A1_FLAGS) & 0x000040 ? '1' : '0'),
1753                 (REG(A1_FLAGS) & 0x000020 ? '1' : '0'),
1754                 (REG(A1_FLAGS) & 0x000010 ? '1' : '0'),
1755                 (REG(A1_FLAGS) & 0x000008 ? '1' : '0'),
1756                 (REG(A1_FLAGS) & 0x000002 ? '1' : '0'),
1757                 (REG(A1_FLAGS) & 0x000001 ? '1' : '0'));
1758         WriteLog("             pitch=%u, pixSz=%u, zOff=%u, width=%u, xCtrl=%u\n",
1759                 REG(A1_FLAGS) & 0x00003, (REG(A1_FLAGS) & 0x00038) >> 3,
1760                 (REG(A1_FLAGS) & 0x001C0) >> 6,  a1_width, (REG(A1_FLAGS) & 0x30000) >> 16);
1761         WriteLog("  a1_clip  = %u, %u (%08X)\n", GET16(blitter_ram, A1_CLIP + 2), GET16(blitter_ram, A1_CLIP + 0), GET32(blitter_ram, A1_CLIP));
1762         WriteLog("  a1_pixel = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A1_PIXEL + 2), (int16)GET16(blitter_ram, A1_PIXEL + 0), GET32(blitter_ram, A1_PIXEL));
1763         WriteLog("  a1_step  = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A1_STEP + 2), (int16)GET16(blitter_ram, A1_STEP + 0), GET32(blitter_ram, A1_STEP));
1764         WriteLog("  a1_fstep = %u, %u (%08X)\n", GET16(blitter_ram, A1_FSTEP + 2), GET16(blitter_ram, A1_FSTEP + 0), GET32(blitter_ram, A1_FSTEP));
1765         WriteLog("  a1_fpixel= %u, %u (%08X)\n", GET16(blitter_ram, A1_FPIXEL + 2), GET16(blitter_ram, A1_FPIXEL + 0), GET32(blitter_ram, A1_FPIXEL));
1766         WriteLog("  a1_inc   = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A1_INC + 2), (int16)GET16(blitter_ram, A1_INC + 0), GET32(blitter_ram, A1_INC));
1767         WriteLog("  a1_finc  = %u, %u (%08X)\n", GET16(blitter_ram, A1_FINC + 2), GET16(blitter_ram, A1_FINC + 0), GET32(blitter_ram, A1_FINC));
1768
1769         WriteLog("  a2_base  = %08X\n", REG(A2_BASE));
1770         WriteLog("  a2_flags = %08X (%c %c %c %c%c %c %c%c%c%c%c%c %c%c%c %c%c%c . %c%c)\n", REG(A2_FLAGS),
1771                 (REG(A2_FLAGS) & 0x100000 ? '1' : '0'),
1772                 (REG(A2_FLAGS) & 0x080000 ? '1' : '0'),
1773                 (REG(A2_FLAGS) & 0x040000 ? '1' : '0'),
1774                 (REG(A2_FLAGS) & 0x020000 ? '1' : '0'),
1775                 (REG(A2_FLAGS) & 0x010000 ? '1' : '0'),
1776                 (REG(A2_FLAGS) & 0x008000 ? '1' : '0'),
1777                 (REG(A2_FLAGS) & 0x004000 ? '1' : '0'),
1778                 (REG(A2_FLAGS) & 0x002000 ? '1' : '0'),
1779                 (REG(A2_FLAGS) & 0x001000 ? '1' : '0'),
1780                 (REG(A2_FLAGS) & 0x000800 ? '1' : '0'),
1781                 (REG(A2_FLAGS) & 0x000400 ? '1' : '0'),
1782                 (REG(A2_FLAGS) & 0x000200 ? '1' : '0'),
1783                 (REG(A2_FLAGS) & 0x000100 ? '1' : '0'),
1784                 (REG(A2_FLAGS) & 0x000080 ? '1' : '0'),
1785                 (REG(A2_FLAGS) & 0x000040 ? '1' : '0'),
1786                 (REG(A2_FLAGS) & 0x000020 ? '1' : '0'),
1787                 (REG(A2_FLAGS) & 0x000010 ? '1' : '0'),
1788                 (REG(A2_FLAGS) & 0x000008 ? '1' : '0'),
1789                 (REG(A2_FLAGS) & 0x000002 ? '1' : '0'),
1790                 (REG(A2_FLAGS) & 0x000001 ? '1' : '0'));
1791         WriteLog("             pitch=%u, pixSz=%u, zOff=%u, width=%u, xCtrl=%u\n",
1792                 REG(A2_FLAGS) & 0x00003, (REG(A2_FLAGS) & 0x00038) >> 3,
1793                 (REG(A2_FLAGS) & 0x001C0) >> 6,  a2_width, (REG(A2_FLAGS) & 0x30000) >> 16);
1794         WriteLog("  a2_mask  = %u, %u (%08X)\n", GET16(blitter_ram, A2_MASK + 2), GET16(blitter_ram, A2_MASK + 0), GET32(blitter_ram, A2_MASK));
1795         WriteLog("  a2_pixel = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A2_PIXEL + 2), (int16)GET16(blitter_ram, A2_PIXEL + 0), GET32(blitter_ram, A2_PIXEL));
1796         WriteLog("  a2_step  = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A2_STEP + 2), (int16)GET16(blitter_ram, A2_STEP + 0), GET32(blitter_ram, A2_STEP));
1797
1798         WriteLog("  count    = %d x %d\n", GET16(blitter_ram, PIXLINECOUNTER + 2), GET16(blitter_ram, PIXLINECOUNTER));
1799
1800         WriteLog("  SRCEN    = %s\n", (SRCEN ? "1" : "0"));
1801         WriteLog("  SRCENZ   = %s\n", (SRCENZ ? "1" : "0"));
1802         WriteLog("  SRCENX   = %s\n", (SRCENX ? "1" : "0"));
1803         WriteLog("  DSTEN    = %s\n", (DSTEN ? "1" : "0"));
1804         WriteLog("  DSTENZ   = %s\n", (DSTENZ ? "1" : "0"));
1805         WriteLog("  DSTWRZ   = %s\n", (DSTWRZ ? "1" : "0"));
1806         WriteLog("  CLIPA1   = %s\n", (CLIPA1 ? "1" : "0"));
1807         WriteLog("  UPDA1F   = %s\n", (UPDA1F ? "1" : "0"));
1808         WriteLog("  UPDA1    = %s\n", (UPDA1 ? "1" : "0"));
1809         WriteLog("  UPDA2    = %s\n", (UPDA2 ? "1" : "0"));
1810         WriteLog("  DSTA2    = %s\n", (DSTA2 ? "1" : "0"));
1811         WriteLog("  ZOP      = %s %s %s\n", (Z_OP_INF ? "<" : ""), (Z_OP_EQU ? "=" : ""), (Z_OP_SUP ? ">" : ""));
1812         WriteLog("--LFUFUNC  = %s\n", opStr[(cmd >> 21) & 0x0F]);
1813         WriteLog("| PATDSEL  = %s (PD=%08X%08X)\n", (PATDSEL ? "1" : "0"), REG(PATTERNDATA), REG(PATTERNDATA + 4));
1814         WriteLog("--ADDDSEL  = %s\n", (ADDDSEL ? "1" : "0"));
1815         WriteLog("  CMPDST   = %s\n", (CMPDST ? "1" : "0"));
1816         WriteLog("  BCOMPEN  = %s\n", (BCOMPEN ? "1" : "0"));
1817         WriteLog("  DCOMPEN  = %s\n", (DCOMPEN ? "1" : "0"));
1818         WriteLog("  TOPBEN   = %s\n", (TOPBEN ? "1" : "0"));
1819         WriteLog("  TOPNEN   = %s\n", (TOPNEN ? "1" : "0"));
1820         WriteLog("  BKGWREN  = %s\n", (BKGWREN ? "1" : "0"));
1821         WriteLog("  GOURD    = %s (II=%08X, SD=%08X%08X)\n", (GOURD ? "1" : "0"), REG(INTENSITYINC), REG(SRCDATA), REG(SRCDATA + 4));
1822         WriteLog("  GOURZ    = %s (ZI=%08X, ZD=%08X%08X, SZ1=%08X%08X, SZ2=%08X%08X)\n", (GOURZ ? "1" : "0"), REG(ZINC), REG(DSTZ), REG(DSTZ + 4),
1823                 REG(SRCZINT), REG(SRCZINT + 4), REG(SRCZFRAC), REG(SRCZFRAC + 4));
1824         WriteLog("  SRCSHADE = %s\n", (SRCSHADE ? "1" : "0"));
1825 }
1826
1827
1828 #ifdef USE_MIDSUMMER_BLITTER
1829 //
1830 // Here's an attempt to write a blitter that conforms to the Midsummer specs--since
1831 // it's supposedly backwards compatible, it should work well...
1832 //
1833 //#define LOG_BLITTER_MEMORY_ACCESSES
1834
1835 #define DATINIT (false)
1836 #define TXTEXT  (false)
1837 #define POLYGON (false)
1838
1839 void BlitterMidsummer(uint32 cmd)
1840 {
1841 uint32 outer_loop, inner_loop, a1_addr, a2_addr;
1842 int32 a1_x, a1_y, a2_x, a2_y, a1_width, a2_width;
1843 uint8 a1_phrase_mode, a2_phrase_mode;
1844
1845         a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
1846         a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
1847         a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
1848         a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
1849         UINT32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1850         a1_width = ((0x04 | m) << e) >> 2;//*/
1851         a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
1852         a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
1853         m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1854         a2_width = ((0x04 | m) << e) >> 2;//*/
1855
1856         a1_phrase_mode = a2_phrase_mode = 0;
1857
1858         if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 0)
1859                 a1_phrase_mode = 1;
1860
1861         if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 0)
1862                 a2_phrase_mode = 1;
1863
1864 #define INNER0  (inner_loop == 0)
1865 #define OUTER0  (outer_loop == 0)
1866
1867 // $01800005 has SRCENX, may have to investigate further...
1868 // $00011008 has GOURD & DSTEN.
1869 // $41802F41 has SRCSHADE, CLIPA1
1870 /*bool logBlit = false;
1871 if (cmd != 0x00010200 && cmd != 0x01800001 && cmd != 0x01800005
1872         && cmd != 0x00011008 && cmd !=0x41802F41)
1873 {
1874         logBlit = true;
1875         LogBlit();
1876 }//*/
1877
1878         uint64 srcData = GET64(blitter_ram, SRCDATA), srcXtraData,
1879                 dstData = GET64(blitter_ram, DSTDATA), writeData;
1880         uint32 srcAddr, dstAddr;
1881         uint8 bitCount, a1PixelSize, a2PixelSize;
1882
1883         // JTRM says phrase mode only works for 8BPP or higher, so let's try this...
1884         uint32 phraseOffset[8] = { 8, 8, 8, 8, 4, 2, 0, 0 };
1885         uint8 pixelShift[8] = { 3, 2, 1, 0, 1, 2, 0, 0 };
1886
1887         a1PixelSize = (blitter_ram[A1_FLAGS + 3] >> 3) & 0x07;
1888         a2PixelSize = (blitter_ram[A2_FLAGS + 3] >> 3) & 0x07;
1889
1890         outer_loop = GET16(blitter_ram, PIXLINECOUNTER + 0);
1891
1892         if (outer_loop == 0)
1893                 outer_loop = 0x10000;
1894
1895         // We just list the states here and jump from state to state in order to
1896         // keep things somewhat clear. Optimization/cleanups later.
1897
1898 //idle:                                                 // Blitter is idle, and will not perform any bus activity
1899 /*
1900 idle         Blitter is off the bus, and no activity takes place.
1901 if GO    if DATINIT goto init_if
1902          else       goto inner
1903 */
1904         if (DATINIT)
1905                 goto init_if;
1906         else
1907                 goto inner;
1908
1909 /*
1910 inner        Inner loop is active, read and write cycles are performed
1911 */
1912 inner:                                                  // Run inner loop state machine (asserts step from its idle state)
1913         inner_loop = GET16(blitter_ram, PIXLINECOUNTER + 2);
1914
1915         if (inner_loop == 0)
1916                 inner_loop = 0x10000;
1917
1918 /*
1919 ------------------------------
1920 idle:                        Inactive, blitter is idle or passing round outer loop
1921 idle       Another state in the outer loop is active. No bus transfers are performed.
1922 if STEP
1923     if SRCENX goto sreadx
1924     else if TXTEXT goto txtread
1925     else if SRCEN goto sread
1926     else if DSTEN goto dread
1927     else if DSTENZ goto dzread
1928     else goto dwrite
1929 */
1930     if (SRCENX)
1931                 goto sreadx;
1932     else if (TXTEXT)
1933                 goto txtread;
1934     else if (SRCEN)
1935                 goto sread;
1936     else if (DSTEN)
1937                 goto dread;
1938     else if (DSTENZ)
1939                 goto dzread;
1940     else
1941                 goto dwrite;
1942
1943 /*
1944 sreadx     Extra source data read at the start of an inner loop pass.
1945 if STEP
1946     if SRCENZ goto szreadx
1947     else if TXTEXT goto txtread
1948     else if SRCEN goto sread
1949     else if DSTEN goto dread
1950     else if DSTENZ goto dzread
1951     else goto dwrite
1952 */
1953 sreadx:                                                 // Extra source data read
1954         if (SRCENZ)
1955                 goto szreadx;
1956         else if (TXTEXT)
1957                 goto txtread;
1958         else if (SRCEN)
1959                 goto sread;
1960         else if (DSTEN)
1961                 goto dread;
1962         else if (DSTENZ)
1963                 goto dzread;
1964         else
1965                 goto dwrite;
1966
1967 /*
1968 szreadx    Extra source Z read as the start of an inner loop pass.
1969 if STEP
1970     if TXTEXT goto txtread
1971     else goto sread
1972 */
1973 szreadx:                                                // Extra source Z read
1974         if (TXTEXT)
1975                 goto txtread;
1976         else
1977                 goto sread;
1978
1979 /*
1980 txtread    Read texture data from external memory. This state is only used for external texture.
1981            TEXTEXT is the condition TEXTMODE=1.
1982 if STEP
1983     if SRCEN goto sread
1984     else if DSTEN goto dread
1985     else if DSTENZ goto dzread
1986     else goto dwrite
1987 */
1988 txtread:                                                // Read external texture data
1989         if (SRCEN)
1990                 goto sread;
1991         else if (DSTEN)
1992                 goto dread;
1993         else if (DSTENZ)
1994                 goto dzread;
1995         else
1996                 goto dwrite;
1997
1998 /*
1999 sread      Source data read.
2000 if STEP
2001     if SRCENZ goto szread
2002     else if DSTEN goto dread
2003     else if DSTENZ goto dzread
2004     else goto dwrite
2005 */
2006 sread:                                                  // Source data read
2007 //The JTRM doesn't really specify the internal structure of the source data read, but I would
2008 //imagine that if it's in phrase mode that it starts by reading the phrase that the window is
2009 //pointing at. Likewise, the pixel (if in BPP 1, 2 & 4, chopped) otherwise. It probably still
2010 //transfers an entire phrase even in pixel mode.
2011 //Odd thought: Does it expand, e.g., 1 BPP pixels into 32 BPP internally? Hmm...
2012 //No.
2013 /*
2014         a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
2015         a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
2016         a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
2017         a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
2018         xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
2019         xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
2020         a1_pitch = pitchValue[(REG(A1_FLAGS) & 0x03)];
2021         a2_pitch = pitchValue[(REG(A2_FLAGS) & 0x03)];
2022         n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
2023         n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
2024         a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
2025         a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
2026         a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
2027         a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
2028         a1_phrase_mode = 0;
2029         a2_phrase_mode = 0;
2030         a1_width = ((0x04 | m) << e) >> 2;
2031         a2_width = ((0x04 | m) << e) >> 2;
2032
2033         // write values back to registers
2034         WREG(A1_PIXEL,  (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
2035         WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
2036         WREG(A2_PIXEL,  (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
2037 */
2038         // Calculate the address to be read...
2039
2040 //Need to fix phrase mode calcs here, since they should *step* by eight, not mulitply.
2041 //Also, need to fix various differing BPP modes here, since offset won't be correct except
2042 //for 8BPP. !!! FIX !!!
2043         srcAddr = (DSTA2 ? a1_addr : a2_addr);
2044
2045 /*      if ((DSTA2 ? a1_phrase_mode : a2_phrase_mode) == 1)
2046         {
2047                 srcAddr += (((DSTA2 ? a1_x : a2_x) >> 16)
2048                         + (((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width)));
2049         }
2050         else*/
2051         {
2052 //              uint32 pixAddr = ((DSTA2 ? a1_x : a2_x) >> 16)
2053 //                      + (((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
2054                 int32 pixAddr = (int16)((DSTA2 ? a1_x : a2_x) >> 16)
2055                         + ((int16)((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
2056
2057                 if ((DSTA2 ? a1PixelSize : a2PixelSize) < 3)
2058                         pixAddr >>= pixelShift[(DSTA2 ? a1PixelSize : a2PixelSize)];
2059                 else if ((DSTA2 ? a1PixelSize : a2PixelSize) > 3)
2060                         pixAddr <<= pixelShift[(DSTA2 ? a1PixelSize : a2PixelSize)];
2061
2062                 srcAddr += pixAddr;
2063         }
2064
2065         // And read it!
2066
2067         if ((DSTA2 ? a1_phrase_mode : a2_phrase_mode) == 1)
2068         {
2069                 srcData = ((uint64)JaguarReadLong(srcAddr, BLITTER) << 32)
2070                         | (uint64)JaguarReadLong(srcAddr + 4, BLITTER);
2071         }
2072         else
2073         {
2074 //1,2,&4BPP are wrong here... !!! FIX !!!
2075                 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 0)           // 1 BPP
2076                         srcData = JaguarReadByte(srcAddr, BLITTER);
2077                 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 1)           // 2 BPP
2078                         srcData = JaguarReadByte(srcAddr, BLITTER);
2079                 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 2)           // 4 BPP
2080                         srcData = JaguarReadByte(srcAddr, BLITTER);
2081                 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 3)           // 8 BPP
2082                         srcData = JaguarReadByte(srcAddr, BLITTER);
2083                 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 4)           // 16 BPP
2084                         srcData = JaguarReadWord(srcAddr, BLITTER);
2085                 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 5)           // 32 BPP
2086                         srcData = JaguarReadLong(srcAddr, BLITTER);
2087         }
2088
2089 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2090 if (logBlit)
2091         WriteLog("BLITTER: srcAddr=%08X,   srcData=%08X %08X\n", srcAddr, (uint32)(srcData >> 32), (uint32)(srcData & 0xFFFFFFFF));
2092 #endif
2093
2094         if (SRCENZ)
2095                 goto szread;
2096         else if (DSTEN)
2097                 goto dread;
2098         else if (DSTENZ)
2099                 goto dzread;
2100         else
2101                 goto dwrite;
2102
2103 szread:                                                 // Source Z read
2104 /*
2105 szread     Source Z read.
2106 if STEP
2107     if DSTEN goto dread
2108     else if DSTENZ goto dzread
2109     else goto dwrite
2110 */
2111         if (DSTEN)
2112                 goto dread;
2113         else if (DSTENZ)
2114                 goto dzread;
2115         else
2116                 goto dwrite;
2117
2118 dread:                                                  // Destination data read
2119 /*
2120 dread      Destination data read.
2121 if STEP
2122     if DSTENZ goto dzread
2123     else goto dwrite
2124 */
2125         // Calculate the destination address to be read...
2126
2127 //Need to fix phrase mode calcs here, since they should *step* by eight, not mulitply.
2128 //Also, need to fix various differing BPP modes here, since offset won't be correct except
2129 //for 8BPP. !!! FIX !!!
2130         dstAddr = (DSTA2 ? a2_addr : a1_addr);
2131
2132         {
2133 //      uint32 pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
2134 //              + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2135         int32 pixAddr = (int16)((DSTA2 ? a2_x : a1_x) >> 16)
2136                 + ((int16)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2137
2138         if ((DSTA2 ? a2PixelSize : a1PixelSize) < 3)
2139                 pixAddr >>= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2140         else if ((DSTA2 ? a2PixelSize : a1PixelSize) > 3)
2141                 pixAddr <<= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2142
2143         dstAddr += pixAddr;
2144         }
2145
2146         // And read it!
2147
2148         if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2149         {
2150                 dstData = ((uint64)JaguarReadLong(srcAddr, BLITTER) << 32)
2151                         | (uint64)JaguarReadLong(srcAddr + 4, BLITTER);
2152         }
2153         else
2154         {
2155 //1,2,&4BPP are wrong here... !!! FIX !!!
2156                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 0)           // 1 BPP
2157                         dstData = JaguarReadByte(dstAddr, BLITTER);
2158                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 1)           // 2 BPP
2159                         dstData = JaguarReadByte(dstAddr, BLITTER);
2160                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 2)           // 4 BPP
2161                         dstData = JaguarReadByte(dstAddr, BLITTER);
2162                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 3)           // 8 BPP
2163                         dstData = JaguarReadByte(dstAddr, BLITTER);
2164                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 4)           // 16 BPP
2165                         dstData = JaguarReadWord(dstAddr, BLITTER);
2166                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 5)           // 32 BPP
2167                         dstData = JaguarReadLong(dstAddr, BLITTER);
2168         }
2169
2170 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2171 if (logBlit)
2172         WriteLog("BLITTER (dread): dstAddr=%08X,   dstData=%08X %08X\n", dstAddr, (uint32)(dstData >> 32), (uint32)(dstData & 0xFFFFFFFF));
2173 #endif
2174
2175         if (DSTENZ)
2176                 goto dzread;
2177         else
2178                 goto dwrite;
2179
2180 dzread:                                                 // Destination Z read
2181 /*
2182 dzread     Destination Z read.
2183 if STEP goto dwrite
2184 */
2185         goto dwrite;
2186
2187 dwrite:                                                 // Destination data write
2188 /*
2189 dwrite     Destination write. Every pass round the inner loop must go through this state..
2190 if STEP
2191     if DSTWRZ goto dzwrite
2192     else if INNER0 goto idle
2193     else if TXTEXT goto txtread
2194     else if SRCEN goto sread
2195     else if DSTEN goto dread
2196     else if DSTENZ goto dzread
2197     else goto dwrite
2198 */
2199 /*
2200 Blit!
2201   a1_base  = 00100000
2202   a1_pitch = 0
2203   a1_psize = 16
2204   a1_width = 320
2205   a1_xadd  = 1.000000 (phrase=0)
2206   a1_yadd  = 0.000000
2207   a1_x     = 159.000000
2208   a1_y     = 1.000000
2209   a1_zoffs = 0
2210   a2_base  = 000095D0
2211   a2_pitch = 0
2212   a2_psize = 16
2213   a2_width = 256
2214   a2_xadd  = 1.000000 (phrase=1)
2215   a2_yadd  = 0.000000
2216   a2_x     = 2.000000
2217   a2_y     = 0.000000
2218   a2_mask_x= 0xFFFFFFFF
2219   a2_mask_y= 0xFFFFFFFF
2220   a2_zoffs = 0
2221   count    = 2 x 1
2222   COMMAND  = 00011008
2223   SRCEN    = 0
2224   DSTEN    = 1
2225   UPDA1F   = 0
2226   UPDA1    = 0
2227   UPDA2    = 0
2228   DSTA2    = 0
2229 --LFUFUNC  = LFU_CLEAR
2230 | PATDSEL  = 1 (PD=77C7 7700 7700 7700)
2231 --ADDDSEL  = 0
2232   GOURD    = 1 (II=00FC 1A00, SD=FF00 0000 0000 0000)
2233 */
2234
2235 //Still need to do CLIPA1 and SRCSHADE and GOURD and GOURZ...
2236
2237         // Check clipping...
2238
2239         if (CLIPA1)
2240         {
2241                 uint16 x = a1_x >> 16, y = a1_y >> 16;
2242
2243                 if (x >= GET16(blitter_ram, A1_CLIP + 2) || y >= GET16(blitter_ram, A1_CLIP))
2244                         goto inhibitWrite;
2245         }
2246
2247         // Figure out what gets written...
2248
2249         if (PATDSEL)
2250         {
2251                 writeData = GET64(blitter_ram, PATTERNDATA);
2252 //GOURD works properly only in 16BPP mode...
2253 //SRCDATA holds the intensity fractions...
2254 //Does GOURD get calc'ed here or somewhere else???
2255 //Temporary testing kludge...
2256 //if (GOURD)
2257 //   writeData >>= 48;
2258 //      writeData = 0xFF88;
2259 //OK, it's not writing an entire strip of pixels... Why?
2260 //bad incrementing, that's why!
2261         }
2262         else if (ADDDSEL)
2263         {
2264                 // Apparently this only works with 16-bit pixels. Not sure if it works in phrase mode either.
2265 //Also, take TOPBEN & TOPNEN into account here as well...
2266                 writeData = srcData + dstData;
2267         }
2268         else    // LFUFUNC is the default...
2269         {
2270                 writeData = 0;
2271
2272                 if (LFU_NAN)
2273                         writeData |= ~srcData & ~dstData;
2274                 if (LFU_NA)
2275                         writeData |= ~srcData & dstData;
2276                 if (LFU_AN)
2277                         writeData |= srcData & ~dstData;
2278                 if (LFU_A)
2279                         writeData |= srcData & dstData;
2280         }
2281
2282         // Calculate the address to be written...
2283
2284         dstAddr = (DSTA2 ? a2_addr : a1_addr);
2285
2286 /*      if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2287         {
2288 //both of these calculate the wrong address because they don't take into account
2289 //pixel sizes...
2290                 dstAddr += ((DSTA2 ? a2_x : a1_x) >> 16)
2291                         + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2292         }
2293         else*/
2294         {
2295 /*              dstAddr += ((DSTA2 ? a2_x : a1_x) >> 16)
2296                         + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));*/
2297 //              uint32 pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
2298 //                      + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2299                 int32 pixAddr = (int16)((DSTA2 ? a2_x : a1_x) >> 16)
2300                         + ((int16)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2301
2302                 if ((DSTA2 ? a2PixelSize : a1PixelSize) < 3)
2303                         pixAddr >>= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2304                 else if ((DSTA2 ? a2PixelSize : a1PixelSize) > 3)
2305                         pixAddr <<= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2306
2307                 dstAddr += pixAddr;
2308         }
2309
2310         // And write it!
2311
2312         if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2313         {
2314                 JaguarWriteLong(dstAddr, writeData >> 32, BLITTER);
2315                 JaguarWriteLong(dstAddr + 4, writeData & 0xFFFFFFFF, BLITTER);
2316         }
2317         else
2318         {
2319 //1,2,&4BPP are wrong here... !!! FIX !!!
2320                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 0)           // 1 BPP
2321                         JaguarWriteByte(dstAddr, writeData, BLITTER);
2322                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 1)           // 2 BPP
2323                         JaguarWriteByte(dstAddr, writeData, BLITTER);
2324                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 2)           // 4 BPP
2325                         JaguarWriteByte(dstAddr, writeData, BLITTER);
2326                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 3)           // 8 BPP
2327                         JaguarWriteByte(dstAddr, writeData, BLITTER);
2328                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 4)           // 16 BPP
2329                         JaguarWriteWord(dstAddr, writeData, BLITTER);
2330                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 5)           // 32 BPP
2331                         JaguarWriteLong(dstAddr, writeData, BLITTER);
2332         }
2333
2334 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2335 if (logBlit)
2336         WriteLog("BLITTER: dstAddr=%08X, writeData=%08X %08X\n", dstAddr, (uint32)(writeData >> 32), (uint32)(writeData & 0xFFFFFFFF));
2337 #endif
2338
2339 inhibitWrite://Should this go here? or on the other side of the X/Y incrementing?
2340 //Seems OK here... for now.
2341
2342 // Do funky X/Y incrementation here as well... !!! FIX !!!
2343
2344         // Handle A1 channel stepping
2345
2346         if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 0)
2347                 a1_x += phraseOffset[a1PixelSize] << 16;
2348         else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 1)
2349                 a1_x += (blitter_ram[A1_FLAGS + 1] & 0x08 ? -1 << 16 : 1 << 16);
2350 /*      else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 2)
2351                 a1_x += 0 << 16;                              */
2352         else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 3)
2353         {
2354 //Always add the FINC here??? That was the problem with the BIOS screen... So perhaps.
2355                 a1_x += GET16(blitter_ram, A1_FINC + 2);
2356                 a1_y += GET16(blitter_ram, A1_FINC + 0);
2357
2358                 a1_x += GET16(blitter_ram, A1_INC + 2) << 16;
2359                 a1_y += GET16(blitter_ram, A1_INC + 0) << 16;
2360         }
2361
2362         if ((blitter_ram[A1_FLAGS + 1] & 0x04) && (blitter_ram[A1_FLAGS + 1] & 0x03 != 3))
2363                 a1_y += (blitter_ram[A1_FLAGS + 1] & 0x10 ? -1 << 16 : 1 << 16);
2364
2365         // Handle A2 channel stepping
2366
2367         if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 0)
2368                 a2_x += phraseOffset[a2PixelSize] << 16;
2369         else if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 1)
2370                 a2_x += (blitter_ram[A2_FLAGS + 1] & 0x08 ? -1 << 16 : 1 << 16);
2371 /*      else if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 2)
2372                 a2_x += 0 << 16;                              */
2373
2374         if (blitter_ram[A2_FLAGS + 1] & 0x04)
2375                 a2_y += (blitter_ram[A2_FLAGS + 1] & 0x10 ? -1 << 16 : 1 << 16);
2376
2377 //Need to fix this so that it subtracts (saturating, of course) the correct number of pixels
2378 //in phrase mode... !!! FIX !!! [DONE]
2379 //Need to fix this so that it counts down the correct item. Does it count the
2380 //source or the destination phrase mode???
2381 //It shouldn't matter, because we *should* end up processing the same amount
2382 //the same number of pixels... Not sure though.
2383         if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2384         {
2385                 if (inner_loop < phraseOffset[DSTA2 ? a2PixelSize : a1PixelSize])
2386                         inner_loop = 0;
2387                 else
2388                         inner_loop -= phraseOffset[DSTA2 ? a2PixelSize : a1PixelSize];
2389         }
2390         else
2391                 inner_loop--;
2392
2393
2394         if (DSTWRZ)
2395                 goto dzwrite;
2396         else if (INNER0)
2397                 goto indone;
2398         else if (TXTEXT)
2399                 goto txtread;
2400         else if (SRCEN)
2401                 goto sread;
2402         else if (DSTEN)
2403                 goto dread;
2404         else if (DSTENZ)
2405                 goto dzread;
2406         else
2407                 goto dwrite;
2408
2409 dzwrite:                                                // Destination Z write
2410 /*
2411 dzwrite    Destination Z write.
2412 if STEP
2413     if INNER0 goto idle
2414     else if TXTEXT goto txtread
2415     else if SRCEN goto sread
2416     else if DSTEN goto dread
2417     else if DSTENZ goto dzread
2418     else goto dwrite
2419 */
2420         if (INNER0)
2421                 goto indone;
2422         else if (TXTEXT)
2423                 goto txtread;
2424         else if (SRCEN)
2425                 goto sread;
2426         else if (DSTEN)
2427                 goto dread;
2428         else if (DSTENZ)
2429                 goto dzread;
2430         else
2431                 goto dwrite;
2432
2433 /*
2434 ------------------------------
2435 if INDONE if OUTER0 goto idle
2436 else if UPDA1F        goto a1fupdate
2437 else if UPDA1         goto a1update
2438 else if GOURZ.POLYGON goto zfupdate
2439 else if UPDA2         goto a2update
2440 else if DATINIT       goto init_if
2441 else restart inner
2442 */
2443 indone:
2444         outer_loop--;
2445
2446
2447         if (OUTER0)
2448                 goto blitter_done;
2449         else if (UPDA1F)
2450                 goto a1fupdate;
2451         else if (UPDA1)
2452                 goto a1update;
2453 //kill this, for now...
2454 //      else if (GOURZ.POLYGON)
2455 //              goto zfupdate;
2456         else if (UPDA2)
2457                 goto a2update;
2458         else if (DATINIT)
2459                 goto init_if;
2460         else
2461                 goto inner;
2462
2463 a1fupdate:                                              // Update A1 pointer fractions and more (see below)
2464 /*
2465 a1fupdate    A1 step fraction is added to A1 pointer fraction
2466              POLYGON true: A1 step delta X and Y fraction parts are added to the A1
2467                          step X and Y fraction parts (the value prior to this add is used for
2468                          the step to pointer add).
2469              POLYGON true: inner count step fraction is added to the inner count
2470                          fraction part
2471              POLYGON.GOURD true: the I fraction step is added to the computed
2472                          intensity fraction parts +
2473              POLYGON.GOURD true: the I fraction step delta is added to the I
2474                          fraction step
2475 goto a1update
2476 */
2477 /*
2478 #define A1_PIXEL                ((UINT32)0x0C)  // Integer part of the pixel (Y.i and X.i)
2479 #define A1_STEP                 ((UINT32)0x10)  // Integer part of the step
2480 #define A1_FSTEP                ((UINT32)0x14)  // Fractional part of the step
2481 #define A1_FPIXEL               ((UINT32)0x18)  // Fractional part of the pixel (Y.f and X.f)
2482 */
2483
2484 // This is all kinda murky. All we have are the Midsummer docs to give us any guidance,
2485 // and it's incomplete or filled with errors (like above). Aarrrgggghhhhh!
2486
2487 //This isn't right. Is it? I don't think the fractional parts are signed...
2488 //      a1_x += (int32)((int16)GET16(blitter_ram, A1_FSTEP + 2));
2489 //      a1_y += (int32)((int16)GET16(blitter_ram, A1_FSTEP + 0));
2490         a1_x += GET16(blitter_ram, A1_FSTEP + 2);
2491         a1_y += GET16(blitter_ram, A1_FSTEP + 0);
2492
2493         goto a1update;
2494
2495 a1update:                                               // Update A1 pointer integers
2496 /*
2497 a1update     A1 step is added to A1 pointer, with carry from the fractional add
2498              POLYGON true: A1 step delta X and Y integer parts are added to the A1
2499                          step X and Y integer parts, with carry from the corresponding
2500                          fractional part add (again, the value prior to this add is used for
2501                          the step to pointer add).
2502              POLYGON true: inner count step is added to the inner count, with carry
2503              POLYGON.GOURD true: the I step is added to the computed intensities,
2504                          with carry +
2505              POLYGON.GOURD true: the I step delta is added to the I step, with
2506                          carry the texture X and Y step delta values are added to the X and Y
2507                          step values.
2508 if GOURZ.POLYGON goto zfupdate
2509 else if UPDA2 goto a2update
2510 else if DATINIT goto init_if
2511 else restart inner
2512 */
2513         a1_x += (int32)(GET16(blitter_ram, A1_STEP + 2) << 16);
2514         a1_y += (int32)(GET16(blitter_ram, A1_STEP + 0) << 16);
2515
2516
2517 //kill this, for now...
2518 //      if (GOURZ.POLYGON)
2519         if (false)
2520                 goto zfupdate;
2521         else if (UPDA2)
2522                 goto a2update;
2523         else if (DATINIT)
2524                 goto init_if;
2525         else
2526                 goto inner;
2527
2528 zfupdate:                                               // Update computed Z step fractions
2529 /*
2530 zfupdate     the Z fraction step is added to the computed Z fraction parts +
2531              the Z fraction step delta is added to the Z fraction step
2532 goto zupdate
2533 */
2534         goto zupdate;
2535
2536 zupdate:                                                // Update computed Z step integers
2537 /*
2538 zupdate      the Z step is added to the computed Zs, with carry +
2539              the Z step delta is added to the Z step, with carry
2540 if UPDA2 goto a2update
2541 else if DATINIT goto init_if
2542 else restart inner
2543 */
2544         if (UPDA2)
2545                 goto a2update;
2546         else if (DATINIT)
2547                 goto init_if;
2548         else
2549                 goto inner;
2550
2551 a2update:                                               // Update A2 pointer
2552 /*
2553 a2update     A2 step is added to the A2 pointer
2554 if DATINIT goto init_if
2555 else restart inner
2556 */
2557         a2_x += (int32)(GET16(blitter_ram, A2_STEP + 2) << 16);
2558         a2_y += (int32)(GET16(blitter_ram, A2_STEP + 0) << 16);
2559
2560
2561         if (DATINIT)
2562                 goto init_if;
2563         else
2564                 goto inner;
2565
2566 init_if:                                                // Initialise intensity fractions and texture X
2567 /*
2568 init_if      Initialise the fractional part of the computed intensity fields, from
2569              the increment and step registers. The texture X integer and fractional
2570                          parts can also be initialised.
2571 goto     init_ii
2572 */
2573         goto init_ii;
2574
2575 init_ii:                                                // Initialise intensity integers and texture Y
2576 /*
2577 init_ii      Initialise the integer part of the computed intensity, and texture Y
2578              integer and fractional parts
2579 if GOURZ goto init_zf
2580 else     goto inner
2581 */
2582         if (GOURZ)
2583                 goto init_zf;
2584         else
2585             goto inner;
2586
2587 init_zf:                                                // Initialise Z fractions
2588 /*
2589 init_zf      Initialise the fractional part of the computed Z fields.
2590 goto init_zi
2591 */
2592         goto init_zi;
2593
2594 init_zi:                                                // Initialise Z integers
2595 /*
2596 init_zi      Initialise the integer part of the computed Z fields.
2597 goto inner
2598 */
2599         goto inner;
2600
2601
2602 /*
2603 The outer loop state machine fires off the inner loop, and controls the updating
2604 process between passes through the inner loop.
2605
2606 + -- these functions are irrelevant if the DATINIT function is enabled, which it
2607      will normally be.
2608
2609 All these states will complete in one clock cycle, with the exception of the idle
2610 state, which means the blitter is quiescent; and the inner state, which takes as
2611 long as is required to complete one strip of pixels. It is therefore possible for
2612 the blitter to spend a maximum of nine clock cycles of inactivity between passes
2613 through the inner loop.
2614 */
2615
2616 blitter_done:
2617         {}
2618 }
2619 #endif
2620
2621
2622 //
2623 // Here's attempt #2--taken from the Oberon chip specs!
2624 //
2625
2626 #ifdef USE_MIDSUMMER_BLITTER_MKII
2627
2628 void ADDRGEN(uint32 &, uint32 &, bool, bool,
2629         uint16, uint16, uint32, uint8, uint8, uint8, uint8,
2630         uint16, uint16, uint32, uint8, uint8, uint8, uint8);
2631 void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
2632         uint64 dstd, uint32 iinc, uint8 initcin[], uint64 initinc, uint16 initpix,
2633         uint32 istep, uint64 patd, uint64 srcd, uint64 srcz1, uint64 srcz2,
2634         uint32 zinc, uint32 zstep);
2635 void ADD16SAT(uint16 &r, uint8 &co, uint16 a, uint16 b, uint8 cin, bool sat, bool eightbit, bool hicinh);
2636 void ADDAMUX(int16 &adda_x, int16 &adda_y, uint8 addasel, int16 a1_step_x, int16 a1_step_y,
2637         int16 a1_stepf_x, int16 a1_stepf_y, int16 a2_step_x, int16 a2_step_y,
2638         int16 a1_inc_x, int16 a1_inc_y, int16 a1_incf_x, int16 a1_incf_y, uint8 adda_xconst,
2639         bool adda_yconst, bool addareg, bool suba_x, bool suba_y);
2640 void ADDBMUX(int16 &addb_x, int16 &addb_y, uint8 addbsel, int16 a1_x, int16 a1_y,
2641         int16 a2_x, int16 a2_y, int16 a1_frac_x, int16 a1_frac_y);
2642 void DATAMUX(int16 &data_x, int16 &data_y, uint32 gpu_din, int16 addq_x, int16 addq_y, bool addqsel);
2643 void ADDRADD(int16 &addq_x, int16 &addq_y, bool a1fracldi,
2644         uint16 adda_x, uint16 adda_y, uint16 addb_x, uint16 addb_y, uint8 modx, bool suba_x, bool suba_y);
2645 void DATA(uint64 &wdata, uint8 &dcomp, uint8 &zcomp, bool &nowrite,
2646         bool big_pix, bool cmpdst, uint8 daddasel, uint8 daddbsel, uint8 daddmode, bool daddq_sel, uint8 data_sel,
2647         uint8 dbinh, uint8 dend, uint8 dstart, uint64 dstd, uint32 iinc, uint8 lfu_func, uint64 &patd, bool patdadd,
2648         bool phrase_mode, uint64 srcd, bool srcdread, bool srczread, bool srcz2add, uint8 zmode,
2649         bool bcompen, bool bkgwren, bool dcompen, uint8 icount, uint8 pixsize,
2650         uint64 &srcz, uint64 dstz, uint32 zinc);
2651 void COMP_CTRL(uint8 &dbinh, bool &nowrite,
2652         bool bcompen, bool big_pix, bool bkgwren, uint8 dcomp, bool dcompen, uint8 icount,
2653         uint8 pixsize, bool phrase_mode, uint8 srcd, uint8 zcomp);
2654 #define VERBOSE_BLITTER_LOGGING
2655 bool logBlit = false;
2656
2657 void BlitterMidsummer2(void)
2658 {
2659         // Here's what the specs say the state machine does. Note that this can probably be
2660         // greatly simplified (also, it's different from what John has in his Oberon docs):
2661 //Will remove stuff that isn't in Jaguar I once fully described (stuff like texture won't
2662 //be described here at all)...
2663
2664         uint32 cmd = GET32(blitter_ram, COMMAND);
2665
2666 logBlit = false;
2667 if (
2668         cmd != 0x00010200 &&    // PATDSEL
2669         cmd != 0x01800001
2670         && cmd != 0x01800005
2671 //Boot ROM ATARI letters:
2672         && cmd != 0x00011008    // DSTEN GOURD PATDSEL
2673 //Boot ROM spinning cube:
2674         && cmd != 0x41802F41    // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 GOURZ ZMODE=0 LFUFUNC=C SRCSHADE
2675 //T2K intro screen:
2676         && cmd != 0x01800E01    // SRCEN UPDA1 UPDA2 DSTA2 LFUFUNC=C
2677 //T2K TEMPEST letters:
2678         && cmd != 0x09800741    // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 LFUFUNC=C DCOMPEN
2679 //Static letters on Cybermorph intro screen:
2680         && cmd != 0x09800609    // SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C DCOMPEN
2681 //Static pic on title screen:
2682         && cmd != 0x01800601    // SRCEN UPDA1 UPDA2 LFUFUNC=C
2683 //Turning letters on Cybermorph intro screen:
2684         && cmd != 0x09800F41    // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
2685         && cmd != 0x00113078    // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
2686         && cmd != 0x09900F39    // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
2687         && cmd != 0x09800209    // SRCEN DSTEN UPDA1 LFUFUNC=C DCOMPEN
2688         && cmd != 0x00011200    // UPDA1 GOURD PATDSEL
2689 //Start of Hover Strike (clearing screen):
2690         && cmd != 0x00010000    // PATDSEL
2691 //Hover Strike text:
2692         && cmd != 0x1401060C    // SRCENX DSTEN UPDA1 UPDA2 PATDSEL BCOMPEN BKGWREN
2693 //Hover Strike 3D stuff
2694 //      && cmd != 0x01902839    // SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
2695 //Hover Strike darkening on intro to play (briefing) screen
2696         && cmd != 0x00020208    // DSTEN UPDA1 ADDDSEL
2697 //Trevor McFur stuff:
2698         && cmd != 0x05810601    // SRCEN UPDA1 UPDA2 PATDSEL BCOMPEN
2699         && cmd != 0x01800201    // SRCEN UPDA1 LFUFUNC=C
2700 //T2K:
2701         && cmd != 0x00011000    // GOURD PATDSEL
2702         && cmd != 0x00011040    // CLIP_A1 GOURD PATDSEL
2703         )
2704         logBlit = true;//*/
2705 //logBlit = true;
2706 if (blit_start_log == 0)        // Wait for the signal...
2707         logBlit = false;//*/
2708 /*
2709 Some T2K unique blits:
2710 logBlit = F, cmd = 00010200 *
2711 logBlit = F, cmd = 00011000
2712 logBlit = F, cmd = 00011040
2713 logBlit = F, cmd = 01800005 *
2714 logBlit = F, cmd = 09800741 *
2715 Hover Strike mission selection screen:
2716 Blit! (CMD = 01902839)  // SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
2717 */
2718
2719 //printf("logBlit = %s, cmd = %08X\n", (logBlit ? "T" : "F"), cmd);
2720 //fflush(stdout);
2721 //logBlit = true;
2722
2723 /*
2724 Blit! (CMD = 00011040)
2725 Flags: CLIP_A1 GOURD PATDSEL
2726   count = 18 x 1
2727   a1_base = 00100000, a2_base = 0081F6A8
2728   a1_x = 00A7, a1_y = 0014, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0001, a2_y = 0000
2729   a1_step_x = FE80, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFF8, a2_step_y = 0001
2730   a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
2731   a1_win_x = 0180, a1_win_y = 0118, a2_mask_x = 0000, a2_mask_y = 0000
2732   a2_mask=F a1add=+phr/+0 a2add=+phr/+0
2733   a1_pixsize = 4, a2_pixsize = 4
2734 */
2735 //Testing T2K...
2736 /*logBlit = false;
2737 if (cmd == 0x00011040
2738         && (GET16(blitter_ram, A1_PIXEL + 2) == 0x00A7) && (GET16(blitter_ram, A1_PIXEL + 0) == 0x0014)
2739         && (GET16(blitter_ram, A2_PIXEL + 2) == 0x0001) && (GET16(blitter_ram, A2_PIXEL + 0) == 0x0000)
2740         && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 18))
2741         logBlit = true;*/
2742
2743         // Line states passed in via the command register
2744
2745         bool srcen = (SRCEN), srcenx = (SRCENX), srcenz = (SRCENZ),
2746                 dsten = (DSTEN), dstenz = (DSTENZ), dstwrz = (DSTWRZ), clip_a1 = (CLIPA1),
2747                 upda1 = (UPDA1), upda1f = (UPDA1F), upda2 = (UPDA2), dsta2 = (DSTA2),
2748                 gourd = (GOURD), gourz = (GOURZ), topben = (TOPBEN), topnen = (TOPNEN),
2749                 patdsel = (PATDSEL), adddsel = (ADDDSEL), cmpdst = (CMPDST), bcompen = (BCOMPEN),
2750                 dcompen = (DCOMPEN), bkgwren = (BKGWREN), srcshade = (SRCSHADE);
2751
2752         uint8 zmode = (cmd & 0x01C0000) >> 18, lfufunc = (cmd & 0x1E00000) >> 21;
2753 //Missing: BUSHI
2754 //Where to find various lines:
2755 // clip_a1  -> inner
2756 // gourd    -> dcontrol, inner, outer, state
2757 // gourz    -> dcontrol, inner, outer, state
2758 // cmpdst   -> blit, data, datacomp, state
2759 // bcompen  -> acontrol, inner, mcontrol, state
2760 // dcompen  -> inner, state
2761 // bkgwren  -> inner, state
2762 // srcshade -> dcontrol, inner, state
2763 // adddsel  -> dcontrol
2764 //NOTE: ADDDSEL takes precedence over PATDSEL, PATDSEL over LFU_FUNC
2765 #ifdef VERBOSE_BLITTER_LOGGING
2766 if (logBlit)
2767 {
2768 char zfs[512], lfus[512];
2769 zfs[0] = lfus[0] = 0;
2770 if (dstwrz || dstenz || gourz)
2771         sprintf(zfs, " ZMODE=%X", zmode);
2772 if (!(patdsel || adddsel))
2773         sprintf(lfus, " LFUFUNC=%X", lfufunc);
2774 printf("\nBlit! (CMD = %08X)\nFlags:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", cmd,
2775         (srcen ? " SRCEN" : ""), (srcenx ? " SRCENX" : ""), (srcenz ? " SRCENZ" : ""),
2776         (dsten ? " DSTEN" : ""), (dstenz ? " DSTENZ" : ""), (dstwrz ? " DSTWRZ" : ""),
2777         (clip_a1 ? " CLIP_A1" : ""), (upda1 ? " UPDA1" : ""), (upda1f ? " UPDA1F" : ""),
2778         (upda2 ? " UPDA2" : ""), (dsta2 ? " DSTA2" : ""), (gourd ? " GOURD" : ""),
2779         (gourz ? " GOURZ" : ""), (topben ? " TOPBEN" : ""), (topnen ? " TOPNEN" : ""),
2780         (patdsel ? " PATDSEL" : ""), (adddsel ? " ADDDSEL" : ""), zfs, lfus, (cmpdst ? " CMPDST" : ""),
2781         (bcompen ? " BCOMPEN" : ""), (dcompen ? " DCOMPEN" : ""), (bkgwren ? " BKGWREN" : ""),
2782         (srcshade ? " SRCSHADE" : ""));
2783 printf("  count = %d x %d\n", GET16(blitter_ram, PIXLINECOUNTER + 2), GET16(blitter_ram, PIXLINECOUNTER));
2784 fflush(stdout);
2785 }
2786 #endif
2787
2788         // Lines that don't exist in Jaguar I (and will never be asserted)
2789
2790         bool polygon = false, datinit = false, a1_stepld = false, a2_stepld = false, ext_int = false;
2791         bool istepadd = false, istepfadd = false, finneradd = false, inneradd = false;
2792         bool zstepfadd = false, zstepadd = false;
2793
2794         // Various state lines (initial state--basically the reset state of the FDSYNCs)
2795
2796         bool go = true, idle = true, inner = false, a1fupdate = false, a1update = false,
2797                 zfupdate = false, zupdate = false, a2update = false, init_if = false, init_ii = false,
2798                 init_zf = false, init_zi = false;
2799
2800         bool outer0 = false, indone = false;
2801
2802         bool idlei, inneri, a1fupdatei, a1updatei, zfupdatei, zupdatei, a2updatei, init_ifi, init_iii,
2803                 init_zfi, init_zii;
2804
2805         bool notgzandp = !(gourz && polygon);
2806
2807         // Various registers set up by user
2808
2809         uint16 ocount = GET16(blitter_ram, PIXLINECOUNTER);
2810         uint8 a1_pitch = blitter_ram[A1_FLAGS + 3] & 0x03;
2811         uint8 a2_pitch = blitter_ram[A2_FLAGS + 3] & 0x03;
2812         uint8 a1_pixsize = (blitter_ram[A1_FLAGS + 3] & 0x38) >> 3;
2813         uint8 a2_pixsize = (blitter_ram[A2_FLAGS + 3] & 0x38) >> 3;
2814         uint8 a1_zoffset = (GET16(blitter_ram, A1_FLAGS + 2) >> 6) & 0x07;
2815         uint8 a2_zoffset = (GET16(blitter_ram, A2_FLAGS + 2) >> 6) & 0x07;
2816         uint8 a1_width = (blitter_ram[A1_FLAGS + 2] >> 1) & 0x3F;
2817         uint8 a2_width = (blitter_ram[A2_FLAGS + 2] >> 1) & 0x3F;
2818         bool a2_mask = blitter_ram[A2_FLAGS + 2] & 0x80;
2819         uint8 a1addx = blitter_ram[A1_FLAGS + 1] & 0x03, a2addx = blitter_ram[A2_FLAGS + 1] & 0x03;
2820         bool a1addy = blitter_ram[A1_FLAGS + 1] & 0x04, a2addy = blitter_ram[A2_FLAGS + 1] & 0x04;
2821         bool a1xsign = blitter_ram[A1_FLAGS + 1] & 0x08, a2xsign = blitter_ram[A2_FLAGS + 1] & 0x08;
2822         bool a1ysign = blitter_ram[A1_FLAGS + 1] & 0x10, a2ysign = blitter_ram[A2_FLAGS + 1] & 0x10;
2823         uint32 a1_base = GET32(blitter_ram, A1_BASE) & 0xFFFFFFF8;      // Phrase aligned by ignoring bottom 3 bits
2824         uint32 a2_base = GET32(blitter_ram, A2_BASE) & 0xFFFFFFF8;
2825
2826         uint16 a1_win_x = GET16(blitter_ram, A1_CLIP + 2) & 0x7FFF;
2827         uint16 a1_win_y = GET16(blitter_ram, A1_CLIP + 0) & 0x7FFF;
2828         int16 a1_x = (int16)GET16(blitter_ram, A1_PIXEL + 2);
2829         int16 a1_y = (int16)GET16(blitter_ram, A1_PIXEL + 0);
2830         int16 a1_step_x = (int16)GET16(blitter_ram, A1_STEP + 2);
2831         int16 a1_step_y = (int16)GET16(blitter_ram, A1_STEP + 0);
2832         uint16 a1_stepf_x = GET16(blitter_ram, A1_FSTEP + 2);
2833         uint16 a1_stepf_y = GET16(blitter_ram, A1_FSTEP + 0);
2834         uint16 a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
2835         uint16 a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
2836         int16 a1_inc_x = (int16)GET16(blitter_ram, A1_INC + 2);
2837         int16 a1_inc_y = (int16)GET16(blitter_ram, A1_INC + 0);
2838         uint16 a1_incf_x = GET16(blitter_ram, A1_FINC + 2);
2839         uint16 a1_incf_y = GET16(blitter_ram, A1_FINC + 0);
2840
2841         int16 a2_x = (int16)GET16(blitter_ram, A2_PIXEL + 2);
2842         int16 a2_y = (int16)GET16(blitter_ram, A2_PIXEL + 0);
2843         uint16 a2_mask_x = GET16(blitter_ram, A2_MASK + 2);
2844         uint16 a2_mask_y = GET16(blitter_ram, A2_MASK + 0);
2845         int16 a2_step_x = (int16)GET16(blitter_ram, A2_STEP + 2);
2846         int16 a2_step_y = (int16)GET16(blitter_ram, A2_STEP + 0);
2847
2848         uint64 srcd1 = GET64(blitter_ram, SRCDATA);
2849         uint64 srcd2 = 0;
2850         uint64 dstd = GET64(blitter_ram, DSTDATA);
2851         uint64 patd = GET64(blitter_ram, PATTERNDATA);
2852         uint32 iinc = GET32(blitter_ram, INTENSITYINC);
2853         uint64 srcz1 = GET64(blitter_ram, SRCZINT);
2854         uint64 srcz2 = GET64(blitter_ram, SRCZFRAC);
2855         uint64 dstz = GET64(blitter_ram, DSTZ);
2856         uint32 zinc = GET32(blitter_ram, ZINC);
2857         uint32 collision = GET32(blitter_ram, COLLISIONCTRL);// 0=RESUME, 1=ABORT, 2=STOPEN
2858
2859         uint8 pixsize = (dsta2 ? a2_pixsize : a1_pixsize);      // From ACONTROL
2860
2861 //Testing Trevor McFur--I *think* it's the circle on the lower RHS of the screen...
2862 /*logBlit = false;
2863 if (cmd == 0x05810601 && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 96)
2864         && (GET16(blitter_ram, PIXLINECOUNTER + 0) == 72))
2865         logBlit = true;//*/
2866 //Testing...
2867 //if (cmd == 0x1401060C) patd = 0xFFFFFFFFFFFFFFFFLL;
2868 //if (cmd == 0x1401060C) patd = 0x00000000000000FFLL;
2869 //If it's still not working (bcompen-patd) then see who's writing what to patd and where...
2870 //Still not OK. Check to see who's writing what to where in patd!
2871 //It looks like M68K is writing to the top half of patd... Hmm...
2872 /*
2873 ----> M68K wrote 0000 to byte 15737344 of PATTERNDATA...
2874 --> M68K wrote 00 to byte 0 of PATTERNDATA...
2875 --> M68K wrote 00 to byte 1 of PATTERNDATA...
2876 ----> M68K wrote 00FF to byte 15737346 of PATTERNDATA...
2877 --> M68K wrote 00 to byte 2 of PATTERNDATA...
2878 --> M68K wrote FF to byte 3 of PATTERNDATA...
2879 logBlit = F, cmd = 1401060C
2880
2881 Wren0 := ND6 (wren\[0], gpua\[5], gpua\[6..8], bliten, gpu_memw);
2882 Wren1 := ND6 (wren\[1], gpua[5], gpua\[6..8], bliten, gpu_memw);
2883 Wren2 := ND6 (wren\[2], gpua\[5], gpua[6], gpua\[7..8], bliten, gpu_memw);
2884 Wren3 := ND6 (wren\[3], gpua[5], gpua[6], gpua\[7..8], bliten, gpu_memw);
2885
2886 --> 0 000x xx00
2887 Dec0  := D38GH (a1baseld, a1flagld, a1winld, a1ptrld, a1stepld, a1stepfld, a1fracld, a1incld, gpua[2..4], wren\[0]);
2888 --> 0 001x xx00
2889 Dec1  := D38GH (a1incfld, a2baseld, a2flagld, a2maskld, a2ptrldg, a2stepld, cmdldt, countldt, gpua[2..4], wren\[1]);
2890 --> 0 010x xx00
2891 Dec2  := D38GH (srcd1ldg[0..1], dstdldg[0..1], dstzldg[0..1], srcz1ldg[0..1], gpua[2..4], wren\[2]);
2892 --> 0 011x xx00
2893 Dec3  := D38GH (srcz2ld[0..1], patdld[0..1], iincld, zincld, stopld, intld[0], gpua[2..4], wren\[3]);
2894
2895 wren[3] is asserted when gpu address bus = 0 011x xx00
2896 patdld[0] -> 0 0110 1000 -> $F02268 (lo 32 bits)
2897 patdld[1] -> 0 0110 1100 -> $F0226C (hi 32 bits)
2898
2899 So... It's reversed! The data organization of the patd register is [low 32][high 32]! !!! FIX !!! [DONE]
2900 And fix all the other 64 bit registers [DONE]
2901 */
2902 /*if (cmd == 0x1401060C)
2903 {
2904         printf("logBlit = %s, cmd = %08X\n", (logBlit ? "T" : "F"), cmd);
2905         fflush(stdout);
2906 }*/
2907 /*logBlit = false;
2908 if ((cmd == 0x00010200) && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 9))
2909         logBlit = true;
2910
2911 ; Pink altimeter bar
2912
2913 Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
2914  CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
2915   A1 step values: -10 (X), 1 (Y)
2916   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
2917   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
2918         A1 x/y: 262/132, A2 x/y: 129/0
2919 ;x-coord is 257 in pic, so add 5
2920 ;20 for ship, 33 for #... Let's see if we can find 'em!
2921
2922 ; Black altimeter bar
2923
2924 Blit! (00110000 <- 000BF010) count: 5 x 29, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
2925  CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
2926   A1 step values: -8 (X), 1 (Y)
2927   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
2928   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
2929         A1 x/y: 264/126, A2 x/y: 336/0
2930
2931 Here's the pink bar--note that it's phrase mode without dread, so how does this work???
2932 Not sure, but I *think* that somehow it MUXes the data at the write site in on the left or right side
2933 of the write data when masked in phrase mode. I'll have to do some tracing to see if this is the mechanism
2934 it uses or not...
2935
2936 Blit! (CMD = 00010200)
2937 Flags: UPDA1 PATDSEL
2938   count = 9 x 11
2939   a1_base = 00110010, a2_base = 000BD7E0
2940   a1_x = 0106, a1_y = 0090, a1_frac_x = 0000, a1_frac_y = 8000, a2_x = 025A, a2_y = 0000
2941   a1_step_x = FFF6, a1_step_y = 0001, a1_stepf_x = 5E00, a1_stepf_y = D100, a2_step_x = FFF7, a2_step_y = 0001
2942   a1_inc_x = 0001, a1_inc_y = FFFF, a1_incf_x = 0000, a1_incf_y = E000
2943   a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
2944   a2_mask=F a1add=+phr/+0 a2add=+1/+0
2945   a1_pixsize = 4, a2_pixsize = 4
2946    srcd=BAC673AC2C92E578  dstd=0000000000000000 patd=74C074C074C074C0 iinc=0002E398
2947   srcz1=7E127E12000088DA srcz2=DBE06DF000000000 dstz=0000000000000000 zinc=FFFE4840, coll=0
2948   Phrase mode is ON
2949   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
2950   Entering INNER state...
2951   Entering DWRITE state...
2952      Dest write address/pix address: 0016A830/0 [dstart=20 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [7400000074C074C0] (icount=0007, inc=2)
2953   Entering A1_ADD state [a1_x=0106, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
2954   Entering DWRITE state...
2955      Dest write address/pix address: 0016A850/0 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [74C074C074C074C0] (icount=0003, inc=4)
2956   Entering A1_ADD state [a1_x=0108, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
2957   Entering DWRITE state...
2958      Dest write address/pix address: 0016A870/0 [dstart=0 dend=30 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [74C074C074C00000] (icount=FFFF, inc=4)
2959   Entering A1_ADD state [a1_x=010C, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
2960   Entering IDLE_INNER state...
2961   Leaving INNER state... (ocount=000A)
2962   [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
2963   Entering A1UPDATE state... (272/144 -> 262/145)
2964   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
2965   Entering INNER state...
2966 */
2967
2968         // Bugs in Jaguar I
2969
2970         a2addy = a1addy;                                                        // A2 channel Y add bit is tied to A1's
2971
2972 //if (logBlit && (ocount > 20)) logBlit = false;
2973 #ifdef VERBOSE_BLITTER_LOGGING
2974 if (logBlit)
2975 {
2976 printf("  a1_base = %08X, a2_base = %08X\n", a1_base, a2_base);
2977 printf("  a1_x = %04X, a1_y = %04X, a1_frac_x = %04X, a1_frac_y = %04X, a2_x = %04X, a2_y = %04X\n", (uint16)a1_x, (uint16)a1_y, a1_frac_x, a1_frac_y, (uint16)a2_x, (uint16)a2_y);
2978 printf("  a1_step_x = %04X, a1_step_y = %04X, a1_stepf_x = %04X, a1_stepf_y = %04X, a2_step_x = %04X, a2_step_y = %04X\n", (uint16)a1_step_x, (uint16)a1_step_y, a1_stepf_x, a1_stepf_y, (uint16)a2_step_x, (uint16)a2_step_y);
2979 printf("  a1_inc_x = %04X, a1_inc_y = %04X, a1_incf_x = %04X, a1_incf_y = %04X\n", (uint16)a1_inc_x, (uint16)a1_inc_y, a1_incf_x, a1_incf_y);
2980 printf("  a1_win_x = %04X, a1_win_y = %04X, a2_mask_x = %04X, a2_mask_y = %04X\n", a1_win_x, a1_win_y, a2_mask_x, a2_mask_y);
2981 char x_add_str[4][4] = { "phr", "1", "0", "inc" };
2982 printf("  a2_mask=%s a1add=%s%s/%s%s a2add=%s%s/%s%s\n", (a2_mask ? "T" : "F"), (a1xsign ? "-" : "+"), x_add_str[a1addx],
2983         (a1ysign ? "-" : "+"), (a1addy ? "1" : "0"), (a2xsign ? "-" : "+"), x_add_str[a2addx],
2984         (a2ysign ? "-" : "+"), (a2addy ? "1" : "0"));
2985 printf("  a1_pixsize = %u, a2_pixsize = %u\n", a1_pixsize, a2_pixsize);
2986 printf("   srcd=%08X%08X  dstd=%08X%08X patd=%08X%08X iinc=%08X\n",
2987         (uint32)(srcd1 >> 32), (uint32)(srcd1 & 0xFFFFFFFF),
2988         (uint32)(dstd >> 32), (uint32)(dstd & 0xFFFFFFFF),
2989         (uint32)(patd >> 32), (uint32)(patd & 0xFFFFFFFF), iinc);
2990 printf("  srcz1=%08X%08X srcz2=%08X%08X dstz=%08X%08X zinc=%08X, coll=%X\n",
2991         (uint32)(srcz1 >> 32), (uint32)(srcz1 & 0xFFFFFFFF),
2992         (uint32)(srcz2 >> 32), (uint32)(srcz2 & 0xFFFFFFFF),
2993         (uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF), zinc, collision);
2994 }
2995 #endif
2996
2997         // Various state lines set up by user
2998
2999         bool phrase_mode = ((!dsta2 && a1addx == 0) || (dsta2 && a2addx == 0) ? true : false);  // From ACONTROL
3000 #ifdef VERBOSE_BLITTER_LOGGING
3001 if (logBlit)
3002 {
3003 printf("  Phrase mode is %s\n", (phrase_mode ? "ON" : "off"));
3004 fflush(stdout);
3005 }
3006 #endif
3007 //logBlit = false;
3008
3009         // Stopgap vars to simulate various lines
3010
3011         uint16 a1FracCInX = 0, a1FracCInY = 0;
3012
3013         while (true)
3014         {
3015                 // IDLE
3016
3017                 if ((idle && !go) || (inner && outer0 && indone))
3018                 {
3019 #ifdef VERBOSE_BLITTER_LOGGING
3020 if (logBlit)
3021 {
3022 printf("  Entering IDLE state...\n");
3023 fflush(stdout);
3024 }
3025 #endif
3026                         idlei = true;
3027
3028                         return;
3029                 }
3030                 else
3031                         idlei = false;
3032
3033                 // INNER LOOP ACTIVE
3034 /*
3035   Entering DWRITE state... (icount=0000, inc=4)
3036   Entering IDLE_INNER state...
3037   Leaving INNER state... (ocount=00EF)
3038   [in=T a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3039   Entering INNER state...
3040 Now:
3041   [in=F a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3042 */
3043
3044                 if ((idle && go && !datinit)
3045                         || (inner && !indone)
3046                         || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && !datinit)
3047                         || (a1update && !upda2 && notgzandp && !datinit)
3048                         || (zupdate && !upda2 && !datinit)
3049                         || (a2update && !datinit)
3050                         || (init_ii && !gourz)
3051                         || (init_zi))
3052                 {
3053                         inneri = true;
3054                 }
3055                 else
3056                         inneri = false;
3057
3058                 // A1 FRACTION UPDATE
3059
3060                 if (inner && indone && !outer0 && upda1f)
3061                 {
3062                         a1fupdatei = true;
3063                 }
3064                 else
3065                         a1fupdatei = false;
3066
3067                 // A1 POINTER UPDATE
3068
3069                 if ((a1fupdate)
3070                         || (inner && indone && !outer0 && !upda1f && upda1))
3071                 {
3072                         a1updatei = true;
3073                 }
3074                 else
3075                         a1updatei = false;
3076
3077                 // Z FRACTION UPDATE
3078
3079                 if ((a1update && gourz && polygon)
3080                         || (inner && indone && !outer0 && !upda1f && !upda1 && gourz && polygon))
3081                 {
3082                         zfupdatei = true;
3083                 }
3084                 else
3085                         zfupdatei = false;
3086
3087                 // Z INTEGER UPDATE
3088
3089                 if (zfupdate)
3090                 {
3091                         zupdatei = true;
3092                 }
3093                 else
3094                         zupdatei = false;
3095
3096                 // A2 POINTER UPDATE
3097
3098                 if ((a1update && upda2 && notgzandp)
3099                         || (zupdate && upda2)
3100                         || (inner && indone && !outer0 && !upda1f && notgzandp && !upda1 && upda2))
3101                 {
3102                         a2updatei = true;
3103                 }
3104                 else
3105                         a2updatei = false;
3106
3107                 // INITIALIZE INTENSITY FRACTION
3108
3109                 if ((zupdate && !upda2 && datinit)
3110                         || (a1update && !upda2 && datinit && notgzandp)
3111                         || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && datinit)
3112                         || (a2update && datinit)
3113                         || (idle && go && datinit))
3114                 {
3115                         init_ifi = true;
3116                 }
3117                 else
3118                         init_ifi = false;
3119
3120                 // INITIALIZE INTENSITY INTEGER
3121
3122                 if (init_if)
3123                 {
3124                         init_iii = true;
3125                 }
3126                 else
3127                         init_iii = false;
3128
3129                 // INITIALIZE Z FRACTION
3130
3131                 if (init_ii && gourz)
3132                 {
3133                         init_zfi = true;
3134                 }
3135                 else
3136                         init_zfi = false;
3137
3138                 // INITIALIZE Z INTEGER
3139
3140                 if (init_zf)
3141                 {
3142                         init_zii = true;
3143                 }
3144                 else
3145                         init_zii = false;
3146
3147 // Here we move the fooi into their foo counterparts in order to simulate the moving
3148 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
3149
3150                 idle = idlei;
3151                 inner = inneri;
3152                 a1fupdate = a1fupdatei;
3153                 a1update = a1updatei;
3154                 zfupdate = zfupdatei;           // *
3155                 zupdate = zupdatei;                     // *
3156                 a2update = a2updatei;
3157                 init_if = init_ifi;                     // *
3158                 init_ii = init_iii;                     // *
3159                 init_zf = init_zfi;                     // *
3160                 init_zi = init_zii;                     // *
3161 // * denotes states that will never assert for Jaguar I
3162 #ifdef VERBOSE_BLITTER_LOGGING
3163 if (logBlit)
3164 {
3165 printf("  [in=%c a1f=%c a1=%c zf=%c z=%c a2=%c iif=%c iii=%c izf=%c izi=%c]\n",
3166         (inner ? 'T' : 'F'), (a1fupdate ? 'T' : 'F'), (a1update ? 'T' : 'F'), (zfupdate ? 'T' : 'F'),
3167         (zupdate ? 'T' : 'F'), (a2update ? 'T' : 'F'), (init_if ? 'T' : 'F'), (init_ii ? 'T' : 'F'),
3168         (init_zf ? 'T' : 'F'), (init_zi ? 'T' : 'F'));
3169 fflush(stdout);
3170 }
3171 #endif
3172
3173 // Now, depending on how we want to handle things, we could either put the implementation
3174 // of the various pieces up above, or handle them down below here.
3175
3176 // Let's try postprocessing for now...
3177
3178                 if (inner)
3179                 {
3180                         indone = false;
3181 #ifdef VERBOSE_BLITTER_LOGGING
3182 if (logBlit)
3183 {
3184 printf("  Entering INNER state...\n");
3185 fflush(stdout);
3186 }
3187 #endif
3188                         uint16 icount = GET16(blitter_ram, PIXLINECOUNTER + 2);
3189                         bool idle_inner = true, step = true, sreadx = false, szreadx = false, sread = false,
3190                                 szread = false, dread = false, dzread = false, dwrite = false, dzwrite = false;
3191                         bool inner0 = false;
3192                         bool idle_inneri, sreadxi, szreadxi, sreadi, szreadi, dreadi, dzreadi, dwritei, dzwritei;
3193
3194                         // State lines that will never assert in Jaguar I
3195
3196                         bool textext = false, txtread = false;
3197
3198 //other stuff
3199 uint8 srcshift = 0;
3200 bool sshftld = true; // D flipflop (D -> Q): instart -> sshftld
3201 //NOTE: sshftld probably is only asserted at the beginning of the inner loop. !!! FIX !!!
3202 /*
3203 Blit! (CMD = 01800005)
3204 Flags: SRCEN SRCENX LFUFUNC=C
3205   count = 626 x 1
3206   a1_base = 00037290, a2_base = 000095D0
3207   a1_x = 0000, a1_y = 0000, a2_x = 0002, a2_y = 0000
3208   a1_pixsize = 4, a2_pixsize = 4
3209   srcd=0000000000000000, dstd=0000000000000000, patd=0000000000000000
3210   Phrase mode is ON
3211   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3212   Entering INNER state...
3213   Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
3214     Source extra read address/pix address: 000095D4/0 [0000001C00540038]
3215   Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3216   Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
3217     Source read address/pix address: 000095D8/0 [0054003800009814]
3218   Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3219   Entering DWRITE state...
3220      Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026E, inc=4)
3221   Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3222   Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
3223     Source read address/pix address: 000095E0/0 [00009968000377C7]
3224   Entering A2_ADD state [a2_x=0008, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3225   Entering DWRITE state...
3226      Dest write address/pix address: 00037298/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026A, inc=4)
3227   Entering A1_ADD state [a1_x=0004, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3228 */
3229
3230 //                      while (!idle_inner)
3231                         while (true)
3232                         {
3233                                 // IDLE
3234
3235                                 if ((idle_inner && !step)
3236                                         || (dzwrite && step && inner0)
3237                                         || (dwrite && step && !dstwrz && inner0))
3238                                 {
3239 #ifdef VERBOSE_BLITTER_LOGGING
3240 if (logBlit)
3241 {
3242 printf("  Entering IDLE_INNER state...\n");
3243 fflush(stdout);
3244 }
3245 #endif
3246                                         idle_inneri = true;
3247 break;
3248                                 }
3249                                 else
3250                                         idle_inneri = false;
3251
3252                                 // EXTRA SOURCE DATA READ
3253
3254                                 if ((idle_inner && step && srcenx)
3255                                         || (sreadx && !step))
3256                                 {
3257                                         sreadxi = true;
3258                                 }
3259                                 else
3260                                         sreadxi = false;
3261
3262                                 // EXTRA SOURCE ZED READ
3263
3264                                 if ((sreadx && step && srcenz)
3265                                         || (szreadx && !step))
3266                                 {
3267                                         szreadxi = true;
3268                                 }
3269                                 else
3270                                         szreadxi = false;
3271
3272                                 // TEXTURE DATA READ (not implemented because not in Jaguar I)
3273
3274                                 // SOURCE DATA READ
3275
3276                                 if ((szreadx && step && !textext)
3277                                         || (sreadx && step && !srcenz && srcen)
3278                                         || (idle_inner && step && !srcenx && !textext && srcen)
3279                                         || (dzwrite && step && !inner0 && !textext && srcen)
3280                                         || (dwrite && step && !dstwrz && !inner0 && !textext && srcen)
3281                                         || (txtread && step && srcen)
3282                                         || (sread && !step))
3283                                 {
3284                                         sreadi = true;
3285                                 }
3286                                 else
3287                                         sreadi = false;
3288
3289                                 // SOURCE ZED READ
3290
3291                                 if ((sread && step && srcenz)
3292                                         || (szread && !step))
3293                                 {
3294                                         szreadi = true;
3295                                 }
3296                                 else
3297                                         szreadi = false;
3298
3299                                 // DESTINATION DATA READ
3300
3301                                 if ((szread && step && dsten)
3302                                         || (sread && step && !srcenz && dsten)
3303                                         || (sreadx && step && !srcenz && !textext && !srcen && dsten)
3304                                         || (idle_inner && step && !srcenx && !textext && !srcen && dsten)
3305                                         || (dzwrite && step && !inner0 && !textext && !srcen && dsten)
3306                                         || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && dsten)
3307                                         || (txtread && step && !srcen && dsten)
3308                                         || (dread && !step))
3309                                 {
3310                                         dreadi = true;
3311                                 }
3312                                 else
3313                                         dreadi = false;
3314
3315                                 // DESTINATION ZED READ
3316
3317                                 if ((dread && step && dstenz)
3318                                         || (szread && step && !dsten && dstenz)
3319                                         || (sread && step && !srcenz && !dsten && dstenz)
3320                                         || (sreadx && step && !srcenz && !textext && !srcen && !dsten && dstenz)
3321                                         || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && dstenz)
3322                                         || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && dstenz)
3323                                         || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && dstenz)
3324                                         || (txtread && step && !srcen && !dsten && dstenz)
3325                                         || (dzread && !step))
3326                                 {
3327                                         dzreadi = true;
3328                                 }
3329                                 else
3330                                         dzreadi = false;
3331
3332                                 // DESTINATION DATA WRITE
3333
3334                                 if ((dzread && step)
3335                                         || (dread && step && !dstenz)
3336                                         || (szread && step && !dsten && !dstenz)
3337                                         || (sread && step && !srcenz && !dsten && !dstenz)
3338                                         || (txtread && step && !srcen && !dsten && !dstenz)
3339                                         || (sreadx && step && !srcenz && !textext && !srcen && !dsten && !dstenz)
3340                                         || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && !dstenz)
3341                                         || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && !dstenz)
3342                                         || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && !dstenz)
3343                                         || (dwrite && !step))
3344                                 {
3345                                         dwritei = true;
3346                                 }
3347                                 else
3348                                         dwritei = false;
3349
3350                                 // DESTINATION ZED WRITE
3351
3352                                 if ((dzwrite && !step)
3353                                         || (dwrite && step && dstwrz))
3354                                 {
3355                                         dzwritei = true;
3356                                 }
3357                                 else
3358                                         dzwritei = false;
3359
3360 //Kludge: A QnD way to make sure that sshftld is asserted only for the first
3361 //        cycle of the inner loop...
3362 sshftld = idle_inner;
3363
3364 // Here we move the fooi into their foo counterparts in order to simulate the moving
3365 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
3366
3367                                 idle_inner = idle_inneri;
3368                                 sreadx = sreadxi;
3369                                 szreadx = szreadxi;
3370                                 sread = sreadi;
3371                                 szread = szreadi;
3372                                 dread = dreadi;
3373                                 dzread = dzreadi;
3374                                 dwrite = dwritei;
3375                                 dzwrite = dzwritei;
3376
3377 // Here's a few more decodes--not sure if they're supposed to go here or not...
3378
3379                                 bool srca_addi = (sreadxi && !srcenz) || (sreadi && !srcenz) || szreadxi || szreadi;
3380
3381                                 bool dsta_addi = (dwritei && !dstwrz) || dzwritei;
3382
3383                                 bool gensrc = sreadxi || szreadxi || sreadi || szreadi;
3384                                 bool gendst = dreadi || dzreadi || dwritei || dzwritei;
3385                                 bool gena2i = (gensrc && !dsta2) || (gendst && dsta2);
3386
3387                                 bool zaddr = szreadx || szread || dzread || dzwrite;
3388
3389 // Some stuff from MCONTROL.NET--not sure if this is the correct use of this decode or not...
3390 /*Fontread\     := OND1 (fontread\, sread[1], sreadx[1], bcompen);
3391 Fontread        := INV1 (fontread, fontread\);
3392 Justt           := NAN3 (justt, fontread\, phrase_mode, tactive\);
3393 Justify         := TS (justify, justt, busen);*/
3394 bool fontread = (sread || sreadx) && bcompen;
3395 bool justify = !(!fontread && phrase_mode /*&& tactive*/);
3396
3397 /* Generate inner loop update enables */
3398 /*
3399 A1_addi         := MX2 (a1_addi, dsta_addi, srca_addi, dsta2);
3400 A2_addi         := MX2 (a2_addi, srca_addi, dsta_addi, dsta2);
3401 A1_add          := FD1 (a1_add, a1_add\, a1_addi, clk);
3402 A2_add          := FD1 (a2_add, a2_add\, a2_addi, clk);
3403 A2_addb         := BUF1 (a2_addb, a2_add);
3404 */
3405                                 bool a1_add = (dsta2 ? srca_addi : dsta_addi);
3406                                 bool a2_add = (dsta2 ? dsta_addi : srca_addi);
3407
3408 /* Address adder input A register selection
3409 000     A1 step integer part
3410 001     A1 step fraction part
3411 010     A1 increment integer part
3412 011     A1 increment fraction part
3413 100     A2 step
3414
3415 bit 2 = a2update
3416 bit 1 = /a2update . (a1_add . a1addx[0..1])
3417 bit 0 = /a2update . ( a1fupdate
3418                                     + a1_add . atick[0] . a1addx[0..1])
3419 The /a2update term on bits 0 and 1 is redundant.
3420 Now look-ahead based
3421 */
3422                                 uint8 addasel = (a1fupdate || (a1_add && a1addx == 3) ? 0x01 : 0x00);
3423                                 addasel |= (a1_add && a1addx == 3 ? 0x02 : 0x00);
3424                                 addasel |= (a2update ? 0x04 : 0x00);
3425 /* Address adder input A X constant selection
3426 adda_xconst[0..2] generate a power of 2 in the range 1-64 or all
3427 zeroes when they are all 1
3428 Remember - these are pixels, so to add one phrase the pixel size
3429 has to be taken into account to get the appropriate value.
3430 for A1
3431                 if a1addx[0..1] are 00 set 6 - pixel size
3432                 if a1addx[0..1] are 01 set the value 000
3433                 if a1addx[0..1] are 10 set the value 111
3434 similarly for A2
3435 JLH: Also, 11 will likewise set the value to 111
3436 */
3437                                 uint8 a1_xconst = 6 - a1_pixsize, a2_xconst = 6 - a2_pixsize;
3438
3439                                 if (a1addx == 1)
3440                                     a1_xconst = 0;
3441                                 else if (a1addx & 0x02)
3442                                     a1_xconst = 7;
3443
3444                                 if (a2addx == 1)
3445                                     a2_xconst = 0;
3446                                 else if (a2addx & 0x02)
3447                                     a2_xconst = 7;
3448
3449                                 uint8 adda_xconst = (a2_add ? a2_xconst : a1_xconst);
3450 /* Address adder input A Y constant selection
3451 22 June 94 - This was erroneous, because only the a1addy bit was reflected here.
3452 Therefore, the selection has to be controlled by a bug fix bit.
3453 JLH: Bug fix bit in Jaguar II--not in Jaguar I!
3454 */
3455                                 bool adda_yconst = a1addy;
3456 /* Address adder input A register versus constant selection
3457 given by          a1_add . a1addx[0..1]
3458                                 + a1update
3459                                 + a1fupdate
3460                                 + a2_add . a2addx[0..1]
3461                                 + a2update
3462 */
3463                                 bool addareg = ((a1_add && a1addx == 3) || a1update || a1fupdate
3464                                         || (a2_add && a2addx == 3) || a2update ? true : false);
3465 /* The adders can be put into subtract mode in add pixel size
3466 mode when the corresponding flags are set */
3467                                 bool suba_x = ((a1_add && a1xsign && a1addx == 1) || (a2_add && a2xsign && a2addx == 1) ? true : false);
3468                                 bool suba_y = ((a1_add && a1addy && a1ysign) || (a2_add && a2addy && a2ysign) ? true : false);
3469 /* Address adder input B selection
3470 00      A1 pointer
3471 01      A2 pointer
3472 10      A1 fraction
3473 11      Zero
3474
3475 Bit 1 =   a1fupdate
3476                 + (a1_add . atick[0] . a1addx[0..1])
3477                 + a1fupdate . a1_stepld
3478                 + a1update . a1_stepld
3479                 + a2update . a2_stepld
3480 Bit 0 =   a2update + a2_add
3481                 + a1fupdate . a1_stepld
3482                 + a1update . a1_stepld
3483                 + a2update . a2_stepld
3484 */
3485                                 uint8 addbsel = (a2update || a2_add || (a1fupdate && a1_stepld)
3486                                     || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x01 : 0x00);
3487                                 addbsel |= (a1fupdate || (a1_add && a1addx == 3) || (a1fupdate && a1_stepld)
3488                                     || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x02 : 0x00);
3489
3490 /* The modulo bits are used to align X onto a phrase boundary when
3491 it is being updated by one phrase
3492 000     no mask
3493 001     mask bit 0
3494 010     mask bits 1-0
3495 ..
3496 110     mask bits 5-0
3497
3498 Masking is enabled for a1 when a1addx[0..1] is 00, and the value
3499 is 6 - the pixel size (again!)
3500 */
3501                                 uint8 maska1 = (a1_add && a1addx == 0 ? 6 - a1_pixsize : 0);
3502                                 uint8 maska2 = (a2_add && a2addx == 0 ? 6 - a2_pixsize : 0);
3503                                 uint8 modx = (a2_add ? maska2 : maska1);
3504 /* Generate load strobes for the increment updates */
3505
3506 /*A1pldt                := NAN2 (a1pldt, atick[1], a1_add);
3507 A1ptrldi        := NAN2 (a1ptrldi, a1update\, a1pldt);
3508
3509 A1fldt          := NAN4 (a1fldt, atick[0], a1_add, a1addx[0..1]);
3510 A1fracldi       := NAN2 (a1fracldi, a1fupdate\, a1fldt);
3511
3512 A2pldt          := NAN2 (a2pldt, atick[1], a2_add);
3513 A2ptrldi        := NAN2 (a2ptrldi, a2update\, a2pldt);*/
3514                                 bool a1fracldi = a1fupdate || (a1_add && a1addx == 3);
3515
3516 // Some more from DCONTROL...
3517 // atick[] just MAY be important here! We're assuming it's true and dropping the term...
3518 // That will probably screw up some of the lower terms that seem to rely on the timing of it...
3519 #warning srcdreadd is not properly initialized!
3520 bool srcdreadd = false;                                         // Set in INNER.NET
3521 //Shadeadd\     := NAN2H (shadeadd\, dwrite, srcshade);
3522 //Shadeadd      := INV2 (shadeadd, shadeadd\);
3523 bool shadeadd = dwrite && srcshade;
3524 /* Data adder control, input A selection
3525 000   Destination data
3526 001   Initialiser pixel value
3527 100   Source data      - computed intensity fraction
3528 101   Pattern data     - computed intensity
3529 110   Source zed 1     - computed zed
3530 111   Source zed 2     - computed zed fraction
3531
3532 Bit 0 =   dwrite  . gourd . atick[1]
3533         + dzwrite . gourz . atick[0]
3534         + istepadd
3535         + zstepfadd
3536         + init_if + init_ii + init_zf + init_zi
3537 Bit 1 =   dzwrite . gourz . (atick[0] + atick[1])
3538         + zstepadd
3539         + zstepfadd
3540 Bit 2 =   (gourd + gourz) . /(init_if + init_ii + init_zf + init_zi)
3541         + dwrite  . srcshade
3542 */
3543 uint8 daddasel = ((dwrite && gourd) || (dzwrite && gourz) || istepadd || zstepfadd
3544         || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
3545 daddasel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
3546 daddasel |= (((gourd || gourz) && !(init_if || init_ii || init_zf || init_zi))
3547         || (dwrite && srcshade) ? 0x04 : 0x00);
3548 /* Data adder control, input B selection
3549 0000    Source data
3550 0001    Data initialiser increment
3551 0100    Bottom 16 bits of I increment repeated four times
3552 0101    Top 16 bits of I increment repeated four times
3553 0110    Bottom 16 bits of Z increment repeated four times
3554 0111    Top 16 bits of Z increment repeated four times
3555 1100    Bottom 16 bits of I step repeated four times
3556 1101    Top 16 bits of I step repeated four times
3557 1110    Bottom 16 bits of Z step repeated four times
3558 1111    Top 16 bits of Z step repeated four times
3559
3560 Bit 0 =   dwrite  . gourd . atick[1]
3561         + dzwrite . gourz . atick[1]
3562         + dwrite  . srcshade
3563         + istepadd
3564         + zstepadd
3565         + init_if + init_ii + init_zf + init_zi
3566 Bit 1 =   dzwrite . gourz . (atick[0] + atick[1])
3567         + zstepadd
3568         + zstepfadd
3569 Bit 2 =   dwrite  . gourd . (atick[0] + atick[1])
3570         + dzwrite . gourz . (atick[0] + atick[1])
3571         + dwrite  . srcshade
3572         + istepadd + istepfadd + zstepadd + zstepfadd
3573 Bit 3 =   istepadd + istepfadd + zstepadd + zstepfadd
3574 */
3575 uint8 daddbsel = ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
3576         || istepadd || zstepadd || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
3577 daddbsel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
3578 daddbsel |= ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
3579         || istepadd || istepfadd || zstepadd || zstepfadd ? 0x04 : 0x00);
3580 daddbsel |= (istepadd && istepfadd && zstepadd && zstepfadd ? 0x08 : 0x00);
3581 /* Data adder mode control
3582 000     16-bit normal add
3583 001     16-bit saturating add with carry
3584 010     8-bit saturating add with carry, carry into top byte is
3585         inhibited (YCrCb)
3586 011     8-bit saturating add with carry, carry into top byte and
3587         between top nybbles is inhibited (CRY)
3588 100     16-bit normal add with carry
3589 101     16-bit saturating add
3590 110     8-bit saturating add, carry into top byte is inhibited
3591 111     8-bit saturating add, carry into top byte and between top
3592         nybbles is inhibited
3593
3594 The first five are used for Gouraud calculations, the latter three
3595 for adding source and destination data
3596
3597 Bit 0 =   dzwrite . gourz . atick[1]
3598         + dwrite  . gourd . atick[1] . /topnen . /topben . /ext_int
3599         + dwrite  . gourd . atick[1] .  topnen .  topben . /ext_int
3600         + zstepadd
3601         + istepadd . /topnen . /topben . /ext_int
3602         + istepadd .  topnen .  topben . /ext_int
3603         + /gourd . /gourz . /topnen . /topben
3604         + /gourd . /gourz .  topnen .  topben
3605         + shadeadd . /topnen . /topben
3606         + shadeadd .  topnen .  topben
3607         + init_ii . /topnen . /topben . /ext_int
3608         + init_ii .  topnen .  topben . /ext_int
3609         + init_zi
3610
3611 Bit 1 =   dwrite . gourd . atick[1] . /topben . /ext_int
3612         + istepadd . /topben . /ext_int
3613         + /gourd . /gourz .  /topben
3614         + shadeadd .  /topben
3615         + init_ii .  /topben . /ext_int
3616
3617 Bit 2 =   /gourd . /gourz
3618         + shadeadd
3619         + dwrite  . gourd . atick[1] . ext_int
3620         + istepadd . ext_int
3621         + init_ii . ext_int
3622 */
3623 uint8 daddmode = ((dzwrite && gourz) || (dwrite && gourd && !topnen && !topben && !ext_int)
3624         || (dwrite && gourd && topnen && topben && !ext_int) || zstepadd
3625         || (istepadd && !topnen && !topben && !ext_int)
3626         || (istepadd && topnen && topben && !ext_int) || (!gourd && !gourz && !topnen && !topben)
3627         || (!gourd && !gourz && topnen && topben) || (shadeadd && !topnen && !topben)
3628         || (shadeadd && topnen && topben) || (init_ii && !topnen && !topben && !ext_int)
3629         || (init_ii && topnen && topben && !ext_int) || init_zi ? 0x01 : 0x00);
3630 daddmode |= ((dwrite && gourd && !topben && !ext_int) || (istepadd && !topben && !ext_int)
3631         || (!gourd && !gourz && !topben) || (shadeadd && !topben)
3632         || (init_ii && !topben && !ext_int) ? 0x02 : 0x00);
3633 daddmode |= ((!gourd && !gourz) || shadeadd || (dwrite && gourd && ext_int)
3634         || (istepadd && ext_int) || (init_ii && ext_int) ? 0x04 : 0x00);
3635 /* Data add load controls
3636 Pattern fraction (dest data) is loaded on
3637           dwrite . gourd . atick[0]
3638         + istepfadd . /datinit
3639         + init_if
3640 Pattern data is loaded on
3641           dwrite . gourd . atick[1]
3642         + istepadd . /datinit . /datinit
3643         + init_ii
3644 Source z1 is loaded on
3645           dzwrite . gourz . atick[1]
3646         + zstepadd . /datinit . /datinit
3647         + init_zi
3648 Source z2 is loaded on
3649           dzwrite . gourz . atick[0]
3650         + zstepfadd
3651         + init_zf
3652 Texture map shaded data is loaded on
3653         srcdreadd . srcshade
3654 */
3655 bool patfadd = (dwrite && gourd) || (istepfadd && !datinit) || init_if;
3656 bool patdadd = (dwrite && gourd) || (istepadd && !datinit) || init_ii;
3657 bool srcz1add = (dzwrite && gourz) || (zstepadd && !datinit) || init_zi;
3658 bool srcz2add = (dzwrite && gourz) || zstepfadd || init_zf;
3659 bool srcshadd = srcdreadd && srcshade;
3660 bool daddq_sel = patfadd || patdadd || srcz1add || srcz2add || srcshadd;
3661 /* Select write data
3662 This has to be controlled from stage 1 of the pipe-line, delayed
3663 by one tick, as the write occurs in the cycle after the ack.
3664
3665 00      pattern data
3666 01      lfu data
3667 10      adder output
3668 11      source zed
3669
3670 Bit 0 =  /patdsel . /adddsel
3671         + dzwrite1d
3672 Bit 1 =   adddsel
3673         + dzwrite1d
3674 */
3675 uint8 data_sel = ((!patdsel && !adddsel) || dzwrite ? 0x01 : 0x00)
3676         | (adddsel || dzwrite ? 0x02 : 0x00);
3677
3678 uint32 address, pixAddr;
3679 ADDRGEN(address, pixAddr, gena2i, zaddr,
3680         a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3681         a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3682
3683 //Here's my guess as to how the addresses get truncated to phrase boundaries in phrase mode...
3684 if (!justify)
3685         address &= 0xFFFFF8;
3686
3687 /* Generate source alignment shift
3688    -------------------------------
3689 The source alignment shift for data move is the difference between
3690 the source and destination X pointers, multiplied by the pixel
3691 size.  Only the low six bits of the pointers are of interest, as
3692 pixel sizes are always a power of 2 and window rows are always
3693 phrase aligned.
3694
3695 When not in phrase mode, the top 3 bits of the shift value are
3696 set to zero (2/26).
3697
3698 Source shifting is also used to extract bits for bit-to-byte
3699 expansion in phrase mode.  This involves only the bottom three
3700 bits of the shift value, and is based on the offset within the
3701 phrase of the destination X pointer, in pixels.
3702
3703 Source shifting is disabled when srcen is not set.
3704 */
3705 uint8 dstxp = (dsta2 ? a2_x : a1_x) & 0x3F;
3706 uint8 srcxp = (dsta2 ? a1_x : a2_x) & 0x3F;
3707 uint8 shftv = ((dstxp - srcxp) << pixsize) & 0x3F;
3708 /* The phrase mode alignment count is given by the phrase offset
3709 of the first pixel, for bit to byte expansion */
3710 uint8 pobb = 0;
3711
3712 if (pixsize == 3)
3713         pobb = dstxp & 0x07;
3714 if (pixsize == 4)
3715         pobb = dstxp & 0x03;
3716 if (pixsize == 5)
3717         pobb = dstxp & 0x01;
3718
3719 bool pobbsel = phrase_mode && bcompen;
3720 uint8 loshd = (pobbsel ? pobb : shftv) & 0x07;
3721 uint8 shfti = (srcen || pobbsel ? (sshftld ? loshd : srcshift & 0x07) : 0);
3722 /* Enable for high bits is srcen . phrase_mode */
3723 shfti |= (srcen && phrase_mode ? (sshftld ? shftv & 0x38 : srcshift & 0x38) : 0);
3724 srcshift = shfti;
3725
3726                                 if (sreadx)
3727                                 {
3728 #ifdef VERBOSE_BLITTER_LOGGING
3729 if (logBlit)
3730 {
3731 printf("  Entering SREADX state...");
3732 //printf(" [dstart=%X dend=%X pwidth=%X srcshift=%X]\n", dstart, dend, pwidth, srcshift);
3733 fflush(stdout);
3734 }
3735 #endif
3736 //uint32 srcAddr, pixAddr;
3737 //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
3738 //      a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3739 //      a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3740                                         srcd2 = srcd1;
3741                                         srcd1 = ((uint64)JaguarReadLong(address + 0, BLITTER) << 32)
3742                                                 | (uint64)JaguarReadLong(address + 4, BLITTER);
3743 //Kludge to take pixel size into account...
3744 //Hmm. If we're not in phrase mode, this is most likely NOT going to be used...
3745 //Actually, it would be--because of BCOMPEN expansion, for example...
3746 if (!phrase_mode)
3747 {
3748         if (bcompen)
3749                 srcd1 >>= 56;
3750         else
3751         {
3752                 if (pixsize == 5)
3753                         srcd1 >>= 32;
3754                 else if (pixsize == 4)
3755                         srcd1 >>= 48;
3756                 else
3757                         srcd1 >>= 56;
3758         }
3759 }//*/
3760 #ifdef VERBOSE_BLITTER_LOGGING
3761 if (logBlit)
3762 {
3763 printf("    Source extra read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3764         (uint32)(srcd1 >> 32), (uint32)(srcd1 & 0xFFFFFFFF));
3765 fflush(stdout);
3766 }
3767 #endif
3768                                 }
3769
3770                                 if (szreadx)
3771                                 {
3772 #ifdef VERBOSE_BLITTER_LOGGING
3773 if (logBlit)
3774 {
3775 printf("  Entering SZREADX state...");
3776 fflush(stdout);
3777 }
3778 #endif
3779                                         srcz2 = srcz1;
3780                                         srcz1 = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
3781 #ifdef VERBOSE_BLITTER_LOGGING
3782 if (logBlit)
3783 {
3784         printf(" Src Z extra read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3785                 (uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF));
3786         fflush(stdout);
3787 }
3788 #endif
3789                                 }
3790
3791                                 if (sread)
3792                                 {
3793 #ifdef VERBOSE_BLITTER_LOGGING
3794 if (logBlit)
3795 {
3796 printf("  Entering SREAD state...");
3797 //printf(" [dstart=%X dend=%X pwidth=%X srcshift=%X]\n", dstart, dend, pwidth, srcshift);
3798 fflush(stdout);
3799 }
3800 #endif
3801 //uint32 srcAddr, pixAddr;
3802 //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
3803 //      a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3804 //      a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3805 srcd2 = srcd1;
3806 srcd1 = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
3807 //Kludge to take pixel size into account...
3808 if (!phrase_mode)
3809 {
3810         if (bcompen)
3811                 srcd1 >>= 56;
3812         else
3813         {
3814                 if (pixsize == 5)
3815                         srcd1 >>= 32;
3816                 else if (pixsize == 4)
3817                         srcd1 >>= 48;
3818                 else
3819                         srcd1 >>= 56;
3820         }
3821 }
3822 #ifdef VERBOSE_BLITTER_LOGGING
3823 if (logBlit)
3824 {
3825 printf("     Source read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3826         (uint32)(srcd1 >> 32), (uint32)(srcd1 & 0xFFFFFFFF));
3827 fflush(stdout);
3828 }
3829 #endif
3830                                 }
3831
3832                                 if (szread)
3833                                 {
3834 #ifdef VERBOSE_BLITTER_LOGGING
3835 if (logBlit)
3836 {
3837 printf("  Entering SZREAD state...");
3838 fflush(stdout);
3839 }
3840 #endif
3841                                         srcz2 = srcz1;
3842                                         srcz1 = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
3843 //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
3844 if (!phrase_mode && pixsize == 4)
3845         srcz1 >>= 48;
3846
3847 #ifdef VERBOSE_BLITTER_LOGGING
3848 if (logBlit)
3849 {
3850         printf("     Src Z read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3851                 (uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF));
3852         fflush(stdout);
3853 }
3854 #endif
3855                                 }
3856
3857                                 if (dread)
3858                                 {
3859 #ifdef VERBOSE_BLITTER_LOGGING
3860 if (logBlit)
3861 {
3862 printf("  Entering DREAD state...");
3863 fflush(stdout);
3864 }
3865 #endif
3866 //uint32 dstAddr, pixAddr;
3867 //ADDRGEN(dstAddr, pixAddr, gena2i, zaddr,
3868 //      a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3869 //      a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3870 dstd = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
3871 //Kludge to take pixel size into account...
3872 if (!phrase_mode)
3873 {
3874         if (pixsize == 5)
3875                 dstd >>= 32;
3876         else if (pixsize == 4)
3877                 dstd >>= 48;
3878         else
3879                 dstd >>= 56;
3880 }
3881 #ifdef VERBOSE_BLITTER_LOGGING
3882 if (logBlit)
3883 {
3884 printf("       Dest read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3885         (uint32)(dstd >> 32), (uint32)(dstd & 0xFFFFFFFF));
3886 fflush(stdout);
3887 }
3888 #endif
3889                                 }
3890
3891                                 if (dzread)
3892                                 {
3893 // Is Z always 64 bit read? Or sometimes 16 bit (dependent on phrase_mode)?
3894 #ifdef VERBOSE_BLITTER_LOGGING
3895 if (logBlit)
3896 {
3897         printf("  Entering DZREAD state...");
3898         fflush(stdout);
3899 }
3900 #endif
3901                                         dstz = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
3902 //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
3903 if (!phrase_mode && pixsize == 4)
3904         dstz >>= 48;
3905
3906 #ifdef VERBOSE_BLITTER_LOGGING
3907 if (logBlit)
3908 {
3909         printf("    Dest Z read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3910                 (uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF));
3911         fflush(stdout);
3912 }
3913 #endif
3914                                 }
3915
3916 // These vars should probably go further up in the code... !!! FIX !!!
3917 // We can't preassign these unless they're static...
3918 //uint64 srcz = 0;                      // These are assigned to shut up stupid compiler warnings--dwrite is ALWAYS asserted
3919 //bool winhibit = false;
3920 uint64 srcz;
3921 bool winhibit;
3922 //NOTE: SRCSHADE requires GOURZ to be set to work properly--another Jaguar I bug
3923                                 if (dwrite)
3924                                 {
3925 #ifdef VERBOSE_BLITTER_LOGGING
3926 if (logBlit)
3927 {
3928 printf("  Entering DWRITE state...");
3929 fflush(stdout);
3930 }
3931 #endif
3932 //Counter is done on the dwrite state...! (We'll do it first, since it affects dstart/dend calculations.)
3933 //Here's the voodoo for figuring the correct amount of pixels in phrase mode (or not):
3934                                         int8 inct = -((dsta2 ? a2_x : a1_x) & 0x07);    // From INNER_CNT
3935                                         uint8 inc = 0;
3936                                         inc = (!phrase_mode || (phrase_mode && (inct & 0x01)) ? 0x01 : 0x00);
3937                                         inc |= (phrase_mode && (((pixsize == 3 || pixsize == 4) && (inct & 0x02)) || pixsize == 5 && !(inct & 0x01)) ? 0x02 : 0x00);
3938                                         inc |= (phrase_mode && ((pixsize == 3 && (inct & 0x04)) || (pixsize == 4 && !(inct & 0x03))) ? 0x04 : 0x00);
3939                                         inc |= (phrase_mode && pixsize == 3 && !(inct & 0x07) ? 0x08 : 0x00);
3940
3941                                         uint16 oldicount = icount;      // Save icount to detect underflow...
3942                                         icount -= inc;
3943
3944                                         if (icount == 0 || ((icount & 0x8000) && !(oldicount & 0x8000)))
3945                                                 inner0 = true;
3946 // X/Y stepping is also done here, I think...No. It's done when a1_add or a2_add is asserted...
3947
3948 //*********************************************************************************
3949 //Start & end write mask computations...
3950 //*********************************************************************************
3951
3952 uint8 dstart = 0;
3953
3954 if (pixsize == 3)
3955         dstart = (dstxp & 0x07) << 3;
3956 if (pixsize == 4)
3957         dstart = (dstxp & 0x03) << 4;
3958 if (pixsize == 5)
3959         dstart = (dstxp & 0x01) << 5;
3960
3961 dstart = (phrase_mode ? dstart : pixAddr & 0x07);
3962
3963 //This is the other Jaguar I bug... Normally, should ALWAYS select a1_x here.
3964 uint16 dstxwr = (dsta2 ? a2_x : a1_x) & 0x7FFE;
3965 uint16 pseq = dstxwr ^ (a1_win_x & 0x7FFE);
3966 pseq = (pixsize == 5 ? pseq : pseq & 0x7FFC);
3967 pseq = ((pixsize & 0x06) == 4 ? pseq : pseq & 0x7FF8);
3968 bool penden = clip_a1 && (pseq == 0);
3969 uint8 window_mask = 0;
3970
3971 if (pixsize == 3)
3972         window_mask = (a1_win_x & 0x07) << 3;
3973 if (pixsize == 4)
3974         window_mask = (a1_win_x & 0x03) << 4;
3975 if (pixsize == 5)
3976         window_mask = (a1_win_x & 0x01) << 5;
3977
3978 window_mask = (penden ? window_mask : 0);
3979
3980 /*
3981   Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
3982     Source extra read address/pix address: 000095D0/0 [000004E40000001C]
3983   Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3984   Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=20]
3985     Source read address/pix address: 000095D8/0 [0054003800009814]
3986   Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3987   Entering DWRITE state...
3988      Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=20][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000001C00000000] (icount=026E, inc=4)
3989   Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3990
3991 (icount=026E, inc=4)
3992 icount & 0x03 = 0x02
3993          << 4 = 0x20
3994
3995 window_mask = 0x1000
3996
3997 Therefore, it chooses the inner_mask over the window_mask every time! Argh!
3998 This is because we did this wrong:
3999 Innerm[3-5]     := AN2 (inner_mask[3-5], imb[3-5], inner0);
4000 NOTE! This doesn't fix the problem because inner0 is asserted too late to help here. !!! FIX !!! [Should be DONE]
4001 */
4002
4003 /* The mask to be used if within one phrase of the end of the inner
4004 loop, similarly */
4005 uint8 inner_mask = 0;
4006
4007 if (pixsize == 3)
4008         inner_mask = (icount & 0x07) << 3;
4009 if (pixsize == 4)
4010         inner_mask = (icount & 0x03) << 4;
4011 if (pixsize == 5)
4012         inner_mask = (icount & 0x01) << 5;
4013 if (!inner0)
4014         inner_mask = 0;
4015 /* The actual mask used should be the lesser of the window masks and
4016 the inner mask, where is all cases 000 means 1000. */
4017 window_mask = (window_mask == 0 ? 0x40 : window_mask);
4018 inner_mask = (inner_mask == 0 ? 0x40 : inner_mask);
4019 uint8 emask = (window_mask > inner_mask ? inner_mask : window_mask);
4020 /* The mask to be used for the pixel size, to which must be added
4021 the bit offset */
4022 uint8 pma = pixAddr + (1 << pixsize);
4023 /* Select the mask */
4024 uint8 dend = (phrase_mode ? emask : pma);
4025
4026 /* The cycle width in phrase mode is normally one phrase.  However,
4027 at the start and end it may be narrower.  The start and end masks
4028 are used to generate this.  The width is given by:
4029
4030         8 - start mask - (8 - end mask)
4031 =       end mask - start mask
4032
4033 This is only used for writes in phrase mode.
4034 Start and end from the address level of the pipeline are used.
4035 */
4036 uint8 pwidth = (((dend | dstart) & 0x07) == 0 ? 0x08 : (dend - dstart) & 0x07);
4037
4038 //uint32 dstAddr, pixAddr;
4039 //ADDRGEN(dstAddr, pixAddr, gena2i, zaddr,
4040 //      a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
4041 //      a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
4042 #ifdef VERBOSE_BLITTER_LOGGING
4043 if (logBlit)
4044 {
4045         printf("     Dest write address/pix address: %08X/%1X", address, pixAddr);
4046         fflush(stdout);
4047 }
4048 #endif
4049
4050 //More testing... This is almost certainly wrong, but how else does this work???
4051 //Seems to kinda work... But still, this doesn't seem to make any sense!
4052 if (phrase_mode && !dsten)
4053         dstd = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
4054
4055 //Testing only... for now...
4056 //This is wrong because the write data is a combination of srcd and dstd--either run
4057 //thru the LFU or in PATDSEL or ADDDSEL mode. [DONE now, thru DATA module]
4058 // Precedence is ADDDSEL > PATDSEL > LFU.
4059 //Also, doesn't take into account the start & end masks, or the phrase width...
4060 //Now it does!
4061
4062 // srcd2 = xxxx xxxx 0123 4567, srcd = 8901 2345 xxxx xxxx, srcshift = $20 (32)
4063 uint64 srcd = (srcd2 << (64 - srcshift)) | (srcd1 >> srcshift);
4064 //bleh, ugly ugly ugly
4065 if (srcshift == 0)
4066         srcd = srcd1;
4067
4068 //Z DATA() stuff done here... And it has to be done before any Z shifting...
4069 //Note that we need to have phrase mode start/end support here... (Not since we moved it from dzwrite...!)
4070 /*
4071 Here are a couple of Cybermorph blits with Z:
4072 $00113078       // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
4073 $09900F39       // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
4074
4075 We're having the same phrase mode overwrite problem we had with the pixels... !!! FIX !!!
4076 Odd. It's equating 0 with 0... Even though ZMODE is $04 (less than)!
4077 */
4078 if (gourz)
4079 {
4080 /*
4081 void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
4082         uint64 dstd, uint32 iinc, uint8 initcin[], uint64 initinc, uint16 initpix,
4083         uint32 istep, uint64 patd, uint64 srcd, uint64 srcz1, uint64 srcz2,
4084         uint32 zinc, uint32 zstep)
4085 */
4086         uint16 addq[4];
4087         uint8 initcin[4] = { 0, 0, 0, 0 };
4088         ADDARRAY(addq, 7/*daddasel*/, 6/*daddbsel*/, 0/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
4089         srcz2 = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
4090         ADDARRAY(addq, 6/*daddasel*/, 7/*daddbsel*/, 1/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
4091         srcz1 = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
4092
4093 #ifdef VERBOSE_BLITTER_LOGGING
4094 if (logBlit)
4095 {
4096         printf("\n[srcz1=%08X%08X, srcz2=%08X%08X, zinc=%08X",
4097                 (uint32)(srcz1 >> 32), (uint32)(srcz1 & 0xFFFFFFFF),
4098                 (uint32)(srcz2 >> 32), (uint32)(srcz2 & 0xFFFFFFFF), zinc);
4099         fflush(stdout);
4100 }
4101 #endif
4102 }
4103
4104 uint8 zSrcShift = srcshift & 0x30;
4105 srcz = (srcz2 << (64 - zSrcShift)) | (srcz1 >> zSrcShift);
4106 //bleh, ugly ugly ugly
4107 if (zSrcShift == 0)
4108         srcz = srcz1;
4109
4110 #ifdef VERBOSE_BLITTER_LOGGING
4111 if (logBlit)
4112 {
4113         printf(" srcz=%08X%08X]\n", (uint32)(srcz >> 32), (uint32)(srcz & 0xFFFFFFFF));
4114         fflush(stdout);
4115 }
4116 #endif
4117
4118 //When in SRCSHADE mode, it adds the IINC to the read source (from LFU???)
4119 //According to following line, it gets LFU mode. But does it feed the source into the LFU
4120 //after the add?
4121 //Dest write address/pix address: 0014E83E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003F, inc=1)
4122 //Let's try this:
4123 if (srcshade)
4124 {
4125 //NOTE: This is basically doubling the work done by DATA--since this is what
4126 //      ADDARRAY is loaded with when srschshade is enabled... !!! FIX !!!
4127 //      Also note that it doesn't work properly unless GOURZ is set--there's the clue!
4128         uint16 addq[4];
4129         uint8 initcin[4] = { 0, 0, 0, 0 };
4130         ADDARRAY(addq, 4/*daddasel*/, 5/*daddbsel*/, 7/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
4131         srcd = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
4132 }
4133 //Seems to work... Not 100% sure tho.
4134 //end try this
4135
4136 //Temporary kludge, to see if the fractional pattern does anything...
4137 //This works, BTW
4138 //But it seems to mess up in Cybermorph... the shading should be smooth but it isn't...
4139 //Seems the carry out is lost again... !!! FIX !!! [DONE--see below]
4140 if (patfadd)
4141 {
4142         uint16 addq[4];
4143         uint8 initcin[4] = { 0, 0, 0, 0 };
4144         ADDARRAY(addq, 4/*daddasel*/, 4/*daddbsel*/, 0/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
4145         srcd1 = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
4146 }
4147
4148 //Note that we still don't take atick[0] & [1] into account here, so this will skip half of the data needed... !!! FIX !!!
4149 //Not yet enumerated: dbinh, srcdread, srczread
4150 //Also, should do srcshift on the z value in phrase mode... !!! FIX !!! [DONE]
4151 //As well as add a srcz variable we can set external to this state... !!! FIX !!! [DONE]
4152
4153 uint64 wdata;
4154 uint8 dcomp, zcomp;
4155 DATA(wdata, dcomp, zcomp, winhibit,
4156         true, cmpdst, daddasel, daddbsel, daddmode, daddq_sel, data_sel, 0/*dbinh*/,
4157         dend, dstart, dstd, iinc, lfufunc, patd, patdadd,
4158         phrase_mode, srcd, false/*srcdread*/, false/*srczread*/, srcz2add, zmode,
4159         bcompen, bkgwren, dcompen, icount & 0x07, pixsize,
4160         srcz, dstz, zinc);
4161 /*
4162 Seems that the phrase mode writes with DCOMPEN and DSTEN are corrupting inside of DATA: !!! FIX !!!
4163 It's fairly random as well. 7CFE -> 7DFE, 7FCA -> 78CA, 7FA4 -> 78A4, 7F88 -> 8F88
4164 It could be related to an uninitialized variable, like the zmode bug...
4165 [DONE]
4166 It was a bug in the dech38el data--it returned $FF for ungated instead of $00...
4167
4168 Blit! (CMD = 09800609)
4169 Flags: SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C DCOMPEN
4170   count = 10 x 12
4171   a1_base = 00110000, a2_base = 0010B2A8
4172   a1_x = 004B, a1_y = 00D8, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0704, a2_y = 0000
4173   a1_step_x = FFF3, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFFC, a2_step_y = 0000
4174   a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4175   a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
4176   a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4177   a1_pixsize = 4, a2_pixsize = 4
4178    srcd=0000000000000000  dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4179   srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4180   Phrase mode is ON
4181   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4182   Entering INNER state...
4183   Entering SREAD state...    Source read address/pix address: 0010C0B0/0 [0000000078047804]
4184   Entering A2_ADD state [a2_x=0704, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4185   Entering DREAD state...
4186       Dest read address/pix address: 00197240/0 [0000000000000028]
4187   Entering DWRITE state...
4188      Dest write address/pix address: 00197240/0 [dstart=30 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000028] (icount=0009, inc=1)
4189   Entering A1_ADD state [a1_x=004B, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4190   Entering SREAD state...    Source read address/pix address: 0010C0B8/0 [7804780478047804]
4191   Entering A2_ADD state [a2_x=0708, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4192   Entering DREAD state...
4193       Dest read address/pix address: 00197260/0 [0028000000200008]
4194   Entering DWRITE state...
4195      Dest write address/pix address: 00197260/0 [dstart=0 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0028780478047804] (icount=0005, inc=4)
4196   Entering A1_ADD state [a1_x=004C, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4197   Entering SREAD state...    Source read address/pix address: 0010C0C0/0 [0000000000000000]
4198   Entering A2_ADD state [a2_x=070C, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4199   Entering DREAD state...
4200       Dest read address/pix address: 00197280/0 [0008001800180018]
4201   Entering DWRITE state...
4202      Dest write address/pix address: 00197280/0 [dstart=0 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [7804780478040018] (icount=0001, inc=4)
4203   Entering A1_ADD state [a1_x=0050, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4204   Entering SREAD state...    Source read address/pix address: 0010C0C8/0 [000078047BFE7BFE]
4205   Entering A2_ADD state [a2_x=0710, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4206   Entering DREAD state...
4207       Dest read address/pix address: 001972A0/0 [0008002000000000]
4208   Entering DWRITE state...
4209      Dest write address/pix address: 001972A0/0 [dstart=0 dend=10 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0008002000000000] (icount=FFFD, inc=4)
4210   Entering A1_ADD state [a1_x=0054, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4211   Entering IDLE_INNER state...
4212 */
4213
4214 //Why isn't this taken care of in DATA? Because, DATA is modifying its local copy instead of the one used here.
4215 //!!! FIX !!! [DONE]
4216 //if (patdadd)
4217 //      patd = wdata;
4218
4219 //if (patfadd)
4220 //      srcd1 = wdata;
4221
4222 /*
4223 DEF ADDRCOMP (
4224         a1_outside      // A1 pointer is outside window bounds
4225         :OUT;
4226 INT16/  a1_x
4227 INT16/  a1_y
4228 INT15/  a1_win_x
4229 INT15/  a1_win_y
4230         :IN);
4231 BEGIN
4232
4233 // The address is outside if negative, or if greater than or equal
4234 // to the window size
4235
4236 A1_xcomp        := MAG_15 (a1xgr, a1xeq, a1xlt, a1_x{0..14}, a1_win_x{0..14});
4237 A1_ycomp        := MAG_15 (a1ygr, a1yeq, a1ylt, a1_y{0..14}, a1_win_y{0..14});
4238 A1_outside      := OR6 (a1_outside, a1_x{15}, a1xgr, a1xeq, a1_y{15}, a1ygr, a1yeq);
4239 */
4240 //NOTE: There seems to be an off-by-one bug here in the clip_a1 section... !!! FIX !!!
4241 //      Actually, seems to be related to phrase mode writes...
4242 //      Or is it? Could be related to non-15-bit compares as above?
4243 if (clip_a1 && ((a1_x & 0x8000) || (a1_y & 0x8000) || (a1_x >= a1_win_x) || (a1_y >= a1_win_y)))
4244         winhibit = true;
4245
4246 if (!winhibit)
4247 {
4248         if (phrase_mode)
4249         {
4250                 JaguarWriteLong(address + 0, wdata >> 32, BLITTER);
4251                 JaguarWriteLong(address + 4, wdata & 0xFFFFFFFF, BLITTER);
4252         }
4253         else
4254         {
4255                 if (pixsize == 5)
4256                         JaguarWriteLong(address, wdata & 0xFFFFFFFF, BLITTER);
4257                 else if (pixsize == 4)
4258                         JaguarWriteWord(address, wdata & 0x0000FFFF, BLITTER);
4259                 else
4260                         JaguarWriteByte(address, wdata & 0x000000FF, BLITTER);
4261         }
4262 }
4263
4264 #ifdef VERBOSE_BLITTER_LOGGING
4265 if (logBlit)
4266 {
4267         printf(" [%08X%08X]", (uint32)(wdata >> 32), (uint32)(wdata & 0xFFFFFFFF));
4268         printf(" (icount=%04X, inc=%u)\n", icount, (uint16)inc);
4269         printf("    [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
4270         printf("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
4271         fflush(stdout);
4272 }
4273 #endif
4274                                 }
4275
4276                                 if (dzwrite)
4277                                 {
4278 // OK, here's the big insight: When NOT in GOURZ mode, srcz1 & 2 function EXACTLY the same way that
4279 // srcd1 & 2 work--there's an implicit shift from srcz1 to srcz2 whenever srcz1 is read.
4280 // OTHERWISE, srcz1 is the integer for the computed Z and srcz2 is the fractional part.
4281 // Writes to srcz1 & 2 follow the same pattern as the other 64-bit registers--low 32 at the low address,
4282 // high 32 at the high address (little endian!).
4283 // NOTE: GOURZ is still not properly supported. Check patd/patf handling...
4284 //       Phrase mode start/end masks are not properly supported either...
4285 #ifdef VERBOSE_BLITTER_LOGGING
4286 if (logBlit)
4287 {
4288         printf("  Entering DZWRITE state...");
4289         printf("  Dest Z write address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
4290                 (uint32)(srcz >> 32), (uint32)(srcz & 0xFFFFFFFF));
4291         fflush(stdout);
4292 }
4293 #endif
4294 //This is not correct... !!! FIX !!!
4295 //Should be OK now... We'll see...
4296 //Nope. Having the same starstep write problems in phrase mode as we had with pixels... !!! FIX !!!
4297 //This is not causing the problem in Hover Strike... :-/
4298 //The problem was with the SREADX not shifting. Still problems with Z comparisons & other text in pregame screen...
4299 if (!winhibit)
4300 {
4301         if (phrase_mode)
4302         {
4303                 JaguarWriteLong(address + 0, srcz >> 32, BLITTER);
4304                 JaguarWriteLong(address + 4, srcz & 0xFFFFFFFF, BLITTER);
4305         }
4306         else
4307         {
4308                 if (pixsize == 4)
4309                         JaguarWriteWord(address, srcz & 0x0000FFFF, BLITTER);
4310         }
4311 }//*/
4312 #ifdef VERBOSE_BLITTER_LOGGING
4313 if (logBlit)
4314 {
4315 //      printf(" [%08X%08X]\n", (uint32)(srcz >> 32), (uint32)(srcz & 0xFFFFFFFF));
4316 //      fflush(stdout);
4317 //printf(" [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
4318         printf("    [dstart=? dend=? pwidth=? srcshift=%X]", srcshift);
4319         printf("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
4320         fflush(stdout);
4321 }
4322 #endif
4323                                 }
4324
4325                                 if (a1_add)
4326                                 {
4327 #ifdef VERBOSE_BLITTER_LOGGING
4328 if (logBlit)
4329 {
4330 //printf("  Entering A1_ADD state [addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4331 printf("  Entering A1_ADD state [a1_x=%04X, a1_y=%04X, addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", a1_x, a1_y, addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4332 fflush(stdout);
4333 }
4334 #endif
4335 int16 adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
4336 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4337         a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4338 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4339 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4340
4341 #if 0//def VERBOSE_BLITTER_LOGGING
4342 if (logBlit)
4343 {
4344 printf("  [adda_x=%d, adda_y=%d, addb_x=%d, addb_y=%d, addq_x=%d, addq_y=%d]\n", adda_x, adda_y, addb_x, addb_y, addq_x, addq_y);
4345 fflush(stdout);
4346 }
4347 #endif
4348 //Now, write to what???
4349 //a2ptrld comes from a2ptrldi...
4350 //I believe it's addbsel that determines the writeback...
4351 // This is where atick[0] & [1] come in, in determining which part (fractional, integer)
4352 // gets written to...
4353 //a1_x = addq_x;
4354 //a1_y = addq_y;
4355 //Kludge, to get A1 channel increment working...
4356 if (a1addx == 3)
4357 {
4358         a1_frac_x = addq_x, a1_frac_y = addq_y;
4359
4360 addasel = 2, addbsel = 0, a1fracldi = false;
4361 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4362         a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4363 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4364 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4365
4366         a1_x = addq_x, a1_y = addq_y;
4367 }
4368 else
4369         a1_x = addq_x, a1_y = addq_y;
4370                                 }
4371
4372                                 if (a2_add)
4373                                 {
4374 #ifdef VERBOSE_BLITTER_LOGGING
4375 if (logBlit)
4376 {
4377 //printf("  Entering A2_ADD state [addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4378 printf("  Entering A2_ADD state [a2_x=%04X, a2_y=%04X, addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", a2_x, a2_y, addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4379 fflush(stdout);
4380 }
4381 #endif
4382 //void ADDAMUX(int16 &adda_x, int16 &adda_y, uint8 addasel, int16 a1_step_x, int16 a1_step_y,
4383 //      int16 a1_stepf_x, int16 a1_stepf_y, int16 a2_step_x, int16 a2_step_y,
4384 //      int16 a1_inc_x, int16 a1_inc_y, int16 a1_incf_x, int16 a1_incf_y, uint8 adda_xconst,
4385 //      bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
4386 //void ADDBMUX(int16 &addb_x, int16 &addb_y, uint8 addbsel, int16 a1_x, int16 a1_y,
4387 //      int16 a2_x, int16 a2_y, int16 a1_frac_x, int16 a1_frac_y)
4388 //void ADDRADD(int16 &addq_x, int16 &addq_y, bool a1fracldi,
4389 //      int16 adda_x, int16 adda_y, int16 addb_x, int16 addb_y, uint8 modx, bool suba_x, bool suba_y)
4390 //void DATAMUX(int16 &data_x, int16 &data_y, uint32 gpu_din, int16 addq_x, int16 addq_y, bool addqsel)
4391 int16 adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
4392 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4393         a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4394 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4395 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4396
4397 #if 0//def VERBOSE_BLITTER_LOGGING
4398 if (logBlit)
4399 {
4400 printf("  [adda_x=%d, adda_y=%d, addb_x=%d, addb_y=%d, addq_x=%d, addq_y=%d]\n", adda_x, adda_y, addb_x, addb_y, addq_x, addq_y);
4401 fflush(stdout);
4402 }
4403 #endif
4404 //Now, write to what???
4405 //a2ptrld comes from a2ptrldi...
4406 //I believe it's addbsel that determines the writeback...
4407 a2_x = addq_x;
4408 a2_y = addq_y;
4409                                 }
4410                         }
4411 /*
4412 Flags: SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 GOURZ ZMODE=0 LFUFUNC=C SRCSHADE
4413   count = 64 x 55
4414   a1_base = 0015B000, a2_base = 0014B000
4415   a1_x = 0000, a1_y = 0000, a1_frac_x = 8000, a1_frac_y = 8000, a2_x = 001F, a2_y = 0038
4416   a1_step_x = FFFFFFC0, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 2AAA, a2_step_x = FFFFFFC0, a2_step_y = 0001
4417   a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4418   a1_win_x = 0040, a1_win_y = 0040, a2_mask_x = 0000, a2_mask_y = 0000
4419   a2_mask=F a1add=+inc/+0 a2add=+1/+0
4420   a1_pixsize = 4, a2_pixsize = 4
4421    srcd=FF00FF00FF00FF00  dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4422   srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, col=0
4423   Phrase mode is off
4424   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4425   Entering INNER state...
4426   Entering SREAD state...    Source read address/pix address: 0015B000/0 [6505650565056505]
4427   Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4428   Entering DWRITE state...
4429      Dest write address/pix address: 0014E83E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003F, inc=1)
4430   Entering A2_ADD state [a2_x=001F, a2_y=0038, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4431   Entering SREAD state...    Source read address/pix address: 0015B000/0 [6505650565056505]
4432   Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4433   Entering DWRITE state...
4434      Dest write address/pix address: 0014E942/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003E, inc=1)
4435   Entering A2_ADD state [a2_x=0021, a2_y=0039, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4436   Entering SREAD state...    Source read address/pix address: 0015B000/0 [6505650565056505]
4437   Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4438   Entering DWRITE state...
4439      Dest write address/pix address: 0014EA46/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003D, inc=1)
4440   Entering A2_ADD state [a2_x=0023, a2_y=003A, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4441   Entering SREAD state...    Source read address/pix address: 0015B000/0 [6505650565056505]
4442   Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4443   Entering DWRITE state...
4444      Dest write address/pix address: 0014EB4A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003C, inc=1)
4445   Entering A2_ADD state [a2_x=0025, a2_y=003B, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4446   ...
4447   Entering SREAD state...    Source read address/pix address: 0015B000/0 [6505650565056505]
4448   Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4449   Entering DWRITE state...
4450      Dest write address/pix address: 0015283A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=0000, inc=1)
4451   Entering A2_ADD state [a2_x=009D, a2_y=0077, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4452   Entering IDLE_INNER state...
4453   Leaving INNER state... (ocount=0036)
4454   [in=F a1f=T a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4455   Entering A1FUPDATE state...
4456   [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4457   Entering A1UPDATE state... (-32768/-32768 -> 32704/-32767)
4458   [in=F a1f=F a1=F zf=F z=F a2=T iif=F iii=F izf=F izi=F]
4459   Entering A2UPDATE state... (159/120 -> 95/121)
4460   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4461   Entering INNER state...
4462 */
4463
4464 #ifdef VERBOSE_BLITTER_LOGGING
4465 if (logBlit)
4466 {
4467 printf("  Leaving INNER state...");
4468 fflush(stdout);
4469 }
4470 #endif
4471                         indone = true;
4472 // The outer counter is updated here as well on the clock cycle...
4473
4474 /* the inner loop is started whenever another state is about to
4475 cause the inner state to go active */
4476 //Instart               := ND7 (instart, innert[0], innert[2..7]);
4477
4478 //Actually, it's done only when inner gets asserted without the 2nd line of conditions
4479 //(inner AND !indone)
4480 //fixed now...
4481 //Since we don't get here until the inner loop is finished (indone = true) we can get
4482 //away with doing it here...!
4483                         ocount--;
4484
4485                         if (ocount == 0)
4486                                 outer0 = true;
4487 #ifdef VERBOSE_BLITTER_LOGGING
4488 if (logBlit)
4489 {
4490 printf(" (ocount=%04X)\n", ocount);
4491 fflush(stdout);
4492 }
4493 #endif
4494                 }
4495
4496                 if (a1fupdate)
4497                 {
4498 #ifdef VERBOSE_BLITTER_LOGGING
4499 if (logBlit)
4500 {
4501 printf("  Entering A1FUPDATE state...\n");
4502 fflush(stdout);
4503 }
4504 #endif
4505                         uint32 a1_frac_xt = (uint32)a1_frac_x + (uint32)a1_stepf_x;
4506                         uint32 a1_frac_yt = (uint32)a1_frac_y + (uint32)a1_stepf_y;
4507                         a1FracCInX = a1_frac_xt >> 16;
4508                         a1FracCInY = a1_frac_yt >> 16;
4509                         a1_frac_x = (uint16)(a1_frac_xt & 0xFFFF);
4510                         a1_frac_y = (uint16)(a1_frac_yt & 0xFFFF);
4511                 }
4512
4513                 if (a1update)
4514                 {
4515 #ifdef VERBOSE_BLITTER_LOGGING
4516 if (logBlit)
4517 {
4518 printf("  Entering A1UPDATE state... (%d/%d -> ", a1_x, a1_y);
4519 fflush(stdout);
4520 }
4521 #endif
4522                         a1_x += a1_step_x + a1FracCInX;
4523                         a1_y += a1_step_y + a1FracCInY;
4524 #ifdef VERBOSE_BLITTER_LOGGING
4525 if (logBlit)
4526 {
4527 printf("%d/%d)\n", a1_x, a1_y);
4528 fflush(stdout);
4529 }
4530 #endif
4531                 }
4532
4533                 if (a2update)
4534                 {
4535 #ifdef VERBOSE_BLITTER_LOGGING
4536 if (logBlit)
4537 {
4538 printf("  Entering A2UPDATE state... (%d/%d -> ", a2_x, a2_y);
4539 fflush(stdout);
4540 }
4541 #endif
4542                         a2_x += a2_step_x;
4543                         a2_y += a2_step_y;
4544 #ifdef VERBOSE_BLITTER_LOGGING
4545 if (logBlit)
4546 {
4547 printf("%d/%d)\n", a2_x, a2_y);
4548 fflush(stdout);
4549 }
4550 #endif
4551                 }
4552         }
4553
4554         // Write values back to registers (in real blitter, these are continuously updated)
4555         SET16(blitter_ram, A1_PIXEL + 0, a1_y);
4556         SET16(blitter_ram, A1_PIXEL + 2, a1_x);
4557         SET16(blitter_ram, A1_FPIXEL + 0, a1_frac_y);
4558         SET16(blitter_ram, A1_FPIXEL + 2, a1_frac_x);
4559         SET16(blitter_ram, A2_PIXEL + 0, a2_y);
4560         SET16(blitter_ram, A2_PIXEL + 2, a2_x);
4561 }
4562
4563
4564 /*
4565 The latest that doesn't work properly:
4566
4567 Blit! (CMD = 09800741)
4568 Flags: SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 LFUFUNC=C DCOMPEN
4569   count = 15 x 18
4570   a1_base = 00050000, a2_base = 0083F400
4571   a1_x = 003D, a1_y = 00AD, a1_frac_x = 8000, a1_frac_y = 0000, a2_x = 0027, a2_y = 00A4
4572   a1_step_x = FFF1, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFF1, a2_step_y = 0001
4573   a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4574   a1_win_x = 0180, a1_win_y = 0118, a2_mask_x = 0000, a2_mask_y = 0000
4575   a2_mask=F a1add=+inc/+0 a2add=+1/+0
4576   a1_pixsize = 4, a2_pixsize = 4
4577    srcd=0000000000000000  dstd=0000000000000000 patd=0000000000000000 iinc=00FFF000
4578   srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, col=2
4579   Phrase mode is off
4580   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4581   Entering INNER state...
4582   Entering SREAD state...    Source read address/pix address: 00858E4E/0 [0000000000000000]
4583   Entering A2_ADD state [a2_x=0027, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4584   Entering DWRITE state...
4585      Dest write address/pix address: 0007077A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000000] (icount=000E, inc=1)
4586   Entering A1_ADD state [a1_x=003D, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4587   Entering SREAD state...    Source read address/pix address: 00858E50/0 [0000000000000000]
4588   Entering A2_ADD state [a2_x=0028, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4589   Entering DWRITE state...
4590      Dest write address/pix address: 0007077C/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000000] (icount=000D, inc=1)
4591   Entering A1_ADD state [a1_x=003E, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4592   Entering SREAD state...    Source read address/pix address: 00858E52/0 [0000000000000000]
4593   Entering A2_ADD state [a2_x=0029, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4594   Entering DWRITE state...
4595      Dest write address/pix address: 0007077E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000000] (icount=000C, inc=1)
4596   Entering A1_ADD state [a1_x=003F, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4597   Entering SREAD state...    Source read address/pix address: 00858E54/0 [000000000000014A]
4598   Entering A2_ADD state [a2_x=002A, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4599   Entering DWRITE state...
4600      Dest write address/pix address: 00070780/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=000B, inc=1)
4601   Entering A1_ADD state [a1_x=0040, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4602   Entering SREAD state...    Source read address/pix address: 00858E56/0 [000000000000014A]
4603   Entering A2_ADD state [a2_x=002B, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4604   Entering DWRITE state...
4605      Dest write address/pix address: 00070782/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=000A, inc=1)
4606   Entering A1_ADD state [a1_x=0041, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4607   Entering SREAD state...    Source read address/pix address: 00858E58/0 [000000000000014A]
4608   Entering A2_ADD state [a2_x=002C, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4609   Entering DWRITE state...
4610      Dest write address/pix address: 00070784/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0009, inc=1)
4611   Entering A1_ADD state [a1_x=0042, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4612   Entering SREAD state...    Source read address/pix address: 00858E5A/0 [000000000000014A]
4613   Entering A2_ADD state [a2_x=002D, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4614   Entering DWRITE state...
4615      Dest write address/pix address: 00070786/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0008, inc=1)
4616   Entering A1_ADD state [a1_x=0043, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4617   Entering SREAD state...    Source read address/pix address: 00858E5C/0 [000000000000014A]
4618   Entering A2_ADD state [a2_x=002E, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4619   Entering DWRITE state...
4620      Dest write address/pix address: 00070788/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0007, inc=1)
4621   Entering A1_ADD state [a1_x=0044, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4622   Entering SREAD state...    Source read address/pix address: 00858E5E/0 [000000000000014A]
4623   Entering A2_ADD state [a2_x=002F, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4624   Entering DWRITE state...
4625      Dest write address/pix address: 0007078A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0006, inc=1)
4626   Entering A1_ADD state [a1_x=0045, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4627   Entering SREAD state...    Source read address/pix address: 00858E60/0 [000000000000014A]
4628   Entering A2_ADD state [a2_x=0030, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4629   Entering DWRITE state...
4630      Dest write address/pix address: 0007078C/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0005, inc=1)
4631   Entering A1_ADD state [a1_x=0046, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4632   Entering SREAD state...    Source read address/pix address: 00858E62/0 [000000000000014A]
4633   Entering A2_ADD state [a2_x=0031, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4634   Entering DWRITE state...
4635      Dest write address/pix address: 0007078E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0004, inc=1)
4636   Entering A1_ADD state [a1_x=0047, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4637   Entering SREAD state...    Source read address/pix address: 00858E64/0 [000000000000014A]
4638   Entering A2_ADD state [a2_x=0032, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4639   Entering DWRITE state...
4640      Dest write address/pix address: 00070790/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [000000000000014A] (icount=0003, inc=1)
4641   Entering A1_ADD state [a1_x=0048, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4642   Entering SREAD state...    Source read address/pix address: 00858E66/0 [0000000000000000]
4643   Entering A2_ADD state [a2_x=0033, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4644   Entering DWRITE state...
4645      Dest write address/pix address: 00070792/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000000] (icount=0002, inc=1)
4646   Entering A1_ADD state [a1_x=0049, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4647   Entering SREAD state...    Source read address/pix address: 00858E68/0 [0000000000000000]
4648   Entering A2_ADD state [a2_x=0034, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4649   Entering DWRITE state...
4650      Dest write address/pix address: 00070794/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000000] (icount=0001, inc=1)
4651   Entering A1_ADD state [a1_x=004A, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4652   Entering SREAD state...    Source read address/pix address: 00858E6A/0 [0000000000000000]
4653   Entering A2_ADD state [a2_x=0035, a2_y=00A4, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4654   Entering DWRITE state...
4655      Dest write address/pix address: 00070796/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000000] (icount=0000, inc=1)
4656   Entering A1_ADD state [a1_x=004B, a1_y=00AD, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4657   Entering IDLE_INNER state...
4658   Leaving INNER state... (ocount=0011)
4659   [in=F a1f=T a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4660   Entering A1FUPDATE state...
4661   [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4662   Entering A1UPDATE state... (76/173 -> 61/174)
4663   [in=F a1f=F a1=F zf=F z=F a2=T iif=F iii=F izf=F izi=F]
4664   Entering A2UPDATE state... (54/164 -> 39/165)
4665   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4666   Entering INNER state...
4667 */
4668
4669
4670 // Various pieces of the blitter puzzle are teased out here...
4671
4672
4673
4674 /*
4675 DEF ADDRGEN (
4676 INT24/  address         // byte address
4677                 pixa[0..2]      // bit part of address, un-pipe-lined
4678                 :OUT;
4679 INT16/  a1_x
4680 INT16/  a1_y
4681 INT21/  a1_base
4682                 a1_pitch[0..1]
4683                 a1_pixsize[0..2]
4684                 a1_width[0..5]
4685                 a1_zoffset[0..1]
4686 INT16/  a2_x
4687 INT16/  a2_y
4688 INT21/  a2_base
4689                 a2_pitch[0..1]
4690                 a2_pixsize[0..2]
4691                 a2_width[0..5]
4692                 a2_zoffset[0..1]
4693                 apipe           // load address pipe-line latch
4694                 clk                     // co-processor clock
4695                 gena2           // generate A2 as opposed to A1
4696                 zaddr           // generate Z address
4697                 :IN);
4698 */
4699
4700 void ADDRGEN(uint32 &address, uint32 &pixa, bool gena2, bool zaddr,
4701         uint16 a1_x, uint16 a1_y, uint32 a1_base, uint8 a1_pitch, uint8 a1_pixsize, uint8 a1_width, uint8 a1_zoffset,
4702         uint16 a2_x, uint16 a2_y, uint32 a2_base, uint8 a2_pitch, uint8 a2_pixsize, uint8 a2_width, uint8 a2_zoffset)
4703 {
4704         uint16 x = (gena2 ? a2_x : a1_x) & 0x7FFF;
4705         uint16 y = (gena2 ? a2_y : a1_y) & 0x0FFF;
4706         uint8 width = (gena2 ? a2_width : a1_width);
4707         uint8 pixsize = (gena2 ? a2_pixsize : a1_pixsize);
4708         uint8 pitch = (gena2 ? a2_pitch : a1_pitch);
4709         uint32 base = (gena2 ? a2_base : a1_base) >> 3;//Only upper 21 bits are passed around the bus? Seems like it...
4710         uint8 zoffset = (gena2 ? a2_zoffset : a1_zoffset);
4711
4712         uint32 ytm = ((uint32)y << 2) + (width & 0x02 ? (uint32)y << 1 : 0) + (width & 0x01 ? (uint32)y : 0);
4713
4714         uint32 ya = (ytm << (width >> 2)) >> 2;
4715
4716         uint32 pa = ya + x;
4717
4718         /*uint32*/ pixa = pa << pixsize;
4719
4720         uint8 pt = ((pitch & 0x01) && !(pitch & 0x02) ? 0x01 : 0x00)
4721                 | (!(pitch & 0x01) && (pitch & 0x02) ? 0x02 : 0x00);
4722 //      uint32 phradr = pixa << pt;
4723         uint32 phradr = (pixa >> 6) << pt;
4724         uint32 shup = (pitch == 0x03 ? (pixa >> 6) : 0);
4725
4726         uint8 za = (zaddr ? zoffset : 0) & 0x03;
4727 //      uint32 addr = za + (phradr & 0x07) + (shup << 1) + base;
4728         uint32 addr = za + phradr + (shup << 1) + base;
4729         /*uint32*/ address = ((pixa & 0x38) >> 3) | ((addr & 0x1FFFFF) << 3);
4730 #if 0//def VERBOSE_BLITTER_LOGGING
4731 if (logBlit)
4732 {
4733 printf("    [gena2=%s, x=%04X, y=%04X, w=%1X, pxsz=%1X, ptch=%1X, b=%08X, zoff=%1X]\n", (gena2 ? "T" : "F"), x, y, width, pixsize, pitch, base, zoffset);
4734 printf("    [ytm=%X, ya=%X, pa=%X, pixa=%X, pt=%X, phradr=%X, shup=%X, za=%X, addr=%X, address=%X]\n", ytm, ya, pa, pixa, pt, phradr, shup, za, addr, address);
4735 fflush(stdout);
4736 }
4737 #endif
4738         pixa &= 0x07;
4739 /*
4740   Entering INNER state...
4741     [gena2=T, x=0002, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4742     [ytm=0, ya=0, pa=2, pixa=20, pt=0, phradr=0, shup=0, za=0, addr=12BA, address=95D4]
4743   Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4744     Source extra read address/pix address: 000095D4/0 [0000001C00540038]
4745   Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4746     [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4747     [ytm=0, ya=0, pa=4, pixa=40, pt=0, phradr=1, shup=0, za=0, addr=12BB, address=95D8]
4748   Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
4749     Source read address/pix address: 000095D8/0 [0054003800009814]
4750   Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4751     [gena2=F, x=0000, y=0000, w=20, pxsz=4, ptch=0, b=00006E52, zoff=0]
4752     [ytm=0, ya=0, pa=0, pixa=0, pt=0, phradr=0, shup=0, za=0, addr=6E52, address=37290]
4753   Entering DWRITE state...
4754      Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026E, inc=4)
4755   Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4756     [gena2=T, x=0008, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4757     [ytm=0, ya=0, pa=8, pixa=80, pt=0, phradr=2, shup=0, za=0, addr=12BC, address=95E0]
4758 */
4759 /*
4760 Obviously wrong:
4761   Entering SREAD state...
4762     [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4763     [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=40, shup=0, za=0, addr=10AC, address=8560]
4764     Source read address/pix address: 00008560/0 [8C27981B327E00F0]
4765
4766 2nd pass (still wrong):
4767   Entering SREAD state...
4768     [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4769     [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=40, shup=0, za=0, addr=10EC, address=8760]
4770     Source read address/pix address: 00008760/0 [00E06DC04581880C]
4771
4772 Correct!:
4773   Entering SREAD state...
4774     [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4775     [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=1, shup=0, za=0, addr=10AD, address=8568]
4776     Source read address/pix address: 00008568/0 [6267981A327C00F0]
4777
4778 OK, now we're back into incorrect (or is it?):
4779   Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4780     Source extra read address/pix address: 000095D4/0 [0000 001C 0054 0038]
4781   Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4782   Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
4783     Source read address/pix address: 000095D8/0 [0054 0038 0000 9814]
4784   Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4785 I think this may be correct...!
4786 */
4787 }
4788
4789 /*
4790 // source and destination address update conditions
4791
4792 Sraat0          := AN2 (sraat[0], sreadxi, srcenz\);
4793 Sraat1          := AN2 (sraat[1], sreadi, srcenz\);
4794 Srca_addi       := OR4 (srca_addi, szreadxi, szreadi, sraat[0..1]);
4795 Srca_add        := FD1Q (srca_add, srca_addi, clk);
4796
4797 Dstaat          := AN2 (dstaat, dwritei, dstwrz\);
4798 Dsta_addi       := OR2 (dsta_addi, dzwritei, dstaat);
4799 // Dsta_add     := FD1Q (dsta_add, dsta_addi, clk);
4800
4801 // source and destination address generate conditions
4802
4803 Gensrc          := OR4 (gensrc, sreadxi, szreadxi, sreadi, szreadi);
4804 Gendst          := OR4 (gendst, dreadi, dzreadi, dwritei, dzwritei);
4805 Dsta2\          := INV1 (dsta2\, dsta2);
4806 Gena2t0         := NAN2 (gena2t[0], gensrc, dsta2\);
4807 Gena2t1         := NAN2 (gena2t[1], gendst, dsta2);
4808 Gena2i          := NAN2 (gena2i, gena2t[0..1]);
4809 Gena2           := FD1QU (gena2, gena2i, clk);
4810
4811 Zaddr           := OR4 (zaddr, szreadx, szread, dzread, dzwrite);
4812 */
4813
4814 /*void foo(void)
4815 {
4816         // Basically, the above translates to:
4817         bool srca_addi = (sreadxi && !srcenz) || (sreadi && !srcenz) || szreadxi || szreadi;
4818
4819         bool dsta_addi = (dwritei && !dstwrz) || dzwritei;
4820
4821         bool gensrc = sreadxi || szreadxi || sreadi || szreadi;
4822         bool gendst = dreadi || szreadi || dwritei || dzwritei;
4823         bool gena2i = (gensrc && !dsta2) || (gendst && dsta2);
4824
4825         bool zaddr = szreadx || szread || dzread || dzwrite;
4826 }*/
4827
4828 /*
4829 // source data reads
4830
4831 Srcdpset\       := NAN2 (srcdpset\, readreq, sread);
4832 Srcdpt1         := NAN2 (srcdpt[1], srcdpend, srcdack\);
4833 Srcdpt2         := NAN2 (srcdpt[2], srcdpset\, srcdpt[1]);
4834 Srcdpend        := FD2Q (srcdpend, srcdpt[2], clk, reset\);
4835
4836 Srcdxpset\      := NAN2 (srcdxpset\, readreq, sreadx);
4837 Srcdxpt1        := NAN2 (srcdxpt[1], srcdxpend, srcdxack\);
4838 Srcdxpt2        := NAN2 (srcdxpt[2], srcdxpset\, srcdxpt[1]);
4839 Srcdxpend       := FD2Q (srcdxpend, srcdxpt[2], clk, reset\);
4840
4841 Sdpend          := OR2 (sdpend, srcdxpend, srcdpend);
4842 Srcdreadt       := AN2 (srcdreadt, sdpend, read_ack);
4843
4844 //2/9/92 - enhancement?
4845 //Load srcdread on the next tick as well to modify it in srcshade
4846
4847 Srcdreadd       := FD1Q (srcdreadd, srcdreadt, clk);
4848 Srcdread        := AOR1 (srcdread, srcshade, srcdreadd, srcdreadt);
4849
4850 // source zed reads
4851
4852 Srczpset\       := NAN2 (srczpset\, readreq, szread);
4853 Srczpt1         := NAN2 (srczpt[1], srczpend, srczack\);
4854 Srczpt2         := NAN2 (srczpt[2], srczpset\, srczpt[1]);
4855 Srczpend        := FD2Q (srczpend, srczpt[2], clk, reset\);
4856
4857 Srczxpset\      := NAN2 (srczxpset\, readreq, szreadx);
4858 Srczxpt1        := NAN2 (srczxpt[1], srczxpend, srczxack\);
4859 Srczxpt2        := NAN2 (srczxpt[2], srczxpset\, srczxpt[1]);
4860 Srczxpend       := FD2Q (srczxpend, srczxpt[2], clk, reset\);
4861
4862 Szpend          := OR2 (szpend, srczpend, srczxpend);
4863 Srczread        := AN2 (srczread, szpend, read_ack);
4864
4865 // destination data reads
4866
4867 Dstdpset\       := NAN2 (dstdpset\, readreq, dread);
4868 Dstdpt0         := NAN2 (dstdpt[0], dstdpend, dstdack\);
4869 Dstdpt1         := NAN2 (dstdpt[1], dstdpset\, dstdpt[0]);
4870 Dstdpend        := FD2Q (dstdpend, dstdpt[1], clk, reset\);
4871 Dstdread        := AN2 (dstdread, dstdpend, read_ack);
4872
4873 // destination zed reads
4874
4875 Dstzpset\       := NAN2 (dstzpset\, readreq, dzread);
4876 Dstzpt0         := NAN2 (dstzpt[0], dstzpend, dstzack\);
4877 Dstzpt1         := NAN2 (dstzpt[1], dstzpset\, dstzpt[0]);
4878 Dstzpend        := FD2Q (dstzpend, dstzpt[1], clk, reset\);
4879 Dstzread        := AN2 (dstzread, dstzpend, read_ack);
4880 */
4881
4882 /*void foo2(void)
4883 {
4884         // Basically, the above translates to:
4885         bool srcdpend = (readreq && sread) || (srcdpend && !srcdack);
4886         bool srcdxpend = (readreq && sreadx) || (srcdxpend && !srcdxack);
4887         bool sdpend = srcxpend || srcdpend;
4888         bool srcdread = ((sdpend && read_ack) && srcshade) || (sdpend && read_ack);//the latter term is lookahead
4889
4890 }*/
4891
4892 ////////////////////////////////////////////////////////////////////////////////////////////
4893 ////////////////////////////////////////////////////////////////////////////////////////////
4894 // Here's an important bit: The source data adder logic. Need to track down the inputs!!! //
4895 ////////////////////////////////////////////////////////////////////////////////////////////
4896 ////////////////////////////////////////////////////////////////////////////////////////////
4897
4898 /*
4899 DEF ADDARRAY (
4900 INT16/  addq[0..3]
4901         :OUT;
4902         clk
4903         daddasel[0..2]  // data adder input A selection
4904         daddbsel[0..3]
4905         daddmode[0..2]
4906 INT32/  dstd[0..1]
4907 INT32/  iinc
4908         initcin[0..3]   // carry into the adders from the initializers
4909         initinc[0..63]  // the initialisation increment
4910         initpix[0..15]  // Data initialiser pixel value
4911 INT32/  istep
4912 INT32/  patd[0..1]
4913 INT32/  srcdlo
4914 INT32/  srcdhi
4915 INT32/  srcz1[0..1]
4916 INT32/  srcz2[0..1]
4917         reset\
4918 INT32/  zinc
4919 INT32/  zstep
4920         :IN);
4921 */
4922 void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
4923         uint64 dstd, uint32 iinc, uint8 initcin[], uint64 initinc, uint16 initpix,
4924         uint32 istep, uint64 patd, uint64 srcd, uint64 srcz1, uint64 srcz2,
4925         uint32 zinc, uint32 zstep)
4926 {
4927         uint32 initpix2 = ((uint32)initpix << 16) | initpix;
4928         uint32 addalo[8], addahi[8];
4929         addalo[0] = dstd & 0xFFFFFFFF;
4930         addalo[1] = initpix2;
4931         addalo[2] = 0;
4932         addalo[3] = 0;
4933         addalo[4] = srcd & 0xFFFFFFFF;
4934         addalo[5] = patd & 0xFFFFFFFF;
4935         addalo[6] = srcz1 & 0xFFFFFFFF;
4936         addalo[7] = srcz2 & 0xFFFFFFFF;
4937         addahi[0] = dstd >> 32;
4938         addahi[1] = initpix2;
4939         addahi[2] = 0;
4940         addahi[3] = 0;
4941         addahi[4] = srcd >> 32;
4942         addahi[5] = patd >> 32;
4943         addahi[6] = srcz1 >> 32;
4944         addahi[7] = srcz2 >> 32;
4945         uint16 adda[4];
4946         adda[0] = addalo[daddasel] & 0xFFFF;
4947         adda[1] = addalo[daddasel] >> 16;
4948         adda[2] = addahi[daddasel] & 0xFFFF;
4949         adda[3] = addahi[daddasel] >> 16;
4950
4951         uint16 wordmux[8];
4952         wordmux[0] = iinc & 0xFFFF;
4953         wordmux[1] = iinc >> 16;
4954         wordmux[2] = zinc & 0xFFFF;
4955         wordmux[3] = zinc >> 16;;
4956         wordmux[4] = istep & 0xFFFF;
4957         wordmux[5] = istep >> 16;;
4958         wordmux[6] = zstep & 0xFFFF;
4959         wordmux[7] = zstep >> 16;;
4960         uint16 word = wordmux[((daddbsel & 0x08) >> 1) | (daddbsel & 0x03)];
4961         uint16 addb[4];
4962         bool dbsel2 = daddbsel & 0x04;
4963         bool iincsel = (daddbsel & 0x01) && !(daddbsel & 0x04);
4964
4965         if (!dbsel2 && !iincsel)
4966                 addb[0] = srcd & 0xFFFF,
4967                 addb[1] = (srcd >> 16) & 0xFFFF,
4968                 addb[2] = (srcd >> 32) & 0xFFFF,
4969                 addb[3] = (srcd >> 48) & 0xFFFF;
4970         else if (dbsel2 && !iincsel)
4971                 addb[0] = addb[1] = addb[2] = addb[3] = word;
4972         else if (!dbsel2 && iincsel)
4973                 addb[0] = initinc & 0xFFFF,
4974                 addb[1] = (initinc >> 16) & 0xFFFF,
4975                 addb[2] = (initinc >> 32) & 0xFFFF,
4976                 addb[3] = (initinc >> 48) & 0xFFFF;
4977         else
4978                 addb[0] = addb[1] = addb[2] = addb[3] = 0;
4979
4980         uint8 cinsel = (daddmode >= 1 && daddmode <= 4 ? 1 : 0);
4981
4982 static uint8 co[4];//These are preserved between calls...
4983         uint8 cin[4];
4984
4985         for(int i=0; i<4; i++)
4986                 cin[i] = initcin[i] | (co[i] & cinsel);
4987
4988         bool eightbit = daddmode & 0x02;
4989         bool sat = daddmode & 0x03;
4990         bool hicinh = ((daddmode & 0x03) == 0x03);
4991
4992 //Note that the carry out is saved between calls to this function...
4993         for(int i=0; i<4; i++)
4994                 ADD16SAT(addq[i], co[i], adda[i], addb[i], cin[i], sat, eightbit, hicinh);
4995 }
4996
4997 /*
4998 DEF ADD16SAT (
4999 INT16/  r               // result
5000         co              // carry out
5001         :IO;
5002 INT16/  a
5003 INT16/  b
5004         cin
5005         sat
5006         eightbit
5007         hicinh
5008         :IN);
5009 */
5010 void ADD16SAT(uint16 &r, uint8 &co, uint16 a, uint16 b, uint8 cin, bool sat, bool eightbit, bool hicinh)
5011 {
5012 /*if (logBlit)
5013 {
5014         printf("--> [sat=%s 8b=%s hicinh=%s] %04X + %04X (+ %u) = ", (sat ? "T" : "F"), (eightbit ? "T" : "F"), (hicinh ? "T" : "F"), a, b, cin);
5015         fflush(stdout);
5016 }*/
5017         uint8 carry[4];
5018         uint32 qt = (a & 0xFF) + (b & 0xFF) + cin;
5019         carry[0] = (qt & 0x0100 ? 1 : 0);
5020         uint16 q = qt & 0x00FF;
5021         carry[1] = (carry[0] && !eightbit ? carry[0] : 0);
5022         qt = (a & 0x0F00) + (b & 0x0F00) + (carry[1] << 8);
5023         carry[2] = (qt & 0x1000 ? 1 : 0);
5024         q |= qt & 0x0F00;
5025         carry[3] = (carry[2] && !hicinh ? carry[2] : 0);
5026         qt = (a & 0xF000) + (b & 0xF000) + (carry[3] << 12);
5027         co = (qt & 0x10000 ? 1 : 0);
5028         q |= qt & 0xF000;
5029
5030         uint8 btop = (eightbit ? (b & 0x0080) >> 7 : (b & 0x8000) >> 15);
5031         uint8 ctop = (eightbit ? carry[0] : co);
5032
5033         bool saturate = sat && (btop ^ ctop);
5034         bool hisaturate = saturate && !eightbit;
5035 /*if (logBlit)
5036 {
5037         printf("bt=%u ct=%u s=%u hs=%u] ", btop, ctop, saturate, hisaturate);
5038         fflush(stdout);
5039 }*/
5040
5041         r = (saturate ? (ctop ? 0x00FF : 0x0000) : q & 0x00FF);
5042         r |= (hisaturate ? (ctop ? 0xFF00 : 0x0000) : q & 0xFF00);
5043 /*if (logBlit)
5044 {
5045         printf("%04X (co=%u)\n", r, co);
5046         fflush(stdout);
5047 }*/
5048 }
5049
5050 /**  ADDAMUX - Address adder input A selection  *******************
5051
5052 This module generates the data loaded into the address adder input A.  This is
5053 the update value, and can be one of four registers :  A1 step, A2 step, A1
5054 increment and A1 fraction.  It can complement these values to perform
5055 subtraction, and it can generate constants to increment / decrement the window
5056 pointers.
5057
5058 addasel[0..2] select the register to add
5059
5060 000     A1 step integer part
5061 001     A1 step fraction part
5062 010     A1 increment integer part
5063 011     A1 increment fraction part
5064 100     A2 step
5065
5066 adda_xconst[0..2] generate a power of 2 in the range 1-64 or all zeroes when
5067 they are all 1.
5068
5069 addareg selects register value to be added as opposed to constant
5070 value.
5071
5072 suba_x, suba_y complement the X and Y values
5073
5074 */
5075
5076 /*
5077 DEF ADDAMUX (
5078 INT16/  adda_x
5079 INT16/  adda_y
5080         :OUT;
5081         addasel[0..2]
5082 INT16/  a1_step_x
5083 INT16/  a1_step_y
5084 INT16/  a1_stepf_x
5085 INT16/  a1_stepf_y
5086 INT16/  a2_step_x
5087 INT16/  a2_step_y
5088 INT16/  a1_inc_x
5089 INT16/  a1_inc_y
5090 INT16/  a1_incf_x
5091 INT16/  a1_incf_y
5092         adda_xconst[0..2]
5093         adda_yconst
5094         addareg
5095         suba_x
5096         suba_y :IN);
5097 */
5098 void ADDAMUX(int16 &adda_x, int16 &adda_y, uint8 addasel, int16 a1_step_x, int16 a1_step_y,
5099         int16 a1_stepf_x, int16 a1_stepf_y, int16 a2_step_x, int16 a2_step_y,
5100         int16 a1_inc_x, int16 a1_inc_y, int16 a1_incf_x, int16 a1_incf_y, uint8 adda_xconst,
5101         bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
5102 {
5103
5104 /*INT16/        addac_x, addac_y, addar_x, addar_y, addart_x, addart_y,
5105 INT16/  addas_x, addas_y, suba_x16, suba_y16
5106 :LOCAL;
5107 BEGIN
5108
5109 Zero            := TIE0 (zero);*/
5110
5111 /* Multiplex the register terms */
5112
5113 /*Addaselb[0-2] := BUF8 (addaselb[0-2], addasel[0-2]);
5114 Addart_x        := MX4 (addart_x, a1_step_x, a1_stepf_x, a1_inc_x, a1_incf_x, addaselb[0..1]);
5115 Addar_x         := MX2 (addar_x, addart_x, a2_step_x, addaselb[2]);
5116 Addart_y        := MX4 (addart_y, a1_step_y, a1_stepf_y, a1_inc_y, a1_incf_y, addaselb[0..1]);
5117 Addar_y         := MX2 (addar_y, addart_y, a2_step_y, addaselb[2]);*/
5118
5119 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5120         int16 xterm[4], yterm[4];
5121         xterm[0] = a1_step_x, xterm[1] = a1_stepf_x, xterm[2] = a1_inc_x, xterm[3] = a1_incf_x;
5122         yterm[0] = a1_step_y, yterm[1] = a1_stepf_y, yterm[2] = a1_inc_y, yterm[3] = a1_incf_y;
5123         int16 addar_x = (addasel & 0x04 ? a2_step_x : xterm[addasel & 0x03]);
5124         int16 addar_y = (addasel & 0x04 ? a2_step_y : yterm[addasel & 0x03]);
5125 //////////////////////////////////////////////////////////////////////////////////////
5126
5127 /* Generate a constant value - this is a power of 2 in the range
5128 0-64, or zero.  The control bits are adda_xconst[0..2], when they
5129 are all 1  the result is 0.
5130 Constants for Y can only be 0 or 1 */
5131
5132 /*Addac_xlo     := D38H (addac_x[0..6], unused[0], adda_xconst[0..2]);
5133 Unused[0]       := DUMMY (unused[0]);
5134
5135 Addac_x         := JOIN (addac_x, addac_x[0..6], zero, zero, zero, zero, zero, zero, zero, zero, zero);
5136 Addac_y         := JOIN (addac_y, adda_yconst, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero,
5137                         zero, zero, zero, zero, zero);*/
5138 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5139         int16 addac_x = (adda_xconst == 0x07 ? 0 : 1 << adda_xconst);
5140         int16 addac_y = (adda_yconst ? 0x01 : 0);
5141 //////////////////////////////////////////////////////////////////////////////////////
5142
5143 /* Select between constant value and register value */
5144
5145 /*Addas_x               := MX2 (addas_x, addac_x, addar_x, addareg);
5146 Addas_y         := MX2 (addas_y, addac_y, addar_y, addareg);*/
5147 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5148         int16 addas_x = (addareg ? addar_x : addac_x);
5149         int16 addas_y = (addareg ? addar_y : addac_y);
5150 //////////////////////////////////////////////////////////////////////////////////////
5151
5152 /* Complement these values (complement flag gives adder carry in)*/
5153
5154 /*Suba_x16      := JOIN (suba_x16, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x,
5155                         suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x);
5156 Suba_y16        := JOIN (suba_y16, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y,
5157                         suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y);
5158 Adda_x          := EO (adda_x, suba_x16, addas_x);
5159 Adda_y          := EO (adda_y, suba_y16, addas_y);*/
5160 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5161         adda_x = addas_x ^ (suba_x ? 0xFFFF : 0x0000);
5162         adda_y = addas_y ^ (suba_y ? 0xFFFF : 0x0000);
5163 //////////////////////////////////////////////////////////////////////////////////////
5164
5165 //END;
5166 }
5167
5168 /**  ADDBMUX - Address adder input B selection  *******************
5169
5170 This module selects the register to be updated by the address
5171 adder.  This can be one of three registers, the A1 and A2
5172 pointers, or the A1 fractional part. It can also be zero, so that the step
5173 registers load directly into the pointers.
5174 */
5175
5176 /*DEF ADDBMUX (
5177 INT16/  addb_x
5178 INT16/  addb_y
5179         :OUT;
5180         addbsel[0..1]
5181 INT16/  a1_x
5182 INT16/  a1_y
5183 INT16/  a2_x
5184 INT16/  a2_y
5185 INT16/  a1_frac_x
5186 INT16/  a1_frac_y
5187         :IN);
5188 INT16/  zero16 :LOCAL;
5189 BEGIN*/
5190 void ADDBMUX(int16 &addb_x, int16 &addb_y, uint8 addbsel, int16 a1_x, int16 a1_y,
5191         int16 a2_x, int16 a2_y, int16 a1_frac_x, int16 a1_frac_y)
5192 {
5193
5194 /*Zero          := TIE0 (zero);
5195 Zero16          := JOIN (zero16, zero, zero, zero, zero, zero, zero, zero,
5196                         zero, zero, zero, zero, zero, zero, zero, zero, zero);
5197 Addbselb[0-1]   := BUF8 (addbselb[0-1], addbsel[0-1]);
5198 Addb_x          := MX4 (addb_x, a1_x, a2_x, a1_frac_x, zero16, addbselb[0..1]);
5199 Addb_y          := MX4 (addb_y, a1_y, a2_y, a1_frac_y, zero16, addbselb[0..1]);*/
5200 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5201         int16 xterm[4], yterm[4];
5202         xterm[0] = a1_x, xterm[1] = a2_x, xterm[2] = a1_frac_x, xterm[3] = 0;
5203         yterm[0] = a1_y, yterm[1] = a2_y, yterm[2] = a1_frac_y, yterm[3] = 0;
5204         addb_x = xterm[addbsel & 0x03];
5205         addb_y = yterm[addbsel & 0x03];
5206 //////////////////////////////////////////////////////////////////////////////////////
5207
5208 //END;
5209 }
5210
5211 /**  DATAMUX - Address local data bus selection  ******************
5212
5213 Select between the adder output and the input data bus
5214 */
5215
5216 /*DEF DATAMUX (
5217 INT16/  data_x
5218 INT16/  data_y
5219         :OUT;
5220 INT32/  gpu_din
5221 INT16/  addq_x
5222 INT16/  addq_y
5223         addqsel
5224         :IN);
5225
5226 INT16/  gpu_lo, gpu_hi
5227 :LOCAL;
5228 BEGIN*/
5229 void DATAMUX(int16 &data_x, int16 &data_y, uint32 gpu_din, int16 addq_x, int16 addq_y, bool addqsel)
5230 {
5231 /*Gpu_lo                := JOIN (gpu_lo, gpu_din{0..15});
5232 Gpu_hi          := JOIN (gpu_hi, gpu_din{16..31});
5233
5234 Addqselb        := BUF8 (addqselb, addqsel);
5235 Data_x          := MX2 (data_x, gpu_lo, addq_x, addqselb);
5236 Data_y          := MX2 (data_y, gpu_hi, addq_y, addqselb);*/
5237 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5238         data_x = (addqsel ? addq_x : (int16)(gpu_din & 0xFFFF));
5239         data_y = (addqsel ? addq_y : (int16)(gpu_din >> 16));
5240 //////////////////////////////////////////////////////////////////////////////////////
5241
5242 //END;
5243 }
5244
5245 /******************************************************************
5246 addradd
5247 29/11/90
5248
5249 Blitter Address Adder
5250 ---------------------
5251 The blitter address adder is a pair of sixteen bit adders, one
5252 each for X and Y.  The multiplexing of the input terms is
5253 performed elsewhere, but this adder can also perform modulo
5254 arithmetic to align X-addresses onto phrase boundaries.
5255
5256 modx[0..2] take values
5257 000     no mask
5258 001     mask bit 0
5259 010     mask bits 1-0
5260 ..
5261 110     mask bits 5-0
5262
5263 ******************************************************************/
5264
5265 /*IMPORT duplo, tosh;
5266
5267 DEF ADDRADD (
5268 INT16/  addq_x
5269 INT16/  addq_y
5270                 :OUT;
5271                 a1fracldi               // propagate address adder carry
5272 INT16/  adda_x
5273 INT16/  adda_y
5274 INT16/  addb_x
5275 INT16/  addb_y
5276                 clk[0]                  // co-processor clock
5277                 modx[0..2]
5278                 suba_x
5279                 suba_y
5280                 :IN);
5281
5282 BEGIN
5283
5284 Zero            := TIE0 (zero);*/
5285 void ADDRADD(int16 &addq_x, int16 &addq_y, bool a1fracldi,
5286         uint16 adda_x, uint16 adda_y, uint16 addb_x, uint16 addb_y, uint8 modx, bool suba_x, bool suba_y)
5287 {
5288
5289 /* Perform the addition */
5290
5291 /*Adder_x               := ADD16 (addqt_x[0..15], co_x, adda_x{0..15}, addb_x{0..15}, ci_x);
5292 Adder_y         := ADD16 (addq_y[0..15], co_y, adda_y{0..15}, addb_y{0..15}, ci_y);*/
5293
5294 /* latch carry and propagate if required */
5295
5296 /*Cxt0          := AN2 (cxt[0], co_x, a1fracldi);
5297 Cxt1            := FD1Q (cxt[1], cxt[0], clk[0]);
5298 Ci_x            := EO (ci_x, cxt[1], suba_x);
5299
5300 yt0                     := AN2 (cyt[0], co_y, a1fracldi);
5301 Cyt1            := FD1Q (cyt[1], cyt[0], clk[0]);
5302 Ci_y            := EO (ci_y, cyt[1], suba_y);*/
5303
5304 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5305 //I'm sure the following will generate a bunch of warnings, but will have to do for now.
5306         static uint16 co_x = 0, co_y = 0;       // Carry out has to propogate between function calls...
5307         uint16 ci_x = co_x ^ (suba_x ? 1 : 0);
5308         uint16 ci_y = co_y ^ (suba_y ? 1 : 0);
5309         uint32 addqt_x = adda_x + addb_x + ci_x;
5310         uint32 addqt_y = adda_y + addb_y + ci_y;
5311         co_x = ((addqt_x & 0x10000) && a1fracldi ? 1 : 0);
5312         co_y = ((addqt_y & 0x10000) && a1fracldi ? 1 : 0);
5313 //////////////////////////////////////////////////////////////////////////////////////
5314
5315 /* Mask low bits of X to 0 if required */
5316
5317 /*Masksel               := D38H (unused[0], masksel[0..4], maskbit[5], unused[1], modx[0..2]);
5318
5319 Maskbit[0-4]    := OR2 (maskbit[0-4], masksel[0-4], maskbit[1-5]);
5320
5321 Mask[0-5]       := MX2 (addq_x[0-5], addqt_x[0-5], zero, maskbit[0-5]);
5322
5323 Addq_x          := JOIN (addq_x, addq_x[0..5], addqt_x[6..15]);
5324 Addq_y          := JOIN (addq_y, addq_y[0..15]);*/
5325
5326 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5327         int16 mask[8] = { 0xFFFF, 0xFFFE, 0xFFFC, 0xFFF8, 0xFFF0, 0xFFE0, 0xFFC0, 0x0000 };
5328         addq_x = addqt_x & mask[modx];
5329         addq_y = addqt_y & 0xFFFF;
5330 //////////////////////////////////////////////////////////////////////////////////////
5331
5332 //Unused[0-1]   := DUMMY (unused[0-1]);
5333
5334 //END;
5335 }
5336
5337 /*
5338 DEF DATA (
5339                 wdata[0..63]    // co-processor write data bus
5340                 :BUS;
5341                 dcomp[0..7]             // data byte equal flags
5342                 srcd[0..7]              // bits to use for bit to byte expansion
5343                 zcomp[0..3]             // output from Z comparators
5344                 :OUT;
5345                 a1_x[0..1]              // low two bits of A1 X pointer
5346                 big_pix                 // pixel organisation is big-endian
5347                 blitter_active  // blitter is active
5348                 clk                             // co-processor clock
5349                 cmpdst                  // compare dest rather than source
5350                 colorld                 // load the pattern color fields
5351                 daddasel[0..2]  // data adder input A selection
5352                 daddbsel[0..3]  // data adder input B selection
5353                 daddmode[0..2]  // data adder mode
5354                 daddq_sel               // select adder output vs. GPU data
5355                 data[0..63]             // co-processor read data bus
5356                 data_ena                // enable write data
5357                 data_sel[0..1]  // select data to write
5358                 dbinh\[0..7]    // byte oriented changed data inhibits
5359                 dend[0..5]              // end of changed write data zone
5360                 dpipe[0..1]             // load computed data pipe-line latch
5361                 dstart[0..5]    // start of changed write data zone
5362                 dstdld[0..1]    // dest data load (two halves)
5363                 dstzld[0..1]    // dest zed load (two halves)
5364                 ext_int                 // enable extended precision intensity calculations
5365 INT32/  gpu_din                 // GPU data bus
5366                 iincld                  // I increment load
5367                 iincldx                 // alternate I increment load
5368                 init_if                 // initialise I fraction phase
5369                 init_ii                 // initialise I integer phase
5370                 init_zf                 // initialise Z fraction phase
5371                 intld[0..3]             // computed intensities load
5372                 istepadd                // intensity step integer add
5373                 istepfadd               // intensity step fraction add
5374                 istepld                 // I step load
5375                 istepdld                // I step delta load
5376                 lfu_func[0..3]  // LFU function code
5377                 patdadd                 // pattern data gouraud add
5378                 patdld[0..1]    // pattern data load (two halves)
5379                 pdsel[0..1]             // select pattern data type
5380                 phrase_mode             // phrase write mode
5381                 reload                  // transfer contents of double buffers
5382                 reset\                  // system reset
5383                 srcd1ld[0..1]   // source register 1 load (two halves)
5384                 srcdread                // source data read load enable
5385                 srczread                // source zed read load enable
5386                 srcshift[0..5]  // source alignment shift
5387                 srcz1ld[0..1]   // source zed 1 load (two halves)
5388                 srcz2add                // zed fraction gouraud add
5389                 srcz2ld[0..1]   // source zed 2 load (two halves)
5390                 textrgb                 // texture mapping in RGB mode
5391                 txtd[0..63]             // data from the texture unit
5392                 zedld[0..3]             // computed zeds load
5393                 zincld                  // Z increment load
5394                 zmode[0..2]             // Z comparator mode
5395                 zpipe[0..1]             // load computed zed pipe-line latch
5396                 zstepadd                // zed step integer add
5397                 zstepfadd               // zed step fraction add
5398                 zstepld                 // Z step load
5399                 zstepdld                // Z step delta load
5400                 :IN);
5401 */
5402
5403 void DATA(uint64 &wdata, uint8 &dcomp, uint8 &zcomp, bool &nowrite,
5404         bool big_pix, bool cmpdst, uint8 daddasel, uint8 daddbsel, uint8 daddmode, bool daddq_sel, uint8 data_sel,
5405         uint8 dbinh, uint8 dend, uint8 dstart, uint64 dstd, uint32 iinc, uint8 lfu_func, uint64 &patd, bool patdadd,
5406         bool phrase_mode, uint64 srcd, bool srcdread, bool srczread, bool srcz2add, uint8 zmode,
5407         bool bcompen, bool bkgwren, bool dcompen, uint8 icount, uint8 pixsize,
5408         uint64 &srcz, uint64 dstz, uint32 zinc)
5409 {
5410 /*
5411   Stuff we absolutely *need* to have passed in/out:
5412 IN:
5413   patdadd, dstd, srcd, patd, daddasel, daddbsel, daddmode, iinc, srcz1, srcz2, big_pix, phrase_mode, cmpdst
5414 OUT:
5415   changed patd (wdata I guess...) (Nope. We pass it back directly now...)
5416 */
5417
5418 // Source data registers
5419
5420 /*Data_src      := DATA_SRC (srcdlo, srcdhi, srcz[0..1], srczo[0..1], srczp[0..1], srcz1[0..1], srcz2[0..1], big_pix,
5421                         clk, gpu_din, intld[0..3], local_data0, local_data1, srcd1ld[0..1], srcdread, srczread, srcshift[0..5],
5422                         srcz1ld[0..1], srcz2add, srcz2ld[0..1], zedld[0..3], zpipe[0..1]);
5423 Srcd[0-7]       := JOIN (srcd[0-7], srcdlo{0-7});
5424 Srcd[8-31]      := JOIN (srcd[8-31], srcdlo{8-31});
5425 Srcd[32-63]     := JOIN (srcd[32-63], srcdhi{0-31});*/
5426
5427 // Destination data registers
5428
5429 /*Data_dst      := DATA_DST (dstd[0..63], dstz[0..1], clk, dstdld[0..1], dstzld[0..1], load_data[0..1]);
5430 Dstdlo          := JOIN (dstdlo, dstd[0..31]);
5431 Dstdhi          := JOIN (dstdhi, dstd[32..63]);*/
5432
5433 // Pattern and Color data registers
5434
5435 // Looks like this is simply another register file for the pattern data registers. No adding or anything funky
5436 // going on. Note that patd & patdv will output the same info.
5437 // Patdldl/h (patdld[0..1]) can select the local_data bus to overwrite the current pattern data...
5438 // Actually, it can be either patdld OR patdadd...!
5439 /*Data_pat      := DATA_PAT (colord[0..15], int0dp[8..10], int1dp[8..10], int2dp[8..10], int3dp[8..10], mixsel[0..2],
5440                         patd[0..63], patdv[0..1], clk, colorld, dpipe[0], ext_int, gpu_din, intld[0..3], local_data0, local_data1,
5441                         patdadd, patdld[0..1], reload, reset\);
5442 Patdlo          := JOIN (patdlo, patd[0..31]);
5443 Patdhi          := JOIN (patdhi, patd[32..63]);*/
5444
5445 // Multiplying data Mixer (NOT IN JAGUAR I)
5446
5447 /*Datamix               := DATAMIX (patdo[0..1], clk, colord[0..15], dpipe[1], dstd[0..63], int0dp[8..10], int1dp[8..10],
5448                         int2dp[8..10], int3dp[8..10], mixsel[0..2], patd[0..63], pdsel[0..1], srcd[0..63], textrgb, txtd[0..63]);*/
5449
5450 // Logic function unit
5451
5452 /*Lfu           := LFU (lfu[0..1], srcdlo, srcdhi, dstdlo, dstdhi, lfu_func[0..3]);*/
5453 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5454         uint64 funcmask[2] = { 0, 0xFFFFFFFFFFFFFFFFLL };
5455         uint64 func0 = funcmask[lfu_func & 0x01];
5456         uint64 func1 = funcmask[(lfu_func >> 1) & 0x01];
5457         uint64 func2 = funcmask[(lfu_func >> 2) & 0x01];
5458         uint64 func3 = funcmask[(lfu_func >> 3) & 0x01];
5459         uint64 lfu = (~srcd & ~dstd & func0) | (~srcd & dstd & func1) | (srcd & ~dstd & func2) | (srcd & dstd & func3);
5460 //////////////////////////////////////////////////////////////////////////////////////
5461
5462 // Increment and Step Registers
5463
5464 // Does it do anything without the step add lines? Check it!
5465 // No. This is pretty much just a register file without the Jaguar II lines...
5466 /*Inc_step      := INC_STEP (iinc, istep[0..31], zinc, zstep[0..31], clk, ext_int, gpu_din, iincld, iincldx, istepadd,
5467                         istepfadd, istepld, istepdld, reload, reset\, zincld, zstepadd, zstepfadd, zstepld, zstepdld);
5468 Istep           := JOIN (istep, istep[0..31]);
5469 Zstep           := JOIN (zstep, zstep[0..31]);*/
5470
5471 // Pixel data comparator
5472
5473 /*Datacomp      := DATACOMP (dcomp[0..7], cmpdst, dstdlo, dstdhi, patdlo, patdhi, srcdlo, srcdhi);*/
5474 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5475         dcomp = 0;
5476         uint64 cmpd = patd ^ (cmpdst ? dstd : srcd);
5477
5478         if ((cmpd & 0x00000000000000FFLL) == 0)
5479                 dcomp |= 0x01;
5480         if ((cmpd & 0x000000000000FF00LL) == 0)
5481                 dcomp |= 0x02;
5482         if ((cmpd & 0x0000000000FF0000LL) == 0)
5483                 dcomp |= 0x04;
5484         if ((cmpd & 0x00000000FF000000LL) == 0)
5485                 dcomp |= 0x08;
5486         if ((cmpd & 0x000000FF00000000LL) == 0)
5487                 dcomp |= 0x10;
5488         if ((cmpd & 0x0000FF0000000000LL) == 0)
5489                 dcomp |= 0x20;
5490         if ((cmpd & 0x00FF000000000000LL) == 0)
5491                 dcomp |= 0x40;
5492         if ((cmpd & 0xFF00000000000000LL) == 0)
5493                 dcomp |= 0x80;
5494 //////////////////////////////////////////////////////////////////////////////////////
5495
5496 // Zed comparator for Z-buffer operations
5497
5498 /*Zedcomp               := ZEDCOMP (zcomp[0..3], srczp[0..1], dstz[0..1], zmode[0..2]);*/
5499 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5500 //srczp is srcz pipelined, also it goes through a source shift as well...
5501 /*The shift is basically like so (each piece is 16 bits long):
5502
5503         0         1         2         3         4          5         6
5504         srcz1lolo srcz1lohi srcz1hilo srcz1hihi srcrz2lolo srcz2lohi srcz2hilo
5505
5506 with srcshift bits 4 & 5 selecting the start position
5507 */
5508 //So... basically what we have here is:
5509         zcomp = 0;
5510
5511         if ((((srcz & 0x000000000000FFFFLL) < (dstz & 0x000000000000FFFFLL)) && (zmode & 0x01))
5512                 || (((srcz & 0x000000000000FFFFLL) == (dstz & 0x000000000000FFFFLL)) && (zmode & 0x02))
5513                 || (((srcz & 0x000000000000FFFFLL) > (dstz & 0x000000000000FFFFLL)) && (zmode & 0x04)))
5514                 zcomp |= 0x01;
5515
5516         if ((((srcz & 0x00000000FFFF0000LL) < (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x01))
5517                 || (((srcz & 0x00000000FFFF0000LL) == (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x02))
5518                 || (((srcz & 0x00000000FFFF0000LL) > (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x04)))
5519                 zcomp |= 0x02;
5520
5521         if ((((srcz & 0x0000FFFF00000000LL) < (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x01))
5522                 || (((srcz & 0x0000FFFF00000000LL) == (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x02))
5523                 || (((srcz & 0x0000FFFF00000000LL) > (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x04)))
5524                 zcomp |= 0x04;
5525
5526         if ((((srcz & 0xFFFF000000000000LL) < (dstz & 0xFFFF000000000000LL)) && (zmode & 0x01))
5527                 || (((srcz & 0xFFFF000000000000LL) == (dstz & 0xFFFF000000000000LL)) && (zmode & 0x02))
5528                 || (((srcz & 0xFFFF000000000000LL) > (dstz & 0xFFFF000000000000LL)) && (zmode & 0x04)))
5529                 zcomp |= 0x08;
5530
5531 //TEMP, TO TEST IF ZCOMP IS THE CULPRIT...
5532 //Nope, this is NOT the problem...
5533 //zcomp=0;
5534 // We'll do the comparison/bit/byte inhibits here, since that's they way it happens
5535 // in the real thing (dcomp goes out to COMP_CTRL and back into DATA through dbinh)...
5536 #if 1
5537         uint8 dbinht;
5538 //      bool nowrite;
5539         COMP_CTRL(dbinht, nowrite,
5540                 bcompen, true/*big_pix*/, bkgwren, dcomp, dcompen, icount, pixsize, phrase_mode, srcd & 0xFF, zcomp);
5541         dbinh = dbinht;
5542 //      dbinh = 0x00;
5543 #endif
5544
5545 #if 1
5546 #ifdef VERBOSE_BLITTER_LOGGING
5547 if (logBlit)
5548 {
5549         printf("\n[dcomp=%02X zcomp=%02X dbinh=%02X]\n", dcomp, zcomp, dbinh);
5550         fflush(stdout);
5551 }//*/
5552 #endif
5553 #endif
5554 //////////////////////////////////////////////////////////////////////////////////////
5555
5556 // 22 Mar 94
5557 // The data initializer - allows all four initial values to be computed from one (NOT IN JAGUAR I)
5558
5559 /*Datinit               := DATINIT (initcin[0..3], initinc[0..63], initpix[0..15], a1_x[0..1], big_pix, clk, iinc, init_if, init_ii,
5560                         init_zf, istep[0..31], zinc, zstep[0..31]);*/
5561
5562 // Adder array for Z and intensity increments
5563
5564 /*Addarray      := ADDARRAY (addq[0..3], clk, daddasel[0..2], daddbsel[0..3], daddmode[0..2], dstdlo, dstdhi, iinc,
5565                         initcin[0..3], initinc[0..63], initpix[0..15], istep, patdv[0..1], srcdlo, srcdhi, srcz1[0..1],
5566                         srcz2[0..1], reset\, zinc, zstep);*/
5567 /*void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
5568         uint64 dstd, uint32 iinc, uint8 initcin[], uint64 initinc, uint16 initpix,
5569         uint32 istep, uint64 patd, uint64 srcd, uint64 srcz1, uint64 srcz2,
5570         uint32 zinc, uint32 zstep)*/
5571 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5572         uint16 addq[4];
5573         uint8 initcin[4] = { 0, 0, 0, 0 };
5574         ADDARRAY(addq, daddasel, daddbsel, daddmode, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
5575
5576         //This is normally done asynchronously above (thru local_data) when in patdadd mode...
5577 //And now it's passed back to the caller to be persistent between calls...!
5578 //But it's causing some serious fuck-ups in T2K now... !!! FIX !!! [DONE--???]
5579 //Weird! It doesn't anymore...!
5580         if (patdadd)
5581                 patd = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
5582 //////////////////////////////////////////////////////////////////////////////////////
5583
5584 // Local data bus multiplexer
5585
5586 /*Local_mux     := LOCAL_MUX (local_data[0..1], load_data[0..1],
5587         addq[0..3], gpu_din, data[0..63], blitter_active, daddq_sel);
5588 Local_data0     := JOIN (local_data0, local_data[0]);
5589 Local_data1     := JOIN (local_data1, local_data[1]);*/
5590 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5591 //////////////////////////////////////////////////////////////////////////////////////
5592
5593 // Data output multiplexer and tri-state drive
5594
5595 /*Data_mux      := DATA_MUX (wdata[0..63], addq[0..3], big_pix, dstdlo, dstdhi, dstz[0..1], data_sel[0..1], data_ena,
5596                         dstart[0..5], dend[0..5], dbinh\[0..7], lfu[0..1], patdo[0..1], phrase_mode, srczo[0..1]);*/
5597 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5598 // NOTE: patdo comes from DATAMIX and can be considered the same as patd for Jaguar I
5599
5600 //////////////////////////////////////////////////////////////////////////////////////
5601 //}
5602
5603 /*DEF DATA_MUX (
5604                 wdata[0..63]    // co-processor rwrite data bus
5605                 :BUS;
5606 INT16/  addq[0..3]
5607                 big_pix                 // Pixel organisation is big-endian
5608 INT32/  dstdlo
5609 INT32/  dstdhi
5610 INT32/  dstzlo
5611 INT32/  dstzhi
5612                 data_sel[0..1]  // source of write data
5613                 data_ena                // enable write data onto read/write bus
5614                 dstart[0..5]    // start of changed write data
5615                 dend[0..5]              // end of changed write data
5616                 dbinh\[0..7]    // byte oriented changed data inhibits
5617 INT32/  lfu[0..1]
5618 INT32/  patd[0..1]
5619                 phrase_mode             // phrase write mode
5620 INT32/  srczlo
5621 INT32/  srczhi
5622                 :IN);*/
5623
5624 /*INT32/        addql[0..1], ddatlo, ddathi zero32
5625 :LOCAL;
5626 BEGIN
5627
5628 Phrase_mode\    := INV1 (phrase_mode\, phrase_mode);
5629 Zero            := TIE0 (zero);
5630 Zero32          := JOIN (zero32, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero);*/
5631
5632 /* Generate a changed data mask */
5633
5634 /*Edis          := OR6 (edis\, dend[0..5]);
5635 Ecoarse         := DECL38E (e_coarse\[0..7], dend[3..5], edis\);
5636 E_coarse[0]     := INV1 (e_coarse[0], e_coarse\[0]);
5637 Efine           := DECL38E (unused[0], e_fine\[1..7], dend[0..2], e_coarse[0]);*/
5638 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5639         uint8 decl38e[2][8] = { { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
5640                 { 0xFE, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x7F } };
5641         uint8 dech38[8] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
5642         uint8 dech38el[2][8] = { { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 },
5643                 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } };
5644
5645                         int en = (dend & 0x3F ? 1 : 0);
5646         uint8 e_coarse = decl38e[en][(dend & 0x38) >> 3];               // Actually, this is e_coarse inverted...
5647         uint8 e_fine = decl38e[(e_coarse & 0x01) ^ 0x01][dend & 0x07];
5648         e_fine &= 0xFE;
5649 //////////////////////////////////////////////////////////////////////////////////////
5650
5651 /*Scoarse               := DECH38 (s_coarse[0..7], dstart[3..5]);
5652 Sfen\           := INV1 (sfen\, s_coarse[0]);
5653 Sfine           := DECH38EL (s_fine[0..7], dstart[0..2], sfen\);*/
5654 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5655         uint8 s_coarse = dech38[(dstart & 0x38) >> 3];
5656         uint8 s_fine = dech38el[(s_coarse & 0x01) ^ 0x01][dstart & 0x07];
5657 //////////////////////////////////////////////////////////////////////////////////////
5658
5659 /*Maskt[0]      := BUF1 (maskt[0], s_fine[0]);
5660 Maskt[1-7]      := OAN1P (maskt[1-7], maskt[0-6], s_fine[1-7], e_fine\[1-7]);*/
5661 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5662         uint16 maskt = s_fine & 0x0001;
5663         maskt |= (((maskt & 0x0001) || (s_fine & 0x02)) && (e_fine & 0x02) ? 0x0002 : 0x0000);
5664         maskt |= (((maskt & 0x0002) || (s_fine & 0x04)) && (e_fine & 0x04) ? 0x0004 : 0x0000);
5665         maskt |= (((maskt & 0x0004) || (s_fine & 0x08)) && (e_fine & 0x08) ? 0x0008 : 0x0000);
5666         maskt |= (((maskt & 0x0008) || (s_fine & 0x10)) && (e_fine & 0x10) ? 0x0010 : 0x0000);
5667         maskt |= (((maskt & 0x0010) || (s_fine & 0x20)) && (e_fine & 0x20) ? 0x0020 : 0x0000);
5668         maskt |= (((maskt & 0x0020) || (s_fine & 0x40)) && (e_fine & 0x40) ? 0x0040 : 0x0000);
5669         maskt |= (((maskt & 0x0040) || (s_fine & 0x80)) && (e_fine & 0x80) ? 0x0080 : 0x0000);
5670 //////////////////////////////////////////////////////////////////////////////////////
5671
5672 /* Produce a look-ahead on the ripple carry:
5673 masktla = s_coarse[0] . /e_coarse[0] */
5674 /*Masktla               := AN2 (masktla, s_coarse[0], e_coarse\[0]);
5675 Maskt[8]        := OAN1P (maskt[8], masktla, s_coarse[1], e_coarse\[1]);
5676 Maskt[9-14]     := OAN1P (maskt[9-14], maskt[8-13], s_coarse[2-7], e_coarse\[2-7]);*/
5677 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5678         maskt |= (((s_coarse & e_coarse & 0x01) || (s_coarse & 0x02)) && (e_coarse & 0x02) ? 0x0100 : 0x0000);
5679         maskt |= (((maskt & 0x0100) || (s_coarse & 0x04)) && (e_coarse & 0x04) ? 0x0200 : 0x0000);
5680         maskt |= (((maskt & 0x0200) || (s_coarse & 0x08)) && (e_coarse & 0x08) ? 0x0400 : 0x0000);
5681         maskt |= (((maskt & 0x0400) || (s_coarse & 0x10)) && (e_coarse & 0x10) ? 0x0800 : 0x0000);
5682         maskt |= (((maskt & 0x0800) || (s_coarse & 0x20)) && (e_coarse & 0x20) ? 0x1000 : 0x0000);
5683         maskt |= (((maskt & 0x1000) || (s_coarse & 0x40)) && (e_coarse & 0x40) ? 0x2000 : 0x0000);
5684         maskt |= (((maskt & 0x2000) || (s_coarse & 0x80)) && (e_coarse & 0x80) ? 0x4000 : 0x0000);
5685 //////////////////////////////////////////////////////////////////////////////////////
5686
5687 /* The bit terms are mirrored for big-endian pixels outside phrase
5688 mode.  The byte terms are mirrored for big-endian pixels in phrase
5689 mode.  */
5690
5691 /*Mirror_bit    := AN2M (mir_bit, phrase_mode\, big_pix);
5692 Mirror_byte     := AN2H (mir_byte, phrase_mode, big_pix);
5693
5694 Masktb[14]      := BUF1 (masktb[14], maskt[14]);
5695 Masku[0]        := MX4 (masku[0],  maskt[0],  maskt[7],  maskt[14],  zero, mir_bit, mir_byte);
5696 Masku[1]        := MX4 (masku[1],  maskt[1],  maskt[6],  maskt[14],  zero, mir_bit, mir_byte);
5697 Masku[2]        := MX4 (masku[2],  maskt[2],  maskt[5],  maskt[14],  zero, mir_bit, mir_byte);
5698 Masku[3]        := MX4 (masku[3],  maskt[3],  maskt[4],  masktb[14], zero, mir_bit, mir_byte);
5699 Masku[4]        := MX4 (masku[4],  maskt[4],  maskt[3],  masktb[14], zero, mir_bit, mir_byte);
5700 Masku[5]        := MX4 (masku[5],  maskt[5],  maskt[2],  masktb[14], zero, mir_bit, mir_byte);
5701 Masku[6]        := MX4 (masku[6],  maskt[6],  maskt[1],  masktb[14], zero, mir_bit, mir_byte);
5702 Masku[7]        := MX4 (masku[7],  maskt[7],  maskt[0],  masktb[14], zero, mir_bit, mir_byte);
5703 Masku[8]        := MX2 (masku[8],  maskt[8],  maskt[13], mir_byte);
5704 Masku[9]        := MX2 (masku[9],  maskt[9],  maskt[12], mir_byte);
5705 Masku[10]       := MX2 (masku[10], maskt[10], maskt[11], mir_byte);
5706 Masku[11]       := MX2 (masku[11], maskt[11], maskt[10], mir_byte);
5707 Masku[12]       := MX2 (masku[12], maskt[12], maskt[9],  mir_byte);
5708 Masku[13]       := MX2 (masku[13], maskt[13], maskt[8],  mir_byte);
5709 Masku[14]       := MX2 (masku[14], maskt[14], maskt[0],  mir_byte);*/
5710 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5711         bool mir_bit = true/*big_pix*/ && !phrase_mode;
5712         bool mir_byte = true/*big_pix*/ && phrase_mode;
5713         uint16 masku = maskt;
5714
5715         if (mir_bit)
5716         {
5717                 masku &= 0xFF00;
5718                 masku |= (maskt >> 7) & 0x0001;
5719                 masku |= (maskt >> 5) & 0x0002;
5720                 masku |= (maskt >> 3) & 0x0004;
5721                 masku |= (maskt >> 1) & 0x0008;
5722                 masku |= (maskt << 1) & 0x0010;
5723                 masku |= (maskt << 3) & 0x0020;
5724                 masku |= (maskt << 5) & 0x0040;
5725                 masku |= (maskt << 7) & 0x0080;
5726         }
5727
5728         if (mir_byte)
5729         {
5730                 masku = 0;
5731                 masku |= (maskt >> 14) & 0x0001;
5732                 masku |= (maskt >> 13) & 0x0002;
5733                 masku |= (maskt >> 12) & 0x0004;
5734                 masku |= (maskt >> 11) & 0x0008;
5735                 masku |= (maskt >> 10) & 0x0010;
5736                 masku |= (maskt >> 9)  & 0x0020;
5737                 masku |= (maskt >> 8)  & 0x0040;
5738                 masku |= (maskt >> 7)  & 0x0080;
5739
5740                 masku |= (maskt >> 5) & 0x0100;
5741                 masku |= (maskt >> 3) & 0x0200;
5742                 masku |= (maskt >> 1) & 0x0400;
5743                 masku |= (maskt << 1) & 0x0800;
5744                 masku |= (maskt << 3) & 0x1000;
5745                 masku |= (maskt << 5) & 0x2000;
5746                 masku |= (maskt << 7) & 0x4000;
5747         }
5748 //////////////////////////////////////////////////////////////////////////////////////
5749
5750 /* The maskt terms define the area for changed data, but the byte
5751 inhibit terms can override these */
5752
5753 /*Mask[0-7]     := AN2 (mask[0-7], masku[0-7], dbinh\[0]);
5754 Mask[8-14]      := AN2H (mask[8-14], masku[8-14], dbinh\[1-7]);*/
5755 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5756         uint16 mask = masku & (!(dbinh & 0x01) ? 0xFFFF : 0xFF00);
5757         mask &= ~(((uint16)dbinh & 0x00FE) << 7);
5758 //////////////////////////////////////////////////////////////////////////////////////
5759
5760 /*Addql[0]      := JOIN (addql[0], addq[0..1]);
5761 Addql[1]        := JOIN (addql[1], addq[2..3]);
5762
5763 Dsel0b[0-1]     := BUF8 (dsel0b[0-1], data_sel[0]);
5764 Dsel1b[0-1]     := BUF8 (dsel1b[0-1], data_sel[1]);
5765 Ddatlo          := MX4 (ddatlo, patd[0], lfu[0], addql[0], zero32, dsel0b[0], dsel1b[0]);
5766 Ddathi          := MX4 (ddathi, patd[1], lfu[1], addql[1], zero32, dsel0b[1], dsel1b[1]);*/
5767 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5768         uint64 dmux[4];
5769         dmux[0] = patd;
5770         dmux[1] = lfu;
5771         dmux[2] = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
5772         dmux[3] = 0;
5773         uint64 ddat = dmux[data_sel];
5774 //////////////////////////////////////////////////////////////////////////////////////
5775
5776 /*Zed_sel               := AN2 (zed_sel, data_sel[0..1]);
5777 Zed_selb[0-1]   := BUF8 (zed_selb[0-1], zed_sel);
5778
5779 Dat[0-7]        := MX4 (dat[0-7],   dstdlo{0-7},   ddatlo{0-7},   dstzlo{0-7},   srczlo{0-7},   mask[0-7], zed_selb[0]);
5780 Dat[8-15]       := MX4 (dat[8-15],  dstdlo{8-15},  ddatlo{8-15},  dstzlo{8-15},  srczlo{8-15},  mask[8],   zed_selb[0]);
5781 Dat[16-23]      := MX4 (dat[16-23], dstdlo{16-23}, ddatlo{16-23}, dstzlo{16-23}, srczlo{16-23}, mask[9],   zed_selb[0]);
5782 Dat[24-31]      := MX4 (dat[24-31], dstdlo{24-31}, ddatlo{24-31}, dstzlo{24-31}, srczlo{24-31}, mask[10],  zed_selb[0]);
5783 Dat[32-39]      := MX4 (dat[32-39], dstdhi{0-7},   ddathi{0-7},   dstzhi{0-7},   srczhi{0-7},   mask[11],  zed_selb[1]);
5784 Dat[40-47]      := MX4 (dat[40-47], dstdhi{8-15},  ddathi{8-15},  dstzhi{8-15},  srczhi{8-15},  mask[12],  zed_selb[1]);
5785 Dat[48-55]      := MX4 (dat[48-55], dstdhi{16-23}, ddathi{16-23}, dstzhi{16-23}, srczhi{16-23}, mask[13],  zed_selb[1]);
5786 Dat[56-63]      := MX4 (dat[56-63], dstdhi{24-31}, ddathi{24-31}, dstzhi{24-31}, srczhi{24-31}, mask[14],  zed_selb[1]);*/
5787 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5788         wdata = ((ddat & mask) | (dstd & ~mask)) & 0x00000000000000FFLL;
5789         wdata |= (mask & 0x0100 ? ddat : dstd) & 0x000000000000FF00LL;
5790         wdata |= (mask & 0x0200 ? ddat : dstd) & 0x0000000000FF0000LL;
5791         wdata |= (mask & 0x0400 ? ddat : dstd) & 0x00000000FF000000LL;
5792         wdata |= (mask & 0x0800 ? ddat : dstd) & 0x000000FF00000000LL;
5793         wdata |= (mask & 0x1000 ? ddat : dstd) & 0x0000FF0000000000LL;
5794         wdata |= (mask & 0x2000 ? ddat : dstd) & 0x00FF000000000000LL;
5795         wdata |= (mask & 0x4000 ? ddat : dstd) & 0xFF00000000000000LL;
5796 /*if (logBlit)
5797 {
5798         printf("\n[ddat=%08X%08X dstd=%08X%08X wdata=%08X%08X mask=%04X]\n",
5799                 (uint32)(ddat >> 32), (uint32)(ddat & 0xFFFFFFFF),
5800                 (uint32)(dstd >> 32), (uint32)(dstd & 0xFFFFFFFF),
5801                 (uint32)(wdata >> 32), (uint32)(wdata & 0xFFFFFFFF), mask);
5802         fflush(stdout);
5803 }//*/
5804 //This is a crappy way of handling this, but it should work for now...
5805         uint64 zwdata;
5806         zwdata = ((srcz & mask) | (dstz & ~mask)) & 0x00000000000000FFLL;
5807         zwdata |= (mask & 0x0100 ? srcz : dstz) & 0x000000000000FF00LL;
5808         zwdata |= (mask & 0x0200 ? srcz : dstz) & 0x0000000000FF0000LL;
5809         zwdata |= (mask & 0x0400 ? srcz : dstz) & 0x00000000FF000000LL;
5810         zwdata |= (mask & 0x0800 ? srcz : dstz) & 0x000000FF00000000LL;
5811         zwdata |= (mask & 0x1000 ? srcz : dstz) & 0x0000FF0000000000LL;
5812         zwdata |= (mask & 0x2000 ? srcz : dstz) & 0x00FF000000000000LL;
5813         zwdata |= (mask & 0x4000 ? srcz : dstz) & 0xFF00000000000000LL;
5814 if (logBlit)
5815 {
5816         printf("\n[srcz=%08X%08X dstz=%08X%08X zwdata=%08X%08X mask=%04X]\n",
5817                 (uint32)(srcz >> 32), (uint32)(srcz & 0xFFFFFFFF),
5818                 (uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF),
5819                 (uint32)(zwdata >> 32), (uint32)(zwdata & 0xFFFFFFFF), mask);
5820         fflush(stdout);
5821 }//*/
5822         srcz = zwdata;
5823 //////////////////////////////////////////////////////////////////////////////////////
5824
5825 /*Data_enab[0-1]        := BUF8 (data_enab[0-1], data_ena);
5826 Datadrv[0-31]   := TS (wdata[0-31],  dat[0-31],  data_enab[0]);
5827 Datadrv[32-63]  := TS (wdata[32-63], dat[32-63], data_enab[1]);
5828
5829 Unused[0]       := DUMMY (unused[0]);
5830
5831 END;*/
5832 }
5833
5834 /**  COMP_CTRL - Comparator output control logic  *****************
5835
5836 This block is responsible for taking the comparator outputs and
5837 using them as appropriate to inhibit writes.  Two methods are
5838 supported for inhibiting write data:
5839
5840 -       suppression of the inner loop controlled write operation
5841 -       a set of eight byte inhibit lines to write back dest data
5842
5843 The first technique is used in pixel oriented modes, the second in
5844 phrase mode, but the phrase mode form is only applicable to eight
5845 and sixteen bit pixel modes.
5846
5847 Writes can be suppressed by data being equal, by the Z comparator
5848 conditions being met, or by the bit to pixel expansion scheme.
5849
5850 Pipe-lining issues: the data derived comparator outputs are stable
5851 until the next data read, well after the affected write from this
5852 operation.  However, the inner counter bits can count immediately
5853 before the ack for the last write.  Therefore, it is necessary to
5854 delay bcompbit select terms by one inner loop pipe-line stage,
5855 when generating the select for the data control - the output is
5856 delayed one further tick to give it write data timing (2/34).
5857
5858 There is also a problem with computed data - the new values are
5859 calculated before the write associated with the old value has been
5860 performed.  The is taken care of within the zed comparator by
5861 pipe-lining the comparator inputs where appropriate.
5862 */
5863
5864 //#define LOG_COMP_CTRL
5865 /*DEF COMP_CTRL (
5866         dbinh\[0..7]    // destination byte inhibit lines
5867         nowrite         // suppress inner loop write operation
5868         :OUT;
5869         bcompen         // bit selector inhibit enable
5870         big_pix         // pixels are big-endian
5871         bkgwren         // enable dest data write in pix inhibit
5872         clk             // co-processor clock
5873         dcomp[0..7]     // output of data byte comparators
5874         dcompen         // data comparator inhibit enable
5875         icount[0..2]    // low bits of inner count
5876         pixsize[0..2]   // destination pixel size
5877         phrase_mode     // phrase write mode
5878         srcd[0..7]      // bits to use for bit to byte expansion
5879         step_inner      // inner loop advance
5880         zcomp[0..3]     // output of word zed comparators
5881         :IN);*/
5882 void COMP_CTRL(uint8 &dbinh, bool &nowrite,
5883         bool bcompen, bool big_pix, bool bkgwren, uint8 dcomp, bool dcompen, uint8 icount,
5884         uint8 pixsize, bool phrase_mode, uint8 srcd, uint8 zcomp)
5885 {
5886 //BEGIN
5887
5888 /*Bkgwren\      := INV1 (bkgwren\, bkgwren);
5889 Phrase_mode\    := INV1 (phrase_mode\, phrase_mode);
5890 Pixsize\[0-2]   := INV2 (pixsize\[0-2], pixsize[0-2]);*/
5891
5892 /* The bit comparator bits are derived from the source data, which
5893 will have been suitably aligned for phrase mode.  The contents of
5894 the inner counter are used to select which bit to use.
5895
5896 When not in phrase mode the inner count value is used to select
5897 one bit.  It is assumed that the count has already occurred, so,
5898 7 selects bit 0, etc.  In big-endian pixel mode, this turns round,
5899 so that a count of 7 selects bit 7.
5900
5901 In phrase mode, the eight bits are used directly, and this mode is
5902 only applicable to 8-bit pixel mode (2/34) */
5903
5904 /*Bcompselt[0-2]        := EO (bcompselt[0-2], icount[0-2], big_pix);
5905 Bcompbit        := MX8 (bcompbit, srcd[7], srcd[6], srcd[5],
5906                         srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompselt[0..2]);
5907 Bcompbit\       := INV1 (bcompbit\, bcompbit);*/
5908 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5909 #ifdef LOG_COMP_CTRL
5910 if (logBlit)
5911 {
5912         printf("\n     [bcompen=%s dcompen=%s phrase_mode=%s bkgwren=%s dcomp=%02X zcomp=%02X]", (bcompen ? "T" : "F"), (dcompen ? "T" : "F"), (phrase_mode ? "T" : "F"), (bkgwren ? "T" : "F"), dcomp, zcomp);
5913         printf("\n     ");
5914         fflush(stdout);
5915 }
5916 #endif
5917         uint8 bcompselt = (big_pix ? ~icount : icount) & 0x07;
5918         uint8 bitmask[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
5919         bool bcompbit = srcd & bitmask[bcompselt];
5920 //////////////////////////////////////////////////////////////////////////////////////
5921
5922 /* pipe-line the count */
5923 /*Bcompsel[0-2] := FDSYNC (bcompsel[0-2], bcompselt[0-2], step_inner, clk);
5924 Bcompbt         := MX8 (bcompbitpt, srcd[7], srcd[6], srcd[5],
5925                         srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompsel[0..2]);
5926 Bcompbitp       := FD1Q (bcompbitp, bcompbitpt, clk);
5927 Bcompbitp\      := INV1 (bcompbitp\, bcompbitp);*/
5928
5929 /* For pixel mode, generate the write inhibit signal for all modes
5930 on bit inhibit, for 8 and 16 bit modes on comparator inhibit, and
5931 for 16 bit mode on Z inhibit
5932
5933 Nowrite = bcompen . /bcompbit . /phrase_mode
5934         + dcompen . dcomp[0] . /phrase_mode . pixsize = 011
5935         + dcompen . dcomp[0..1] . /phrase_mode . pixsize = 100
5936         + zcomp[0] . /phrase_mode . pixsize = 100
5937 */
5938
5939 /*Nowt0         := NAN3 (nowt[0], bcompen, bcompbit\, phrase_mode\);
5940 Nowt1           := ND6  (nowt[1], dcompen, dcomp[0], phrase_mode\, pixsize\[2], pixsize[0..1]);
5941 Nowt2           := ND7  (nowt[2], dcompen, dcomp[0..1], phrase_mode\, pixsize[2], pixsize\[0..1]);
5942 Nowt3           := NAN5 (nowt[3], zcomp[0], phrase_mode\, pixsize[2], pixsize\[0..1]);
5943 Nowt4           := NAN4 (nowt[4], nowt[0..3]);
5944 Nowrite         := AN2  (nowrite, nowt[4], bkgwren\);*/
5945 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5946         nowrite = ((bcompen && !bcompbit && !phrase_mode)
5947                 || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
5948                 || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
5949                 || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4)))
5950                 && !bkgwren;
5951 //////////////////////////////////////////////////////////////////////////////////////
5952
5953 /*Winht         := NAN3 (winht, bcompen, bcompbitp\, phrase_mode\);
5954 Winhibit        := NAN4 (winhibit, winht, nowt[1..3]);*/
5955 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5956 //This is the same as above, but with bcompbit delayed one tick and called 'winhibit'
5957 //Small difference: Besides the pipeline effect, it's also not using !bkgwren...
5958 //      bool winhibit = (bcompen && !
5959         bool winhibit = (bcompen && !bcompbit && !phrase_mode)
5960                 || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
5961                 || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
5962                 || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4));
5963 #ifdef LOG_COMP_CTRL
5964 if (logBlit)
5965 {
5966         printf("[nw=%s wi=%s]", (nowrite ? "T" : "F"), (winhibit ? "T" : "F"));
5967         fflush(stdout);
5968 }
5969 #endif
5970 //////////////////////////////////////////////////////////////////////////////////////
5971
5972 /* For phrase mode, generate the byte inhibit signals for eight bit
5973 mode 011, or sixteen bit mode 100
5974 dbinh\[0] =  pixsize[2] . zcomp[0]
5975          +  pixsize[2] . dcomp[0] . dcomp[1] . dcompen
5976          + /pixsize[2] . dcomp[0] . dcompen
5977          + /srcd[0] . bcompen
5978
5979 Inhibits 0-3 are also used when not in phrase mode to write back
5980 destination data.
5981 */
5982
5983 /*Srcd\[0-7]    := INV1 (srcd\[0-7], srcd[0-7]);
5984
5985 Di0t0           := NAN2H (di0t[0], pixsize[2], zcomp[0]);
5986 Di0t1           := NAN4H (di0t[1], pixsize[2], dcomp[0..1], dcompen);
5987 Di0t2           := NAN2 (di0t[2], srcd\[0], bcompen);
5988 Di0t3           := NAN3 (di0t[3], pixsize\[2], dcomp[0], dcompen);
5989 Di0t4           := NAN4 (di0t[4], di0t[0..3]);
5990 Dbinh[0]        := ANR1P (dbinh\[0], di0t[4], phrase_mode, winhibit);*/
5991 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5992         dbinh = 0;
5993         bool di0t0_1 = ((pixsize & 0x04) && (zcomp & 0x01))
5994                 || ((pixsize & 0x04) && (dcomp & 0x01) && (dcomp & 0x02) && dcompen);
5995         bool di0t4 = di0t0_1
5996                 || (!(srcd & 0x01) && bcompen)
5997                 || (!(pixsize & 0x04) && (dcomp & 0x01) && dcompen);
5998         dbinh |= (!((di0t4 && phrase_mode) || winhibit) ? 0x01 : 0x00);
5999 #ifdef LOG_COMP_CTRL
6000 if (logBlit)
6001 {
6002         printf("[di0t0_1=%s di0t4=%s]", (di0t0_1 ? "T" : "F"), (di0t4 ? "T" : "F"));
6003         fflush(stdout);
6004 }
6005 #endif
6006 //////////////////////////////////////////////////////////////////////////////////////
6007
6008 /*Di1t0         := NAN3 (di1t[0], pixsize\[2], dcomp[1], dcompen);
6009 Di1t1           := NAN2 (di1t[1], srcd\[1], bcompen);
6010 Di1t2           := NAN4 (di1t[2], di0t[0..1], di1t[0..1]);
6011 Dbinh[1]        := ANR1 (dbinh\[1], di1t[2], phrase_mode, winhibit);*/
6012 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6013         bool di1t2 = di0t0_1
6014                 || (!(srcd & 0x02) && bcompen)
6015                 || (!(pixsize & 0x04) && (dcomp & 0x02) && dcompen);
6016         dbinh |= (!((di1t2 && phrase_mode) || winhibit) ? 0x02 : 0x00);
6017 #ifdef LOG_COMP_CTRL
6018 if (logBlit)
6019 {
6020         printf("[di1t2=%s]", (di1t2 ? "T" : "F"));
6021         fflush(stdout);
6022 }
6023 #endif
6024 //////////////////////////////////////////////////////////////////////////////////////
6025
6026 /*Di2t0         := NAN2H (di2t[0], pixsize[2], zcomp[1]);
6027 Di2t1           := NAN4H (di2t[1], pixsize[2], dcomp[2..3], dcompen);
6028 Di2t2           := NAN2 (di2t[2], srcd\[2], bcompen);
6029 Di2t3           := NAN3 (di2t[3], pixsize\[2], dcomp[2], dcompen);
6030 Di2t4           := NAN4 (di2t[4], di2t[0..3]);
6031 Dbinh[2]        := ANR1 (dbinh\[2], di2t[4], phrase_mode, winhibit);*/
6032 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6033 //[bcompen=F dcompen=T phrase_mode=T bkgwren=F][nw=F wi=F]
6034 //[di0t0_1=F di0t4=F][di1t2=F][di2t0_1=T di2t4=T][di3t2=T][di4t0_1=F di2t4=F][di5t2=F][di6t0_1=F di6t4=F][di7t2=F]
6035 //[dcomp=$00 dbinh=$0C][7804780400007804] (icount=0005, inc=4)
6036         bool di2t0_1 = ((pixsize & 0x04) && (zcomp & 0x02))
6037                 || ((pixsize & 0x04) && (dcomp & 0x04) && (dcomp & 0x08) && dcompen);
6038         bool di2t4 = di2t0_1
6039                 || (!(srcd & 0x04) && bcompen)
6040                 || (!(pixsize & 0x04) && (dcomp & 0x04) && dcompen);
6041         dbinh |= (!((di2t4 && phrase_mode) || winhibit) ? 0x04 : 0x00);
6042 #ifdef LOG_COMP_CTRL
6043 if (logBlit)
6044 {
6045         printf("[di2t0_1=%s di2t4=%s]", (di2t0_1 ? "T" : "F"), (di2t4 ? "T" : "F"));
6046         fflush(stdout);
6047 }
6048 #endif
6049 //////////////////////////////////////////////////////////////////////////////////////
6050
6051 /*Di3t0         := NAN3 (di3t[0], pixsize\[2], dcomp[3], dcompen);
6052 Di3t1           := NAN2 (di3t[1], srcd\[3], bcompen);
6053 Di3t2           := NAN4 (di3t[2], di2t[0..1], di3t[0..1]);
6054 Dbinh[3]        := ANR1 (dbinh\[3], di3t[2], phrase_mode, winhibit);*/
6055 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6056         bool di3t2 = di2t0_1
6057                 || (!(srcd & 0x08) && bcompen)
6058                 || (!(pixsize & 0x04) && (dcomp & 0x08) && dcompen);
6059         dbinh |= (!((di3t2 && phrase_mode) || winhibit) ? 0x08 : 0x00);
6060 #ifdef LOG_COMP_CTRL
6061 if (logBlit)
6062 {
6063         printf("[di3t2=%s]", (di3t2 ? "T" : "F"));
6064         fflush(stdout);
6065 }
6066 #endif
6067 //////////////////////////////////////////////////////////////////////////////////////
6068
6069 /*Di4t0         := NAN2H (di4t[0], pixsize[2], zcomp[2]);
6070 Di4t1           := NAN4H (di4t[1], pixsize[2], dcomp[4..5], dcompen);
6071 Di4t2           := NAN2 (di4t[2], srcd\[4], bcompen);
6072 Di4t3           := NAN3 (di4t[3], pixsize\[2], dcomp[4], dcompen);
6073 Di4t4           := NAN4 (di4t[4], di4t[0..3]);
6074 Dbinh[4]        := NAN2 (dbinh\[4], di4t[4], phrase_mode);*/
6075 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6076         bool di4t0_1 = ((pixsize & 0x04) && (zcomp & 0x04))
6077                 || ((pixsize & 0x04) && (dcomp & 0x10) && (dcomp & 0x20) && dcompen);
6078         bool di4t4 = di4t0_1
6079                 || (!(srcd & 0x10) && bcompen)
6080                 || (!(pixsize & 0x04) && (dcomp & 0x10) && dcompen);
6081         dbinh |= (!(di4t4 && phrase_mode) ? 0x10 : 0x00);
6082 #ifdef LOG_COMP_CTRL
6083 if (logBlit)
6084 {
6085         printf("[di4t0_1=%s di2t4=%s]", (di4t0_1 ? "T" : "F"), (di4t4 ? "T" : "F"));
6086         fflush(stdout);
6087 }
6088 #endif
6089 //////////////////////////////////////////////////////////////////////////////////////
6090
6091 /*Di5t0         := NAN3 (di5t[0], pixsize\[2], dcomp[5], dcompen);
6092 Di5t1           := NAN2 (di5t[1], srcd\[5], bcompen);
6093 Di5t2           := NAN4 (di5t[2], di4t[0..1], di5t[0..1]);
6094 Dbinh[5]        := NAN2 (dbinh\[5], di5t[2], phrase_mode);*/
6095 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6096         bool di5t2 = di4t0_1
6097                 || (!(srcd & 0x20) && bcompen)
6098                 || (!(pixsize & 0x04) && (dcomp & 0x20) && dcompen);
6099         dbinh |= (!(di5t2 && phrase_mode) ? 0x20 : 0x00);
6100 #ifdef LOG_COMP_CTRL
6101 if (logBlit)
6102 {
6103         printf("[di5t2=%s]", (di5t2 ? "T" : "F"));
6104         fflush(stdout);
6105 }
6106 #endif
6107 //////////////////////////////////////////////////////////////////////////////////////
6108
6109 /*Di6t0         := NAN2H (di6t[0], pixsize[2], zcomp[3]);
6110 Di6t1           := NAN4H (di6t[1], pixsize[2], dcomp[6..7], dcompen);
6111 Di6t2           := NAN2 (di6t[2], srcd\[6], bcompen);
6112 Di6t3           := NAN3 (di6t[3], pixsize\[2], dcomp[6], dcompen);
6113 Di6t4           := NAN4 (di6t[4], di6t[0..3]);
6114 Dbinh[6]        := NAN2 (dbinh\[6], di6t[4], phrase_mode);*/
6115 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6116         bool di6t0_1 = ((pixsize & 0x04) && (zcomp & 0x08))
6117                 || ((pixsize & 0x04) && (dcomp & 0x40) && (dcomp & 0x80) && dcompen);
6118         bool di6t4 = di6t0_1
6119                 || (!(srcd & 0x40) && bcompen)
6120                 || (!(pixsize & 0x04) && (dcomp & 0x40) && dcompen);
6121         dbinh |= (!(di6t4 && phrase_mode) ? 0x40 : 0x00);
6122 #ifdef LOG_COMP_CTRL
6123 if (logBlit)
6124 {
6125         printf("[di6t0_1=%s di6t4=%s]", (di6t0_1 ? "T" : "F"), (di6t4 ? "T" : "F"));
6126         fflush(stdout);
6127 }
6128 #endif
6129 //////////////////////////////////////////////////////////////////////////////////////
6130
6131 /*Di7t0         := NAN3 (di7t[0], pixsize\[2], dcomp[7], dcompen);
6132 Di7t1           := NAN2 (di7t[1], srcd\[7], bcompen);
6133 Di7t2           := NAN4 (di7t[2], di6t[0..1], di7t[0..1]);
6134 Dbinh[7]        := NAN2 (dbinh\[7], di7t[2], phrase_mode);*/
6135 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6136         bool di7t2 = di6t0_1
6137                 || (!(srcd & 0x80) && bcompen)
6138                 || (!(pixsize & 0x04) && (dcomp & 0x80) && dcompen);
6139         dbinh |= (!(di7t2 && phrase_mode) ? 0x80 : 0x00);
6140 #ifdef LOG_COMP_CTRL
6141 if (logBlit)
6142 {
6143         printf("[di7t2=%s]", (di7t2 ? "T" : "F"));
6144         fflush(stdout);
6145 }
6146 #endif
6147 //////////////////////////////////////////////////////////////////////////////////////
6148
6149 //END;
6150 //kludge
6151 dbinh = ~dbinh;
6152 #ifdef LOG_COMP_CTRL
6153 if (logBlit)
6154 {
6155         printf("[dcomp=$%02X dbinh=$%02X]\n    ", dcomp, dbinh);
6156         fflush(stdout);
6157 }
6158 #endif
6159 }
6160
6161
6162 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6163 //////////////////////////////////////////////////////////////////////////////////////
6164
6165 #endif