Shamusworld >> Repos - virtualjaguar/blob - src/blitter.cpp

   1 //
   2 // Blitter core
   3 //
   4 // by James L. Hammons
   5 // (C) 2010 Underground Software
   6 //
   7 // JLH = James L. Hammons <jlhamm@acm.org>
   8 //
   9 // Who  When        What
  10 // ---  ----------  -------------------------------------------------------------
  11 // JLH  01/16/2010  Created this log ;-)
  12 //
  13
  14 //
  15 // I owe a debt of gratitude to Curt Vendel and to John Mathieson--to Curt
  16 // for supplying the Oberon ASIC nets and to John for making them available
  17 // to Curt. ;-) Without that excellent documentation which shows *exactly*
  18 // what's going on inside the TOM chip, we'd all still be guessing as to how
  19 // the wily blitter and other pieces of the Jaguar puzzle actually work.
  20 // Now how about those JERRY ASIC nets gentlemen...? [We have those now!] ;-)
  21 //
  22
  23 #include "blitter.h"
  24
  25 #include <stdlib.h>
  26 #include <stdio.h>
  27 #include <string.h>
  28 #include "jaguar.h"
  29 #include "log.h"
  30 //#include "memory.h"
  31
  32 // Various conditional compilation goodies...
  33
  34 //#define USE_ORIGINAL_BLITTER
  35 //#define USE_MIDSUMMER_BLITTER
  36 #define USE_MIDSUMMER_BLITTER_MKII
  37
  38 // External global variables
  39
  40 extern int jaguar_active_memory_dumps;
  41
  42 // Local global variables
  43
  44 int start_logging = 0;
  45 uint8 blitter_working = 0;
  46
  47 // Blitter register RAM (most of it is hidden from the user)
  48
  49 static uint8 blitter_ram[0x100];
  50
  51 // Other crapola
  52
  53 bool specialLog = false;
  54 extern int effect_start;
  55 extern int blit_start_log;
  56 void BlitterMidsummer(uint32 cmd);
  57 void BlitterMidsummer2(void);
  58
  59 #define REG(A)  (((uint32)blitter_ram[(A)] << 24) | ((uint32)blitter_ram[(A)+1] << 16) \
  60                                 | ((uint32)blitter_ram[(A)+2] << 8) | (uint32)blitter_ram[(A)+3])
  61 #define WREG(A,D)       (blitter_ram[(A)] = ((D)>>24)&0xFF, blitter_ram[(A)+1] = ((D)>>16)&0xFF, \
  62                                         blitter_ram[(A)+2] = ((D)>>8)&0xFF, blitter_ram[(A)+3] = (D)&0xFF)
  63
  64 // Blitter registers (offsets from F02200)
  65
  66 #define A1_BASE                 ((uint32)0x00)
  67 #define A1_FLAGS                ((uint32)0x04)
  68 #define A1_CLIP                 ((uint32)0x08)  // Height and width values for clipping
  69 #define A1_PIXEL                ((uint32)0x0C)  // Integer part of the pixel (Y.i and X.i)
  70 #define A1_STEP                 ((uint32)0x10)  // Integer part of the step
  71 #define A1_FSTEP                ((uint32)0x14)  // Fractional part of the step
  72 #define A1_FPIXEL               ((uint32)0x18)  // Fractional part of the pixel (Y.f and X.f)
  73 #define A1_INC                  ((uint32)0x1C)  // Integer part of the increment
  74 #define A1_FINC                 ((uint32)0x20)  // Fractional part of the increment
  75 #define A2_BASE                 ((uint32)0x24)
  76 #define A2_FLAGS                ((uint32)0x28)
  77 #define A2_MASK                 ((uint32)0x2C)  // Modulo values for x and y (M.y  and M.x)
  78 #define A2_PIXEL                ((uint32)0x30)  // Integer part of the pixel (no fractional part for A2)
  79 #define A2_STEP                 ((uint32)0x34)  // Integer part of the step (no fractional part for A2)
  80 #define COMMAND                 ((uint32)0x38)
  81 #define PIXLINECOUNTER  ((uint32)0x3C)  // Inner & outer loop values
  82 #define SRCDATA                 ((uint32)0x40)
  83 #define DSTDATA                 ((uint32)0x48)
  84 #define DSTZ                    ((uint32)0x50)
  85 #define SRCZINT                 ((uint32)0x58)
  86 #define SRCZFRAC                ((uint32)0x60)
  87 #define PATTERNDATA             ((uint32)0x68)
  88 #define INTENSITYINC    ((uint32)0x70)
  89 #define ZINC                    ((uint32)0x74)
  90 #define COLLISIONCTRL   ((uint32)0x78)
  91 #define PHRASEINT0              ((uint32)0x7C)
  92 #define PHRASEINT1              ((uint32)0x80)
  93 #define PHRASEINT2              ((uint32)0x84)
  94 #define PHRASEINT3              ((uint32)0x88)
  95 #define PHRASEZ0                ((uint32)0x8C)
  96 #define PHRASEZ1                ((uint32)0x90)
  97 #define PHRASEZ2                ((uint32)0x94)
  98 #define PHRASEZ3                ((uint32)0x98)
  99
 100 // Blitter command bits
 101
 102 #define SRCEN                   (cmd & 0x00000001)
 103 #define SRCENZ                  (cmd & 0x00000002)
 104 #define SRCENX                  (cmd & 0x00000004)
 105 #define DSTEN                   (cmd & 0x00000008)
 106 #define DSTENZ                  (cmd & 0x00000010)
 107 #define DSTWRZ                  (cmd & 0x00000020)
 108 #define CLIPA1                  (cmd & 0x00000040)
 109
 110 #define UPDA1F                  (cmd & 0x00000100)
 111 #define UPDA1                   (cmd & 0x00000200)
 112 #define UPDA2                   (cmd & 0x00000400)
 113
 114 #define DSTA2                   (cmd & 0x00000800)
 115
 116 #define Z_OP_INF                (cmd & 0x00040000)
 117 #define Z_OP_EQU                (cmd & 0x00080000)
 118 #define Z_OP_SUP                (cmd & 0x00100000)
 119
 120 #define LFU_NAN                 (cmd & 0x00200000)
 121 #define LFU_NA                  (cmd & 0x00400000)
 122 #define LFU_AN                  (cmd & 0x00800000)
 123 #define LFU_A                   (cmd & 0x01000000)
 124
 125 #define CMPDST                  (cmd & 0x02000000)
 126 #define BCOMPEN                 (cmd & 0x04000000)
 127 #define DCOMPEN                 (cmd & 0x08000000)
 128
 129 #define PATDSEL                 (cmd & 0x00010000)
 130 #define ADDDSEL                 (cmd & 0x00020000)
 131 #define TOPBEN                  (cmd & 0x00004000)
 132 #define TOPNEN                  (cmd & 0x00008000)
 133 #define BKGWREN                 (cmd & 0x10000000)
 134 #define GOURD                   (cmd & 0x00001000)
 135 #define GOURZ                   (cmd & 0x00002000)
 136 #define SRCSHADE                (cmd & 0x40000000)
 137
 138
 139 #define XADDPHR  0
 140 #define XADDPIX  1
 141 #define XADD0    2
 142 #define XADDINC  3
 143
 144 #define XSIGNSUB_A1             (REG(A1_FLAGS)&0x080000)
 145 #define XSIGNSUB_A2             (REG(A2_FLAGS)&0x080000)
 146
 147 #define YSIGNSUB_A1             (REG(A1_FLAGS)&0x100000)
 148 #define YSIGNSUB_A2             (REG(A2_FLAGS)&0x100000)
 149
 150 #define YADD1_A1                (REG(A1_FLAGS)&0x040000)
 151 #define YADD1_A2                (REG(A2_FLAGS)&0x040000)
 152
 153 /*******************************************************************************
 154 ********************** STUFF CUT BELOW THIS LINE! ******************************
 155 *******************************************************************************/
 156 #ifdef USE_ORIGINAL_BLITTER                                                                             // We're ditching this crap for now...
 157
 158 //Put 'em back, once we fix the problem!!! [KO]
 159 // 1 bpp pixel read
 160 #define PIXEL_SHIFT_1(a)      (((~a##_x) >> 16) & 7)
 161 #define PIXEL_OFFSET_1(a)     (((((uint32)a##_y >> 16) * a##_width / 8) + (((uint32)a##_x >> 19) & ~7)) * (1 + a##_pitch) + (((uint32)a##_x >> 19) & 7))
 162 #define READ_PIXEL_1(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER) >> PIXEL_SHIFT_1(a)) & 0x01)
 163 //#define READ_PIXEL_1(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a)) >> PIXEL_SHIFT_1(a)) & 0x01)
 164
 165 // 2 bpp pixel read
 166 #define PIXEL_SHIFT_2(a)      (((~a##_x) >> 15) & 6)
 167 #define PIXEL_OFFSET_2(a)     (((((uint32)a##_y >> 16) * a##_width / 4) + (((uint32)a##_x >> 18) & ~7)) * (1 + a##_pitch) + (((uint32)a##_x >> 18) & 7))
 168 #define READ_PIXEL_2(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER) >> PIXEL_SHIFT_2(a)) & 0x03)
 169 //#define READ_PIXEL_2(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a)) >> PIXEL_SHIFT_2(a)) & 0x03)
 170
 171 // 4 bpp pixel read
 172 #define PIXEL_SHIFT_4(a)      (((~a##_x) >> 14) & 4)
 173 #define PIXEL_OFFSET_4(a)     (((((uint32)a##_y >> 16) * (a##_width/2)) + (((uint32)a##_x >> 17) & ~7)) * (1 + a##_pitch) + (((uint32)a##_x >> 17) & 7))
 174 #define READ_PIXEL_4(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER) >> PIXEL_SHIFT_4(a)) & 0x0f)
 175 //#define READ_PIXEL_4(a)       ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a)) >> PIXEL_SHIFT_4(a)) & 0x0f)
 176
 177 // 8 bpp pixel read
 178 #define PIXEL_OFFSET_8(a)     (((((uint32)a##_y >> 16) * a##_width) + (((uint32)a##_x >> 16) & ~7)) * (1 + a##_pitch) + (((uint32)a##_x >> 16) & 7))
 179 #define READ_PIXEL_8(a)       (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a), BLITTER))
 180 //#define READ_PIXEL_8(a)       (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a)))
 181
 182 // 16 bpp pixel read
 183 #define PIXEL_OFFSET_16(a)    (((((uint32)a##_y >> 16) * a##_width) + (((uint32)a##_x >> 16) & ~3)) * (1 + a##_pitch) + (((uint32)a##_x >> 16) & 3))
 184 #define READ_PIXEL_16(a)       (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), BLITTER))
 185 //#define READ_PIXEL_16(a)       (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1)))
 186
 187 // 32 bpp pixel read
 188 #define PIXEL_OFFSET_32(a)    (((((uint32)a##_y >> 16) * a##_width) + (((uint32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32)a##_x >> 16) & 1))
 189 #define READ_PIXEL_32(a)      (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), BLITTER))
 190 //#define READ_PIXEL_32(a)      (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2)))
 191
 192 // pixel read
 193 #define READ_PIXEL(a,f) (\
 194          (((f>>3)&0x07) == 0) ? (READ_PIXEL_1(a)) : \
 195          (((f>>3)&0x07) == 1) ? (READ_PIXEL_2(a)) : \
 196          (((f>>3)&0x07) == 2) ? (READ_PIXEL_4(a)) : \
 197          (((f>>3)&0x07) == 3) ? (READ_PIXEL_8(a)) : \
 198          (((f>>3)&0x07) == 4) ? (READ_PIXEL_16(a)) : \
 199          (((f>>3)&0x07) == 5) ? (READ_PIXEL_32(a)) : 0)
 200
 201 // 16 bpp z data read
 202 #define ZDATA_OFFSET_16(a)     (PIXEL_OFFSET_16(a) + a##_zoffs * 4)
 203 #define READ_ZDATA_16(a)       (JaguarReadWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), BLITTER))
 204 //#define READ_ZDATA_16(a)       (JaguarReadWord(a##_addr+(ZDATA_OFFSET_16(a)<<1)))
 205
 206 // z data read
 207 #define READ_ZDATA(a,f) (READ_ZDATA_16(a))
 208
 209 // 16 bpp z data write
 210 #define WRITE_ZDATA_16(a,d)     {  JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d, BLITTER); }
 211 //#define WRITE_ZDATA_16(a,d)     {  JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d); }
 212
 213 // z data write
 214 #define WRITE_ZDATA(a,f,d) WRITE_ZDATA_16(a,d);
 215
 216 // 1 bpp r data read
 217 #define READ_RDATA_1(r,a,p)  ((p) ?  ((REG(r+(((uint32)a##_x >> 19) & 0x04))) >> (((uint32)a##_x >> 16) & 0x1F)) & 0x0001 : (REG(r) & 0x0001))
 218
 219 // 2 bpp r data read
 220 #define READ_RDATA_2(r,a,p)  ((p) ?  ((REG(r+(((uint32)a##_x >> 18) & 0x04))) >> (((uint32)a##_x >> 15) & 0x3E)) & 0x0003 : (REG(r) & 0x0003))
 221
 222 // 4 bpp r data read
 223 #define READ_RDATA_4(r,a,p)  ((p) ?  ((REG(r+(((uint32)a##_x >> 17) & 0x04))) >> (((uint32)a##_x >> 14) & 0x28)) & 0x000F : (REG(r) & 0x000F))
 224
 225 // 8 bpp r data read
 226 #define READ_RDATA_8(r,a,p)  ((p) ?  ((REG(r+(((uint32)a##_x >> 16) & 0x04))) >> (((uint32)a##_x >> 13) & 0x18)) & 0x00FF : (REG(r) & 0x00FF))
 227
 228 // 16 bpp r data read
 229 #define READ_RDATA_16(r,a,p)  ((p) ? ((REG(r+(((uint32)a##_x >> 15) & 0x04))) >> (((uint32)a##_x >> 12) & 0x10)) & 0xFFFF : (REG(r) & 0xFFFF))
 230
 231 // 32 bpp r data read
 232 #define READ_RDATA_32(r,a,p)  ((p) ? REG(r+(((uint32)a##_x >> 14) & 0x04)) : REG(r))
 233
 234 // register data read
 235 #define READ_RDATA(r,a,f,p) (\
 236          (((f>>3)&0x07) == 0) ? (READ_RDATA_1(r,a,p)) : \
 237          (((f>>3)&0x07) == 1) ? (READ_RDATA_2(r,a,p)) : \
 238          (((f>>3)&0x07) == 2) ? (READ_RDATA_4(r,a,p)) : \
 239          (((f>>3)&0x07) == 3) ? (READ_RDATA_8(r,a,p)) : \
 240          (((f>>3)&0x07) == 4) ? (READ_RDATA_16(r,a,p)) : \
 241          (((f>>3)&0x07) == 5) ? (READ_RDATA_32(r,a,p)) : 0)
 242
 243 // 1 bpp pixel write
 244 #define WRITE_PIXEL_1(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_1(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER)&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a)), BLITTER); }
 245 //#define WRITE_PIXEL_1(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_1(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a))&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a))); }
 246
 247 // 2 bpp pixel write
 248 #define WRITE_PIXEL_2(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_2(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER)&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a)), BLITTER); }
 249 //#define WRITE_PIXEL_2(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_2(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a))&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a))); }
 250
 251 // 4 bpp pixel write
 252 #define WRITE_PIXEL_4(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_4(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER)&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a)), BLITTER); }
 253 //#define WRITE_PIXEL_4(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_4(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a))&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a))); }
 254
 255 // 8 bpp pixel write
 256 #define WRITE_PIXEL_8(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_8(a), d, BLITTER); }
 257 //#define WRITE_PIXEL_8(a,d)       { JaguarWriteByte(a##_addr+PIXEL_OFFSET_8(a), d); }
 258
 259 // 16 bpp pixel write
 260 //#define WRITE_PIXEL_16(a,d)     {  JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1),d); }
 261 #define WRITE_PIXEL_16(a,d)     {  JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d, BLITTER); if (specialLog) WriteLog("Pixel write address: %08X\n", a##_addr+(PIXEL_OFFSET_16(a)<<1)); }
 262 //#define WRITE_PIXEL_16(a,d)     {  JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d); if (specialLog) WriteLog("Pixel write address: %08X\n", a##_addr+(PIXEL_OFFSET_16(a)<<1)); }
 263
 264 // 32 bpp pixel write
 265 #define WRITE_PIXEL_32(a,d)             { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d, BLITTER); }
 266 //#define WRITE_PIXEL_32(a,d)           { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d); }
 267
 268 // pixel write
 269 #define WRITE_PIXEL(a,f,d) {\
 270         switch ((f>>3)&0x07) { \
 271         case 0: WRITE_PIXEL_1(a,d);  break;  \
 272         case 1: WRITE_PIXEL_2(a,d);  break;  \
 273         case 2: WRITE_PIXEL_4(a,d);  break;  \
 274         case 3: WRITE_PIXEL_8(a,d);  break;  \
 275         case 4: WRITE_PIXEL_16(a,d); break;  \
 276         case 5: WRITE_PIXEL_32(a,d); break;  \
 277         }}
 278
 279 // Width in Pixels of a Scanline
 280 // This is a pretranslation of the value found in the A1 & A2 flags: It's really a floating point value
 281 // of the form EEEEMM where MM is the mantissa with an implied "1." in front of it and the EEEE value is
 282 // the exponent. Valid values for the exponent range from 0 to 11 (decimal). It's easiest to think of it
 283 // as a floating point bit pattern being followed by a number of zeroes. So, e.g., 001101 translates to
 284 // 1.01 (the "1." being implied) x (2 ^ 3) or 1010 -> 10 in base 10 (i.e., 1.01 with the decimal place
 285 // being shifted to the right 3 places).
 286 /*static uint32 blitter_scanline_width[48] =
 287 {
 288      0,    0,    0,    0,                                       // Note: This would really translate to 1, 1, 1, 1
 289      2,    0,    0,    0,
 290      4,    0,    6,    0,
 291      8,   10,   12,   14,
 292     16,   20,   24,   28,
 293     32,   40,   48,   56,
 294     64,   80,   96,  112,
 295    128,  160,  192,  224,
 296    256,  320,  384,  448,
 297    512,  640,  768,  896,
 298   1024, 1280, 1536, 1792,
 299   2048, 2560, 3072, 3584
 300 };//*/
 301
 302 //static uint8 * tom_ram_8;
 303 //static uint8 * paletteRam;
 304 static uint8 src;
 305 static uint8 dst;
 306 static uint8 misc;
 307 static uint8 a1ctl;
 308 static uint8 mode;
 309 static uint8 ity;
 310 static uint8 zop;
 311 static uint8 op;
 312 static uint8 ctrl;
 313 static uint32 a1_addr;
 314 static uint32 a2_addr;
 315 static int32 a1_zoffs;
 316 static int32 a2_zoffs;
 317 static uint32 xadd_a1_control;
 318 static uint32 xadd_a2_control;
 319 static int32 a1_pitch;
 320 static int32 a2_pitch;
 321 static uint32 n_pixels;
 322 static uint32 n_lines;
 323 static int32 a1_x;
 324 static int32 a1_y;
 325 static int32 a1_width;
 326 static int32 a2_x;
 327 static int32 a2_y;
 328 static int32 a2_width;
 329 static int32 a2_mask_x;
 330 static int32 a2_mask_y;
 331 static int32 a1_xadd;
 332 static int32 a1_yadd;
 333 static int32 a2_xadd;
 334 static int32 a2_yadd;
 335 static uint8 a1_phrase_mode;
 336 static uint8 a2_phrase_mode;
 337 static int32 a1_step_x = 0;
 338 static int32 a1_step_y = 0;
 339 static int32 a2_step_x = 0;
 340 static int32 a2_step_y = 0;
 341 static uint32 outer_loop;
 342 static uint32 inner_loop;
 343 static uint32 a2_psize;
 344 static uint32 a1_psize;
 345 static uint32 gouraud_add;
 346 //static uint32 gouraud_data;
 347 //static uint16 gint[4];
 348 //static uint16 gfrac[4];
 349 //static uint8  gcolour[4];
 350 static int gd_i[4];
 351 static int gd_c[4];
 352 static int gd_ia, gd_ca;
 353 static int colour_index = 0;
 354 static int32 zadd;
 355 static uint32 z_i[4];
 356
 357 static int32 a1_clip_x, a1_clip_y;
 358
 359 // In the spirit of "get it right first, *then* optimize" I've taken the liberty
 360 // of removing all the unnecessary code caching. If it turns out to be a good way
 361 // to optimize the blitter, then we may revisit it in the future...
 362
 363 //
 364 // Generic blit handler
 365 //
 366 void blitter_generic(uint32 cmd)
 367 {
 368 /*
 369 Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 370  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 371   A1 step values: -2 (X), 1 (Y)
 372   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 373   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 374   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 375         A1 x/y: 100/12, A2 x/y: 106/0 Pattern: 000000F300000000
 376 */
 377 //if (effect_start)
 378 //      specialLog = true;
 379 /*if (cmd == 0x1401060C && blit_start_log)
 380         specialLog = true;//*/
 381 //Testing only!
 382 //uint32 logGo = ((cmd == 0x01800E01 && REG(A1_BASE) == 0x898000) ? 1 : 0);
 383         uint32 srcdata, srczdata, dstdata, dstzdata, writedata, inhibit;
 384         uint32 bppSrc = (DSTA2 ? 1 << ((REG(A1_FLAGS) >> 3) & 0x07) : 1 << ((REG(A2_FLAGS) >> 3) & 0x07));
 385
 386 if (specialLog)
 387 {
 388         WriteLog("About to do n x m blit (BM width is ? pixels)...\n");
 389         WriteLog("A1_STEP_X/Y = %08X/%08X, A2_STEP_X/Y = %08X/%08X\n", a1_step_x, a1_step_y, a2_step_x, a2_step_y);
 390 }
 391 /*      if (BCOMPEN)
 392         {
 393                 if (DSTA2)
 394                         a1_xadd = 0;
 395                 else
 396                         a2_xadd = 0;
 397         }//*/
 398
 399         while (outer_loop--)
 400         {
 401 if (specialLog)
 402 {
 403         WriteLog("  A1_X/Y = %08X/%08X, A2_X/Y = %08X/%08X\n", a1_x, a1_y, a2_x, a2_y);
 404 }
 405                 uint32 a1_start = a1_x, a2_start = a2_x, bitPos = 0;
 406
 407                 //Kludge for Hover Strike...
 408                 //I wonder if this kludge is in conjunction with the SRCENX down below...
 409                 // This isn't so much a kludge but the way things work in BCOMPEN mode...!
 410                 if (BCOMPEN && SRCENX)
 411                 {
 412                         if (n_pixels < bppSrc)
 413                                 bitPos = bppSrc - n_pixels;
 414                 }
 415
 416                 inner_loop = n_pixels;
 417                 while (inner_loop--)
 418                 {
 419 if (specialLog)
 420 {
 421         WriteLog("    A1_X/Y = %08X/%08X, A2_X/Y = %08X/%08X\n", a1_x, a1_y, a2_x, a2_y);
 422 }
 423                         srcdata = srczdata = dstdata = dstzdata = writedata = inhibit = 0;
 424
 425                         if (!DSTA2)                                                     // Data movement: A1 <- A2
 426                         {
 427                                 // load src data and Z
 428 //                              if (SRCEN)
 429                                 if (SRCEN || SRCENX)    // Not sure if this is correct... (seems to be...!)
 430                                 {
 431                                         srcdata = READ_PIXEL(a2, REG(A2_FLAGS));
 432
 433                                         if (SRCENZ)
 434                                                 srczdata = READ_ZDATA(a2, REG(A2_FLAGS));
 435                                         else if (cmd & 0x0001C020)      // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
 436                                                 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
 437                                 }
 438                                 else    // Use SRCDATA register...
 439                                 {
 440                                         srcdata = READ_RDATA(SRCDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
 441
 442                                         if (cmd & 0x0001C020)           // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
 443                                                 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
 444                                 }
 445
 446                                 // load dst data and Z
 447                                 if (DSTEN)
 448                                 {
 449                                         dstdata = READ_PIXEL(a1, REG(A1_FLAGS));
 450
 451                                         if (DSTENZ)
 452                                                 dstzdata = READ_ZDATA(a1, REG(A1_FLAGS));
 453                                         else
 454                                                 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
 455                                 }
 456                                 else
 457                                 {
 458                                         dstdata = READ_RDATA(DSTDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
 459
 460                                         if (DSTENZ)
 461                                                 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
 462                                 }
 463
 464 /*This wasn't working...                                // a1 clipping
 465                                 if (cmd & 0x00000040)
 466                                 {
 467                                         if (a1_x < 0 || a1_y < 0 || (a1_x >> 16) >= (REG(A1_CLIP) & 0x7FFF)
 468                                                 || (a1_y >> 16) >= ((REG(A1_CLIP) >> 16) & 0x7FFF))
 469                                                 inhibit = 1;
 470                                 }//*/
 471
 472                                 if (GOURZ)
 473                                         srczdata = z_i[colour_index] >> 16;
 474
 475                                 // apply z comparator
 476                                 if (Z_OP_INF && srczdata <  dstzdata)   inhibit = 1;
 477                                 if (Z_OP_EQU && srczdata == dstzdata)   inhibit = 1;
 478                                 if (Z_OP_SUP && srczdata >  dstzdata)   inhibit = 1;
 479
 480                                 // apply data comparator
 481 // Note: DCOMPEN only works in 8/16 bpp modes! !!! FIX !!!
 482 // Does BCOMPEN only work in 1 bpp mode???
 483 //   No, but it always does a 1 bit expansion no matter what the BPP of the channel is set to. !!! FIX !!!
 484 //   This is bit tricky... We need to fix the XADD value so that it acts like a 1BPP value while inside
 485 //   an 8BPP space.
 486                                 if (DCOMPEN | BCOMPEN)
 487                                 {
 488 //Temp, for testing Hover Strike
 489 //Doesn't seem to do it... Why?
 490 //What needs to happen here is twofold. First, the address generator in the outer loop has
 491 //to honor the BPP when calculating the start address (which it kinda does already). Second,
 492 //it has to step bit by bit when using BCOMPEN. How to do this???
 493         if (BCOMPEN)
 494 //small problem with this approach: it's not accurate... We need a proper address to begin with
 495 //and *then* we can do the bit stepping from there the way it's *supposed* to be done... !!! FIX !!!
 496 //[DONE]
 497         {
 498                 uint32 pixShift = (~bitPos) & (bppSrc - 1);
 499                 srcdata = (srcdata >> pixShift) & 0x01;
 500
 501                 bitPos++;
 502 //              if (bitPos % bppSrc == 0)
 503 //                      a2_x += 0x00010000;
 504         }
 505 /*
 506 Interesting (Hover Strike--large letter):
 507
 508 Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 509  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 510   A1 step values: -2 (X), 1 (Y)
 511   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 512   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 513   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 514         A1 x/y: 100/12, A2 x/y: 106/0 Pattern: 000000F300000000
 515
 516 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 517  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 518   A1 step values: -8 (X), 1 (Y)
 519   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 520   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 521   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 522         A1 x/y: 102/12, A2 x/y: 107/0 Pattern: 000000F300000000
 523
 524 Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 525  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 526   A1 step values: -1 (X), 1 (Y)
 527   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 528   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 529   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 530         A1 x/y: 118/12, A2 x/y: 70/0 Pattern: 000000F300000000
 531
 532 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 533  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 534   A1 step values: -8 (X), 1 (Y)
 535   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 536   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 537   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 538         A1 x/y: 119/12, A2 x/y: 71/0 Pattern: 000000F300000000
 539
 540 Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 541  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 542   A1 step values: -1 (X), 1 (Y)
 543   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 544   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 545   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 546         A1 x/y: 127/12, A2 x/y: 66/0 Pattern: 000000F300000000
 547
 548 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
 549  CMD -> src: SRCENX dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
 550   A1 step values: -8 (X), 1 (Y)
 551   A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 552   A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 553   A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 554         A1 x/y: 128/12, A2 x/y: 67/0 Pattern: 000000F300000000
 555 */
 556
 557
 558                                         if (!CMPDST)
 559                                         {
 560 //WriteLog("Blitter: BCOMPEN set on command %08X inhibit prev:%u, now:", cmd, inhibit);
 561                                                 // compare source pixel with pattern pixel
 562 /*
 563 Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd: 05810001]
 564  CMD -> src: SRCEN  dst:  misc:  a1ctl:  mode:  ity: PATDSEL z-op:  op: LFU_REPLACE ctrl: BCOMPEN
 565   A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 566   A2 -> pitch: 1 phrases, depth: 1bpp, z-off: 0, width: 16 (10), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 567         x/y: 0/20
 568 ...
 569 */
 570 // AvP is still wrong, could be cuz it's doing A1 -> A2...
 571
 572 // Src is the 1bpp bitmap... DST is the PATTERN!!!
 573 // This seems to solve at least ONE of the problems with MC3D...
 574 // Why should this be inverted???
 575 // Bcuz it is. This is supposed to be used only for a bit -> pixel expansion...
 576 /*                                              if (srcdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
 577 //                                              if (srcdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
 578                                                         inhibit = 1;//*/
 579 /*                                              uint32 A2bpp = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
 580                                                 if (A2bpp == 1 || A2bpp == 16 || A2bpp == 8)
 581                                                         inhibit = (srcdata == 0 ? 1: 0);
 582 //                                                      inhibit = !srcdata;
 583                                                 else
 584                                                         WriteLog("Blitter: Bad BPP (%u) selected for BCOMPEN mode!\n", A2bpp);//*/
 585 // What it boils down to is this:
 586
 587                                                 if (srcdata == 0)
 588                                                         inhibit = 1;//*/
 589                                         }
 590                                         else
 591                                         {
 592                                                 // compare destination pixel with pattern pixel
 593                                                 if (dstdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
 594 //                                              if (dstdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
 595                                                         inhibit = 1;
 596                                         }
 597
 598 // This is DEFINITELY WRONG
 599 //                                      if (a1_phrase_mode || a2_phrase_mode)
 600 //                                              inhibit = !inhibit;
 601                                 }
 602
 603                                 if (CLIPA1)
 604                                 {
 605                                         inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
 606                                                 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
 607                                 }
 608
 609                                 // compute the write data and store
 610                                 if (!inhibit)
 611                                 {
 612 // Houston, we have a problem...
 613 // Look here, at PATDSEL and GOURD. If both are active (as they are on the BIOS intro), then there's
 614 // a conflict! E.g.:
 615 //Blit! (00100000 <- 000095D0) count: 3 x 1, A1/2_FLAGS: 00014220/00004020 [cmd: 00011008]
 616 // CMD -> src:  dst: DSTEN  misc:  a1ctl:  mode: GOURD  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
 617 //  A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
 618 //  A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 619 //        A1 x/y: 90/171, A2 x/y: 808/0 Pattern: 776D770077007700
 620
 621                                         if (PATDSEL)
 622                                         {
 623                                                 // use pattern data for write data
 624                                                 writedata = READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
 625                                         }
 626                                         else if (ADDDSEL)
 627                                         {
 628 /*if (blit_start_log)
 629         WriteLog("BLIT: ADDDSEL srcdata: %08X\, dstdata: %08X, ", srcdata, dstdata);//*/
 630
 631                                                 // intensity addition
 632 //Ok, this is wrong... Or is it? Yes, it's wrong! !!! FIX !!!
 633 /*                                              writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
 634                                                 if (!(TOPBEN) && writedata > 0xFF)
 635 //                                                      writedata = 0xFF;
 636                                                         writedata &= 0xFF;
 637                                                 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
 638                                                 if (!(TOPNEN) && writedata > 0xFFF)
 639 //                                                      writedata = 0xFFF;
 640                                                         writedata &= 0xFFF;
 641                                                 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);//*/
 642 //notneeded--writedata &= 0xFFFF;
 643 /*if (blit_start_log)
 644         WriteLog("writedata: %08X\n", writedata);//*/
 645 /*
 646 Hover Strike ADDDSEL blit:
 647
 648 Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cmd: 00020208]
 649  CMD -> src:  dst: DSTEN  misc:  a1ctl: UPDA1  mode:  ity: ADDDSEL z-op:  op: LFU_CLEAR ctrl:
 650   A1 step values: -320 (X), 1 (Y)
 651   A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 652   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 653         A1 x/y: 0/0, A2 x/y: 3288/0 Pattern: 0000000000000000 SRCDATA: 00FD00FD00FD00FD
 654 */
 655                                                 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
 656
 657                                                 if (!TOPBEN)
 658                                                 {
 659 //This is correct now, but slow...
 660                                                         int16 s = (srcdata & 0xFF) | (srcdata & 0x80 ? 0xFF00 : 0x0000),
 661                                                                 d = dstdata & 0xFF;
 662                                                         int16 sum = s + d;
 663
 664                                                         if (sum < 0)
 665                                                                 writedata = 0x00;
 666                                                         else if (sum > 0xFF)
 667                                                                 writedata = 0xFF;
 668                                                         else
 669                                                                 writedata = (uint32)sum;
 670                                                 }
 671
 672 //This doesn't seem right... Looks like it would muck up the low byte... !!! FIX !!!
 673                                                 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
 674
 675                                                 if (!TOPNEN && writedata > 0xFFF)
 676                                                 {
 677                                                         writedata &= 0xFFF;
 678                                                 }
 679
 680                                                 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
 681                                         }
 682                                         else
 683                                         {
 684                                                 if (LFU_NAN) writedata |= ~srcdata & ~dstdata;
 685                                                 if (LFU_NA)  writedata |= ~srcdata & dstdata;
 686                                                 if (LFU_AN)  writedata |= srcdata  & ~dstdata;
 687                                                 if (LFU_A)       writedata |= srcdata  & dstdata;
 688                                         }
 689
 690 //Although, this looks like it's OK... (even if it is shitty!)
 691 //According to JTRM, this is part of the four things the blitter does with the write data (the other
 692 //three being PATDSEL, ADDDSEL, and LFU (default). I'm not sure which gets precedence, this or PATDSEL
 693 //(see above blit example)...
 694                                         if (GOURD)
 695                                                 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
 696
 697                                         if (SRCSHADE)
 698                                         {
 699                                                 int intensity = srcdata & 0xFF;
 700                                                 int ia = gd_ia >> 16;
 701                                                 if (ia & 0x80)
 702                                                         ia = 0xFFFFFF00 | ia;
 703                                                 intensity += ia;
 704                                                 if (intensity < 0)
 705                                                         intensity = 0;
 706                                                 if (intensity > 0xFF)
 707                                                         intensity = 0xFF;
 708                                                 writedata = (srcdata & 0xFF00) | intensity;
 709                                         }
 710                                 }
 711                                 else
 712                                 {
 713                                         writedata = dstdata;
 714                                         srczdata = dstzdata;
 715                                 }
 716
 717 //Tried 2nd below for Hover Strike: No dice.
 718                                 if (/*a1_phrase_mode || */BKGWREN || !inhibit)
 719 //                              if (/*a1_phrase_mode || BKGWREN ||*/ !inhibit)
 720                                 {
 721 /*if (((REG(A1_FLAGS) >> 3) & 0x07) == 5)
 722 {
 723         uint32 offset = a1_addr+(PIXEL_OFFSET_32(a1)<<2);
 724 // (((((uint32)a##_y >> 16) * a##_width) + (((uint32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32)a##_x >> 16) & 1))
 725         if ((offset >= 0x1FF020 && offset <= 0x1FF03F) || (offset >= 0x1FF820 && offset <= 0x1FF83F))
 726                 WriteLog("32bpp pixel write: A1 Phrase mode --> ");
 727 }//*/
 728                                         // write to the destination
 729                                         WRITE_PIXEL(a1, REG(A1_FLAGS), writedata);
 730                                         if (DSTWRZ)
 731                                                 WRITE_ZDATA(a1, REG(A1_FLAGS), srczdata);
 732                                 }
 733                         }
 734                         else    // if (DSTA2)                                                   // Data movement: A1 -> A2
 735                         {
 736                                 // load src data and Z
 737                                 if (SRCEN)
 738                                 {
 739                                         srcdata = READ_PIXEL(a1, REG(A1_FLAGS));
 740                                         if (SRCENZ)
 741                                                 srczdata = READ_ZDATA(a1, REG(A1_FLAGS));
 742                                         else if (cmd & 0x0001C020)      // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
 743                                                 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
 744                                 }
 745                                 else
 746                                 {
 747                                         srcdata = READ_RDATA(SRCDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
 748                                         if (cmd & 0x001C020)    // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
 749                                                 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
 750                                 }
 751
 752                                 // load dst data and Z
 753                                 if (DSTEN)
 754                                 {
 755                                         dstdata = READ_PIXEL(a2, REG(A2_FLAGS));
 756                                         if (DSTENZ)
 757                                                 dstzdata = READ_ZDATA(a2, REG(A2_FLAGS));
 758                                         else
 759                                                 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
 760                                 }
 761                                 else
 762                                 {
 763                                         dstdata = READ_RDATA(DSTDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
 764                                         if (DSTENZ)
 765                                                 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
 766                                 }
 767
 768                                 if (GOURZ)
 769                                         srczdata = z_i[colour_index] >> 16;
 770
 771                                 // apply z comparator
 772                                 if (Z_OP_INF && srczdata < dstzdata)    inhibit = 1;
 773                                 if (Z_OP_EQU && srczdata == dstzdata)   inhibit = 1;
 774                                 if (Z_OP_SUP && srczdata > dstzdata)    inhibit = 1;
 775
 776                                 // apply data comparator
 777 //NOTE: The bit comparator (BCOMPEN) is NOT the same at the data comparator!
 778                                 if (DCOMPEN | BCOMPEN)
 779                                 {
 780                                         if (!CMPDST)
 781                                         {
 782                                                 // compare source pixel with pattern pixel
 783 // AvP: Numbers are correct, but sprites are not!
 784 //This doesn't seem to be a problem... But could still be wrong...
 785 /*                                              if (srcdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
 786 //                                              if (srcdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
 787                                                         inhibit = 1;//*/
 788 // This is probably not 100% correct... It works in the 1bpp case
 789 // (in A1 <- A2 mode, that is...)
 790 // AvP: This is causing blocks to be written instead of bit patterns...
 791 // Works now...
 792 // NOTE: We really should separate out the BCOMPEN & DCOMPEN stuff!
 793 /*                                              uint32 A1bpp = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
 794                                                 if (A1bpp == 1 || A1bpp == 16 || A1bpp == 8)
 795                                                         inhibit = (srcdata == 0 ? 1: 0);
 796                                                 else
 797                                                         WriteLog("Blitter: Bad BPP (%u) selected for BCOMPEN mode!\n", A1bpp);//*/
 798 // What it boils down to is this:
 799                                                 if (srcdata == 0)
 800                                                         inhibit = 1;//*/
 801                                         }
 802                                         else
 803                                         {
 804                                                 // compare destination pixel with pattern pixel
 805                                                 if (dstdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
 806 //                                              if (dstdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
 807                                                         inhibit = 1;
 808                                         }
 809
 810 // This is DEFINITELY WRONG
 811 //                                      if (a1_phrase_mode || a2_phrase_mode)
 812 //                                              inhibit = !inhibit;
 813                                 }
 814
 815                                 if (CLIPA1)
 816                                 {
 817                                         inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
 818                                                 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
 819                                 }
 820
 821                                 // compute the write data and store
 822                                 if (!inhibit)
 823                                 {
 824                                         if (PATDSEL)
 825                                         {
 826                                                 // use pattern data for write data
 827                                                 writedata = READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
 828                                         }
 829                                         else if (ADDDSEL)
 830                                         {
 831                                                 // intensity addition
 832                                                 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
 833                                                 if (!(TOPBEN) && writedata > 0xFF)
 834                                                         writedata = 0xFF;
 835                                                 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
 836                                                 if (!(TOPNEN) && writedata > 0xFFF)
 837                                                         writedata = 0xFFF;
 838                                                 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
 839                                         }
 840                                         else
 841                                         {
 842                                                 if (LFU_NAN)
 843                                                         writedata |= ~srcdata & ~dstdata;
 844                                                 if (LFU_NA)
 845                                                         writedata |= ~srcdata & dstdata;
 846                                                 if (LFU_AN)
 847                                                         writedata |= srcdata & ~dstdata;
 848                                                 if (LFU_A)
 849                                                         writedata |= srcdata & dstdata;
 850                                         }
 851
 852                                         if (GOURD)
 853                                                 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
 854
 855                                         if (SRCSHADE)
 856                                         {
 857                                                 int intensity = srcdata & 0xFF;
 858                                                 int ia = gd_ia >> 16;
 859                                                 if (ia & 0x80)
 860                                                         ia = 0xFFFFFF00 | ia;
 861                                                 intensity += ia;
 862                                                 if (intensity < 0)
 863                                                         intensity = 0;
 864                                                 if (intensity > 0xFF)
 865                                                         intensity = 0xFF;
 866                                                 writedata = (srcdata & 0xFF00) | intensity;
 867                                         }
 868                                 }
 869                                 else
 870                                 {
 871                                         writedata = dstdata;
 872                                         srczdata = dstzdata;
 873                                 }
 874
 875                                 if (/*a2_phrase_mode || */BKGWREN || !inhibit)
 876                                 {
 877 /*if (logGo)
 878 {
 879         uint32 offset = a2_addr+(PIXEL_OFFSET_16(a2)<<1);
 880 // (((((uint32)a##_y >> 16) * a##_width) + (((uint32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32)a##_x >> 16) & 1))
 881         WriteLog("[%08X:%04X] ", offset, writedata);
 882 }//*/
 883                                         // write to the destination
 884                                         WRITE_PIXEL(a2, REG(A2_FLAGS), writedata);
 885
 886                                         if (DSTWRZ)
 887                                                 WRITE_ZDATA(a2, REG(A2_FLAGS), srczdata);
 888                                 }
 889                         }
 890
 891                         // Update x and y (inner loop)
 892 //Now it does! But crappy, crappy, crappy! !!! FIX !!! [DONE]
 893 //This is less than ideal, but it works...
 894                         if (!BCOMPEN)
 895                         {//*/
 896                                 a1_x += a1_xadd, a1_y += a1_yadd;
 897                                 a2_x = (a2_x + a2_xadd) & a2_mask_x, a2_y = (a2_y + a2_yadd) & a2_mask_y;
 898                         }
 899                         else
 900                         {
 901                                 a1_y += a1_yadd, a2_y = (a2_y + a2_yadd) & a2_mask_y;
 902                                 if (!DSTA2)
 903                                 {
 904                                         a1_x += a1_xadd;
 905                                         if (bitPos % bppSrc == 0)
 906                                                 a2_x = (a2_x + a2_xadd) & a2_mask_x;
 907                                 }
 908                                 else
 909                                 {
 910                                         a2_x = (a2_x + a2_xadd) & a2_mask_x;
 911                                         if (bitPos % bppSrc == 0)
 912                                                 a1_x += a1_xadd;
 913                                 }
 914                         }//*/
 915
 916                         if (GOURZ)
 917                                 z_i[colour_index] += zadd;
 918
 919                         if (GOURD || SRCSHADE)
 920                         {
 921                                 gd_i[colour_index] += gd_ia;
 922 //Hmm, this doesn't seem to do anything...
 923 //But it is correct according to the JTRM...!
 924 if ((int32)gd_i[colour_index] < 0)
 925         gd_i[colour_index] = 0;
 926 if (gd_i[colour_index] > 0x00FFFFFF)
 927         gd_i[colour_index] = 0x00FFFFFF;//*/
 928
 929                                 gd_c[colour_index] += gd_ca;
 930 if ((int32)gd_c[colour_index] < 0)
 931         gd_c[colour_index] = 0;
 932 if (gd_c[colour_index] > 0x000000FF)
 933         gd_c[colour_index] = 0x000000FF;//*/
 934                         }
 935
 936                         if (GOURD || SRCSHADE || GOURZ)
 937                         {
 938                                 if (a1_phrase_mode)
 939 //This screws things up WORSE (for the BIOS opening screen)
 940 //                              if (a1_phrase_mode || a2_phrase_mode)
 941                                         colour_index = (colour_index + 1) & 0x03;
 942                         }
 943                 }
 944
 945 /*
 946 Here's the problem... The phrase mode code!
 947 Blit! (00100000 -> 00148000) count: 327 x 267, A1/2_FLAGS: 00004420/00004420 [cmd: 41802E01]
 948  CMD -> src: SRCEN  dst:  misc:  a1ctl: UPDA1 UPDA2 mode: DSTA2 GOURZ ity:  z-op:  op: LFU_REPLACE ctrl: SRCSHADE
 949   A1 step values: -327 (X), 1 (Y)
 950   A2 step values: -327 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 951   A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 952   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 953         A1 x/y: 28/58, A2 x/y: 28/58 Pattern: 00EA7BEA77EA77EA SRCDATA: 7BFF7BFF7BFF7BFF
 954
 955 Below fixes it, but then borks:
 956 ; O
 957
 958 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
 959  CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
 960   A1 step values: -15 (X), 1 (Y)
 961   A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
 962   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 963   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
 964         A1 x/y: 173/144, A2 x/y: 4052/0
 965
 966 Lesse, with pre-add we'd have:
 967
 968      oooooooooooo
 969 00001111222233334444555566667777
 970   ^  ^starts here...
 971   |             ^ends here.
 972   |rolls back to here. Hmm.
 973
 974 */
 975 //NOTE: The way to fix the CD BIOS is to uncomment below and comment the stuff after
 976 //      the phrase mode mucking around. But it fucks up everything else...
 977 //#define SCREWY_CD_DEPENDENT
 978 #ifdef SCREWY_CD_DEPENDENT
 979                 a1_x += a1_step_x;
 980                 a1_y += a1_step_y;
 981                 a2_x += a2_step_x;
 982                 a2_y += a2_step_y;//*/
 983 #endif
 984
 985                 //New: Phrase mode taken into account! :-p
 986 /*              if (a1_phrase_mode)                     // v1
 987                 {
 988                         // Bump the pointer to the next phrase boundary
 989                         // Even though it works, this is crappy... Clean it up!
 990                         uint32 size = 64 / a1_psize;
 991
 992                         // Crappy kludge... ('aligning' source to destination)
 993                         if (a2_phrase_mode && DSTA2)
 994                         {
 995                                 uint32 extra = (a2_start >> 16) % size;
 996                                 a1_x += extra << 16;
 997                         }
 998
 999                         uint32 newx = (a1_x >> 16) / size;
1000                         uint32 newxrem = (a1_x >> 16) % size;
1001                         a1_x &= 0x0000FFFF;
1002                         a1_x |= (((newx + (newxrem == 0 ? 0 : 1)) * size) & 0xFFFF) << 16;
1003                 }//*/
1004                 if (a1_phrase_mode)                     // v2
1005                 {
1006                         // Bump the pointer to the next phrase boundary
1007                         // Even though it works, this is crappy... Clean it up!
1008                         uint32 size = 64 / a1_psize;
1009
1010                         // Crappy kludge... ('aligning' source to destination)
1011                         if (a2_phrase_mode && DSTA2)
1012                         {
1013                                 uint32 extra = (a2_start >> 16) % size;
1014                                 a1_x += extra << 16;
1015                         }
1016
1017                         uint32 pixelSize = (size - 1) << 16;
1018                         a1_x = (a1_x + pixelSize) & ~pixelSize;
1019                 }
1020
1021 /*              if (a2_phrase_mode)                     // v1
1022                 {
1023                         // Bump the pointer to the next phrase boundary
1024                         // Even though it works, this is crappy... Clean it up!
1025                         uint32 size = 64 / a2_psize;
1026
1027                         // Crappy kludge... ('aligning' source to destination)
1028                         // Prolly should do this for A1 channel as well... [DONE]
1029                         if (a1_phrase_mode && !DSTA2)
1030                         {
1031                                 uint32 extra = (a1_start >> 16) % size;
1032                                 a2_x += extra << 16;
1033                         }
1034
1035                         uint32 newx = (a2_x >> 16) / size;
1036                         uint32 newxrem = (a2_x >> 16) % size;
1037                         a2_x &= 0x0000FFFF;
1038                         a2_x |= (((newx + (newxrem == 0 ? 0 : 1)) * size) & 0xFFFF) << 16;
1039                 }//*/
1040                 if (a2_phrase_mode)                     // v1
1041                 {
1042                         // Bump the pointer to the next phrase boundary
1043                         // Even though it works, this is crappy... Clean it up!
1044                         uint32 size = 64 / a2_psize;
1045
1046                         // Crappy kludge... ('aligning' source to destination)
1047                         // Prolly should do this for A1 channel as well... [DONE]
1048                         if (a1_phrase_mode && !DSTA2)
1049                         {
1050                                 uint32 extra = (a1_start >> 16) % size;
1051                                 a2_x += extra << 16;
1052                         }
1053
1054                         uint32 pixelSize = (size - 1) << 16;
1055                         a2_x = (a2_x + pixelSize) & ~pixelSize;
1056                 }
1057
1058                 //Not entirely: This still mucks things up... !!! FIX !!!
1059                 //Should this go before or after the phrase mode mucking around?
1060 #ifndef SCREWY_CD_DEPENDENT
1061                 a1_x += a1_step_x;
1062                 a1_y += a1_step_y;
1063                 a2_x += a2_step_x;
1064                 a2_y += a2_step_y;//*/
1065 #endif
1066         }
1067
1068         // write values back to registers
1069         WREG(A1_PIXEL,  (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
1070         WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
1071         WREG(A2_PIXEL,  (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
1072 specialLog = false;
1073 }
1074
1075 void blitter_blit(uint32 cmd)
1076 {
1077 //Apparently this is doing *something*, just not sure exactly what...
1078 /*if (cmd == 0x41802E01)
1079 {
1080         WriteLog("BLIT: Found our blit. Was: %08X ", cmd);
1081         cmd = 0x01800E01;
1082         WriteLog("Is: %08X\n", cmd);
1083 }//*/
1084
1085         uint32 pitchValue[4] = { 0, 1, 3, 2 };
1086         colour_index = 0;
1087         src = cmd & 0x07;
1088         dst = (cmd >> 3) & 0x07;
1089         misc = (cmd >> 6) & 0x03;
1090         a1ctl = (cmd >> 8) & 0x7;
1091         mode = (cmd >> 11) & 0x07;
1092         ity = (cmd >> 14) & 0x0F;
1093         zop = (cmd >> 18) & 0x07;
1094         op = (cmd >> 21) & 0x0F;
1095         ctrl = (cmd >> 25) & 0x3F;
1096
1097         // Addresses in A1/2_BASE are *phrase* aligned, i.e., bottom three bits are ignored!
1098         // NOTE: This fixes Rayman's bad collision detection AND keeps T2K working!
1099         a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
1100         a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
1101
1102         a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
1103         a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
1104
1105         xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
1106         xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
1107
1108         a1_pitch = pitchValue[(REG(A1_FLAGS) & 0x03)];
1109         a2_pitch = pitchValue[(REG(A2_FLAGS) & 0x03)];
1110
1111         n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
1112         n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
1113
1114         a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
1115         a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
1116 //According to the JTRM, X is restricted to 15 bits and Y is restricted to 12.
1117 //But it seems to fuck up T2K! !!! FIX !!!
1118 //Could it be sign extended??? Doesn't seem to be so according to JTRM
1119 //      a1_x &= 0x7FFFFFFF, a1_y &= 0x0FFFFFFF;
1120 //Actually, it says that the X is 16 bits. But it still seems to mess with the Y when restricted to 12...
1121 //      a1_y &= 0x0FFFFFFF;
1122
1123 //      a1_width = blitter_scanline_width[((REG(A1_FLAGS) & 0x00007E00) >> 9)];
1124 // According to JTRM, this must give a *whole number* of phrases in the current
1125 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
1126         uint32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1127         a1_width = ((0x04 | m) << e) >> 2;//*/
1128
1129         a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
1130         a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
1131 //According to the JTRM, X is restricted to 15 bits and Y is restricted to 12.
1132 //But it seems to fuck up T2K! !!! FIX !!!
1133 //      a2_x &= 0x7FFFFFFF, a2_y &= 0x0FFFFFFF;
1134 //Actually, it says that the X is 16 bits. But it still seems to mess with the Y when restricted to 12...
1135 //      a2_y &= 0x0FFFFFFF;
1136
1137 //      a2_width = blitter_scanline_width[((REG(A2_FLAGS) & 0x00007E00) >> 9)];
1138 // According to JTRM, this must give a *whole number* of phrases in the current
1139 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
1140         m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1141         a2_width = ((0x04 | m) << e) >> 2;//*/
1142         a2_mask_x = ((REG(A2_MASK) & 0x0000FFFF) << 16) | 0xFFFF;
1143         a2_mask_y = (REG(A2_MASK) & 0xFFFF0000) | 0xFFFF;
1144
1145         // Check for "use mask" flag
1146         if (!(REG(A2_FLAGS) & 0x8000))
1147         {
1148                 a2_mask_x = 0xFFFFFFFF; // must be 16.16
1149                 a2_mask_y = 0xFFFFFFFF; // must be 16.16
1150         }
1151
1152         a1_phrase_mode = 0;
1153
1154         // According to the official documentation, a hardware bug ties A2's yadd bit to A1's...
1155         a2_yadd = a1_yadd = (YADD1_A1 ? 1 << 16 : 0);
1156
1157         if (YSIGNSUB_A1)
1158                 a1_yadd = -a1_yadd;
1159
1160         // determine a1_xadd
1161         switch (xadd_a1_control)
1162         {
1163         case XADDPHR:
1164 // This is a documented Jaguar bug relating to phrase mode and truncation... Look into it!
1165                 // add phrase offset to X and truncate
1166                 a1_xadd = 1 << 16;
1167                 a1_phrase_mode = 1;
1168                 break;
1169         case XADDPIX:
1170                 // add pixelsize (1) to X
1171                 a1_xadd = 1 << 16;
1172                 break;
1173         case XADD0:
1174                 // add zero (for those nice vertical lines)
1175                 a1_xadd = 0;
1176                 break;
1177         case XADDINC:
1178                 // add the contents of the increment register
1179                 a1_xadd = (REG(A1_INC) << 16)            | (REG(A1_FINC) & 0x0000FFFF);
1180                 a1_yadd = (REG(A1_INC) & 0xFFFF0000) | (REG(A1_FINC) >> 16);
1181                 break;
1182         }
1183
1184
1185 //Blit! (0011D000 -> 000B9600) count: 228 x 1, A1/2_FLAGS: 00073820/00064220 [cmd: 41802801]
1186 //  A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 128 (1C), addctl: XADDINC YADD1 XSIGNADD YSIGNADD
1187 //  A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADD0 YADD1 XSIGNADD YSIGNADD
1188 //if (YADD1_A1 && YADD1_A2 && xadd_a2_control == XADD0 && xadd_a1_control == XADDINC)// &&
1189 //      uint32 a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
1190 //Ok, so this ISN'T it... Prolly the XADDPHR code above that's doing it...
1191 //if (REG(A1_FLAGS) == 0x00073820 && REG(A2_FLAGS) == 0x00064220 && cmd == 0x41802801)
1192 //        A1 x/y: 14368/7, A2 x/y: 150/36
1193 //This is it... The problem...
1194 //if ((a1_x >> 16) == 14368) // 14368 = $3820
1195 //      return; //Lesse what we got...
1196
1197         if (XSIGNSUB_A1)
1198                 a1_xadd = -a1_xadd;
1199
1200         if (YSIGNSUB_A2)
1201                 a2_yadd = -a2_yadd;
1202
1203         a2_phrase_mode = 0;
1204
1205         // determine a2_xadd
1206         switch (xadd_a2_control)
1207         {
1208         case XADDPHR:
1209                 // add phrase offset to X and truncate
1210                 a2_xadd = 1 << 16;
1211                 a2_phrase_mode = 1;
1212                 break;
1213         case XADDPIX:
1214                 // add pixelsize (1) to X
1215                 a2_xadd = 1 << 16;
1216                 break;
1217         case XADD0:
1218                 // add zero (for those nice vertical lines)
1219                 a2_xadd = 0;
1220                 break;
1221 //This really isn't a valid bit combo for A2... Shouldn't this cause the blitter to just say no?
1222         case XADDINC:
1223 WriteLog("BLIT: Asked to use invalid bit combo (XADDINC) for A2...\n");
1224                 // add the contents of the increment register
1225                 // since there is no register for a2 we just add 1
1226 //Let's do nothing, since it's not listed as a valid bit combo...
1227 //              a2_xadd = 1 << 16;
1228                 break;
1229         }
1230
1231         if (XSIGNSUB_A2)
1232                 a2_xadd = -a2_xadd;
1233
1234         // Modify outer loop steps based on blitter command
1235
1236         a1_step_x = 0;
1237         a1_step_y = 0;
1238         a2_step_x = 0;
1239         a2_step_y = 0;
1240
1241         if (UPDA1F)
1242                 a1_step_x = (REG(A1_FSTEP) & 0xFFFF),
1243                 a1_step_y = (REG(A1_FSTEP) >> 16);
1244
1245         if (UPDA1)
1246                 a1_step_x |= ((REG(A1_STEP) & 0x0000FFFF) << 16),
1247                 a1_step_y |= ((REG(A1_STEP) & 0xFFFF0000));
1248
1249         if (UPDA2)
1250                 a2_step_x = (REG(A2_STEP) & 0x0000FFFF) << 16,
1251                 a2_step_y = (REG(A2_STEP) & 0xFFFF0000);
1252
1253         outer_loop = n_lines;
1254
1255         // Clipping...
1256
1257         if (CLIPA1)
1258                 a1_clip_x = REG(A1_CLIP) & 0x7FFF,
1259                 a1_clip_y = (REG(A1_CLIP) >> 16) & 0x7FFF;
1260
1261 // This phrase sizing is incorrect as well... !!! FIX !!! [NOTHING TO FIX]
1262 // Err, this is pixel size... (and it's OK)
1263         a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
1264         a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
1265
1266         // Z-buffering
1267         if (GOURZ)
1268         {
1269                 zadd = REG(ZINC);
1270
1271                 for(int v=0; v<4; v++)
1272                         z_i[v] = REG(PHRASEZ0 + v*4);
1273         }
1274
1275         // Gouraud shading
1276         if (GOURD || GOURZ || SRCSHADE)
1277         {
1278                 gd_c[0] = blitter_ram[PATTERNDATA + 6];
1279                 gd_i[0] = ((uint32)blitter_ram[PATTERNDATA + 7] << 16)
1280                         | ((uint32)blitter_ram[SRCDATA + 6] << 8) | blitter_ram[SRCDATA + 7];
1281
1282                 gd_c[1] = blitter_ram[PATTERNDATA + 4];
1283                 gd_i[1] = ((uint32)blitter_ram[PATTERNDATA + 5] << 16)
1284                         | ((uint32)blitter_ram[SRCDATA + 4] << 8) | blitter_ram[SRCDATA + 5];
1285
1286                 gd_c[2] = blitter_ram[PATTERNDATA + 2];
1287                 gd_i[2] = ((uint32)blitter_ram[PATTERNDATA + 3] << 16)
1288                         | ((uint32)blitter_ram[SRCDATA + 2] << 8) | blitter_ram[SRCDATA + 3];
1289
1290                 gd_c[3] = blitter_ram[PATTERNDATA + 0];
1291                 gd_i[3] = ((uint32)blitter_ram[PATTERNDATA + 1] << 16)
1292                         | ((uint32)blitter_ram[SRCDATA + 0] << 8) | blitter_ram[SRCDATA + 1];
1293
1294                 gouraud_add = REG(INTENSITYINC);
1295
1296                 gd_ia = gouraud_add & 0x00FFFFFF;
1297                 if (gd_ia & 0x00800000)
1298                         gd_ia = 0xFF000000 | gd_ia;
1299
1300                 gd_ca = (gouraud_add >> 24) & 0xFF;
1301                 if (gd_ca & 0x00000080)
1302                         gd_ca = 0xFFFFFF00 | gd_ca;
1303         }
1304
1305         // Bit comparitor fixing...
1306 /*      if (BCOMPEN)
1307         {
1308                 // Determine the data flow direction...
1309                 if (!DSTA2)
1310                         a2_step_x /= (1 << ((REG(A2_FLAGS) >> 3) & 0x07));
1311                 else
1312                         ;//add this later
1313         }//*/
1314 /*      if (BCOMPEN)//Kludge for Hover Strike... !!! FIX !!!
1315         {
1316                 // Determine the data flow direction...
1317                 if (!DSTA2)
1318                         a2_x <<= 3;
1319         }//*/
1320
1321 #ifdef LOG_BLITS
1322         if (start_logging)
1323         {
1324                 WriteLog("Blit!\n");
1325                 WriteLog("  cmd      = 0x%.8x\n",cmd);
1326                 WriteLog("  a1_base  = %08X\n", a1_addr);
1327                 WriteLog("  a1_pitch = %d\n", a1_pitch);
1328                 WriteLog("  a1_psize = %d\n", a1_psize);
1329                 WriteLog("  a1_width = %d\n", a1_width);
1330                 WriteLog("  a1_xadd  = %f (phrase=%d)\n", (float)a1_xadd / 65536.0, a1_phrase_mode);
1331                 WriteLog("  a1_yadd  = %f\n", (float)a1_yadd / 65536.0);
1332                 WriteLog("  a1_xstep = %f\n", (float)a1_step_x / 65536.0);
1333                 WriteLog("  a1_ystep = %f\n", (float)a1_step_y / 65536.0);
1334                 WriteLog("  a1_x     = %f\n", (float)a1_x / 65536.0);
1335                 WriteLog("  a1_y     = %f\n", (float)a1_y / 65536.0);
1336                 WriteLog("  a1_zoffs = %i\n",a1_zoffs);
1337
1338                 WriteLog("  a2_base  = %08X\n", a2_addr);
1339                 WriteLog("  a2_pitch = %d\n", a2_pitch);
1340                 WriteLog("  a2_psize = %d\n", a2_psize);
1341                 WriteLog("  a2_width = %d\n", a2_width);
1342                 WriteLog("  a2_xadd  = %f (phrase=%d)\n", (float)a2_xadd / 65536.0, a2_phrase_mode);
1343                 WriteLog("  a2_yadd  = %f\n", (float)a2_yadd / 65536.0);
1344                 WriteLog("  a2_xstep = %f\n", (float)a2_step_x / 65536.0);
1345                 WriteLog("  a2_ystep = %f\n", (float)a2_step_y / 65536.0);
1346                 WriteLog("  a2_x     = %f\n", (float)a2_x / 65536.0);
1347                 WriteLog("  a2_y     = %f\n", (float)a2_y / 65536.0);
1348                 WriteLog("  a2_mask_x= 0x%.4x\n",a2_mask_x);
1349                 WriteLog("  a2_mask_y= 0x%.4x\n",a2_mask_y);
1350                 WriteLog("  a2_zoffs = %i\n",a2_zoffs);
1351
1352                 WriteLog("  count    = %d x %d\n", n_pixels, n_lines);
1353
1354                 WriteLog("  command  = %08X\n", cmd);
1355                 WriteLog("  dsten    = %i\n",DSTEN);
1356                 WriteLog("  srcen    = %i\n",SRCEN);
1357                 WriteLog("  patdsel  = %i\n",PATDSEL);
1358                 WriteLog("  color    = 0x%.8x\n",REG(PATTERNDATA));
1359                 WriteLog("  dcompen  = %i\n",DCOMPEN);
1360                 WriteLog("  bcompen  = %i\n",BCOMPEN);
1361                 WriteLog("  cmpdst   = %i\n",CMPDST);
1362                 WriteLog("  GOURZ   = %i\n",GOURZ);
1363                 WriteLog("  GOURD   = %i\n",GOURD);
1364                 WriteLog("  SRCSHADE= %i\n",SRCSHADE);
1365         }
1366 #endif
1367
1368 //NOTE: Pitch is ignored!
1369
1370 //This *might* be the altimeter blits (they are)...
1371 //On captured screen, x-pos for black (inner) is 259, for pink is 257
1372 //Black is short by 3, pink is short by 1...
1373 /*
1374 Blit! (00110000 <- 000BF010) count: 9 x 31, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1375  CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
1376   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1377   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1378         A1 x/y: 262/124, A2 x/y: 128/0
1379 Blit! (00110000 <- 000BF010) count: 5 x 38, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1380  CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
1381   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1382   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1383         A1 x/y: 264/117, A2 x/y: 407/0
1384
1385 Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1386  CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
1387   A1 step values: -10 (X), 1 (Y)
1388   A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1389   A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1390         A1 x/y: 262/132, A2 x/y: 129/0
1391 Blit! (00110000 <- 000BF010) count: 5 x 27, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1392  CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
1393   A1 step values: -8 (X), 1 (Y)
1394   A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1395   A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1396         A1 x/y: 264/128, A2 x/y: 336/0
1397
1398   264v       vCursor ends up here...
1399      xxxxx...`
1400      111122223333
1401
1402 262v         vCursor ends up here...
1403    xxxxxxxxx.'
1404  1111222233334444
1405
1406 Fixed! Now for more:
1407
1408 ; This looks like the ship icon in the upper left corner...
1409
1410 Blit! (00110000 <- 0010B2A8) count: 11 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1411  CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
1412   A1 step values: -12 (X), 1 (Y)
1413   A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1414   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1415   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1416         A1 x/y: 20/24, A2 x/y: 5780/0
1417
1418 Also fixed!
1419
1420 More (not sure this is a blitter problem as much as it's a GPU problem):
1421 All but the "M" are trashed...
1422 This does *NOT* look like a blitter problem, as it's rendering properly...
1423 Actually, if you look at the A1 step values, there IS a discrepancy!
1424
1425 ; D
1426
1427 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1428  CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
1429   A1 step values: -14 (X), 1 (Y)
1430   A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1431   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1432   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1433         A1 x/y: 134/144, A2 x/y: 2516/0
1434 ;129,146: +5,-2
1435
1436 ; E
1437
1438 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1439  CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
1440   A1 step values: -13 (X), 1 (Y)
1441   A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1442   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1443   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1444         A1 x/y: 147/144, A2 x/y: 2660/0
1445
1446 ; M
1447
1448 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1449  CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
1450   A1 step values: -12 (X), 1 (Y)
1451   A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1452   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1453   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1454         A1 x/y: 160/144, A2 x/y: 3764/0
1455
1456 ; O
1457
1458 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1459  CMD -> src: SRCEN  dst: DSTEN  misc:  a1ctl: UPDA1 UPDA2 mode:  ity:  z-op:  op: LFU_REPLACE ctrl: DCOMPEN
1460   A1 step values: -15 (X), 1 (Y)
1461   A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1462   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1463   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1464         A1 x/y: 173/144, A2 x/y: 4052/0
1465
1466 */
1467 //extern int op_start_log;
1468 if (blit_start_log)
1469 {
1470         char * ctrlStr[4] = { "XADDPHR\0", "XADDPIX\0", "XADD0\0", "XADDINC\0" };
1471         char * bppStr[8] = { "1bpp\0", "2bpp\0", "4bpp\0", "8bpp\0", "16bpp\0", "32bpp\0", "???\0", "!!!\0" };
1472         char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
1473                 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
1474         uint32 /*src = cmd & 0x07, dst = (cmd >> 3) & 0x07, misc = (cmd >> 6) & 0x03,
1475                 a1ctl = (cmd >> 8) & 0x07,*/ mode = (cmd >> 11) & 0x07/*, ity = (cmd >> 14) & 0x0F,
1476                 zop = (cmd >> 18) & 0x07, op = (cmd >> 21) & 0x0F, ctrl = (cmd >> 25) & 0x3F*/;
1477         uint32 a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
1478         uint32 p1 = a1f & 0x07, p2 = a2f & 0x07,
1479                 d1 = (a1f >> 3) & 0x07, d2 = (a2f >> 3) & 0x07,
1480                 zo1 = (a1f >> 6) & 0x07, zo2 = (a2f >> 6) & 0x07,
1481                 w1 = (a1f >> 9) & 0x3F, w2 = (a2f >> 9) & 0x3F,
1482                 ac1 = (a1f >> 16) & 0x1F, ac2 = (a2f >> 16) & 0x1F;
1483         uint32 iw1 = ((0x04 | (w1 & 0x03)) << ((w1 & 0x3C) >> 2)) >> 2;
1484         uint32 iw2 = ((0x04 | (w2 & 0x03)) << ((w2 & 0x3C) >> 2)) >> 2;
1485         WriteLog("Blit! (%08X %s %08X) count: %d x %d, A1/2_FLAGS: %08X/%08X [cmd: %08X]\n", a1_addr, (mode&0x01 ? "->" : "<-"), a2_addr, n_pixels, n_lines, a1f, a2f, cmd);
1486 //      WriteLog(" CMD -> src: %d, dst: %d, misc: %d, a1ctl: %d, mode: %d, ity: %1X, z-op: %d, op: %1X, ctrl: %02X\n", src, dst, misc, a1ctl, mode, ity, zop, op, ctrl);
1487
1488         WriteLog(" CMD -> src: %s%s%s ", (cmd & 0x0001 ? "SRCEN " : ""), (cmd & 0x0002 ? "SRCENZ " : ""), (cmd & 0x0004 ? "SRCENX" : ""));
1489         WriteLog("dst: %s%s%s ", (cmd & 0x0008 ? "DSTEN " : ""), (cmd & 0x0010 ? "DSTENZ " : ""), (cmd & 0x0020 ? "DSTWRZ" : ""));
1490         WriteLog("misc: %s%s ", (cmd & 0x0040 ? "CLIP_A1 " : ""), (cmd & 0x0080 ? "???" : ""));
1491         WriteLog("a1ctl: %s%s%s ", (cmd & 0x0100 ? "UPDA1F " : ""), (cmd & 0x0200 ? "UPDA1 " : ""), (cmd & 0x0400 ? "UPDA2" : ""));
1492         WriteLog("mode: %s%s%s ", (cmd & 0x0800 ? "DSTA2 " : ""), (cmd & 0x1000 ? "GOURD " : ""), (cmd & 0x2000 ? "GOURZ" : ""));
1493         WriteLog("ity: %s%s%s%s ", (cmd & 0x4000 ? "TOPBEN " : ""), (cmd & 0x8000 ? "TOPNEN " : ""), (cmd & 0x00010000 ? "PATDSEL" : ""), (cmd & 0x00020000 ? "ADDDSEL" : ""));
1494         WriteLog("z-op: %s%s%s ", (cmd & 0x00040000 ? "ZMODELT " : ""), (cmd & 0x00080000 ? "ZMODEEQ " : ""), (cmd & 0x00100000 ? "ZMODEGT" : ""));
1495         WriteLog("op: %s ", opStr[(cmd >> 21) & 0x0F]);
1496         WriteLog("ctrl: %s%s%s%s%s%s\n", (cmd & 0x02000000 ? "CMPDST " : ""), (cmd & 0x04000000 ? "BCOMPEN " : ""), (cmd & 0x08000000 ? "DCOMPEN " : ""), (cmd & 0x10000000 ? "BKGWREN " : ""), (cmd & 0x20000000 ? "BUSHI " : ""), (cmd & 0x40000000 ? "SRCSHADE" : ""));
1497
1498         if (UPDA1)
1499                 WriteLog("  A1 step values: %d (X), %d (Y)\n", a1_step_x >> 16, a1_step_y >> 16);
1500
1501         if (UPDA2)
1502                 WriteLog("  A2 step values: %d (X), %d (Y) [mask (%sused): %08X - %08X/%08X]\n", a2_step_x >> 16, a2_step_y >> 16, (a2f & 0x8000 ? "" : "un"), REG(A2_MASK), a2_mask_x, a2_mask_y);
1503
1504         WriteLog("  A1 -> pitch: %d phrases, depth: %s, z-off: %d, width: %d (%02X), addctl: %s %s %s %s\n", 1 << p1, bppStr[d1], zo1, iw1, w1, ctrlStr[ac1&0x03], (ac1&0x04 ? "YADD1" : "YADD0"), (ac1&0x08 ? "XSIGNSUB" : "XSIGNADD"), (ac1&0x10 ? "YSIGNSUB" : "YSIGNADD"));
1505         WriteLog("  A2 -> pitch: %d phrases, depth: %s, z-off: %d, width: %d (%02X), addctl: %s %s %s %s\n", 1 << p2, bppStr[d2], zo2, iw2, w2, ctrlStr[ac2&0x03], (ac2&0x04 ? "YADD1" : "YADD0"), (ac2&0x08 ? "XSIGNSUB" : "XSIGNADD"), (ac2&0x10 ? "YSIGNSUB" : "YSIGNADD"));
1506         WriteLog("        A1 x/y: %d/%d, A2 x/y: %d/%d Pattern: %08X%08X SRCDATA: %08X%08X\n", a1_x >> 16, a1_y >> 16, a2_x >> 16, a2_y >> 16, REG(PATTERNDATA), REG(PATTERNDATA + 4), REG(SRCDATA), REG(SRCDATA + 4));
1507 //      blit_start_log = 0;
1508 //      op_start_log = 1;
1509 }
1510
1511         blitter_working = 1;
1512 //#ifndef USE_GENERIC_BLITTER
1513 //      if (!blitter_execute_cached_code(blitter_in_cache(cmd)))
1514 //#endif
1515         blitter_generic(cmd);
1516
1517 /*if (blit_start_log)
1518 {
1519         if (a1_addr == 0xF03000 && a2_addr == 0x004D58)
1520         {
1521                 WriteLog("\nBytes at 004D58:\n");
1522                 for(int i=0x004D58; i<0x004D58+(10*127*4); i++)
1523                         WriteLog("%02X ", JaguarReadByte(i));
1524                 WriteLog("\nBytes at F03000:\n");
1525                 for(int i=0xF03000; i<0xF03000+(6*127*4); i++)
1526                         WriteLog("%02X ", JaguarReadByte(i));
1527                 WriteLog("\n\n");
1528         }
1529 }//*/
1530
1531         blitter_working = 0;
1532 }
1533 #endif                                                                                  // of the #if 0 near the top...
1534 /*******************************************************************************
1535 ********************** STUFF CUT ABOVE THIS LINE! ******************************
1536 *******************************************************************************/
1537
1538 void BlitterInit(void)
1539 {
1540         BlitterReset();
1541 }
1542
1543 void BlitterReset(void)
1544 {
1545         memset(blitter_ram, 0x00, 0xA0);
1546 }
1547
1548 void BlitterDone(void)
1549 {
1550         WriteLog("BLIT: Done.\n");
1551 }
1552
1553 uint8 BlitterReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
1554 {
1555         offset &= 0xFF;
1556
1557         // status register
1558 //This isn't cycle accurate--how to fix? !!! FIX !!!
1559 //Probably have to do some multi-threaded implementation or at least a reentrant safe implementation...
1560         if (offset == (0x38 + 3))
1561                 return 0x01;    // always idle
1562
1563 // CHECK HERE ONCE THIS FIX HAS BEEN TESTED: [ ]
1564 //Fix for AvP:
1565         if (offset >= 0x04 && offset <= 0x07)
1566 //This is it. I wonder if it just ignores the lower three bits?
1567 //No, this is a documented Jaguar I bug. It also bites the read at $F02230 as well...
1568                 return blitter_ram[offset + 0x08];              // A1_PIXEL ($F0220C) read at $F02204
1569
1570         if (offset >= 0x2C && offset <= 0x2F)
1571                 return blitter_ram[offset + 0x04];              // A2_PIXEL ($F02230) read at $F0222C
1572
1573         return blitter_ram[offset];
1574 }
1575
1576 //Crappy!
1577 uint16 BlitterReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
1578 {
1579         return ((uint16)BlitterReadByte(offset, who) << 8) | (uint16)BlitterReadByte(offset+1, who);
1580 }
1581
1582 //Crappy!
1583 uint32 BlitterReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
1584 {
1585         return (BlitterReadWord(offset, who) << 16) | BlitterReadWord(offset+2, who);
1586 }
1587
1588 void BlitterWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
1589 {
1590 /*if (offset & 0xFF == 0x7B)
1591         WriteLog("--> Wrote to B_STOP: value -> %02X\n", data);*/
1592         offset &= 0xFF;
1593 /*if ((offset >= PATTERNDATA) && (offset < PATTERNDATA + 8))
1594 {
1595         printf("--> %s wrote %02X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - PATTERNDATA);
1596         fflush(stdout);
1597 }//*/
1598
1599         // This handles writes to INTENSITY0-3 by also writing them to their proper places in
1600         // PATTERNDATA & SOURCEDATA (should do the same for the Z registers! !!! FIX !!! [DONE])
1601         if ((offset >= 0x7C) && (offset <= 0x9B))
1602         {
1603                 switch (offset)
1604                 {
1605                 // INTENSITY registers 0-3
1606                 case 0x7C: break;
1607                 case 0x7D: blitter_ram[PATTERNDATA + 7] = data; break;
1608                 case 0x7E: blitter_ram[SRCDATA + 6] = data; break;
1609                 case 0x7F: blitter_ram[SRCDATA + 7] = data; break;
1610
1611                 case 0x80: break;
1612                 case 0x81: blitter_ram[PATTERNDATA + 5] = data; break;
1613                 case 0x82: blitter_ram[SRCDATA + 4] = data; break;
1614                 case 0x83: blitter_ram[SRCDATA + 5] = data; break;
1615
1616                 case 0x84: break;
1617                 case 0x85: blitter_ram[PATTERNDATA + 3] = data; break;
1618                 case 0x86: blitter_ram[SRCDATA + 2] = data; break;
1619                 case 0x87: blitter_ram[SRCDATA + 3] = data; break;
1620
1621                 case 0x88: break;
1622                 case 0x89: blitter_ram[PATTERNDATA + 1] = data; break;
1623                 case 0x8A: blitter_ram[SRCDATA + 0] = data; break;
1624                 case 0x8B: blitter_ram[SRCDATA + 1] = data; break;
1625
1626
1627                 // Z registers 0-3
1628                 case 0x8C: blitter_ram[SRCZINT + 6] = data; break;
1629                 case 0x8D: blitter_ram[SRCZINT + 7] = data; break;
1630                 case 0x8E: blitter_ram[SRCZFRAC + 6] = data; break;
1631                 case 0x8F: blitter_ram[SRCZFRAC + 7] = data; break;
1632
1633                 case 0x90: blitter_ram[SRCZINT + 4] = data; break;
1634                 case 0x91: blitter_ram[SRCZINT + 5] = data; break;
1635                 case 0x92: blitter_ram[SRCZFRAC + 4] = data; break;
1636                 case 0x93: blitter_ram[SRCZFRAC + 5] = data; break;
1637
1638                 case 0x94: blitter_ram[SRCZINT + 2] = data; break;
1639                 case 0x95: blitter_ram[SRCZINT + 3] = data; break;
1640                 case 0x96: blitter_ram[SRCZFRAC + 2] = data; break;
1641                 case 0x97: blitter_ram[SRCZFRAC + 3] = data; break;
1642
1643                 case 0x98: blitter_ram[SRCZINT + 0] = data; break;
1644                 case 0x99: blitter_ram[SRCZINT + 1] = data; break;
1645                 case 0x9A: blitter_ram[SRCZFRAC + 0] = data; break;
1646                 case 0x9B: blitter_ram[SRCZFRAC + 1] = data; break;
1647                 }
1648         }
1649
1650         // It looks weird, but this is how the 64 bit registers are actually handled...!
1651
1652         else if ((offset >= SRCDATA + 0) && (offset <= SRCDATA + 3)
1653                 || (offset >= DSTDATA + 0) && (offset <= DSTDATA + 3)
1654                 || (offset >= DSTZ + 0) && (offset <= DSTZ + 3)
1655                 || (offset >= SRCZINT + 0) && (offset <= SRCZINT + 3)
1656                 || (offset >= SRCZFRAC + 0) && (offset <= SRCZFRAC + 3)
1657                 || (offset >= PATTERNDATA + 0) && (offset <= PATTERNDATA + 3))
1658         {
1659                 blitter_ram[offset + 4] = data;
1660         }
1661         else if ((offset >= SRCDATA + 4) && (offset <= SRCDATA + 7)
1662                 || (offset >= DSTDATA + 4) && (offset <= DSTDATA + 7)
1663                 || (offset >= DSTZ + 4) && (offset <= DSTZ + 7)
1664                 || (offset >= SRCZINT + 4) && (offset <= SRCZINT + 7)
1665                 || (offset >= SRCZFRAC + 4) && (offset <= SRCZFRAC + 7)
1666                 || (offset >= PATTERNDATA + 4) && (offset <= PATTERNDATA + 7))
1667         {
1668                 blitter_ram[offset - 4] = data;
1669         }
1670         else
1671                 blitter_ram[offset] = data;
1672 }
1673
1674 void BlitterWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
1675 {
1676 /*if (((offset & 0xFF) >= PATTERNDATA) && ((offset & 0xFF) < PATTERNDATA + 8))
1677 {
1678         printf("----> %s wrote %04X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - (0xF02200 + PATTERNDATA));
1679         fflush(stdout);
1680 }*/
1681 //#if 1
1682 /*      if (offset & 0xFF == A1_PIXEL && data == 14368)
1683         {
1684                 WriteLog("\n1\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1685 extern bool doGPUDis;
1686 doGPUDis = true;
1687         }
1688         if ((offset & 0xFF) == (A1_PIXEL + 2) && data == 14368)
1689         {
1690                 WriteLog("\n2\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1691 extern bool doGPUDis;
1692 doGPUDis = true;
1693         }//*/
1694 //#endif
1695
1696         BlitterWriteByte(offset + 0, data >> 8, who);
1697         BlitterWriteByte(offset + 1, data & 0xFF, who);
1698
1699         if ((offset & 0xFF) == 0x3A)
1700         // I.e., the second write of 32-bit value--not convinced this is the best way to do this!
1701         // But then again, according to the Jaguar docs, this is correct...!
1702 /*extern int blit_start_log;
1703 extern bool doGPUDis;
1704 if (blit_start_log)
1705 {
1706         WriteLog("BLIT: Blitter started by %s...\n", whoName[who]);
1707         doGPUDis = true;
1708 }//*/
1709 #ifdef USE_ORIGINAL_BLITTER
1710                 blitter_blit(GET32(blitter_ram, 0x38));
1711 #endif
1712 #ifdef USE_MIDSUMMER_BLITTER
1713                 BlitterMidsummer(GET32(blitter_ram, 0x38));
1714 #endif
1715 #ifdef USE_MIDSUMMER_BLITTER_MKII
1716                 BlitterMidsummer2();
1717 #endif
1718 }
1719 //F02278,9,A,B
1720
1721 void BlitterWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
1722 {
1723 /*if (((offset & 0xFF) >= PATTERNDATA) && ((offset & 0xFF) < PATTERNDATA + 8))
1724 {
1725         printf("------> %s wrote %08X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - (0xF02200 + PATTERNDATA));
1726         fflush(stdout);
1727 }//*/
1728 //#if 1
1729 /*      if ((offset & 0xFF) == A1_PIXEL && (data & 0xFFFF) == 14368)
1730         {
1731                 WriteLog("\n3\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1732 extern bool doGPUDis;
1733 doGPUDis = true;
1734         }//*/
1735 //#endif
1736
1737         BlitterWriteWord(offset + 0, data >> 16, who);
1738         BlitterWriteWord(offset + 2, data & 0xFFFF, who);
1739 }
1740
1741 void LogBlit(void)
1742 {
1743         const char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
1744                 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
1745         uint32 cmd = GET32(blitter_ram, 0x38);
1746         uint32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1747         uint32 a1_width = ((0x04 | m) << e) >> 2;
1748         m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1749         uint32 a2_width = ((0x04 | m) << e) >> 2;
1750
1751         WriteLog("Blit!\n");
1752         WriteLog("  COMMAND  = %08X\n", cmd);
1753         WriteLog("  a1_base  = %08X\n", REG(A1_BASE));
1754         WriteLog("  a1_flags = %08X (%c %c %c %c%c . %c%c%c%c%c%c %c%c%c %c%c%c . %c%c)\n", REG(A1_FLAGS),
1755                 (REG(A1_FLAGS) & 0x100000 ? '1' : '0'),
1756                 (REG(A1_FLAGS) & 0x080000 ? '1' : '0'),
1757                 (REG(A1_FLAGS) & 0x040000 ? '1' : '0'),
1758                 (REG(A1_FLAGS) & 0x020000 ? '1' : '0'),
1759                 (REG(A1_FLAGS) & 0x010000 ? '1' : '0'),
1760                 (REG(A1_FLAGS) & 0x004000 ? '1' : '0'),
1761                 (REG(A1_FLAGS) & 0x002000 ? '1' : '0'),
1762                 (REG(A1_FLAGS) & 0x001000 ? '1' : '0'),
1763                 (REG(A1_FLAGS) & 0x000800 ? '1' : '0'),
1764                 (REG(A1_FLAGS) & 0x000400 ? '1' : '0'),
1765                 (REG(A1_FLAGS) & 0x000200 ? '1' : '0'),
1766                 (REG(A1_FLAGS) & 0x000100 ? '1' : '0'),
1767                 (REG(A1_FLAGS) & 0x000080 ? '1' : '0'),
1768                 (REG(A1_FLAGS) & 0x000040 ? '1' : '0'),
1769                 (REG(A1_FLAGS) & 0x000020 ? '1' : '0'),
1770                 (REG(A1_FLAGS) & 0x000010 ? '1' : '0'),
1771                 (REG(A1_FLAGS) & 0x000008 ? '1' : '0'),
1772                 (REG(A1_FLAGS) & 0x000002 ? '1' : '0'),
1773                 (REG(A1_FLAGS) & 0x000001 ? '1' : '0'));
1774         WriteLog("             pitch=%u, pixSz=%u, zOff=%u, width=%u, xCtrl=%u\n",
1775                 REG(A1_FLAGS) & 0x00003, (REG(A1_FLAGS) & 0x00038) >> 3,
1776                 (REG(A1_FLAGS) & 0x001C0) >> 6,  a1_width, (REG(A1_FLAGS) & 0x30000) >> 16);
1777         WriteLog("  a1_clip  = %u, %u (%08X)\n", GET16(blitter_ram, A1_CLIP + 2), GET16(blitter_ram, A1_CLIP + 0), GET32(blitter_ram, A1_CLIP));
1778         WriteLog("  a1_pixel = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A1_PIXEL + 2), (int16)GET16(blitter_ram, A1_PIXEL + 0), GET32(blitter_ram, A1_PIXEL));
1779         WriteLog("  a1_step  = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A1_STEP + 2), (int16)GET16(blitter_ram, A1_STEP + 0), GET32(blitter_ram, A1_STEP));
1780         WriteLog("  a1_fstep = %u, %u (%08X)\n", GET16(blitter_ram, A1_FSTEP + 2), GET16(blitter_ram, A1_FSTEP + 0), GET32(blitter_ram, A1_FSTEP));
1781         WriteLog("  a1_fpixel= %u, %u (%08X)\n", GET16(blitter_ram, A1_FPIXEL + 2), GET16(blitter_ram, A1_FPIXEL + 0), GET32(blitter_ram, A1_FPIXEL));
1782         WriteLog("  a1_inc   = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A1_INC + 2), (int16)GET16(blitter_ram, A1_INC + 0), GET32(blitter_ram, A1_INC));
1783         WriteLog("  a1_finc  = %u, %u (%08X)\n", GET16(blitter_ram, A1_FINC + 2), GET16(blitter_ram, A1_FINC + 0), GET32(blitter_ram, A1_FINC));
1784
1785         WriteLog("  a2_base  = %08X\n", REG(A2_BASE));
1786         WriteLog("  a2_flags = %08X (%c %c %c %c%c %c %c%c%c%c%c%c %c%c%c %c%c%c . %c%c)\n", REG(A2_FLAGS),
1787                 (REG(A2_FLAGS) & 0x100000 ? '1' : '0'),
1788                 (REG(A2_FLAGS) & 0x080000 ? '1' : '0'),
1789                 (REG(A2_FLAGS) & 0x040000 ? '1' : '0'),
1790                 (REG(A2_FLAGS) & 0x020000 ? '1' : '0'),
1791                 (REG(A2_FLAGS) & 0x010000 ? '1' : '0'),
1792                 (REG(A2_FLAGS) & 0x008000 ? '1' : '0'),
1793                 (REG(A2_FLAGS) & 0x004000 ? '1' : '0'),
1794                 (REG(A2_FLAGS) & 0x002000 ? '1' : '0'),
1795                 (REG(A2_FLAGS) & 0x001000 ? '1' : '0'),
1796                 (REG(A2_FLAGS) & 0x000800 ? '1' : '0'),
1797                 (REG(A2_FLAGS) & 0x000400 ? '1' : '0'),
1798                 (REG(A2_FLAGS) & 0x000200 ? '1' : '0'),
1799                 (REG(A2_FLAGS) & 0x000100 ? '1' : '0'),
1800                 (REG(A2_FLAGS) & 0x000080 ? '1' : '0'),
1801                 (REG(A2_FLAGS) & 0x000040 ? '1' : '0'),
1802                 (REG(A2_FLAGS) & 0x000020 ? '1' : '0'),
1803                 (REG(A2_FLAGS) & 0x000010 ? '1' : '0'),
1804                 (REG(A2_FLAGS) & 0x000008 ? '1' : '0'),
1805                 (REG(A2_FLAGS) & 0x000002 ? '1' : '0'),
1806                 (REG(A2_FLAGS) & 0x000001 ? '1' : '0'));
1807         WriteLog("             pitch=%u, pixSz=%u, zOff=%u, width=%u, xCtrl=%u\n",
1808                 REG(A2_FLAGS) & 0x00003, (REG(A2_FLAGS) & 0x00038) >> 3,
1809                 (REG(A2_FLAGS) & 0x001C0) >> 6,  a2_width, (REG(A2_FLAGS) & 0x30000) >> 16);
1810         WriteLog("  a2_mask  = %u, %u (%08X)\n", GET16(blitter_ram, A2_MASK + 2), GET16(blitter_ram, A2_MASK + 0), GET32(blitter_ram, A2_MASK));
1811         WriteLog("  a2_pixel = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A2_PIXEL + 2), (int16)GET16(blitter_ram, A2_PIXEL + 0), GET32(blitter_ram, A2_PIXEL));
1812         WriteLog("  a2_step  = %d, %d (%08X)\n", (int16)GET16(blitter_ram, A2_STEP + 2), (int16)GET16(blitter_ram, A2_STEP + 0), GET32(blitter_ram, A2_STEP));
1813
1814         WriteLog("  count    = %d x %d\n", GET16(blitter_ram, PIXLINECOUNTER + 2), GET16(blitter_ram, PIXLINECOUNTER));
1815
1816         WriteLog("  SRCEN    = %s\n", (SRCEN ? "1" : "0"));
1817         WriteLog("  SRCENZ   = %s\n", (SRCENZ ? "1" : "0"));
1818         WriteLog("  SRCENX   = %s\n", (SRCENX ? "1" : "0"));
1819         WriteLog("  DSTEN    = %s\n", (DSTEN ? "1" : "0"));
1820         WriteLog("  DSTENZ   = %s\n", (DSTENZ ? "1" : "0"));
1821         WriteLog("  DSTWRZ   = %s\n", (DSTWRZ ? "1" : "0"));
1822         WriteLog("  CLIPA1   = %s\n", (CLIPA1 ? "1" : "0"));
1823         WriteLog("  UPDA1F   = %s\n", (UPDA1F ? "1" : "0"));
1824         WriteLog("  UPDA1    = %s\n", (UPDA1 ? "1" : "0"));
1825         WriteLog("  UPDA2    = %s\n", (UPDA2 ? "1" : "0"));
1826         WriteLog("  DSTA2    = %s\n", (DSTA2 ? "1" : "0"));
1827         WriteLog("  ZOP      = %s %s %s\n", (Z_OP_INF ? "<" : ""), (Z_OP_EQU ? "=" : ""), (Z_OP_SUP ? ">" : ""));
1828         WriteLog("--LFUFUNC  = %s\n", opStr[(cmd >> 21) & 0x0F]);
1829         WriteLog("| PATDSEL  = %s (PD=%08X%08X)\n", (PATDSEL ? "1" : "0"), REG(PATTERNDATA), REG(PATTERNDATA + 4));
1830         WriteLog("--ADDDSEL  = %s\n", (ADDDSEL ? "1" : "0"));
1831         WriteLog("  CMPDST   = %s\n", (CMPDST ? "1" : "0"));
1832         WriteLog("  BCOMPEN  = %s\n", (BCOMPEN ? "1" : "0"));
1833         WriteLog("  DCOMPEN  = %s\n", (DCOMPEN ? "1" : "0"));
1834         WriteLog("  TOPBEN   = %s\n", (TOPBEN ? "1" : "0"));
1835         WriteLog("  TOPNEN   = %s\n", (TOPNEN ? "1" : "0"));
1836         WriteLog("  BKGWREN  = %s\n", (BKGWREN ? "1" : "0"));
1837         WriteLog("  GOURD    = %s (II=%08X, SD=%08X%08X)\n", (GOURD ? "1" : "0"), REG(INTENSITYINC), REG(SRCDATA), REG(SRCDATA + 4));
1838         WriteLog("  GOURZ    = %s (ZI=%08X, ZD=%08X%08X, SZ1=%08X%08X, SZ2=%08X%08X)\n", (GOURZ ? "1" : "0"), REG(ZINC), REG(DSTZ), REG(DSTZ + 4),
1839                 REG(SRCZINT), REG(SRCZINT + 4), REG(SRCZFRAC), REG(SRCZFRAC + 4));
1840         WriteLog("  SRCSHADE = %s\n", (SRCSHADE ? "1" : "0"));
1841 }
1842
1843
1844 #ifdef USE_MIDSUMMER_BLITTER
1845 //
1846 // Here's an attempt to write a blitter that conforms to the Midsummer specs--since
1847 // it's supposedly backwards compatible, it should work well...
1848 //
1849 //#define LOG_BLITTER_MEMORY_ACCESSES
1850
1851 #define DATINIT (false)
1852 #define TXTEXT  (false)
1853 #define POLYGON (false)
1854
1855 void BlitterMidsummer(uint32 cmd)
1856 {
1857 uint32 outer_loop, inner_loop, a1_addr, a2_addr;
1858 int32 a1_x, a1_y, a2_x, a2_y, a1_width, a2_width;
1859 uint8 a1_phrase_mode, a2_phrase_mode;
1860
1861         a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
1862         a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
1863         a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
1864         a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
1865         uint32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1866         a1_width = ((0x04 | m) << e) >> 2;//*/
1867         a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
1868         a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
1869         m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1870         a2_width = ((0x04 | m) << e) >> 2;//*/
1871
1872         a1_phrase_mode = a2_phrase_mode = 0;
1873
1874         if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 0)
1875                 a1_phrase_mode = 1;
1876
1877         if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 0)
1878                 a2_phrase_mode = 1;
1879
1880 #define INNER0  (inner_loop == 0)
1881 #define OUTER0  (outer_loop == 0)
1882
1883 // $01800005 has SRCENX, may have to investigate further...
1884 // $00011008 has GOURD & DSTEN.
1885 // $41802F41 has SRCSHADE, CLIPA1
1886 /*bool logBlit = false;
1887 if (cmd != 0x00010200 && cmd != 0x01800001 && cmd != 0x01800005
1888         && cmd != 0x00011008 && cmd !=0x41802F41)
1889 {
1890         logBlit = true;
1891         LogBlit();
1892 }//*/
1893
1894         uint64 srcData = GET64(blitter_ram, SRCDATA), srcXtraData,
1895                 dstData = GET64(blitter_ram, DSTDATA), writeData;
1896         uint32 srcAddr, dstAddr;
1897         uint8 bitCount, a1PixelSize, a2PixelSize;
1898
1899         // JTRM says phrase mode only works for 8BPP or higher, so let's try this...
1900         uint32 phraseOffset[8] = { 8, 8, 8, 8, 4, 2, 0, 0 };
1901         uint8 pixelShift[8] = { 3, 2, 1, 0, 1, 2, 0, 0 };
1902
1903         a1PixelSize = (blitter_ram[A1_FLAGS + 3] >> 3) & 0x07;
1904         a2PixelSize = (blitter_ram[A2_FLAGS + 3] >> 3) & 0x07;
1905
1906         outer_loop = GET16(blitter_ram, PIXLINECOUNTER + 0);
1907
1908         if (outer_loop == 0)
1909                 outer_loop = 0x10000;
1910
1911         // We just list the states here and jump from state to state in order to
1912         // keep things somewhat clear. Optimization/cleanups later.
1913
1914 //idle:                                                 // Blitter is idle, and will not perform any bus activity
1915 /*
1916 idle         Blitter is off the bus, and no activity takes place.
1917 if GO    if DATINIT goto init_if
1918          else       goto inner
1919 */
1920         if (DATINIT)
1921                 goto init_if;
1922         else
1923                 goto inner;
1924
1925 /*
1926 inner        Inner loop is active, read and write cycles are performed
1927 */
1928 inner:                                                  // Run inner loop state machine (asserts step from its idle state)
1929         inner_loop = GET16(blitter_ram, PIXLINECOUNTER + 2);
1930
1931         if (inner_loop == 0)
1932                 inner_loop = 0x10000;
1933
1934 /*
1935 ------------------------------
1936 idle:                        Inactive, blitter is idle or passing round outer loop
1937 idle       Another state in the outer loop is active. No bus transfers are performed.
1938 if STEP
1939     if SRCENX goto sreadx
1940     else if TXTEXT goto txtread
1941     else if SRCEN goto sread
1942     else if DSTEN goto dread
1943     else if DSTENZ goto dzread
1944     else goto dwrite
1945 */
1946     if (SRCENX)
1947                 goto sreadx;
1948     else if (TXTEXT)
1949                 goto txtread;
1950     else if (SRCEN)
1951                 goto sread;
1952     else if (DSTEN)
1953                 goto dread;
1954     else if (DSTENZ)
1955                 goto dzread;
1956     else
1957                 goto dwrite;
1958
1959 /*
1960 sreadx     Extra source data read at the start of an inner loop pass.
1961 if STEP
1962     if SRCENZ goto szreadx
1963     else if TXTEXT goto txtread
1964     else if SRCEN goto sread
1965     else if DSTEN goto dread
1966     else if DSTENZ goto dzread
1967     else goto dwrite
1968 */
1969 sreadx:                                                 // Extra source data read
1970         if (SRCENZ)
1971                 goto szreadx;
1972         else if (TXTEXT)
1973                 goto txtread;
1974         else if (SRCEN)
1975                 goto sread;
1976         else if (DSTEN)
1977                 goto dread;
1978         else if (DSTENZ)
1979                 goto dzread;
1980         else
1981                 goto dwrite;
1982
1983 /*
1984 szreadx    Extra source Z read as the start of an inner loop pass.
1985 if STEP
1986     if TXTEXT goto txtread
1987     else goto sread
1988 */
1989 szreadx:                                                // Extra source Z read
1990         if (TXTEXT)
1991                 goto txtread;
1992         else
1993                 goto sread;
1994
1995 /*
1996 txtread    Read texture data from external memory. This state is only used for external texture.
1997            TEXTEXT is the condition TEXTMODE=1.
1998 if STEP
1999     if SRCEN goto sread
2000     else if DSTEN goto dread
2001     else if DSTENZ goto dzread
2002     else goto dwrite
2003 */
2004 txtread:                                                // Read external texture data
2005         if (SRCEN)
2006                 goto sread;
2007         else if (DSTEN)
2008                 goto dread;
2009         else if (DSTENZ)
2010                 goto dzread;
2011         else
2012                 goto dwrite;
2013
2014 /*
2015 sread      Source data read.
2016 if STEP
2017     if SRCENZ goto szread
2018     else if DSTEN goto dread
2019     else if DSTENZ goto dzread
2020     else goto dwrite
2021 */
2022 sread:                                                  // Source data read
2023 //The JTRM doesn't really specify the internal structure of the source data read, but I would
2024 //imagine that if it's in phrase mode that it starts by reading the phrase that the window is
2025 //pointing at. Likewise, the pixel (if in BPP 1, 2 & 4, chopped) otherwise. It probably still
2026 //transfers an entire phrase even in pixel mode.
2027 //Odd thought: Does it expand, e.g., 1 BPP pixels into 32 BPP internally? Hmm...
2028 //No.
2029 /*
2030         a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
2031         a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
2032         a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
2033         a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
2034         xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
2035         xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
2036         a1_pitch = pitchValue[(REG(A1_FLAGS) & 0x03)];
2037         a2_pitch = pitchValue[(REG(A2_FLAGS) & 0x03)];
2038         n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
2039         n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
2040         a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
2041         a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
2042         a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
2043         a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
2044         a1_phrase_mode = 0;
2045         a2_phrase_mode = 0;
2046         a1_width = ((0x04 | m) << e) >> 2;
2047         a2_width = ((0x04 | m) << e) >> 2;
2048
2049         // write values back to registers
2050         WREG(A1_PIXEL,  (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
2051         WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
2052         WREG(A2_PIXEL,  (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
2053 */
2054         // Calculate the address to be read...
2055
2056 //Need to fix phrase mode calcs here, since they should *step* by eight, not mulitply.
2057 //Also, need to fix various differing BPP modes here, since offset won't be correct except
2058 //for 8BPP. !!! FIX !!!
2059         srcAddr = (DSTA2 ? a1_addr : a2_addr);
2060
2061 /*      if ((DSTA2 ? a1_phrase_mode : a2_phrase_mode) == 1)
2062         {
2063                 srcAddr += (((DSTA2 ? a1_x : a2_x) >> 16)
2064                         + (((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width)));
2065         }
2066         else*/
2067         {
2068 //              uint32 pixAddr = ((DSTA2 ? a1_x : a2_x) >> 16)
2069 //                      + (((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
2070                 int32 pixAddr = (int16)((DSTA2 ? a1_x : a2_x) >> 16)
2071                         + ((int16)((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
2072
2073                 if ((DSTA2 ? a1PixelSize : a2PixelSize) < 3)
2074                         pixAddr >>= pixelShift[(DSTA2 ? a1PixelSize : a2PixelSize)];
2075                 else if ((DSTA2 ? a1PixelSize : a2PixelSize) > 3)
2076                         pixAddr <<= pixelShift[(DSTA2 ? a1PixelSize : a2PixelSize)];
2077
2078                 srcAddr += pixAddr;
2079         }
2080
2081         // And read it!
2082
2083         if ((DSTA2 ? a1_phrase_mode : a2_phrase_mode) == 1)
2084         {
2085                 srcData = ((uint64)JaguarReadLong(srcAddr, BLITTER) << 32)
2086                         | (uint64)JaguarReadLong(srcAddr + 4, BLITTER);
2087         }
2088         else
2089         {
2090 //1,2,&4BPP are wrong here... !!! FIX !!!
2091                 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 0)           // 1 BPP
2092                         srcData = JaguarReadByte(srcAddr, BLITTER);
2093                 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 1)           // 2 BPP
2094                         srcData = JaguarReadByte(srcAddr, BLITTER);
2095                 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 2)           // 4 BPP
2096                         srcData = JaguarReadByte(srcAddr, BLITTER);
2097                 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 3)           // 8 BPP
2098                         srcData = JaguarReadByte(srcAddr, BLITTER);
2099                 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 4)           // 16 BPP
2100                         srcData = JaguarReadWord(srcAddr, BLITTER);
2101                 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 5)           // 32 BPP
2102                         srcData = JaguarReadLong(srcAddr, BLITTER);
2103         }
2104
2105 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2106 if (logBlit)
2107         WriteLog("BLITTER: srcAddr=%08X,   srcData=%08X %08X\n", srcAddr, (uint32)(srcData >> 32), (uint32)(srcData & 0xFFFFFFFF));
2108 #endif
2109
2110         if (SRCENZ)
2111                 goto szread;
2112         else if (DSTEN)
2113                 goto dread;
2114         else if (DSTENZ)
2115                 goto dzread;
2116         else
2117                 goto dwrite;
2118
2119 szread:                                                 // Source Z read
2120 /*
2121 szread     Source Z read.
2122 if STEP
2123     if DSTEN goto dread
2124     else if DSTENZ goto dzread
2125     else goto dwrite
2126 */
2127         if (DSTEN)
2128                 goto dread;
2129         else if (DSTENZ)
2130                 goto dzread;
2131         else
2132                 goto dwrite;
2133
2134 dread:                                                  // Destination data read
2135 /*
2136 dread      Destination data read.
2137 if STEP
2138     if DSTENZ goto dzread
2139     else goto dwrite
2140 */
2141         // Calculate the destination address to be read...
2142
2143 //Need to fix phrase mode calcs here, since they should *step* by eight, not mulitply.
2144 //Also, need to fix various differing BPP modes here, since offset won't be correct except
2145 //for 8BPP. !!! FIX !!!
2146         dstAddr = (DSTA2 ? a2_addr : a1_addr);
2147
2148         {
2149 //      uint32 pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
2150 //              + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2151         int32 pixAddr = (int16)((DSTA2 ? a2_x : a1_x) >> 16)
2152                 + ((int16)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2153
2154         if ((DSTA2 ? a2PixelSize : a1PixelSize) < 3)
2155                 pixAddr >>= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2156         else if ((DSTA2 ? a2PixelSize : a1PixelSize) > 3)
2157                 pixAddr <<= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2158
2159         dstAddr += pixAddr;
2160         }
2161
2162         // And read it!
2163
2164         if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2165         {
2166                 dstData = ((uint64)JaguarReadLong(srcAddr, BLITTER) << 32)
2167                         | (uint64)JaguarReadLong(srcAddr + 4, BLITTER);
2168         }
2169         else
2170         {
2171 //1,2,&4BPP are wrong here... !!! FIX !!!
2172                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 0)           // 1 BPP
2173                         dstData = JaguarReadByte(dstAddr, BLITTER);
2174                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 1)           // 2 BPP
2175                         dstData = JaguarReadByte(dstAddr, BLITTER);
2176                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 2)           // 4 BPP
2177                         dstData = JaguarReadByte(dstAddr, BLITTER);
2178                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 3)           // 8 BPP
2179                         dstData = JaguarReadByte(dstAddr, BLITTER);
2180                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 4)           // 16 BPP
2181                         dstData = JaguarReadWord(dstAddr, BLITTER);
2182                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 5)           // 32 BPP
2183                         dstData = JaguarReadLong(dstAddr, BLITTER);
2184         }
2185
2186 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2187 if (logBlit)
2188         WriteLog("BLITTER (dread): dstAddr=%08X,   dstData=%08X %08X\n", dstAddr, (uint32)(dstData >> 32), (uint32)(dstData & 0xFFFFFFFF));
2189 #endif
2190
2191         if (DSTENZ)
2192                 goto dzread;
2193         else
2194                 goto dwrite;
2195
2196 dzread:                                                 // Destination Z read
2197 /*
2198 dzread     Destination Z read.
2199 if STEP goto dwrite
2200 */
2201         goto dwrite;
2202
2203 dwrite:                                                 // Destination data write
2204 /*
2205 dwrite     Destination write. Every pass round the inner loop must go through this state..
2206 if STEP
2207     if DSTWRZ goto dzwrite
2208     else if INNER0 goto idle
2209     else if TXTEXT goto txtread
2210     else if SRCEN goto sread
2211     else if DSTEN goto dread
2212     else if DSTENZ goto dzread
2213     else goto dwrite
2214 */
2215 /*
2216 Blit!
2217   a1_base  = 00100000
2218   a1_pitch = 0
2219   a1_psize = 16
2220   a1_width = 320
2221   a1_xadd  = 1.000000 (phrase=0)
2222   a1_yadd  = 0.000000
2223   a1_x     = 159.000000
2224   a1_y     = 1.000000
2225   a1_zoffs = 0
2226   a2_base  = 000095D0
2227   a2_pitch = 0
2228   a2_psize = 16
2229   a2_width = 256
2230   a2_xadd  = 1.000000 (phrase=1)
2231   a2_yadd  = 0.000000
2232   a2_x     = 2.000000
2233   a2_y     = 0.000000
2234   a2_mask_x= 0xFFFFFFFF
2235   a2_mask_y= 0xFFFFFFFF
2236   a2_zoffs = 0
2237   count    = 2 x 1
2238   COMMAND  = 00011008
2239   SRCEN    = 0
2240   DSTEN    = 1
2241   UPDA1F   = 0
2242   UPDA1    = 0
2243   UPDA2    = 0
2244   DSTA2    = 0
2245 --LFUFUNC  = LFU_CLEAR
2246 | PATDSEL  = 1 (PD=77C7 7700 7700 7700)
2247 --ADDDSEL  = 0
2248   GOURD    = 1 (II=00FC 1A00, SD=FF00 0000 0000 0000)
2249 */
2250
2251 //Still need to do CLIPA1 and SRCSHADE and GOURD and GOURZ...
2252
2253         // Check clipping...
2254
2255         if (CLIPA1)
2256         {
2257                 uint16 x = a1_x >> 16, y = a1_y >> 16;
2258
2259                 if (x >= GET16(blitter_ram, A1_CLIP + 2) || y >= GET16(blitter_ram, A1_CLIP))
2260                         goto inhibitWrite;
2261         }
2262
2263         // Figure out what gets written...
2264
2265         if (PATDSEL)
2266         {
2267                 writeData = GET64(blitter_ram, PATTERNDATA);
2268 //GOURD works properly only in 16BPP mode...
2269 //SRCDATA holds the intensity fractions...
2270 //Does GOURD get calc'ed here or somewhere else???
2271 //Temporary testing kludge...
2272 //if (GOURD)
2273 //   writeData >>= 48;
2274 //      writeData = 0xFF88;
2275 //OK, it's not writing an entire strip of pixels... Why?
2276 //bad incrementing, that's why!
2277         }
2278         else if (ADDDSEL)
2279         {
2280                 // Apparently this only works with 16-bit pixels. Not sure if it works in phrase mode either.
2281 //Also, take TOPBEN & TOPNEN into account here as well...
2282                 writeData = srcData + dstData;
2283         }
2284         else    // LFUFUNC is the default...
2285         {
2286                 writeData = 0;
2287
2288                 if (LFU_NAN)
2289                         writeData |= ~srcData & ~dstData;
2290                 if (LFU_NA)
2291                         writeData |= ~srcData & dstData;
2292                 if (LFU_AN)
2293                         writeData |= srcData & ~dstData;
2294                 if (LFU_A)
2295                         writeData |= srcData & dstData;
2296         }
2297
2298         // Calculate the address to be written...
2299
2300         dstAddr = (DSTA2 ? a2_addr : a1_addr);
2301
2302 /*      if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2303         {
2304 //both of these calculate the wrong address because they don't take into account
2305 //pixel sizes...
2306                 dstAddr += ((DSTA2 ? a2_x : a1_x) >> 16)
2307                         + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2308         }
2309         else*/
2310         {
2311 /*              dstAddr += ((DSTA2 ? a2_x : a1_x) >> 16)
2312                         + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));*/
2313 //              uint32 pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
2314 //                      + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2315                 int32 pixAddr = (int16)((DSTA2 ? a2_x : a1_x) >> 16)
2316                         + ((int16)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2317
2318                 if ((DSTA2 ? a2PixelSize : a1PixelSize) < 3)
2319                         pixAddr >>= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2320                 else if ((DSTA2 ? a2PixelSize : a1PixelSize) > 3)
2321                         pixAddr <<= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2322
2323                 dstAddr += pixAddr;
2324         }
2325
2326         // And write it!
2327
2328         if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2329         {
2330                 JaguarWriteLong(dstAddr, writeData >> 32, BLITTER);
2331                 JaguarWriteLong(dstAddr + 4, writeData & 0xFFFFFFFF, BLITTER);
2332         }
2333         else
2334         {
2335 //1,2,&4BPP are wrong here... !!! FIX !!!
2336                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 0)           // 1 BPP
2337                         JaguarWriteByte(dstAddr, writeData, BLITTER);
2338                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 1)           // 2 BPP
2339                         JaguarWriteByte(dstAddr, writeData, BLITTER);
2340                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 2)           // 4 BPP
2341                         JaguarWriteByte(dstAddr, writeData, BLITTER);
2342                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 3)           // 8 BPP
2343                         JaguarWriteByte(dstAddr, writeData, BLITTER);
2344                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 4)           // 16 BPP
2345                         JaguarWriteWord(dstAddr, writeData, BLITTER);
2346                 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 5)           // 32 BPP
2347                         JaguarWriteLong(dstAddr, writeData, BLITTER);
2348         }
2349
2350 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2351 if (logBlit)
2352         WriteLog("BLITTER: dstAddr=%08X, writeData=%08X %08X\n", dstAddr, (uint32)(writeData >> 32), (uint32)(writeData & 0xFFFFFFFF));
2353 #endif
2354
2355 inhibitWrite://Should this go here? or on the other side of the X/Y incrementing?
2356 //Seems OK here... for now.
2357
2358 // Do funky X/Y incrementation here as well... !!! FIX !!!
2359
2360         // Handle A1 channel stepping
2361
2362         if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 0)
2363                 a1_x += phraseOffset[a1PixelSize] << 16;
2364         else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 1)
2365                 a1_x += (blitter_ram[A1_FLAGS + 1] & 0x08 ? -1 << 16 : 1 << 16);
2366 /*      else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 2)
2367                 a1_x += 0 << 16;                              */
2368         else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 3)
2369         {
2370 //Always add the FINC here??? That was the problem with the BIOS screen... So perhaps.
2371                 a1_x += GET16(blitter_ram, A1_FINC + 2);
2372                 a1_y += GET16(blitter_ram, A1_FINC + 0);
2373
2374                 a1_x += GET16(blitter_ram, A1_INC + 2) << 16;
2375                 a1_y += GET16(blitter_ram, A1_INC + 0) << 16;
2376         }
2377
2378         if ((blitter_ram[A1_FLAGS + 1] & 0x04) && (blitter_ram[A1_FLAGS + 1] & 0x03 != 3))
2379                 a1_y += (blitter_ram[A1_FLAGS + 1] & 0x10 ? -1 << 16 : 1 << 16);
2380
2381         // Handle A2 channel stepping
2382
2383         if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 0)
2384                 a2_x += phraseOffset[a2PixelSize] << 16;
2385         else if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 1)
2386                 a2_x += (blitter_ram[A2_FLAGS + 1] & 0x08 ? -1 << 16 : 1 << 16);
2387 /*      else if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 2)
2388                 a2_x += 0 << 16;                              */
2389
2390         if (blitter_ram[A2_FLAGS + 1] & 0x04)
2391                 a2_y += (blitter_ram[A2_FLAGS + 1] & 0x10 ? -1 << 16 : 1 << 16);
2392
2393 //Need to fix this so that it subtracts (saturating, of course) the correct number of pixels
2394 //in phrase mode... !!! FIX !!! [DONE]
2395 //Need to fix this so that it counts down the correct item. Does it count the
2396 //source or the destination phrase mode???
2397 //It shouldn't matter, because we *should* end up processing the same amount
2398 //the same number of pixels... Not sure though.
2399         if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2400         {
2401                 if (inner_loop < phraseOffset[DSTA2 ? a2PixelSize : a1PixelSize])
2402                         inner_loop = 0;
2403                 else
2404                         inner_loop -= phraseOffset[DSTA2 ? a2PixelSize : a1PixelSize];
2405         }
2406         else
2407                 inner_loop--;
2408
2409
2410         if (DSTWRZ)
2411                 goto dzwrite;
2412         else if (INNER0)
2413                 goto indone;
2414         else if (TXTEXT)
2415                 goto txtread;
2416         else if (SRCEN)
2417                 goto sread;
2418         else if (DSTEN)
2419                 goto dread;
2420         else if (DSTENZ)
2421                 goto dzread;
2422         else
2423                 goto dwrite;
2424
2425 dzwrite:                                                // Destination Z write
2426 /*
2427 dzwrite    Destination Z write.
2428 if STEP
2429     if INNER0 goto idle
2430     else if TXTEXT goto txtread
2431     else if SRCEN goto sread
2432     else if DSTEN goto dread
2433     else if DSTENZ goto dzread
2434     else goto dwrite
2435 */
2436         if (INNER0)
2437                 goto indone;
2438         else if (TXTEXT)
2439                 goto txtread;
2440         else if (SRCEN)
2441                 goto sread;
2442         else if (DSTEN)
2443                 goto dread;
2444         else if (DSTENZ)
2445                 goto dzread;
2446         else
2447                 goto dwrite;
2448
2449 /*
2450 ------------------------------
2451 if INDONE if OUTER0 goto idle
2452 else if UPDA1F        goto a1fupdate
2453 else if UPDA1         goto a1update
2454 else if GOURZ.POLYGON goto zfupdate
2455 else if UPDA2         goto a2update
2456 else if DATINIT       goto init_if
2457 else restart inner
2458 */
2459 indone:
2460         outer_loop--;
2461
2462
2463         if (OUTER0)
2464                 goto blitter_done;
2465         else if (UPDA1F)
2466                 goto a1fupdate;
2467         else if (UPDA1)
2468                 goto a1update;
2469 //kill this, for now...
2470 //      else if (GOURZ.POLYGON)
2471 //              goto zfupdate;
2472         else if (UPDA2)
2473                 goto a2update;
2474         else if (DATINIT)
2475                 goto init_if;
2476         else
2477                 goto inner;
2478
2479 a1fupdate:                                              // Update A1 pointer fractions and more (see below)
2480 /*
2481 a1fupdate    A1 step fraction is added to A1 pointer fraction
2482              POLYGON true: A1 step delta X and Y fraction parts are added to the A1
2483                          step X and Y fraction parts (the value prior to this add is used for
2484                          the step to pointer add).
2485              POLYGON true: inner count step fraction is added to the inner count
2486                          fraction part
2487              POLYGON.GOURD true: the I fraction step is added to the computed
2488                          intensity fraction parts +
2489              POLYGON.GOURD true: the I fraction step delta is added to the I
2490                          fraction step
2491 goto a1update
2492 */
2493 /*
2494 #define A1_PIXEL                ((uint32)0x0C)  // Integer part of the pixel (Y.i and X.i)
2495 #define A1_STEP                 ((uint32)0x10)  // Integer part of the step
2496 #define A1_FSTEP                ((uint32)0x14)  // Fractional part of the step
2497 #define A1_FPIXEL               ((uint32)0x18)  // Fractional part of the pixel (Y.f and X.f)
2498 */
2499
2500 // This is all kinda murky. All we have are the Midsummer docs to give us any guidance,
2501 // and it's incomplete or filled with errors (like above). Aarrrgggghhhhh!
2502
2503 //This isn't right. Is it? I don't think the fractional parts are signed...
2504 //      a1_x += (int32)((int16)GET16(blitter_ram, A1_FSTEP + 2));
2505 //      a1_y += (int32)((int16)GET16(blitter_ram, A1_FSTEP + 0));
2506         a1_x += GET16(blitter_ram, A1_FSTEP + 2);
2507         a1_y += GET16(blitter_ram, A1_FSTEP + 0);
2508
2509         goto a1update;
2510
2511 a1update:                                               // Update A1 pointer integers
2512 /*
2513 a1update     A1 step is added to A1 pointer, with carry from the fractional add
2514              POLYGON true: A1 step delta X and Y integer parts are added to the A1
2515                          step X and Y integer parts, with carry from the corresponding
2516                          fractional part add (again, the value prior to this add is used for
2517                          the step to pointer add).
2518              POLYGON true: inner count step is added to the inner count, with carry
2519              POLYGON.GOURD true: the I step is added to the computed intensities,
2520                          with carry +
2521              POLYGON.GOURD true: the I step delta is added to the I step, with
2522                          carry the texture X and Y step delta values are added to the X and Y
2523                          step values.
2524 if GOURZ.POLYGON goto zfupdate
2525 else if UPDA2 goto a2update
2526 else if DATINIT goto init_if
2527 else restart inner
2528 */
2529         a1_x += (int32)(GET16(blitter_ram, A1_STEP + 2) << 16);
2530         a1_y += (int32)(GET16(blitter_ram, A1_STEP + 0) << 16);
2531
2532
2533 //kill this, for now...
2534 //      if (GOURZ.POLYGON)
2535         if (false)
2536                 goto zfupdate;
2537         else if (UPDA2)
2538                 goto a2update;
2539         else if (DATINIT)
2540                 goto init_if;
2541         else
2542                 goto inner;
2543
2544 zfupdate:                                               // Update computed Z step fractions
2545 /*
2546 zfupdate     the Z fraction step is added to the computed Z fraction parts +
2547              the Z fraction step delta is added to the Z fraction step
2548 goto zupdate
2549 */
2550         goto zupdate;
2551
2552 zupdate:                                                // Update computed Z step integers
2553 /*
2554 zupdate      the Z step is added to the computed Zs, with carry +
2555              the Z step delta is added to the Z step, with carry
2556 if UPDA2 goto a2update
2557 else if DATINIT goto init_if
2558 else restart inner
2559 */
2560         if (UPDA2)
2561                 goto a2update;
2562         else if (DATINIT)
2563                 goto init_if;
2564         else
2565                 goto inner;
2566
2567 a2update:                                               // Update A2 pointer
2568 /*
2569 a2update     A2 step is added to the A2 pointer
2570 if DATINIT goto init_if
2571 else restart inner
2572 */
2573         a2_x += (int32)(GET16(blitter_ram, A2_STEP + 2) << 16);
2574         a2_y += (int32)(GET16(blitter_ram, A2_STEP + 0) << 16);
2575
2576
2577         if (DATINIT)
2578                 goto init_if;
2579         else
2580                 goto inner;
2581
2582 init_if:                                                // Initialise intensity fractions and texture X
2583 /*
2584 init_if      Initialise the fractional part of the computed intensity fields, from
2585              the increment and step registers. The texture X integer and fractional
2586                          parts can also be initialised.
2587 goto     init_ii
2588 */
2589         goto init_ii;
2590
2591 init_ii:                                                // Initialise intensity integers and texture Y
2592 /*
2593 init_ii      Initialise the integer part of the computed intensity, and texture Y
2594              integer and fractional parts
2595 if GOURZ goto init_zf
2596 else     goto inner
2597 */
2598         if (GOURZ)
2599                 goto init_zf;
2600         else
2601             goto inner;
2602
2603 init_zf:                                                // Initialise Z fractions
2604 /*
2605 init_zf      Initialise the fractional part of the computed Z fields.
2606 goto init_zi
2607 */
2608         goto init_zi;
2609
2610 init_zi:                                                // Initialise Z integers
2611 /*
2612 init_zi      Initialise the integer part of the computed Z fields.
2613 goto inner
2614 */
2615         goto inner;
2616
2617
2618 /*
2619 The outer loop state machine fires off the inner loop, and controls the updating
2620 process between passes through the inner loop.
2621
2622 + -- these functions are irrelevant if the DATINIT function is enabled, which it
2623      will normally be.
2624
2625 All these states will complete in one clock cycle, with the exception of the idle
2626 state, which means the blitter is quiescent; and the inner state, which takes as
2627 long as is required to complete one strip of pixels. It is therefore possible for
2628 the blitter to spend a maximum of nine clock cycles of inactivity between passes
2629 through the inner loop.
2630 */
2631
2632 blitter_done:
2633         {}
2634 }
2635 #endif
2636
2637
2638 //
2639 // Here's attempt #2--taken from the Oberon chip specs!
2640 //
2641
2642 #ifdef USE_MIDSUMMER_BLITTER_MKII
2643
2644 void ADDRGEN(uint32 &, uint32 &, bool, bool,
2645         uint16, uint16, uint32, uint8, uint8, uint8, uint8,
2646         uint16, uint16, uint32, uint8, uint8, uint8, uint8);
2647 void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
2648         uint64 dstd, uint32 iinc, uint8 initcin[], uint64 initinc, uint16 initpix,
2649         uint32 istep, uint64 patd, uint64 srcd, uint64 srcz1, uint64 srcz2,
2650         uint32 zinc, uint32 zstep);
2651 void ADD16SAT(uint16 &r, uint8 &co, uint16 a, uint16 b, uint8 cin, bool sat, bool eightbit, bool hicinh);
2652 void ADDAMUX(int16 &adda_x, int16 &adda_y, uint8 addasel, int16 a1_step_x, int16 a1_step_y,
2653         int16 a1_stepf_x, int16 a1_stepf_y, int16 a2_step_x, int16 a2_step_y,
2654         int16 a1_inc_x, int16 a1_inc_y, int16 a1_incf_x, int16 a1_incf_y, uint8 adda_xconst,
2655         bool adda_yconst, bool addareg, bool suba_x, bool suba_y);
2656 void ADDBMUX(int16 &addb_x, int16 &addb_y, uint8 addbsel, int16 a1_x, int16 a1_y,
2657         int16 a2_x, int16 a2_y, int16 a1_frac_x, int16 a1_frac_y);
2658 void DATAMUX(int16 &data_x, int16 &data_y, uint32 gpu_din, int16 addq_x, int16 addq_y, bool addqsel);
2659 void ADDRADD(int16 &addq_x, int16 &addq_y, bool a1fracldi,
2660         uint16 adda_x, uint16 adda_y, uint16 addb_x, uint16 addb_y, uint8 modx, bool suba_x, bool suba_y);
2661 void DATA(uint64 &wdata, uint8 &dcomp, uint8 &zcomp, bool &nowrite,
2662         bool big_pix, bool cmpdst, uint8 daddasel, uint8 daddbsel, uint8 daddmode, bool daddq_sel, uint8 data_sel,
2663         uint8 dbinh, uint8 dend, uint8 dstart, uint64 dstd, uint32 iinc, uint8 lfu_func, uint64 &patd, bool patdadd,
2664         bool phrase_mode, uint64 srcd, bool srcdread, bool srczread, bool srcz2add, uint8 zmode,
2665         bool bcompen, bool bkgwren, bool dcompen, uint8 icount, uint8 pixsize,
2666         uint64 &srcz, uint64 dstz, uint32 zinc);
2667 void COMP_CTRL(uint8 &dbinh, bool &nowrite,
2668         bool bcompen, bool big_pix, bool bkgwren, uint8 dcomp, bool dcompen, uint8 icount,
2669         uint8 pixsize, bool phrase_mode, uint8 srcd, uint8 zcomp);
2670 #define VERBOSE_BLITTER_LOGGING
2671 bool logBlit = false;
2672
2673 void BlitterMidsummer2(void)
2674 {
2675         // Here's what the specs say the state machine does. Note that this can probably be
2676         // greatly simplified (also, it's different from what John has in his Oberon docs):
2677 //Will remove stuff that isn't in Jaguar I once fully described (stuff like texture won't
2678 //be described here at all)...
2679
2680         uint32 cmd = GET32(blitter_ram, COMMAND);
2681
2682 logBlit = false;
2683 if (
2684         cmd != 0x00010200 &&    // PATDSEL
2685         cmd != 0x01800001               // SRCEN LFUFUNC=C
2686         && cmd != 0x01800005
2687 //Boot ROM ATARI letters:
2688         && cmd != 0x00011008    // DSTEN GOURD PATDSEL
2689 //Boot ROM spinning cube:
2690         && cmd != 0x41802F41    // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 GOURZ ZMODE=0 LFUFUNC=C SRCSHADE
2691 //T2K intro screen:
2692         && cmd != 0x01800E01    // SRCEN UPDA1 UPDA2 DSTA2 LFUFUNC=C
2693 //T2K TEMPEST letters:
2694         && cmd != 0x09800741    // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 LFUFUNC=C DCOMPEN
2695 //Static letters on Cybermorph intro screen:
2696         && cmd != 0x09800609    // SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C DCOMPEN
2697 //Static pic on title screen:
2698         && cmd != 0x01800601    // SRCEN UPDA1 UPDA2 LFUFUNC=C
2699 //Turning letters on Cybermorph intro screen:
2700 //      && cmd != 0x09800F41    // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
2701         && cmd != 0x00113078    // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
2702         && cmd != 0x09900F39    // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
2703         && cmd != 0x09800209    // SRCEN DSTEN UPDA1 LFUFUNC=C DCOMPEN
2704         && cmd != 0x00011200    // UPDA1 GOURD PATDSEL
2705 //Start of Hover Strike (clearing screen):
2706         && cmd != 0x00010000    // PATDSEL
2707 //Hover Strike text:
2708         && cmd != 0x1401060C    // SRCENX DSTEN UPDA1 UPDA2 PATDSEL BCOMPEN BKGWREN
2709 //Hover Strike 3D stuff
2710         && cmd != 0x01902839    // SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
2711 //Hover Strike darkening on intro to play (briefing) screen
2712         && cmd != 0x00020208    // DSTEN UPDA1 ADDDSEL
2713 //Trevor McFur stuff:
2714         && cmd != 0x05810601    // SRCEN UPDA1 UPDA2 PATDSEL BCOMPEN
2715         && cmd != 0x01800201    // SRCEN UPDA1 LFUFUNC=C
2716 //T2K:
2717         && cmd != 0x00011000    // GOURD PATDSEL
2718         && cmd != 0x00011040    // CLIP_A1 GOURD PATDSEL
2719 //Checkered flag:
2720         && cmd != 0x01800000    // LFUFUNC=C
2721         && cmd != 0x01800401    //
2722         && cmd != 0x01800040    //
2723         && cmd != 0x00020008    //
2724 //      && cmd != 0x09800F41    // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
2725         )
2726         logBlit = true;//*/
2727 //logBlit = true;
2728 if (blit_start_log == 0)        // Wait for the signal...
2729         logBlit = false;//*/
2730 /*
2731 Some T2K unique blits:
2732 logBlit = F, cmd = 00010200 *
2733 logBlit = F, cmd = 00011000
2734 logBlit = F, cmd = 00011040
2735 logBlit = F, cmd = 01800005 *
2736 logBlit = F, cmd = 09800741 *
2737
2738 Hover Strike mission selection screen:
2739 Blit! (CMD = 01902839)  // SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
2740
2741 Checkered Flag blits in the screw up zone:
2742 Blit! (CMD = 01800001)  // SRCEN LFUFUNC=C
2743 Blit! (CMD = 01800000)  // LFUFUNC=C
2744 Blit! (CMD = 00010000)  // PATDSEL
2745
2746 Wolfenstein 3D in the fuckup zone:
2747 Blit! (CMD = 01800000)  // LFUFUNC=C
2748 */
2749
2750 //printf("logBlit = %s, cmd = %08X\n", (logBlit ? "T" : "F"), cmd);
2751 //fflush(stdout);
2752 //logBlit = true;
2753
2754 /*
2755 Blit! (CMD = 00011040)
2756 Flags: CLIP_A1 GOURD PATDSEL
2757   count = 18 x 1
2758   a1_base = 00100000, a2_base = 0081F6A8
2759   a1_x = 00A7, a1_y = 0014, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0001, a2_y = 0000
2760   a1_step_x = FE80, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFF8, a2_step_y = 0001
2761   a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
2762   a1_win_x = 0180, a1_win_y = 0118, a2_mask_x = 0000, a2_mask_y = 0000
2763   a2_mask=F a1add=+phr/+0 a2add=+phr/+0
2764   a1_pixsize = 4, a2_pixsize = 4
2765 */
2766 //Testing T2K...
2767 /*logBlit = false;
2768 if (cmd == 0x00011040
2769         && (GET16(blitter_ram, A1_PIXEL + 2) == 0x00A7) && (GET16(blitter_ram, A1_PIXEL + 0) == 0x0014)
2770         && (GET16(blitter_ram, A2_PIXEL + 2) == 0x0001) && (GET16(blitter_ram, A2_PIXEL + 0) == 0x0000)
2771         && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 18))
2772         logBlit = true;*/
2773
2774         // Line states passed in via the command register
2775
2776         bool srcen = (SRCEN), srcenx = (SRCENX), srcenz = (SRCENZ),
2777                 dsten = (DSTEN), dstenz = (DSTENZ), dstwrz = (DSTWRZ), clip_a1 = (CLIPA1),
2778                 upda1 = (UPDA1), upda1f = (UPDA1F), upda2 = (UPDA2), dsta2 = (DSTA2),
2779                 gourd = (GOURD), gourz = (GOURZ), topben = (TOPBEN), topnen = (TOPNEN),
2780                 patdsel = (PATDSEL), adddsel = (ADDDSEL), cmpdst = (CMPDST), bcompen = (BCOMPEN),
2781                 dcompen = (DCOMPEN), bkgwren = (BKGWREN), srcshade = (SRCSHADE);
2782
2783         uint8 zmode = (cmd & 0x01C0000) >> 18, lfufunc = (cmd & 0x1E00000) >> 21;
2784 //Missing: BUSHI
2785 //Where to find various lines:
2786 // clip_a1  -> inner
2787 // gourd    -> dcontrol, inner, outer, state
2788 // gourz    -> dcontrol, inner, outer, state
2789 // cmpdst   -> blit, data, datacomp, state
2790 // bcompen  -> acontrol, inner, mcontrol, state
2791 // dcompen  -> inner, state
2792 // bkgwren  -> inner, state
2793 // srcshade -> dcontrol, inner, state
2794 // adddsel  -> dcontrol
2795 //NOTE: ADDDSEL takes precedence over PATDSEL, PATDSEL over LFU_FUNC
2796 #ifdef VERBOSE_BLITTER_LOGGING
2797 if (logBlit)
2798 {
2799 char zfs[512], lfus[512];
2800 zfs[0] = lfus[0] = 0;
2801 if (dstwrz || dstenz || gourz)
2802         sprintf(zfs, " ZMODE=%X", zmode);
2803 if (!(patdsel || adddsel))
2804         sprintf(lfus, " LFUFUNC=%X", lfufunc);
2805 printf("\nBlit! (CMD = %08X)\nFlags:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", cmd,
2806         (srcen ? " SRCEN" : ""), (srcenx ? " SRCENX" : ""), (srcenz ? " SRCENZ" : ""),
2807         (dsten ? " DSTEN" : ""), (dstenz ? " DSTENZ" : ""), (dstwrz ? " DSTWRZ" : ""),
2808         (clip_a1 ? " CLIP_A1" : ""), (upda1 ? " UPDA1" : ""), (upda1f ? " UPDA1F" : ""),
2809         (upda2 ? " UPDA2" : ""), (dsta2 ? " DSTA2" : ""), (gourd ? " GOURD" : ""),
2810         (gourz ? " GOURZ" : ""), (topben ? " TOPBEN" : ""), (topnen ? " TOPNEN" : ""),
2811         (patdsel ? " PATDSEL" : ""), (adddsel ? " ADDDSEL" : ""), zfs, lfus, (cmpdst ? " CMPDST" : ""),
2812         (bcompen ? " BCOMPEN" : ""), (dcompen ? " DCOMPEN" : ""), (bkgwren ? " BKGWREN" : ""),
2813         (srcshade ? " SRCSHADE" : ""));
2814 printf("  count = %d x %d\n", GET16(blitter_ram, PIXLINECOUNTER + 2), GET16(blitter_ram, PIXLINECOUNTER));
2815 fflush(stdout);
2816 }
2817 #endif
2818
2819         // Lines that don't exist in Jaguar I (and will never be asserted)
2820
2821         bool polygon = false, datinit = false, a1_stepld = false, a2_stepld = false, ext_int = false;
2822         bool istepadd = false, istepfadd = false, finneradd = false, inneradd = false;
2823         bool zstepfadd = false, zstepadd = false;
2824
2825         // Various state lines (initial state--basically the reset state of the FDSYNCs)
2826
2827         bool go = true, idle = true, inner = false, a1fupdate = false, a1update = false,
2828                 zfupdate = false, zupdate = false, a2update = false, init_if = false, init_ii = false,
2829                 init_zf = false, init_zi = false;
2830
2831         bool outer0 = false, indone = false;
2832
2833         bool idlei, inneri, a1fupdatei, a1updatei, zfupdatei, zupdatei, a2updatei, init_ifi, init_iii,
2834                 init_zfi, init_zii;
2835
2836         bool notgzandp = !(gourz && polygon);
2837
2838         // Various registers set up by user
2839
2840         uint16 ocount = GET16(blitter_ram, PIXLINECOUNTER);
2841         uint8 a1_pitch = blitter_ram[A1_FLAGS + 3] & 0x03;
2842         uint8 a2_pitch = blitter_ram[A2_FLAGS + 3] & 0x03;
2843         uint8 a1_pixsize = (blitter_ram[A1_FLAGS + 3] & 0x38) >> 3;
2844         uint8 a2_pixsize = (blitter_ram[A2_FLAGS + 3] & 0x38) >> 3;
2845         uint8 a1_zoffset = (GET16(blitter_ram, A1_FLAGS + 2) >> 6) & 0x07;
2846         uint8 a2_zoffset = (GET16(blitter_ram, A2_FLAGS + 2) >> 6) & 0x07;
2847         uint8 a1_width = (blitter_ram[A1_FLAGS + 2] >> 1) & 0x3F;
2848         uint8 a2_width = (blitter_ram[A2_FLAGS + 2] >> 1) & 0x3F;
2849         bool a2_mask = blitter_ram[A2_FLAGS + 2] & 0x80;
2850         uint8 a1addx = blitter_ram[A1_FLAGS + 1] & 0x03, a2addx = blitter_ram[A2_FLAGS + 1] & 0x03;
2851         bool a1addy = blitter_ram[A1_FLAGS + 1] & 0x04, a2addy = blitter_ram[A2_FLAGS + 1] & 0x04;
2852         bool a1xsign = blitter_ram[A1_FLAGS + 1] & 0x08, a2xsign = blitter_ram[A2_FLAGS + 1] & 0x08;
2853         bool a1ysign = blitter_ram[A1_FLAGS + 1] & 0x10, a2ysign = blitter_ram[A2_FLAGS + 1] & 0x10;
2854         uint32 a1_base = GET32(blitter_ram, A1_BASE) & 0xFFFFFFF8;      // Phrase aligned by ignoring bottom 3 bits
2855         uint32 a2_base = GET32(blitter_ram, A2_BASE) & 0xFFFFFFF8;
2856
2857         uint16 a1_win_x = GET16(blitter_ram, A1_CLIP + 2) & 0x7FFF;
2858         uint16 a1_win_y = GET16(blitter_ram, A1_CLIP + 0) & 0x7FFF;
2859         int16 a1_x = (int16)GET16(blitter_ram, A1_PIXEL + 2);
2860         int16 a1_y = (int16)GET16(blitter_ram, A1_PIXEL + 0);
2861         int16 a1_step_x = (int16)GET16(blitter_ram, A1_STEP + 2);
2862         int16 a1_step_y = (int16)GET16(blitter_ram, A1_STEP + 0);
2863         uint16 a1_stepf_x = GET16(blitter_ram, A1_FSTEP + 2);
2864         uint16 a1_stepf_y = GET16(blitter_ram, A1_FSTEP + 0);
2865         uint16 a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
2866         uint16 a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
2867         int16 a1_inc_x = (int16)GET16(blitter_ram, A1_INC + 2);
2868         int16 a1_inc_y = (int16)GET16(blitter_ram, A1_INC + 0);
2869         uint16 a1_incf_x = GET16(blitter_ram, A1_FINC + 2);
2870         uint16 a1_incf_y = GET16(blitter_ram, A1_FINC + 0);
2871
2872         int16 a2_x = (int16)GET16(blitter_ram, A2_PIXEL + 2);
2873         int16 a2_y = (int16)GET16(blitter_ram, A2_PIXEL + 0);
2874         uint16 a2_mask_x = GET16(blitter_ram, A2_MASK + 2);
2875         uint16 a2_mask_y = GET16(blitter_ram, A2_MASK + 0);
2876         int16 a2_step_x = (int16)GET16(blitter_ram, A2_STEP + 2);
2877         int16 a2_step_y = (int16)GET16(blitter_ram, A2_STEP + 0);
2878
2879         uint64 srcd1 = GET64(blitter_ram, SRCDATA);
2880         uint64 srcd2 = 0;
2881         uint64 dstd = GET64(blitter_ram, DSTDATA);
2882         uint64 patd = GET64(blitter_ram, PATTERNDATA);
2883         uint32 iinc = GET32(blitter_ram, INTENSITYINC);
2884         uint64 srcz1 = GET64(blitter_ram, SRCZINT);
2885         uint64 srcz2 = GET64(blitter_ram, SRCZFRAC);
2886         uint64 dstz = GET64(blitter_ram, DSTZ);
2887         uint32 zinc = GET32(blitter_ram, ZINC);
2888         uint32 collision = GET32(blitter_ram, COLLISIONCTRL);// 0=RESUME, 1=ABORT, 2=STOPEN
2889
2890         uint8 pixsize = (dsta2 ? a2_pixsize : a1_pixsize);      // From ACONTROL
2891
2892 //Testing Trevor McFur--I *think* it's the circle on the lower RHS of the screen...
2893 /*logBlit = false;
2894 if (cmd == 0x05810601 && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 96)
2895         && (GET16(blitter_ram, PIXLINECOUNTER + 0) == 72))
2896         logBlit = true;//*/
2897 //Testing...
2898 //if (cmd == 0x1401060C) patd = 0xFFFFFFFFFFFFFFFFLL;
2899 //if (cmd == 0x1401060C) patd = 0x00000000000000FFLL;
2900 //If it's still not working (bcompen-patd) then see who's writing what to patd and where...
2901 //Still not OK. Check to see who's writing what to where in patd!
2902 //It looks like M68K is writing to the top half of patd... Hmm...
2903 /*
2904 ----> M68K wrote 0000 to byte 15737344 of PATTERNDATA...
2905 --> M68K wrote 00 to byte 0 of PATTERNDATA...
2906 --> M68K wrote 00 to byte 1 of PATTERNDATA...
2907 ----> M68K wrote 00FF to byte 15737346 of PATTERNDATA...
2908 --> M68K wrote 00 to byte 2 of PATTERNDATA...
2909 --> M68K wrote FF to byte 3 of PATTERNDATA...
2910 logBlit = F, cmd = 1401060C
2911
2912 Wren0 := ND6 (wren\[0], gpua\[5], gpua\[6..8], bliten, gpu_memw);
2913 Wren1 := ND6 (wren\[1], gpua[5], gpua\[6..8], bliten, gpu_memw);
2914 Wren2 := ND6 (wren\[2], gpua\[5], gpua[6], gpua\[7..8], bliten, gpu_memw);
2915 Wren3 := ND6 (wren\[3], gpua[5], gpua[6], gpua\[7..8], bliten, gpu_memw);
2916
2917 --> 0 000x xx00
2918 Dec0  := D38GH (a1baseld, a1flagld, a1winld, a1ptrld, a1stepld, a1stepfld, a1fracld, a1incld, gpua[2..4], wren\[0]);
2919 --> 0 001x xx00
2920 Dec1  := D38GH (a1incfld, a2baseld, a2flagld, a2maskld, a2ptrldg, a2stepld, cmdldt, countldt, gpua[2..4], wren\[1]);
2921 --> 0 010x xx00
2922 Dec2  := D38GH (srcd1ldg[0..1], dstdldg[0..1], dstzldg[0..1], srcz1ldg[0..1], gpua[2..4], wren\[2]);
2923 --> 0 011x xx00
2924 Dec3  := D38GH (srcz2ld[0..1], patdld[0..1], iincld, zincld, stopld, intld[0], gpua[2..4], wren\[3]);
2925
2926 wren[3] is asserted when gpu address bus = 0 011x xx00
2927 patdld[0] -> 0 0110 1000 -> $F02268 (lo 32 bits)
2928 patdld[1] -> 0 0110 1100 -> $F0226C (hi 32 bits)
2929
2930 So... It's reversed! The data organization of the patd register is [low 32][high 32]! !!! FIX !!! [DONE]
2931 And fix all the other 64 bit registers [DONE]
2932 */
2933 /*if (cmd == 0x1401060C)
2934 {
2935         printf("logBlit = %s, cmd = %08X\n", (logBlit ? "T" : "F"), cmd);
2936         fflush(stdout);
2937 }*/
2938 /*logBlit = false;
2939 if ((cmd == 0x00010200) && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 9))
2940         logBlit = true;
2941
2942 ; Pink altimeter bar
2943
2944 Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
2945  CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
2946   A1 step values: -10 (X), 1 (Y)
2947   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
2948   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
2949         A1 x/y: 262/132, A2 x/y: 129/0
2950 ;x-coord is 257 in pic, so add 5
2951 ;20 for ship, 33 for #... Let's see if we can find 'em!
2952
2953 ; Black altimeter bar
2954
2955 Blit! (00110000 <- 000BF010) count: 5 x 29, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
2956  CMD -> src:  dst:  misc:  a1ctl: UPDA1  mode:  ity: PATDSEL z-op:  op: LFU_CLEAR ctrl:
2957   A1 step values: -8 (X), 1 (Y)
2958   A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
2959   A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
2960         A1 x/y: 264/126, A2 x/y: 336/0
2961
2962 Here's the pink bar--note that it's phrase mode without dread, so how does this work???
2963 Not sure, but I *think* that somehow it MUXes the data at the write site in on the left or right side
2964 of the write data when masked in phrase mode. I'll have to do some tracing to see if this is the mechanism
2965 it uses or not...
2966
2967 Blit! (CMD = 00010200)
2968 Flags: UPDA1 PATDSEL
2969   count = 9 x 11
2970   a1_base = 00110010, a2_base = 000BD7E0
2971   a1_x = 0106, a1_y = 0090, a1_frac_x = 0000, a1_frac_y = 8000, a2_x = 025A, a2_y = 0000
2972   a1_step_x = FFF6, a1_step_y = 0001, a1_stepf_x = 5E00, a1_stepf_y = D100, a2_step_x = FFF7, a2_step_y = 0001
2973   a1_inc_x = 0001, a1_inc_y = FFFF, a1_incf_x = 0000, a1_incf_y = E000
2974   a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
2975   a2_mask=F a1add=+phr/+0 a2add=+1/+0
2976   a1_pixsize = 4, a2_pixsize = 4
2977    srcd=BAC673AC2C92E578  dstd=0000000000000000 patd=74C074C074C074C0 iinc=0002E398
2978   srcz1=7E127E12000088DA srcz2=DBE06DF000000000 dstz=0000000000000000 zinc=FFFE4840, coll=0
2979   Phrase mode is ON
2980   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
2981   Entering INNER state...
2982   Entering DWRITE state...
2983      Dest write address/pix address: 0016A830/0 [dstart=20 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [7400000074C074C0] (icount=0007, inc=2)
2984   Entering A1_ADD state [a1_x=0106, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
2985   Entering DWRITE state...
2986      Dest write address/pix address: 0016A850/0 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [74C074C074C074C0] (icount=0003, inc=4)
2987   Entering A1_ADD state [a1_x=0108, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
2988   Entering DWRITE state...
2989      Dest write address/pix address: 0016A870/0 [dstart=0 dend=30 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [74C074C074C00000] (icount=FFFF, inc=4)
2990   Entering A1_ADD state [a1_x=010C, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
2991   Entering IDLE_INNER state...
2992   Leaving INNER state... (ocount=000A)
2993   [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
2994   Entering A1UPDATE state... (272/144 -> 262/145)
2995   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
2996   Entering INNER state...
2997 */
2998
2999         // Bugs in Jaguar I
3000
3001         a2addy = a1addy;                                                        // A2 channel Y add bit is tied to A1's
3002
3003 //if (logBlit && (ocount > 20)) logBlit = false;
3004 #ifdef VERBOSE_BLITTER_LOGGING
3005 if (logBlit)
3006 {
3007 printf("  a1_base = %08X, a2_base = %08X\n", a1_base, a2_base);
3008 printf("  a1_x = %04X, a1_y = %04X, a1_frac_x = %04X, a1_frac_y = %04X, a2_x = %04X, a2_y = %04X\n", (uint16)a1_x, (uint16)a1_y, a1_frac_x, a1_frac_y, (uint16)a2_x, (uint16)a2_y);
3009 printf("  a1_step_x = %04X, a1_step_y = %04X, a1_stepf_x = %04X, a1_stepf_y = %04X, a2_step_x = %04X, a2_step_y = %04X\n", (uint16)a1_step_x, (uint16)a1_step_y, a1_stepf_x, a1_stepf_y, (uint16)a2_step_x, (uint16)a2_step_y);
3010 printf("  a1_inc_x = %04X, a1_inc_y = %04X, a1_incf_x = %04X, a1_incf_y = %04X\n", (uint16)a1_inc_x, (uint16)a1_inc_y, a1_incf_x, a1_incf_y);
3011 printf("  a1_win_x = %04X, a1_win_y = %04X, a2_mask_x = %04X, a2_mask_y = %04X\n", a1_win_x, a1_win_y, a2_mask_x, a2_mask_y);
3012 char x_add_str[4][4] = { "phr", "1", "0", "inc" };
3013 printf("  a2_mask=%s a1add=%s%s/%s%s a2add=%s%s/%s%s\n", (a2_mask ? "T" : "F"), (a1xsign ? "-" : "+"), x_add_str[a1addx],
3014         (a1ysign ? "-" : "+"), (a1addy ? "1" : "0"), (a2xsign ? "-" : "+"), x_add_str[a2addx],
3015         (a2ysign ? "-" : "+"), (a2addy ? "1" : "0"));
3016 printf("  a1_pixsize = %u, a2_pixsize = %u\n", a1_pixsize, a2_pixsize);
3017 printf("   srcd=%08X%08X  dstd=%08X%08X patd=%08X%08X iinc=%08X\n",
3018         (uint32)(srcd1 >> 32), (uint32)(srcd1 & 0xFFFFFFFF),
3019         (uint32)(dstd >> 32), (uint32)(dstd & 0xFFFFFFFF),
3020         (uint32)(patd >> 32), (uint32)(patd & 0xFFFFFFFF), iinc);
3021 printf("  srcz1=%08X%08X srcz2=%08X%08X dstz=%08X%08X zinc=%08X, coll=%X\n",
3022         (uint32)(srcz1 >> 32), (uint32)(srcz1 & 0xFFFFFFFF),
3023         (uint32)(srcz2 >> 32), (uint32)(srcz2 & 0xFFFFFFFF),
3024         (uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF), zinc, collision);
3025 }
3026 #endif
3027
3028         // Various state lines set up by user
3029
3030         bool phrase_mode = ((!dsta2 && a1addx == 0) || (dsta2 && a2addx == 0) ? true : false);  // From ACONTROL
3031 #ifdef VERBOSE_BLITTER_LOGGING
3032 if (logBlit)
3033 {
3034 printf("  Phrase mode is %s\n", (phrase_mode ? "ON" : "off"));
3035 fflush(stdout);
3036 }
3037 #endif
3038 //logBlit = false;
3039
3040         // Stopgap vars to simulate various lines
3041
3042         uint16 a1FracCInX = 0, a1FracCInY = 0;
3043
3044         while (true)
3045         {
3046                 // IDLE
3047
3048                 if ((idle && !go) || (inner && outer0 && indone))
3049                 {
3050 #ifdef VERBOSE_BLITTER_LOGGING
3051 if (logBlit)
3052 {
3053 printf("  Entering IDLE state...\n");
3054 fflush(stdout);
3055 }
3056 #endif
3057                         idlei = true;
3058
3059 //Instead of a return, let's try breaking out of the loop...
3060 break;
3061 //                      return;
3062                 }
3063                 else
3064                         idlei = false;
3065
3066                 // INNER LOOP ACTIVE
3067 /*
3068   Entering DWRITE state... (icount=0000, inc=4)
3069   Entering IDLE_INNER state...
3070   Leaving INNER state... (ocount=00EF)
3071   [in=T a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3072   Entering INNER state...
3073 Now:
3074   [in=F a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3075 */
3076
3077                 if ((idle && go && !datinit)
3078                         || (inner && !indone)
3079                         || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && !datinit)
3080                         || (a1update && !upda2 && notgzandp && !datinit)
3081                         || (zupdate && !upda2 && !datinit)
3082                         || (a2update && !datinit)
3083                         || (init_ii && !gourz)
3084                         || (init_zi))
3085                 {
3086                         inneri = true;
3087                 }
3088                 else
3089                         inneri = false;
3090
3091                 // A1 FRACTION UPDATE
3092
3093                 if (inner && indone && !outer0 && upda1f)
3094                 {
3095                         a1fupdatei = true;
3096                 }
3097                 else
3098                         a1fupdatei = false;
3099
3100                 // A1 POINTER UPDATE
3101
3102                 if ((a1fupdate)
3103                         || (inner && indone && !outer0 && !upda1f && upda1))
3104                 {
3105                         a1updatei = true;
3106                 }
3107                 else
3108                         a1updatei = false;
3109
3110                 // Z FRACTION UPDATE
3111
3112                 if ((a1update && gourz && polygon)
3113                         || (inner && indone && !outer0 && !upda1f && !upda1 && gourz && polygon))
3114                 {
3115                         zfupdatei = true;
3116                 }
3117                 else
3118                         zfupdatei = false;
3119
3120                 // Z INTEGER UPDATE
3121
3122                 if (zfupdate)
3123                 {
3124                         zupdatei = true;
3125                 }
3126                 else
3127                         zupdatei = false;
3128
3129                 // A2 POINTER UPDATE
3130
3131                 if ((a1update && upda2 && notgzandp)
3132                         || (zupdate && upda2)
3133                         || (inner && indone && !outer0 && !upda1f && notgzandp && !upda1 && upda2))
3134                 {
3135                         a2updatei = true;
3136                 }
3137                 else
3138                         a2updatei = false;
3139
3140                 // INITIALIZE INTENSITY FRACTION
3141
3142                 if ((zupdate && !upda2 && datinit)
3143                         || (a1update && !upda2 && datinit && notgzandp)
3144                         || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && datinit)
3145                         || (a2update && datinit)
3146                         || (idle && go && datinit))
3147                 {
3148                         init_ifi = true;
3149                 }
3150                 else
3151                         init_ifi = false;
3152
3153                 // INITIALIZE INTENSITY INTEGER
3154
3155                 if (init_if)
3156                 {
3157                         init_iii = true;
3158                 }
3159                 else
3160                         init_iii = false;
3161
3162                 // INITIALIZE Z FRACTION
3163
3164                 if (init_ii && gourz)
3165                 {
3166                         init_zfi = true;
3167                 }
3168                 else
3169                         init_zfi = false;
3170
3171                 // INITIALIZE Z INTEGER
3172
3173                 if (init_zf)
3174                 {
3175                         init_zii = true;
3176                 }
3177                 else
3178                         init_zii = false;
3179
3180 // Here we move the fooi into their foo counterparts in order to simulate the moving
3181 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
3182
3183                 idle = idlei;
3184                 inner = inneri;
3185                 a1fupdate = a1fupdatei;
3186                 a1update = a1updatei;
3187                 zfupdate = zfupdatei;           // *
3188                 zupdate = zupdatei;                     // *
3189                 a2update = a2updatei;
3190                 init_if = init_ifi;                     // *
3191                 init_ii = init_iii;                     // *
3192                 init_zf = init_zfi;                     // *
3193                 init_zi = init_zii;                     // *
3194 // * denotes states that will never assert for Jaguar I
3195 #ifdef VERBOSE_BLITTER_LOGGING
3196 if (logBlit)
3197 {
3198 printf("  [in=%c a1f=%c a1=%c zf=%c z=%c a2=%c iif=%c iii=%c izf=%c izi=%c]\n",
3199         (inner ? 'T' : 'F'), (a1fupdate ? 'T' : 'F'), (a1update ? 'T' : 'F'), (zfupdate ? 'T' : 'F'),
3200         (zupdate ? 'T' : 'F'), (a2update ? 'T' : 'F'), (init_if ? 'T' : 'F'), (init_ii ? 'T' : 'F'),
3201         (init_zf ? 'T' : 'F'), (init_zi ? 'T' : 'F'));
3202 fflush(stdout);
3203 }
3204 #endif
3205
3206 // Now, depending on how we want to handle things, we could either put the implementation
3207 // of the various pieces up above, or handle them down below here.
3208
3209 // Let's try postprocessing for now...
3210
3211                 if (inner)
3212                 {
3213                         indone = false;
3214 #ifdef VERBOSE_BLITTER_LOGGING
3215 if (logBlit)
3216 {
3217 printf("  Entering INNER state...\n");
3218 fflush(stdout);
3219 }
3220 #endif
3221                         uint16 icount = GET16(blitter_ram, PIXLINECOUNTER + 2);
3222                         bool idle_inner = true, step = true, sreadx = false, szreadx = false, sread = false,
3223                                 szread = false, dread = false, dzread = false, dwrite = false, dzwrite = false;
3224                         bool inner0 = false;
3225                         bool idle_inneri, sreadxi, szreadxi, sreadi, szreadi, dreadi, dzreadi, dwritei, dzwritei;
3226
3227                         // State lines that will never assert in Jaguar I
3228
3229                         bool textext = false, txtread = false;
3230
3231 //other stuff
3232 uint8 srcshift = 0;
3233 bool sshftld = true; // D flipflop (D -> Q): instart -> sshftld
3234 //NOTE: sshftld probably is only asserted at the beginning of the inner loop. !!! FIX !!!
3235 /*
3236 Blit! (CMD = 01800005)
3237 Flags: SRCEN SRCENX LFUFUNC=C
3238   count = 626 x 1
3239   a1_base = 00037290, a2_base = 000095D0
3240   a1_x = 0000, a1_y = 0000, a2_x = 0002, a2_y = 0000
3241   a1_pixsize = 4, a2_pixsize = 4
3242   srcd=0000000000000000, dstd=0000000000000000, patd=0000000000000000
3243   Phrase mode is ON
3244   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3245   Entering INNER state...
3246   Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
3247     Source extra read address/pix address: 000095D4/0 [0000001C00540038]
3248   Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3249   Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
3250     Source read address/pix address: 000095D8/0 [0054003800009814]
3251   Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3252   Entering DWRITE state...
3253      Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026E, inc=4)
3254   Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3255   Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
3256     Source read address/pix address: 000095E0/0 [00009968000377C7]
3257   Entering A2_ADD state [a2_x=0008, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3258   Entering DWRITE state...
3259      Dest write address/pix address: 00037298/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026A, inc=4)
3260   Entering A1_ADD state [a1_x=0004, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3261 */
3262
3263 //                      while (!idle_inner)
3264                         while (true)
3265                         {
3266                                 // IDLE
3267
3268                                 if ((idle_inner && !step)
3269                                         || (dzwrite && step && inner0)
3270                                         || (dwrite && step && !dstwrz && inner0))
3271                                 {
3272 #ifdef VERBOSE_BLITTER_LOGGING
3273 if (logBlit)
3274 {
3275 printf("  Entering IDLE_INNER state...\n");
3276 fflush(stdout);
3277 }
3278 #endif
3279                                         idle_inneri = true;
3280 break;
3281                                 }
3282                                 else
3283                                         idle_inneri = false;
3284
3285                                 // EXTRA SOURCE DATA READ
3286
3287                                 if ((idle_inner && step && srcenx)
3288                                         || (sreadx && !step))
3289                                 {
3290                                         sreadxi = true;
3291                                 }
3292                                 else
3293                                         sreadxi = false;
3294
3295                                 // EXTRA SOURCE ZED READ
3296
3297                                 if ((sreadx && step && srcenz)
3298                                         || (szreadx && !step))
3299                                 {
3300                                         szreadxi = true;
3301                                 }
3302                                 else
3303                                         szreadxi = false;
3304
3305                                 // TEXTURE DATA READ (not implemented because not in Jaguar I)
3306
3307                                 // SOURCE DATA READ
3308
3309                                 if ((szreadx && step && !textext)
3310                                         || (sreadx && step && !srcenz && srcen)
3311                                         || (idle_inner && step && !srcenx && !textext && srcen)
3312                                         || (dzwrite && step && !inner0 && !textext && srcen)
3313                                         || (dwrite && step && !dstwrz && !inner0 && !textext && srcen)
3314                                         || (txtread && step && srcen)
3315                                         || (sread && !step))
3316                                 {
3317                                         sreadi = true;
3318                                 }
3319                                 else
3320                                         sreadi = false;
3321
3322                                 // SOURCE ZED READ
3323
3324                                 if ((sread && step && srcenz)
3325                                         || (szread && !step))
3326                                 {
3327                                         szreadi = true;
3328                                 }
3329                                 else
3330                                         szreadi = false;
3331
3332                                 // DESTINATION DATA READ
3333
3334                                 if ((szread && step && dsten)
3335                                         || (sread && step && !srcenz && dsten)
3336                                         || (sreadx && step && !srcenz && !textext && !srcen && dsten)
3337                                         || (idle_inner && step && !srcenx && !textext && !srcen && dsten)
3338                                         || (dzwrite && step && !inner0 && !textext && !srcen && dsten)
3339                                         || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && dsten)
3340                                         || (txtread && step && !srcen && dsten)
3341                                         || (dread && !step))
3342                                 {
3343                                         dreadi = true;
3344                                 }
3345                                 else
3346                                         dreadi = false;
3347
3348                                 // DESTINATION ZED READ
3349
3350                                 if ((dread && step && dstenz)
3351                                         || (szread && step && !dsten && dstenz)
3352                                         || (sread && step && !srcenz && !dsten && dstenz)
3353                                         || (sreadx && step && !srcenz && !textext && !srcen && !dsten && dstenz)
3354                                         || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && dstenz)
3355                                         || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && dstenz)
3356                                         || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && dstenz)
3357                                         || (txtread && step && !srcen && !dsten && dstenz)
3358                                         || (dzread && !step))
3359                                 {
3360                                         dzreadi = true;
3361                                 }
3362                                 else
3363                                         dzreadi = false;
3364
3365                                 // DESTINATION DATA WRITE
3366
3367                                 if ((dzread && step)
3368                                         || (dread && step && !dstenz)
3369                                         || (szread && step && !dsten && !dstenz)
3370                                         || (sread && step && !srcenz && !dsten && !dstenz)
3371                                         || (txtread && step && !srcen && !dsten && !dstenz)
3372                                         || (sreadx && step && !srcenz && !textext && !srcen && !dsten && !dstenz)
3373                                         || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && !dstenz)
3374                                         || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && !dstenz)
3375                                         || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && !dstenz)
3376                                         || (dwrite && !step))
3377                                 {
3378                                         dwritei = true;
3379                                 }
3380                                 else
3381                                         dwritei = false;
3382
3383                                 // DESTINATION ZED WRITE
3384
3385                                 if ((dzwrite && !step)
3386                                         || (dwrite && step && dstwrz))
3387                                 {
3388                                         dzwritei = true;
3389                                 }
3390                                 else
3391                                         dzwritei = false;
3392
3393 //Kludge: A QnD way to make sure that sshftld is asserted only for the first
3394 //        cycle of the inner loop...
3395 sshftld = idle_inner;
3396
3397 // Here we move the fooi into their foo counterparts in order to simulate the moving
3398 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
3399
3400                                 idle_inner = idle_inneri;
3401                                 sreadx = sreadxi;
3402                                 szreadx = szreadxi;
3403                                 sread = sreadi;
3404                                 szread = szreadi;
3405                                 dread = dreadi;
3406                                 dzread = dzreadi;
3407                                 dwrite = dwritei;
3408                                 dzwrite = dzwritei;
3409
3410 // Here's a few more decodes--not sure if they're supposed to go here or not...
3411
3412                                 bool srca_addi = (sreadxi && !srcenz) || (sreadi && !srcenz) || szreadxi || szreadi;
3413
3414                                 bool dsta_addi = (dwritei && !dstwrz) || dzwritei;
3415
3416                                 bool gensrc = sreadxi || szreadxi || sreadi || szreadi;
3417                                 bool gendst = dreadi || dzreadi || dwritei || dzwritei;
3418                                 bool gena2i = (gensrc && !dsta2) || (gendst && dsta2);
3419
3420                                 bool zaddr = szreadx || szread || dzread || dzwrite;
3421
3422 // Some stuff from MCONTROL.NET--not sure if this is the correct use of this decode or not...
3423 /*Fontread\     := OND1 (fontread\, sread[1], sreadx[1], bcompen);
3424 Fontread        := INV1 (fontread, fontread\);
3425 Justt           := NAN3 (justt, fontread\, phrase_mode, tactive\);
3426 Justify         := TS (justify, justt, busen);*/
3427 bool fontread = (sread || sreadx) && bcompen;
3428 bool justify = !(!fontread && phrase_mode /*&& tactive*/);
3429
3430 /* Generate inner loop update enables */
3431 /*
3432 A1_addi         := MX2 (a1_addi, dsta_addi, srca_addi, dsta2);
3433 A2_addi         := MX2 (a2_addi, srca_addi, dsta_addi, dsta2);
3434 A1_add          := FD1 (a1_add, a1_add\, a1_addi, clk);
3435 A2_add          := FD1 (a2_add, a2_add\, a2_addi, clk);
3436 A2_addb         := BUF1 (a2_addb, a2_add);
3437 */
3438                                 bool a1_add = (dsta2 ? srca_addi : dsta_addi);
3439                                 bool a2_add = (dsta2 ? dsta_addi : srca_addi);
3440
3441 /* Address adder input A register selection
3442 000     A1 step integer part
3443 001     A1 step fraction part
3444 010     A1 increment integer part
3445 011     A1 increment fraction part
3446 100     A2 step
3447
3448 bit 2 = a2update
3449 bit 1 = /a2update . (a1_add . a1addx[0..1])
3450 bit 0 = /a2update . ( a1fupdate
3451                                     + a1_add . atick[0] . a1addx[0..1])
3452 The /a2update term on bits 0 and 1 is redundant.
3453 Now look-ahead based
3454 */
3455                                 uint8 addasel = (a1fupdate || (a1_add && a1addx == 3) ? 0x01 : 0x00);
3456                                 addasel |= (a1_add && a1addx == 3 ? 0x02 : 0x00);
3457                                 addasel |= (a2update ? 0x04 : 0x00);
3458 /* Address adder input A X constant selection
3459 adda_xconst[0..2] generate a power of 2 in the range 1-64 or all
3460 zeroes when they are all 1
3461 Remember - these are pixels, so to add one phrase the pixel size
3462 has to be taken into account to get the appropriate value.
3463 for A1
3464                 if a1addx[0..1] are 00 set 6 - pixel size
3465                 if a1addx[0..1] are 01 set the value 000
3466                 if a1addx[0..1] are 10 set the value 111
3467 similarly for A2
3468 JLH: Also, 11 will likewise set the value to 111
3469 */
3470                                 uint8 a1_xconst = 6 - a1_pixsize, a2_xconst = 6 - a2_pixsize;
3471
3472                                 if (a1addx == 1)
3473                                     a1_xconst = 0;
3474                                 else if (a1addx & 0x02)
3475                                     a1_xconst = 7;
3476
3477                                 if (a2addx == 1)
3478                                     a2_xconst = 0;
3479                                 else if (a2addx & 0x02)
3480                                     a2_xconst = 7;
3481
3482                                 uint8 adda_xconst = (a2_add ? a2_xconst : a1_xconst);
3483 /* Address adder input A Y constant selection
3484 22 June 94 - This was erroneous, because only the a1addy bit was reflected here.
3485 Therefore, the selection has to be controlled by a bug fix bit.
3486 JLH: Bug fix bit in Jaguar II--not in Jaguar I!
3487 */
3488                                 bool adda_yconst = a1addy;
3489 /* Address adder input A register versus constant selection
3490 given by          a1_add . a1addx[0..1]
3491                                 + a1update
3492                                 + a1fupdate
3493                                 + a2_add . a2addx[0..1]
3494                                 + a2update
3495 */
3496                                 bool addareg = ((a1_add && a1addx == 3) || a1update || a1fupdate
3497                                         || (a2_add && a2addx == 3) || a2update ? true : false);
3498 /* The adders can be put into subtract mode in add pixel size
3499 mode when the corresponding flags are set */
3500                                 bool suba_x = ((a1_add && a1xsign && a1addx == 1) || (a2_add && a2xsign && a2addx == 1) ? true : false);
3501                                 bool suba_y = ((a1_add && a1addy && a1ysign) || (a2_add && a2addy && a2ysign) ? true : false);
3502 /* Address adder input B selection
3503 00      A1 pointer
3504 01      A2 pointer
3505 10      A1 fraction
3506 11      Zero
3507
3508 Bit 1 =   a1fupdate
3509                 + (a1_add . atick[0] . a1addx[0..1])
3510                 + a1fupdate . a1_stepld
3511                 + a1update . a1_stepld
3512                 + a2update . a2_stepld
3513 Bit 0 =   a2update + a2_add
3514                 + a1fupdate . a1_stepld
3515                 + a1update . a1_stepld
3516                 + a2update . a2_stepld
3517 */
3518                                 uint8 addbsel = (a2update || a2_add || (a1fupdate && a1_stepld)
3519                                     || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x01 : 0x00);
3520                                 addbsel |= (a1fupdate || (a1_add && a1addx == 3) || (a1fupdate && a1_stepld)
3521                                     || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x02 : 0x00);
3522
3523 /* The modulo bits are used to align X onto a phrase boundary when
3524 it is being updated by one phrase
3525 000     no mask
3526 001     mask bit 0
3527 010     mask bits 1-0
3528 ..
3529 110     mask bits 5-0
3530
3531 Masking is enabled for a1 when a1addx[0..1] is 00, and the value
3532 is 6 - the pixel size (again!)
3533 */
3534                                 uint8 maska1 = (a1_add && a1addx == 0 ? 6 - a1_pixsize : 0);
3535                                 uint8 maska2 = (a2_add && a2addx == 0 ? 6 - a2_pixsize : 0);
3536                                 uint8 modx = (a2_add ? maska2 : maska1);
3537 /* Generate load strobes for the increment updates */
3538
3539 /*A1pldt                := NAN2 (a1pldt, atick[1], a1_add);
3540 A1ptrldi        := NAN2 (a1ptrldi, a1update\, a1pldt);
3541
3542 A1fldt          := NAN4 (a1fldt, atick[0], a1_add, a1addx[0..1]);
3543 A1fracldi       := NAN2 (a1fracldi, a1fupdate\, a1fldt);
3544
3545 A2pldt          := NAN2 (a2pldt, atick[1], a2_add);
3546 A2ptrldi        := NAN2 (a2ptrldi, a2update\, a2pldt);*/
3547                                 bool a1fracldi = a1fupdate || (a1_add && a1addx == 3);
3548
3549 // Some more from DCONTROL...
3550 // atick[] just MAY be important here! We're assuming it's true and dropping the term...
3551 // That will probably screw up some of the lower terms that seem to rely on the timing of it...
3552 #warning srcdreadd is not properly initialized!
3553 bool srcdreadd = false;                                         // Set in INNER.NET
3554 //Shadeadd\     := NAN2H (shadeadd\, dwrite, srcshade);
3555 //Shadeadd      := INV2 (shadeadd, shadeadd\);
3556 bool shadeadd = dwrite && srcshade;
3557 /* Data adder control, input A selection
3558 000   Destination data
3559 001   Initialiser pixel value
3560 100   Source data      - computed intensity fraction
3561 101   Pattern data     - computed intensity
3562 110   Source zed 1     - computed zed
3563 111   Source zed 2     - computed zed fraction
3564
3565 Bit 0 =   dwrite  . gourd . atick[1]
3566         + dzwrite . gourz . atick[0]
3567         + istepadd
3568         + zstepfadd
3569         + init_if + init_ii + init_zf + init_zi
3570 Bit 1 =   dzwrite . gourz . (atick[0] + atick[1])
3571         + zstepadd
3572         + zstepfadd
3573 Bit 2 =   (gourd + gourz) . /(init_if + init_ii + init_zf + init_zi)
3574         + dwrite  . srcshade
3575 */
3576 uint8 daddasel = ((dwrite && gourd) || (dzwrite && gourz) || istepadd || zstepfadd
3577         || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
3578 daddasel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
3579 daddasel |= (((gourd || gourz) && !(init_if || init_ii || init_zf || init_zi))
3580         || (dwrite && srcshade) ? 0x04 : 0x00);
3581 /* Data adder control, input B selection
3582 0000    Source data
3583 0001    Data initialiser increment
3584 0100    Bottom 16 bits of I increment repeated four times
3585 0101    Top 16 bits of I increment repeated four times
3586 0110    Bottom 16 bits of Z increment repeated four times
3587 0111    Top 16 bits of Z increment repeated four times
3588 1100    Bottom 16 bits of I step repeated four times
3589 1101    Top 16 bits of I step repeated four times
3590 1110    Bottom 16 bits of Z step repeated four times
3591 1111    Top 16 bits of Z step repeated four times
3592
3593 Bit 0 =   dwrite  . gourd . atick[1]
3594         + dzwrite . gourz . atick[1]
3595         + dwrite  . srcshade
3596         + istepadd
3597         + zstepadd
3598         + init_if + init_ii + init_zf + init_zi
3599 Bit 1 =   dzwrite . gourz . (atick[0] + atick[1])
3600         + zstepadd
3601         + zstepfadd
3602 Bit 2 =   dwrite  . gourd . (atick[0] + atick[1])
3603         + dzwrite . gourz . (atick[0] + atick[1])
3604         + dwrite  . srcshade
3605         + istepadd + istepfadd + zstepadd + zstepfadd
3606 Bit 3 =   istepadd + istepfadd + zstepadd + zstepfadd
3607 */
3608 uint8 daddbsel = ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
3609         || istepadd || zstepadd || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
3610 daddbsel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
3611 daddbsel |= ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
3612         || istepadd || istepfadd || zstepadd || zstepfadd ? 0x04 : 0x00);
3613 daddbsel |= (istepadd && istepfadd && zstepadd && zstepfadd ? 0x08 : 0x00);
3614 /* Data adder mode control
3615 000     16-bit normal add
3616 001     16-bit saturating add with carry
3617 010     8-bit saturating add with carry, carry into top byte is
3618         inhibited (YCrCb)
3619 011     8-bit saturating add with carry, carry into top byte and
3620         between top nybbles is inhibited (CRY)
3621 100     16-bit normal add with carry
3622 101     16-bit saturating add
3623 110     8-bit saturating add, carry into top byte is inhibited
3624 111     8-bit saturating add, carry into top byte and between top
3625         nybbles is inhibited
3626
3627 The first five are used for Gouraud calculations, the latter three
3628 for adding source and destination data
3629
3630 Bit 0 =   dzwrite . gourz . atick[1]
3631         + dwrite  . gourd . atick[1] . /topnen . /topben . /ext_int
3632         + dwrite  . gourd . atick[1] .  topnen .  topben . /ext_int
3633         + zstepadd
3634         + istepadd . /topnen . /topben . /ext_int
3635         + istepadd .  topnen .  topben . /ext_int
3636         + /gourd . /gourz . /topnen . /topben
3637         + /gourd . /gourz .  topnen .  topben
3638         + shadeadd . /topnen . /topben
3639         + shadeadd .  topnen .  topben
3640         + init_ii . /topnen . /topben . /ext_int
3641         + init_ii .  topnen .  topben . /ext_int
3642         + init_zi
3643
3644 Bit 1 =   dwrite . gourd . atick[1] . /topben . /ext_int
3645         + istepadd . /topben . /ext_int
3646         + /gourd . /gourz .  /topben
3647         + shadeadd .  /topben
3648         + init_ii .  /topben . /ext_int
3649
3650 Bit 2 =   /gourd . /gourz
3651         + shadeadd
3652         + dwrite  . gourd . atick[1] . ext_int
3653         + istepadd . ext_int
3654         + init_ii . ext_int
3655 */
3656 uint8 daddmode = ((dzwrite && gourz) || (dwrite && gourd && !topnen && !topben && !ext_int)
3657         || (dwrite && gourd && topnen && topben && !ext_int) || zstepadd
3658         || (istepadd && !topnen && !topben && !ext_int)
3659         || (istepadd && topnen && topben && !ext_int) || (!gourd && !gourz && !topnen && !topben)
3660         || (!gourd && !gourz && topnen && topben) || (shadeadd && !topnen && !topben)
3661         || (shadeadd && topnen && topben) || (init_ii && !topnen && !topben && !ext_int)
3662         || (init_ii && topnen && topben && !ext_int) || init_zi ? 0x01 : 0x00);
3663 daddmode |= ((dwrite && gourd && !topben && !ext_int) || (istepadd && !topben && !ext_int)
3664         || (!gourd && !gourz && !topben) || (shadeadd && !topben)
3665         || (init_ii && !topben && !ext_int) ? 0x02 : 0x00);
3666 daddmode |= ((!gourd && !gourz) || shadeadd || (dwrite && gourd && ext_int)
3667         || (istepadd && ext_int) || (init_ii && ext_int) ? 0x04 : 0x00);
3668 /* Data add load controls
3669 Pattern fraction (dest data) is loaded on
3670           dwrite . gourd . atick[0]
3671         + istepfadd . /datinit
3672         + init_if
3673 Pattern data is loaded on
3674           dwrite . gourd . atick[1]
3675         + istepadd . /datinit . /datinit
3676         + init_ii
3677 Source z1 is loaded on
3678           dzwrite . gourz . atick[1]
3679         + zstepadd . /datinit . /datinit
3680         + init_zi
3681 Source z2 is loaded on
3682           dzwrite . gourz . atick[0]
3683         + zstepfadd
3684         + init_zf
3685 Texture map shaded data is loaded on
3686         srcdreadd . srcshade
3687 */
3688 bool patfadd = (dwrite && gourd) || (istepfadd && !datinit) || init_if;
3689 bool patdadd = (dwrite && gourd) || (istepadd && !datinit) || init_ii;
3690 bool srcz1add = (dzwrite && gourz) || (zstepadd && !datinit) || init_zi;
3691 bool srcz2add = (dzwrite && gourz) || zstepfadd || init_zf;
3692 bool srcshadd = srcdreadd && srcshade;
3693 bool daddq_sel = patfadd || patdadd || srcz1add || srcz2add || srcshadd;
3694 /* Select write data
3695 This has to be controlled from stage 1 of the pipe-line, delayed
3696 by one tick, as the write occurs in the cycle after the ack.
3697
3698 00      pattern data
3699 01      lfu data
3700 10      adder output
3701 11      source zed
3702
3703 Bit 0 =  /patdsel . /adddsel
3704         + dzwrite1d
3705 Bit 1 =   adddsel
3706         + dzwrite1d
3707 */
3708 uint8 data_sel = ((!patdsel && !adddsel) || dzwrite ? 0x01 : 0x00)
3709         | (adddsel || dzwrite ? 0x02 : 0x00);
3710
3711 uint32 address, pixAddr;
3712 ADDRGEN(address, pixAddr, gena2i, zaddr,
3713         a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3714         a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3715
3716 //Here's my guess as to how the addresses get truncated to phrase boundaries in phrase mode...
3717 if (!justify)
3718         address &= 0xFFFFF8;
3719
3720 /* Generate source alignment shift
3721    -------------------------------
3722 The source alignment shift for data move is the difference between
3723 the source and destination X pointers, multiplied by the pixel
3724 size.  Only the low six bits of the pointers are of interest, as
3725 pixel sizes are always a power of 2 and window rows are always
3726 phrase aligned.
3727
3728 When not in phrase mode, the top 3 bits of the shift value are
3729 set to zero (2/26).
3730
3731 Source shifting is also used to extract bits for bit-to-byte
3732 expansion in phrase mode.  This involves only the bottom three
3733 bits of the shift value, and is based on the offset within the
3734 phrase of the destination X pointer, in pixels.
3735
3736 Source shifting is disabled when srcen is not set.
3737 */
3738 uint8 dstxp = (dsta2 ? a2_x : a1_x) & 0x3F;
3739 uint8 srcxp = (dsta2 ? a1_x : a2_x) & 0x3F;
3740 uint8 shftv = ((dstxp - srcxp) << pixsize) & 0x3F;
3741 /* The phrase mode alignment count is given by the phrase offset
3742 of the first pixel, for bit to byte expansion */
3743 uint8 pobb = 0;
3744
3745 if (pixsize == 3)
3746         pobb = dstxp & 0x07;
3747 if (pixsize == 4)
3748         pobb = dstxp & 0x03;
3749 if (pixsize == 5)
3750         pobb = dstxp & 0x01;
3751
3752 bool pobbsel = phrase_mode && bcompen;
3753 uint8 loshd = (pobbsel ? pobb : shftv) & 0x07;
3754 uint8 shfti = (srcen || pobbsel ? (sshftld ? loshd : srcshift & 0x07) : 0);
3755 /* Enable for high bits is srcen . phrase_mode */
3756 shfti |= (srcen && phrase_mode ? (sshftld ? shftv & 0x38 : srcshift & 0x38) : 0);
3757 srcshift = shfti;
3758
3759                                 if (sreadx)
3760                                 {
3761 #ifdef VERBOSE_BLITTER_LOGGING
3762 if (logBlit)
3763 {
3764 printf("  Entering SREADX state...");
3765 //printf(" [dstart=%X dend=%X pwidth=%X srcshift=%X]\n", dstart, dend, pwidth, srcshift);
3766 fflush(stdout);
3767 }
3768 #endif
3769 //uint32 srcAddr, pixAddr;
3770 //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
3771 //      a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3772 //      a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3773                                         srcd2 = srcd1;
3774                                         srcd1 = ((uint64)JaguarReadLong(address + 0, BLITTER) << 32)
3775                                                 | (uint64)JaguarReadLong(address + 4, BLITTER);
3776 //Kludge to take pixel size into account...
3777 //Hmm. If we're not in phrase mode, this is most likely NOT going to be used...
3778 //Actually, it would be--because of BCOMPEN expansion, for example...
3779 if (!phrase_mode)
3780 {
3781         if (bcompen)
3782                 srcd1 >>= 56;
3783         else
3784         {
3785                 if (pixsize == 5)
3786                         srcd1 >>= 32;
3787                 else if (pixsize == 4)
3788                         srcd1 >>= 48;
3789                 else
3790                         srcd1 >>= 56;
3791         }
3792 }//*/
3793 #ifdef VERBOSE_BLITTER_LOGGING
3794 if (logBlit)
3795 {
3796 printf("    Source extra read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3797         (uint32)(srcd1 >> 32), (uint32)(srcd1 & 0xFFFFFFFF));
3798 fflush(stdout);
3799 }
3800 #endif
3801                                 }
3802
3803                                 if (szreadx)
3804                                 {
3805 #ifdef VERBOSE_BLITTER_LOGGING
3806 if (logBlit)
3807 {
3808 printf("  Entering SZREADX state...");
3809 fflush(stdout);
3810 }
3811 #endif
3812                                         srcz2 = srcz1;
3813                                         srcz1 = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
3814 #ifdef VERBOSE_BLITTER_LOGGING
3815 if (logBlit)
3816 {
3817         printf(" Src Z extra read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3818                 (uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF));
3819         fflush(stdout);
3820 }
3821 #endif
3822                                 }
3823
3824                                 if (sread)
3825                                 {
3826 #ifdef VERBOSE_BLITTER_LOGGING
3827 if (logBlit)
3828 {
3829 printf("  Entering SREAD state...");
3830 //printf(" [dstart=%X dend=%X pwidth=%X srcshift=%X]\n", dstart, dend, pwidth, srcshift);
3831 fflush(stdout);
3832 }
3833 #endif
3834 //uint32 srcAddr, pixAddr;
3835 //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
3836 //      a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3837 //      a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3838 srcd2 = srcd1;
3839 srcd1 = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
3840 //Kludge to take pixel size into account...
3841 if (!phrase_mode)
3842 {
3843         if (bcompen)
3844                 srcd1 >>= 56;
3845         else
3846         {
3847                 if (pixsize == 5)
3848                         srcd1 >>= 32;
3849                 else if (pixsize == 4)
3850                         srcd1 >>= 48;
3851                 else
3852                         srcd1 >>= 56;
3853         }
3854 }
3855 #ifdef VERBOSE_BLITTER_LOGGING
3856 if (logBlit)
3857 {
3858 printf("     Source read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3859         (uint32)(srcd1 >> 32), (uint32)(srcd1 & 0xFFFFFFFF));
3860 fflush(stdout);
3861 }
3862 #endif
3863                                 }
3864
3865                                 if (szread)
3866                                 {
3867 #ifdef VERBOSE_BLITTER_LOGGING
3868 if (logBlit)
3869 {
3870 printf("  Entering SZREAD state...");
3871 fflush(stdout);
3872 }
3873 #endif
3874                                         srcz2 = srcz1;
3875                                         srcz1 = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
3876 //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
3877 if (!phrase_mode && pixsize == 4)
3878         srcz1 >>= 48;
3879
3880 #ifdef VERBOSE_BLITTER_LOGGING
3881 if (logBlit)
3882 {
3883         printf("     Src Z read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3884                 (uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF));
3885         fflush(stdout);
3886 }
3887 #endif
3888                                 }
3889
3890                                 if (dread)
3891                                 {
3892 #ifdef VERBOSE_BLITTER_LOGGING
3893 if (logBlit)
3894 {
3895 printf("  Entering DREAD state...");
3896 fflush(stdout);
3897 }
3898 #endif
3899 //uint32 dstAddr, pixAddr;
3900 //ADDRGEN(dstAddr, pixAddr, gena2i, zaddr,
3901 //      a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3902 //      a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3903 dstd = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
3904 //Kludge to take pixel size into account...
3905 if (!phrase_mode)
3906 {
3907         if (pixsize == 5)
3908                 dstd >>= 32;
3909         else if (pixsize == 4)
3910                 dstd >>= 48;
3911         else
3912                 dstd >>= 56;
3913 }
3914 #ifdef VERBOSE_BLITTER_LOGGING
3915 if (logBlit)
3916 {
3917 printf("       Dest read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3918         (uint32)(dstd >> 32), (uint32)(dstd & 0xFFFFFFFF));
3919 fflush(stdout);
3920 }
3921 #endif
3922                                 }
3923
3924                                 if (dzread)
3925                                 {
3926 // Is Z always 64 bit read? Or sometimes 16 bit (dependent on phrase_mode)?
3927 #ifdef VERBOSE_BLITTER_LOGGING
3928 if (logBlit)
3929 {
3930         printf("  Entering DZREAD state...");
3931         fflush(stdout);
3932 }
3933 #endif
3934                                         dstz = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
3935 //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
3936 if (!phrase_mode && pixsize == 4)
3937         dstz >>= 48;
3938
3939 #ifdef VERBOSE_BLITTER_LOGGING
3940 if (logBlit)
3941 {
3942         printf("    Dest Z read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3943                 (uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF));
3944         fflush(stdout);
3945 }
3946 #endif
3947                                 }
3948
3949 // These vars should probably go further up in the code... !!! FIX !!!
3950 // We can't preassign these unless they're static...
3951 //uint64 srcz = 0;                      // These are assigned to shut up stupid compiler warnings--dwrite is ALWAYS asserted
3952 //bool winhibit = false;
3953 uint64 srcz;
3954 bool winhibit;
3955 //NOTE: SRCSHADE requires GOURZ to be set to work properly--another Jaguar I bug
3956                                 if (dwrite)
3957                                 {
3958 #ifdef VERBOSE_BLITTER_LOGGING
3959 if (logBlit)
3960 {
3961 printf("  Entering DWRITE state...");
3962 fflush(stdout);
3963 }
3964 #endif
3965 //Counter is done on the dwrite state...! (We'll do it first, since it affects dstart/dend calculations.)
3966 //Here's the voodoo for figuring the correct amount of pixels in phrase mode (or not):
3967                                         int8 inct = -((dsta2 ? a2_x : a1_x) & 0x07);    // From INNER_CNT
3968                                         uint8 inc = 0;
3969                                         inc = (!phrase_mode || (phrase_mode && (inct & 0x01)) ? 0x01 : 0x00);
3970                                         inc |= (phrase_mode && (((pixsize == 3 || pixsize == 4) && (inct & 0x02)) || pixsize == 5 && !(inct & 0x01)) ? 0x02 : 0x00);
3971                                         inc |= (phrase_mode && ((pixsize == 3 && (inct & 0x04)) || (pixsize == 4 && !(inct & 0x03))) ? 0x04 : 0x00);
3972                                         inc |= (phrase_mode && pixsize == 3 && !(inct & 0x07) ? 0x08 : 0x00);
3973
3974                                         uint16 oldicount = icount;      // Save icount to detect underflow...
3975                                         icount -= inc;
3976
3977                                         if (icount == 0 || ((icount & 0x8000) && !(oldicount & 0x8000)))
3978                                                 inner0 = true;
3979 // X/Y stepping is also done here, I think...No. It's done when a1_add or a2_add is asserted...
3980
3981 //*********************************************************************************
3982 //Start & end write mask computations...
3983 //*********************************************************************************
3984
3985 uint8 dstart = 0;
3986
3987 if (pixsize == 3)
3988         dstart = (dstxp & 0x07) << 3;
3989 if (pixsize == 4)
3990         dstart = (dstxp & 0x03) << 4;
3991 if (pixsize == 5)
3992         dstart = (dstxp & 0x01) << 5;
3993
3994 dstart = (phrase_mode ? dstart : pixAddr & 0x07);
3995
3996 //This is the other Jaguar I bug... Normally, should ALWAYS select a1_x here.
3997 uint16 dstxwr = (dsta2 ? a2_x : a1_x) & 0x7FFE;
3998 uint16 pseq = dstxwr ^ (a1_win_x & 0x7FFE);
3999 pseq = (pixsize == 5 ? pseq : pseq & 0x7FFC);
4000 pseq = ((pixsize & 0x06) == 4 ? pseq : pseq & 0x7FF8);
4001 bool penden = clip_a1 && (pseq == 0);
4002 uint8 window_mask = 0;
4003
4004 if (pixsize == 3)
4005         window_mask = (a1_win_x & 0x07) << 3;
4006 if (pixsize == 4)
4007         window_mask = (a1_win_x & 0x03) << 4;
4008 if (pixsize == 5)
4009         window_mask = (a1_win_x & 0x01) << 5;
4010
4011 window_mask = (penden ? window_mask : 0);
4012
4013 /*
4014   Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4015     Source extra read address/pix address: 000095D0/0 [000004E40000001C]
4016   Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4017   Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4018     Source read address/pix address: 000095D8/0 [0054003800009814]
4019   Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4020   Entering DWRITE state...
4021      Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=20][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000001C00000000] (icount=026E, inc=4)
4022   Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4023
4024 (icount=026E, inc=4)
4025 icount & 0x03 = 0x02
4026          << 4 = 0x20
4027
4028 window_mask = 0x1000
4029
4030 Therefore, it chooses the inner_mask over the window_mask every time! Argh!
4031 This is because we did this wrong:
4032 Innerm[3-5]     := AN2 (inner_mask[3-5], imb[3-5], inner0);
4033 NOTE! This doesn't fix the problem because inner0 is asserted too late to help here. !!! FIX !!! [Should be DONE]
4034 */
4035
4036 /* The mask to be used if within one phrase of the end of the inner
4037 loop, similarly */
4038 uint8 inner_mask = 0;
4039
4040 if (pixsize == 3)
4041         inner_mask = (icount & 0x07) << 3;
4042 if (pixsize == 4)
4043         inner_mask = (icount & 0x03) << 4;
4044 if (pixsize == 5)
4045         inner_mask = (icount & 0x01) << 5;
4046 if (!inner0)
4047         inner_mask = 0;
4048 /* The actual mask used should be the lesser of the window masks and
4049 the inner mask, where is all cases 000 means 1000. */
4050 window_mask = (window_mask == 0 ? 0x40 : window_mask);
4051 inner_mask = (inner_mask == 0 ? 0x40 : inner_mask);
4052 uint8 emask = (window_mask > inner_mask ? inner_mask : window_mask);
4053 /* The mask to be used for the pixel size, to which must be added
4054 the bit offset */
4055 uint8 pma = pixAddr + (1 << pixsize);
4056 /* Select the mask */
4057 uint8 dend = (phrase_mode ? emask : pma);
4058
4059 /* The cycle width in phrase mode is normally one phrase.  However,
4060 at the start and end it may be narrower.  The start and end masks
4061 are used to generate this.  The width is given by:
4062
4063         8 - start mask - (8 - end mask)
4064 =       end mask - start mask
4065
4066 This is only used for writes in phrase mode.
4067 Start and end from the address level of the pipeline are used.
4068 */
4069 uint8 pwidth = (((dend | dstart) & 0x07) == 0 ? 0x08 : (dend - dstart) & 0x07);
4070
4071 //uint32 dstAddr, pixAddr;
4072 //ADDRGEN(dstAddr, pixAddr, gena2i, zaddr,
4073 //      a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
4074 //      a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
4075 #ifdef VERBOSE_BLITTER_LOGGING
4076 if (logBlit)
4077 {
4078         printf("     Dest write address/pix address: %08X/%1X", address, pixAddr);
4079         fflush(stdout);
4080 }
4081 #endif
4082
4083 //More testing... This is almost certainly wrong, but how else does this work???
4084 //Seems to kinda work... But still, this doesn't seem to make any sense!
4085 if (phrase_mode && !dsten)
4086         dstd = ((uint64)JaguarReadLong(address, BLITTER) << 32) | (uint64)JaguarReadLong(address + 4, BLITTER);
4087
4088 //Testing only... for now...
4089 //This is wrong because the write data is a combination of srcd and dstd--either run
4090 //thru the LFU or in PATDSEL or ADDDSEL mode. [DONE now, thru DATA module]
4091 // Precedence is ADDDSEL > PATDSEL > LFU.
4092 //Also, doesn't take into account the start & end masks, or the phrase width...
4093 //Now it does!
4094
4095 // srcd2 = xxxx xxxx 0123 4567, srcd = 8901 2345 xxxx xxxx, srcshift = $20 (32)
4096 uint64 srcd = (srcd2 << (64 - srcshift)) | (srcd1 >> srcshift);
4097 //bleh, ugly ugly ugly
4098 if (srcshift == 0)
4099         srcd = srcd1;
4100
4101 //NOTE: This only works with pixel sizes less than 8BPP...
4102 //DOUBLE NOTE: Still need to do regression testing to ensure that this doesn't break other stuff... !!! CHECK !!!
4103 if (!phrase_mode && srcshift != 0)
4104         srcd = ((srcd2 & 0xFF) << (8 - srcshift)) | ((srcd1 & 0xFF) >> srcshift);
4105
4106 //Z DATA() stuff done here... And it has to be done before any Z shifting...
4107 //Note that we need to have phrase mode start/end support here... (Not since we moved it from dzwrite...!)
4108 /*
4109 Here are a couple of Cybermorph blits with Z:
4110 $00113078       // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
4111 $09900F39       // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
4112
4113 We're having the same phrase mode overwrite problem we had with the pixels... !!! FIX !!!
4114 Odd. It's equating 0 with 0... Even though ZMODE is $04 (less than)!
4115 */
4116 if (gourz)
4117 {
4118 /*
4119 void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
4120         uint64 dstd, uint32 iinc, uint8 initcin[], uint64 initinc, uint16 initpix,
4121         uint32 istep, uint64 patd, uint64 srcd, uint64 srcz1, uint64 srcz2,
4122         uint32 zinc, uint32 zstep)
4123 */
4124         uint16 addq[4];
4125         uint8 initcin[4] = { 0, 0, 0, 0 };
4126         ADDARRAY(addq, 7/*daddasel*/, 6/*daddbsel*/, 0/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
4127         srcz2 = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
4128         ADDARRAY(addq, 6/*daddasel*/, 7/*daddbsel*/, 1/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
4129         srcz1 = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
4130
4131 #if 0//def VERBOSE_BLITTER_LOGGING
4132 if (logBlit)
4133 {
4134         printf("\n[srcz1=%08X%08X, srcz2=%08X%08X, zinc=%08X",
4135                 (uint32)(srcz1 >> 32), (uint32)(srcz1 & 0xFFFFFFFF),
4136                 (uint32)(srcz2 >> 32), (uint32)(srcz2 & 0xFFFFFFFF), zinc);
4137         fflush(stdout);
4138 }
4139 #endif
4140 }
4141
4142 uint8 zSrcShift = srcshift & 0x30;
4143 srcz = (srcz2 << (64 - zSrcShift)) | (srcz1 >> zSrcShift);
4144 //bleh, ugly ugly ugly
4145 if (zSrcShift == 0)
4146         srcz = srcz1;
4147
4148 #if 0//def VERBOSE_BLITTER_LOGGING
4149 if (logBlit)
4150 {
4151         printf(" srcz=%08X%08X]\n", (uint32)(srcz >> 32), (uint32)(srcz & 0xFFFFFFFF));
4152         fflush(stdout);
4153 }
4154 #endif
4155
4156 //When in SRCSHADE mode, it adds the IINC to the read source (from LFU???)
4157 //According to following line, it gets LFU mode. But does it feed the source into the LFU
4158 //after the add?
4159 //Dest write address/pix address: 0014E83E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003F, inc=1)
4160 //Let's try this:
4161 if (srcshade)
4162 {
4163 //NOTE: This is basically doubling the work done by DATA--since this is what
4164 //      ADDARRAY is loaded with when srschshade is enabled... !!! FIX !!!
4165 //      Also note that it doesn't work properly unless GOURZ is set--there's the clue!
4166         uint16 addq[4];
4167         uint8 initcin[4] = { 0, 0, 0, 0 };
4168         ADDARRAY(addq, 4/*daddasel*/, 5/*daddbsel*/, 7/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
4169         srcd = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
4170 }
4171 //Seems to work... Not 100% sure tho.
4172 //end try this
4173
4174 //Temporary kludge, to see if the fractional pattern does anything...
4175 //This works, BTW
4176 //But it seems to mess up in Cybermorph... the shading should be smooth but it isn't...
4177 //Seems the carry out is lost again... !!! FIX !!! [DONE--see below]
4178 if (patfadd)
4179 {
4180         uint16 addq[4];
4181         uint8 initcin[4] = { 0, 0, 0, 0 };
4182         ADDARRAY(addq, 4/*daddasel*/, 4/*daddbsel*/, 0/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
4183         srcd1 = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
4184 }
4185
4186 //Note that we still don't take atick[0] & [1] into account here, so this will skip half of the data needed... !!! FIX !!!
4187 //Not yet enumerated: dbinh, srcdread, srczread
4188 //Also, should do srcshift on the z value in phrase mode... !!! FIX !!! [DONE]
4189 //As well as add a srcz variable we can set external to this state... !!! FIX !!! [DONE]
4190
4191 uint64 wdata;
4192 uint8 dcomp, zcomp;
4193 DATA(wdata, dcomp, zcomp, winhibit,
4194         true, cmpdst, daddasel, daddbsel, daddmode, daddq_sel, data_sel, 0/*dbinh*/,
4195         dend, dstart, dstd, iinc, lfufunc, patd, patdadd,
4196         phrase_mode, srcd, false/*srcdread*/, false/*srczread*/, srcz2add, zmode,
4197         bcompen, bkgwren, dcompen, icount & 0x07, pixsize,
4198         srcz, dstz, zinc);
4199 /*
4200 Seems that the phrase mode writes with DCOMPEN and DSTEN are corrupting inside of DATA: !!! FIX !!!
4201 It's fairly random as well. 7CFE -> 7DFE, 7FCA -> 78CA, 7FA4 -> 78A4, 7F88 -> 8F88
4202 It could be related to an uninitialized variable, like the zmode bug...
4203 [DONE]
4204 It was a bug in the dech38el data--it returned $FF for ungated instead of $00...
4205
4206 Blit! (CMD = 09800609)
4207 Flags: SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C DCOMPEN
4208   count = 10 x 12
4209   a1_base = 00110000, a2_base = 0010B2A8
4210   a1_x = 004B, a1_y = 00D8, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0704, a2_y = 0000
4211   a1_step_x = FFF3, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFFC, a2_step_y = 0000
4212   a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4213   a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
4214   a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4215   a1_pixsize = 4, a2_pixsize = 4
4216    srcd=0000000000000000  dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4217   srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4218   Phrase mode is ON
4219   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4220   Entering INNER state...
4221   Entering SREAD state...    Source read address/pix address: 0010C0B0/0 [0000000078047804]
4222   Entering A2_ADD state [a2_x=0704, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4223   Entering DREAD state...
4224       Dest read address/pix address: 00197240/0 [0000000000000028]
4225   Entering DWRITE state...
4226      Dest write address/pix address: 00197240/0 [dstart=30 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000028] (icount=0009, inc=1)
4227   Entering A1_ADD state [a1_x=004B, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4228   Entering SREAD state...    Source read address/pix address: 0010C0B8/0 [7804780478047804]
4229   Entering A2_ADD state [a2_x=0708, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4230   Entering DREAD state...
4231       Dest read address/pix address: 00197260/0 [0028000000200008]
4232   Entering DWRITE state...
4233      Dest write address/pix address: 00197260/0 [dstart=0 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0028780478047804] (icount=0005, inc=4)
4234   Entering A1_ADD state [a1_x=004C, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4235   Entering SREAD state...    Source read address/pix address: 0010C0C0/0 [0000000000000000]
4236   Entering A2_ADD state [a2_x=070C, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4237   Entering DREAD state...
4238       Dest read address/pix address: 00197280/0 [0008001800180018]
4239   Entering DWRITE state...
4240      Dest write address/pix address: 00197280/0 [dstart=0 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [7804780478040018] (icount=0001, inc=4)
4241   Entering A1_ADD state [a1_x=0050, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4242   Entering SREAD state...    Source read address/pix address: 0010C0C8/0 [000078047BFE7BFE]
4243   Entering A2_ADD state [a2_x=0710, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4244   Entering DREAD state...
4245       Dest read address/pix address: 001972A0/0 [0008002000000000]
4246   Entering DWRITE state...
4247      Dest write address/pix address: 001972A0/0 [dstart=0 dend=10 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0008002000000000] (icount=FFFD, inc=4)
4248   Entering A1_ADD state [a1_x=0054, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4249   Entering IDLE_INNER state...
4250 */
4251
4252 //Why isn't this taken care of in DATA? Because, DATA is modifying its local copy instead of the one used here.
4253 //!!! FIX !!! [DONE]
4254 //if (patdadd)
4255 //      patd = wdata;
4256
4257 //if (patfadd)
4258 //      srcd1 = wdata;
4259
4260 /*
4261 DEF ADDRCOMP (
4262         a1_outside      // A1 pointer is outside window bounds
4263         :OUT;
4264 INT16/  a1_x
4265 INT16/  a1_y
4266 INT15/  a1_win_x
4267 INT15/  a1_win_y
4268         :IN);
4269 BEGIN
4270
4271 // The address is outside if negative, or if greater than or equal
4272 // to the window size
4273
4274 A1_xcomp        := MAG_15 (a1xgr, a1xeq, a1xlt, a1_x{0..14}, a1_win_x{0..14});
4275 A1_ycomp        := MAG_15 (a1ygr, a1yeq, a1ylt, a1_y{0..14}, a1_win_y{0..14});
4276 A1_outside      := OR6 (a1_outside, a1_x{15}, a1xgr, a1xeq, a1_y{15}, a1ygr, a1yeq);
4277 */
4278 //NOTE: There seems to be an off-by-one bug here in the clip_a1 section... !!! FIX !!!
4279 //      Actually, seems to be related to phrase mode writes...
4280 //      Or is it? Could be related to non-15-bit compares as above?
4281 if (clip_a1 && ((a1_x & 0x8000) || (a1_y & 0x8000) || (a1_x >= a1_win_x) || (a1_y >= a1_win_y)))
4282         winhibit = true;
4283
4284 if (!winhibit)
4285 {
4286         if (phrase_mode)
4287         {
4288                 JaguarWriteLong(address + 0, wdata >> 32, BLITTER);
4289                 JaguarWriteLong(address + 4, wdata & 0xFFFFFFFF, BLITTER);
4290         }
4291         else
4292         {
4293                 if (pixsize == 5)
4294                         JaguarWriteLong(address, wdata & 0xFFFFFFFF, BLITTER);
4295                 else if (pixsize == 4)
4296                         JaguarWriteWord(address, wdata & 0x0000FFFF, BLITTER);
4297                 else
4298                         JaguarWriteByte(address, wdata & 0x000000FF, BLITTER);
4299         }
4300 }
4301
4302 #ifdef VERBOSE_BLITTER_LOGGING
4303 if (logBlit)
4304 {
4305         printf(" [%08X%08X]%s", (uint32)(wdata >> 32), (uint32)(wdata & 0xFFFFFFFF), (winhibit ? "[X]" : ""));
4306         printf(" (icount=%04X, inc=%u)\n", icount, (uint16)inc);
4307         printf("    [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
4308         printf("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
4309         fflush(stdout);
4310 }
4311 #endif
4312                                 }
4313
4314                                 if (dzwrite)
4315                                 {
4316 // OK, here's the big insight: When NOT in GOURZ mode, srcz1 & 2 function EXACTLY the same way that
4317 // srcd1 & 2 work--there's an implicit shift from srcz1 to srcz2 whenever srcz1 is read.
4318 // OTHERWISE, srcz1 is the integer for the computed Z and srcz2 is the fractional part.
4319 // Writes to srcz1 & 2 follow the same pattern as the other 64-bit registers--low 32 at the low address,
4320 // high 32 at the high address (little endian!).
4321 // NOTE: GOURZ is still not properly supported. Check patd/patf handling...
4322 //       Phrase mode start/end masks are not properly supported either...
4323 #ifdef VERBOSE_BLITTER_LOGGING
4324 if (logBlit)
4325 {
4326         printf("  Entering DZWRITE state...");
4327         printf("  Dest Z write address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
4328                 (uint32)(srcz >> 32), (uint32)(srcz & 0xFFFFFFFF));
4329         fflush(stdout);
4330 }
4331 #endif
4332 //This is not correct... !!! FIX !!!
4333 //Should be OK now... We'll see...
4334 //Nope. Having the same starstep write problems in phrase mode as we had with pixels... !!! FIX !!!
4335 //This is not causing the problem in Hover Strike... :-/
4336 //The problem was with the SREADX not shifting. Still problems with Z comparisons & other text in pregame screen...
4337 if (!winhibit)
4338 {
4339         if (phrase_mode)
4340         {
4341                 JaguarWriteLong(address + 0, srcz >> 32, BLITTER);
4342                 JaguarWriteLong(address + 4, srcz & 0xFFFFFFFF, BLITTER);
4343         }
4344         else
4345         {
4346                 if (pixsize == 4)
4347                         JaguarWriteWord(address, srcz & 0x0000FFFF, BLITTER);
4348         }
4349 }//*/
4350 #ifdef VERBOSE_BLITTER_LOGGING
4351 if (logBlit)
4352 {
4353 //      printf(" [%08X%08X]\n", (uint32)(srcz >> 32), (uint32)(srcz & 0xFFFFFFFF));
4354 //      fflush(stdout);
4355 //printf(" [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
4356         printf("    [dstart=? dend=? pwidth=? srcshift=%X]", srcshift);
4357         printf("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
4358         fflush(stdout);
4359 }
4360 #endif
4361                                 }
4362
4363 /*
4364 This is because the address generator was using only 15 bits of the X when it should have
4365 used 16!
4366
4367 There's a slight problem here: The X pointer isn't wrapping like it should when it hits
4368 the edge of the window... Notice how the X isn't reset at the edge of the window:
4369
4370 Blit! (CMD = 00010000)
4371 Flags: PATDSEL
4372   count = 160 x 261
4373   a1_base = 000E8008, a2_base = 0001FA68
4374   a1_x = 0000, a1_y = 0000, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0000, a2_y = 0000
4375   a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
4376   a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4377   a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
4378   a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4379   a1_pixsize = 5, a2_pixsize = 5
4380    srcd=7717771777177717  dstd=0000000000000000 patd=7730773077307730 iinc=00000000
4381   srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4382   Phrase mode is ON
4383   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4384   Entering INNER state...
4385   Entering DWRITE state...     Dest write address/pix address: 000E8008/0 [7730773077307730] (icount=009E, inc=2)
4386  srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4387 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4388     [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4389   Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4390   Entering DWRITE state...     Dest write address/pix address: 000E8018/0 [7730773077307730] (icount=009C, inc=2)
4391  srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4392 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4393     [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4394   Entering A1_ADD state [a1_x=0002, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4395
4396 ...
4397
4398   Entering A1_ADD state [a1_x=009C, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4399   Entering DWRITE state...     Dest write address/pix address: 000E84F8/0 [7730773077307730] (icount=0000, inc=2)
4400  srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4401 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4402     [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4403   Entering A1_ADD state [a1_x=009E, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4404   Entering IDLE_INNER state...
4405
4406   Leaving INNER state... (ocount=0104)
4407   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4408
4409   Entering INNER state...
4410   Entering DWRITE state...     Dest write address/pix address: 000E8508/0 [7730773077307730] (icount=009E, inc=2)
4411  srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4412 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4413     [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4414   Entering A1_ADD state [a1_x=00A0, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4415   Entering DWRITE state...     Dest write address/pix address: 000E8518/0 [7730773077307730] (icount=009C, inc=2)
4416  srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4417 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4418     [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4419   Entering A1_ADD state [a1_x=00A2, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4420
4421 */
4422
4423                                 if (a1_add)
4424                                 {
4425 #ifdef VERBOSE_BLITTER_LOGGING
4426 if (logBlit)
4427 {
4428 //printf("  Entering A1_ADD state [addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4429 printf("  Entering A1_ADD state [a1_x=%04X, a1_y=%04X, addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", a1_x, a1_y, addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4430 fflush(stdout);
4431 }
4432 #endif
4433 int16 adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
4434 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4435         a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4436 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4437 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4438
4439 #if 0//def VERBOSE_BLITTER_LOGGING
4440 if (logBlit)
4441 {
4442 printf("  [adda_x=%d, adda_y=%d, addb_x=%d, addb_y=%d, addq_x=%d, addq_y=%d]\n", adda_x, adda_y, addb_x, addb_y, addq_x, addq_y);
4443 fflush(stdout);
4444 }
4445 #endif
4446 //Now, write to what???
4447 //a2ptrld comes from a2ptrldi...
4448 //I believe it's addbsel that determines the writeback...
4449 // This is where atick[0] & [1] come in, in determining which part (fractional, integer)
4450 // gets written to...
4451 //a1_x = addq_x;
4452 //a1_y = addq_y;
4453 //Kludge, to get A1 channel increment working...
4454 if (a1addx == 3)
4455 {
4456         a1_frac_x = addq_x, a1_frac_y = addq_y;
4457
4458 addasel = 2, addbsel = 0, a1fracldi = false;
4459 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4460         a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4461 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4462 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4463
4464         a1_x = addq_x, a1_y = addq_y;
4465 }
4466 else
4467         a1_x = addq_x, a1_y = addq_y;
4468                                 }
4469
4470                                 if (a2_add)
4471                                 {
4472 #ifdef VERBOSE_BLITTER_LOGGING
4473 if (logBlit)
4474 {
4475 //printf("  Entering A2_ADD state [addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4476 printf("  Entering A2_ADD state [a2_x=%04X, a2_y=%04X, addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", a2_x, a2_y, addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4477 fflush(stdout);
4478 }
4479 #endif
4480 //void ADDAMUX(int16 &adda_x, int16 &adda_y, uint8 addasel, int16 a1_step_x, int16 a1_step_y,
4481 //      int16 a1_stepf_x, int16 a1_stepf_y, int16 a2_step_x, int16 a2_step_y,
4482 //      int16 a1_inc_x, int16 a1_inc_y, int16 a1_incf_x, int16 a1_incf_y, uint8 adda_xconst,
4483 //      bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
4484 //void ADDBMUX(int16 &addb_x, int16 &addb_y, uint8 addbsel, int16 a1_x, int16 a1_y,
4485 //      int16 a2_x, int16 a2_y, int16 a1_frac_x, int16 a1_frac_y)
4486 //void ADDRADD(int16 &addq_x, int16 &addq_y, bool a1fracldi,
4487 //      int16 adda_x, int16 adda_y, int16 addb_x, int16 addb_y, uint8 modx, bool suba_x, bool suba_y)
4488 //void DATAMUX(int16 &data_x, int16 &data_y, uint32 gpu_din, int16 addq_x, int16 addq_y, bool addqsel)
4489 int16 adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
4490 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4491         a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4492 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4493 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4494
4495 #if 0//def VERBOSE_BLITTER_LOGGING
4496 if (logBlit)
4497 {
4498 printf("  [adda_x=%d, adda_y=%d, addb_x=%d, addb_y=%d, addq_x=%d, addq_y=%d]\n", adda_x, adda_y, addb_x, addb_y, addq_x, addq_y);
4499 fflush(stdout);
4500 }
4501 #endif
4502 //Now, write to what???
4503 //a2ptrld comes from a2ptrldi...
4504 //I believe it's addbsel that determines the writeback...
4505 a2_x = addq_x;
4506 a2_y = addq_y;
4507                                 }
4508                         }
4509 /*
4510 Flags: SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 GOURZ ZMODE=0 LFUFUNC=C SRCSHADE
4511   count = 64 x 55
4512   a1_base = 0015B000, a2_base = 0014B000
4513   a1_x = 0000, a1_y = 0000, a1_frac_x = 8000, a1_frac_y = 8000, a2_x = 001F, a2_y = 0038
4514   a1_step_x = FFFFFFC0, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 2AAA, a2_step_x = FFFFFFC0, a2_step_y = 0001
4515   a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4516   a1_win_x = 0040, a1_win_y = 0040, a2_mask_x = 0000, a2_mask_y = 0000
4517   a2_mask=F a1add=+inc/+0 a2add=+1/+0
4518   a1_pixsize = 4, a2_pixsize = 4
4519    srcd=FF00FF00FF00FF00  dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4520   srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, col=0
4521   Phrase mode is off
4522   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4523   Entering INNER state...
4524   Entering SREAD state...    Source read address/pix address: 0015B000/0 [6505650565056505]
4525   Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4526   Entering DWRITE state...
4527      Dest write address/pix address: 0014E83E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003F, inc=1)
4528   Entering A2_ADD state [a2_x=001F, a2_y=0038, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4529   Entering SREAD state...    Source read address/pix address: 0015B000/0 [6505650565056505]
4530   Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4531   Entering DWRITE state...
4532      Dest write address/pix address: 0014E942/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003E, inc=1)
4533   Entering A2_ADD state [a2_x=0021, a2_y=0039, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4534   Entering SREAD state...    Source read address/pix address: 0015B000/0 [6505650565056505]
4535   Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4536   Entering DWRITE state...
4537      Dest write address/pix address: 0014EA46/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003D, inc=1)
4538   Entering A2_ADD state [a2_x=0023, a2_y=003A, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4539   Entering SREAD state...    Source read address/pix address: 0015B000/0 [6505650565056505]
4540   Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4541   Entering DWRITE state...
4542      Dest write address/pix address: 0014EB4A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003C, inc=1)
4543   Entering A2_ADD state [a2_x=0025, a2_y=003B, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4544   ...
4545   Entering SREAD state...    Source read address/pix address: 0015B000/0 [6505650565056505]
4546   Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4547   Entering DWRITE state...
4548      Dest write address/pix address: 0015283A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=0000, inc=1)
4549   Entering A2_ADD state [a2_x=009D, a2_y=0077, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4550   Entering IDLE_INNER state...
4551   Leaving INNER state... (ocount=0036)
4552   [in=F a1f=T a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4553   Entering A1FUPDATE state...
4554   [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4555   Entering A1UPDATE state... (-32768/-32768 -> 32704/-32767)
4556   [in=F a1f=F a1=F zf=F z=F a2=T iif=F iii=F izf=F izi=F]
4557   Entering A2UPDATE state... (159/120 -> 95/121)
4558   [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4559   Entering INNER state...
4560 */
4561
4562 #ifdef VERBOSE_BLITTER_LOGGING
4563 if (logBlit)
4564 {
4565 printf("  Leaving INNER state...");
4566 fflush(stdout);
4567 }
4568 #endif
4569                         indone = true;
4570 // The outer counter is updated here as well on the clock cycle...
4571
4572 /* the inner loop is started whenever another state is about to
4573 cause the inner state to go active */
4574 //Instart               := ND7 (instart, innert[0], innert[2..7]);
4575
4576 //Actually, it's done only when inner gets asserted without the 2nd line of conditions
4577 //(inner AND !indone)
4578 //fixed now...
4579 //Since we don't get here until the inner loop is finished (indone = true) we can get
4580 //away with doing it here...!
4581                         ocount--;
4582
4583                         if (ocount == 0)
4584                                 outer0 = true;
4585 #ifdef VERBOSE_BLITTER_LOGGING
4586 if (logBlit)
4587 {
4588 printf(" (ocount=%04X)\n", ocount);
4589 fflush(stdout);
4590 }
4591 #endif
4592                 }
4593
4594                 if (a1fupdate)
4595                 {
4596 #ifdef VERBOSE_BLITTER_LOGGING
4597 if (logBlit)
4598 {
4599 printf("  Entering A1FUPDATE state...\n");
4600 fflush(stdout);
4601 }
4602 #endif
4603                         uint32 a1_frac_xt = (uint32)a1_frac_x + (uint32)a1_stepf_x;
4604                         uint32 a1_frac_yt = (uint32)a1_frac_y + (uint32)a1_stepf_y;
4605                         a1FracCInX = a1_frac_xt >> 16;
4606                         a1FracCInY = a1_frac_yt >> 16;
4607                         a1_frac_x = (uint16)(a1_frac_xt & 0xFFFF);
4608                         a1_frac_y = (uint16)(a1_frac_yt & 0xFFFF);
4609                 }
4610
4611                 if (a1update)
4612                 {
4613 #ifdef VERBOSE_BLITTER_LOGGING
4614 if (logBlit)
4615 {
4616 printf("  Entering A1UPDATE state... (%d/%d -> ", a1_x, a1_y);
4617 fflush(stdout);
4618 }
4619 #endif
4620                         a1_x += a1_step_x + a1FracCInX;
4621                         a1_y += a1_step_y + a1FracCInY;
4622 #ifdef VERBOSE_BLITTER_LOGGING
4623 if (logBlit)
4624 {
4625 printf("%d/%d)\n", a1_x, a1_y);
4626 fflush(stdout);
4627 }
4628 #endif
4629                 }
4630
4631                 if (a2update)
4632                 {
4633 #ifdef VERBOSE_BLITTER_LOGGING
4634 if (logBlit)
4635 {
4636 printf("  Entering A2UPDATE state... (%d/%d -> ", a2_x, a2_y);
4637 fflush(stdout);
4638 }
4639 #endif
4640                         a2_x += a2_step_x;
4641                         a2_y += a2_step_y;
4642 #ifdef VERBOSE_BLITTER_LOGGING
4643 if (logBlit)
4644 {
4645 printf("%d/%d)\n", a2_x, a2_y);
4646 fflush(stdout);
4647 }
4648 #endif
4649                 }
4650         }
4651
4652 // We never get here! !!! FIX !!!
4653
4654 #ifdef VERBOSE_BLITTER_LOGGING
4655 if (logBlit)
4656 {
4657         printf("Done!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n",
4658                 GET16(blitter_ram, A1_PIXEL + 2),
4659                 GET16(blitter_ram, A1_PIXEL + 0),
4660                 GET16(blitter_ram, A1_FPIXEL + 2),
4661                 GET16(blitter_ram, A1_FPIXEL + 0),
4662                 GET16(blitter_ram, A2_PIXEL + 2),
4663                 GET16(blitter_ram, A2_PIXEL + 0));
4664         fflush(stdout);
4665 }
4666 #endif
4667
4668         // Write values back to registers (in real blitter, these are continuously updated)
4669         SET16(blitter_ram, A1_PIXEL + 2, a1_x);
4670         SET16(blitter_ram, A1_PIXEL + 0, a1_y);
4671         SET16(blitter_ram, A1_FPIXEL + 2, a1_frac_x);
4672         SET16(blitter_ram, A1_FPIXEL + 0, a1_frac_y);
4673         SET16(blitter_ram, A2_PIXEL + 2, a2_x);
4674         SET16(blitter_ram, A2_PIXEL + 0, a2_y);
4675
4676 #ifdef VERBOSE_BLITTER_LOGGING
4677 if (logBlit)
4678 {
4679         printf("Writeback!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n",
4680                 GET16(blitter_ram, A1_PIXEL + 2),
4681                 GET16(blitter_ram, A1_PIXEL + 0),
4682                 GET16(blitter_ram, A1_FPIXEL + 2),
4683                 GET16(blitter_ram, A1_FPIXEL + 0),
4684                 GET16(blitter_ram, A2_PIXEL + 2),
4685                 GET16(blitter_ram, A2_PIXEL + 0));
4686         fflush(stdout);
4687 }
4688 #endif
4689 }
4690
4691 /*
4692         int16 a1_x = (int16)GET16(blitter_ram, A1_PIXEL + 2);
4693         int16 a1_y = (int16)GET16(blitter_ram, A1_PIXEL + 0);
4694         uint16 a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
4695         uint16 a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
4696         int16 a2_x = (int16)GET16(blitter_ram, A2_PIXEL + 2);
4697         int16 a2_y = (int16)GET16(blitter_ram, A2_PIXEL + 0);
4698
4699 Seems that the ending a1_x should be written between blits, but it doesn't seem to be...
4700
4701 Blit! (CMD = 01800000)
4702 Flags: LFUFUNC=C
4703   count = 28672 x 1
4704   a1_base = 00050000, a2_base = 00070000
4705   a1_x = 0000, a1_y = 0000, a1_frac_x = 49CD, a1_frac_y = 0000, a2_x = 0033, a2_y = 0001
4706   a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 939A, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
4707   a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4708   a1_win_x = 0100, a1_win_y = 0020, a2_mask_x = 0000, a2_mask_y = 0000
4709   a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4710   a1_pixsize = 4, a2_pixsize = 3
4711    srcd=DEDEDEDEDEDEDEDE  dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4712   srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4713   Phrase mode is ON
4714
4715 Blit! (CMD = 01800000)
4716 Flags: LFUFUNC=C
4717   count = 28672 x 1
4718   a1_base = 00050000, a2_base = 00070000
4719   a1_x = 0000, a1_y = 0000, a1_frac_x = 49CD, a1_frac_y = 0000, a2_x = 0033, a2_y = 0001
4720   a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 939A, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
4721   a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4722   a1_win_x = 0100, a1_win_y = 0020, a2_mask_x = 0000, a2_mask_y = 0000
4723   a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4724   a1_pixsize = 4, a2_pixsize = 3
4725    srcd=D6D6D6D6D6D6D6D6  dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4726   srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4727   Phrase mode is ON
4728 */
4729
4730
4731
4732 // Various pieces of the blitter puzzle are teased out here...
4733
4734
4735
4736 /*
4737 DEF ADDRGEN (
4738 INT24/  address         // byte address
4739                 pixa[0..2]      // bit part of address, un-pipe-lined
4740                 :OUT;
4741 INT16/  a1_x
4742 INT16/  a1_y
4743 INT21/  a1_base
4744                 a1_pitch[0..1]
4745                 a1_pixsize[0..2]
4746                 a1_width[0..5]
4747                 a1_zoffset[0..1]
4748 INT16/  a2_x
4749 INT16/  a2_y
4750 INT21/  a2_base
4751                 a2_pitch[0..1]
4752                 a2_pixsize[0..2]
4753                 a2_width[0..5]
4754                 a2_zoffset[0..1]
4755                 apipe           // load address pipe-line latch
4756                 clk                     // co-processor clock
4757                 gena2           // generate A2 as opposed to A1
4758                 zaddr           // generate Z address
4759                 :IN);
4760 */
4761
4762 void ADDRGEN(uint32 &address, uint32 &pixa, bool gena2, bool zaddr,
4763         uint16 a1_x, uint16 a1_y, uint32 a1_base, uint8 a1_pitch, uint8 a1_pixsize, uint8 a1_width, uint8 a1_zoffset,
4764         uint16 a2_x, uint16 a2_y, uint32 a2_base, uint8 a2_pitch, uint8 a2_pixsize, uint8 a2_width, uint8 a2_zoffset)
4765 {
4766 //      uint16 x = (gena2 ? a2_x : a1_x) & 0x7FFF;
4767         uint16 x = (gena2 ? a2_x : a1_x) & 0xFFFF;      // Actually uses all 16 bits to generate address...!
4768         uint16 y = (gena2 ? a2_y : a1_y) & 0x0FFF;
4769         uint8 width = (gena2 ? a2_width : a1_width);
4770         uint8 pixsize = (gena2 ? a2_pixsize : a1_pixsize);
4771         uint8 pitch = (gena2 ? a2_pitch : a1_pitch);
4772         uint32 base = (gena2 ? a2_base : a1_base) >> 3;//Only upper 21 bits are passed around the bus? Seems like it...
4773         uint8 zoffset = (gena2 ? a2_zoffset : a1_zoffset);
4774
4775         uint32 ytm = ((uint32)y << 2) + (width & 0x02 ? (uint32)y << 1 : 0) + (width & 0x01 ? (uint32)y : 0);
4776
4777         uint32 ya = (ytm << (width >> 2)) >> 2;
4778
4779         uint32 pa = ya + x;
4780
4781         /*uint32*/ pixa = pa << pixsize;
4782
4783         uint8 pt = ((pitch & 0x01) && !(pitch & 0x02) ? 0x01 : 0x00)
4784                 | (!(pitch & 0x01) && (pitch & 0x02) ? 0x02 : 0x00);
4785 //      uint32 phradr = pixa << pt;
4786         uint32 phradr = (pixa >> 6) << pt;
4787         uint32 shup = (pitch == 0x03 ? (pixa >> 6) : 0);
4788
4789         uint8 za = (zaddr ? zoffset : 0) & 0x03;
4790 //      uint32 addr = za + (phradr & 0x07) + (shup << 1) + base;
4791         uint32 addr = za + phradr + (shup << 1) + base;
4792         /*uint32*/ address = ((pixa & 0x38) >> 3) | ((addr & 0x1FFFFF) << 3);
4793 #if 0//def VERBOSE_BLITTER_LOGGING
4794 if (logBlit)
4795 {
4796 printf("    [gena2=%s, x=%04X, y=%04X, w=%1X, pxsz=%1X, ptch=%1X, b=%08X, zoff=%1X]\n", (gena2 ? "T" : "F"), x, y, width, pixsize, pitch, base, zoffset);
4797 printf("    [ytm=%X, ya=%X, pa=%X, pixa=%X, pt=%X, phradr=%X, shup=%X, za=%X, addr=%X, address=%X]\n", ytm, ya, pa, pixa, pt, phradr, shup, za, addr, address);
4798 fflush(stdout);
4799 }
4800 #endif
4801         pixa &= 0x07;
4802 /*
4803   Entering INNER state...
4804     [gena2=T, x=0002, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4805     [ytm=0, ya=0, pa=2, pixa=20, pt=0, phradr=0, shup=0, za=0, addr=12BA, address=95D4]
4806   Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4807     Source extra read address/pix address: 000095D4/0 [0000001C00540038]
4808   Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4809     [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4810     [ytm=0, ya=0, pa=4, pixa=40, pt=0, phradr=1, shup=0, za=0, addr=12BB, address=95D8]
4811   Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
4812     Source read address/pix address: 000095D8/0 [0054003800009814]
4813   Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4814     [gena2=F, x=0000, y=0000, w=20, pxsz=4, ptch=0, b=00006E52, zoff=0]
4815     [ytm=0, ya=0, pa=0, pixa=0, pt=0, phradr=0, shup=0, za=0, addr=6E52, address=37290]
4816   Entering DWRITE state...
4817      Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026E, inc=4)
4818   Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4819     [gena2=T, x=0008, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4820     [ytm=0, ya=0, pa=8, pixa=80, pt=0, phradr=2, shup=0, za=0, addr=12BC, address=95E0]
4821 */
4822 /*
4823 Obviously wrong:
4824   Entering SREAD state...
4825     [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4826     [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=40, shup=0, za=0, addr=10AC, address=8560]
4827     Source read address/pix address: 00008560/0 [8C27981B327E00F0]
4828
4829 2nd pass (still wrong):
4830   Entering SREAD state...
4831     [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4832     [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=40, shup=0, za=0, addr=10EC, address=8760]
4833     Source read address/pix address: 00008760/0 [00E06DC04581880C]
4834
4835 Correct!:
4836   Entering SREAD state...
4837     [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4838     [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=1, shup=0, za=0, addr=10AD, address=8568]
4839     Source read address/pix address: 00008568/0 [6267981A327C00F0]
4840
4841 OK, now we're back into incorrect (or is it?):
4842   Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4843     Source extra read address/pix address: 000095D4/0 [0000 001C 0054 0038]
4844   Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4845   Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
4846     Source read address/pix address: 000095D8/0 [0054 0038 0000 9814]
4847   Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4848 I think this may be correct...!
4849 */
4850 }
4851
4852 /*
4853 // source and destination address update conditions
4854
4855 Sraat0          := AN2 (sraat[0], sreadxi, srcenz\);
4856 Sraat1          := AN2 (sraat[1], sreadi, srcenz\);
4857 Srca_addi       := OR4 (srca_addi, szreadxi, szreadi, sraat[0..1]);
4858 Srca_add        := FD1Q (srca_add, srca_addi, clk);
4859
4860 Dstaat          := AN2 (dstaat, dwritei, dstwrz\);
4861 Dsta_addi       := OR2 (dsta_addi, dzwritei, dstaat);
4862 // Dsta_add     := FD1Q (dsta_add, dsta_addi, clk);
4863
4864 // source and destination address generate conditions
4865
4866 Gensrc          := OR4 (gensrc, sreadxi, szreadxi, sreadi, szreadi);
4867 Gendst          := OR4 (gendst, dreadi, dzreadi, dwritei, dzwritei);
4868 Dsta2\          := INV1 (dsta2\, dsta2);
4869 Gena2t0         := NAN2 (gena2t[0], gensrc, dsta2\);
4870 Gena2t1         := NAN2 (gena2t[1], gendst, dsta2);
4871 Gena2i          := NAN2 (gena2i, gena2t[0..1]);
4872 Gena2           := FD1QU (gena2, gena2i, clk);
4873
4874 Zaddr           := OR4 (zaddr, szreadx, szread, dzread, dzwrite);
4875 */
4876
4877 /*void foo(void)
4878 {
4879         // Basically, the above translates to:
4880         bool srca_addi = (sreadxi && !srcenz) || (sreadi && !srcenz) || szreadxi || szreadi;
4881
4882         bool dsta_addi = (dwritei && !dstwrz) || dzwritei;
4883
4884         bool gensrc = sreadxi || szreadxi || sreadi || szreadi;
4885         bool gendst = dreadi || szreadi || dwritei || dzwritei;
4886         bool gena2i = (gensrc && !dsta2) || (gendst && dsta2);
4887
4888         bool zaddr = szreadx || szread || dzread || dzwrite;
4889 }*/
4890
4891 /*
4892 // source data reads
4893
4894 Srcdpset\       := NAN2 (srcdpset\, readreq, sread);
4895 Srcdpt1         := NAN2 (srcdpt[1], srcdpend, srcdack\);
4896 Srcdpt2         := NAN2 (srcdpt[2], srcdpset\, srcdpt[1]);
4897 Srcdpend        := FD2Q (srcdpend, srcdpt[2], clk, reset\);
4898
4899 Srcdxpset\      := NAN2 (srcdxpset\, readreq, sreadx);
4900 Srcdxpt1        := NAN2 (srcdxpt[1], srcdxpend, srcdxack\);
4901 Srcdxpt2        := NAN2 (srcdxpt[2], srcdxpset\, srcdxpt[1]);
4902 Srcdxpend       := FD2Q (srcdxpend, srcdxpt[2], clk, reset\);
4903
4904 Sdpend          := OR2 (sdpend, srcdxpend, srcdpend);
4905 Srcdreadt       := AN2 (srcdreadt, sdpend, read_ack);
4906
4907 //2/9/92 - enhancement?
4908 //Load srcdread on the next tick as well to modify it in srcshade
4909
4910 Srcdreadd       := FD1Q (srcdreadd, srcdreadt, clk);
4911 Srcdread        := AOR1 (srcdread, srcshade, srcdreadd, srcdreadt);
4912
4913 // source zed reads
4914
4915 Srczpset\       := NAN2 (srczpset\, readreq, szread);
4916 Srczpt1         := NAN2 (srczpt[1], srczpend, srczack\);
4917 Srczpt2         := NAN2 (srczpt[2], srczpset\, srczpt[1]);
4918 Srczpend        := FD2Q (srczpend, srczpt[2], clk, reset\);
4919
4920 Srczxpset\      := NAN2 (srczxpset\, readreq, szreadx);
4921 Srczxpt1        := NAN2 (srczxpt[1], srczxpend, srczxack\);
4922 Srczxpt2        := NAN2 (srczxpt[2], srczxpset\, srczxpt[1]);
4923 Srczxpend       := FD2Q (srczxpend, srczxpt[2], clk, reset\);
4924
4925 Szpend          := OR2 (szpend, srczpend, srczxpend);
4926 Srczread        := AN2 (srczread, szpend, read_ack);
4927
4928 // destination data reads
4929
4930 Dstdpset\       := NAN2 (dstdpset\, readreq, dread);
4931 Dstdpt0         := NAN2 (dstdpt[0], dstdpend, dstdack\);
4932 Dstdpt1         := NAN2 (dstdpt[1], dstdpset\, dstdpt[0]);
4933 Dstdpend        := FD2Q (dstdpend, dstdpt[1], clk, reset\);
4934 Dstdread        := AN2 (dstdread, dstdpend, read_ack);
4935
4936 // destination zed reads
4937
4938 Dstzpset\       := NAN2 (dstzpset\, readreq, dzread);
4939 Dstzpt0         := NAN2 (dstzpt[0], dstzpend, dstzack\);
4940 Dstzpt1         := NAN2 (dstzpt[1], dstzpset\, dstzpt[0]);
4941 Dstzpend        := FD2Q (dstzpend, dstzpt[1], clk, reset\);
4942 Dstzread        := AN2 (dstzread, dstzpend, read_ack);
4943 */
4944
4945 /*void foo2(void)
4946 {
4947         // Basically, the above translates to:
4948         bool srcdpend = (readreq && sread) || (srcdpend && !srcdack);
4949         bool srcdxpend = (readreq && sreadx) || (srcdxpend && !srcdxack);
4950         bool sdpend = srcxpend || srcdpend;
4951         bool srcdread = ((sdpend && read_ack) && srcshade) || (sdpend && read_ack);//the latter term is lookahead
4952
4953 }*/
4954
4955 ////////////////////////////////////////////////////////////////////////////////////////////
4956 ////////////////////////////////////////////////////////////////////////////////////////////
4957 // Here's an important bit: The source data adder logic. Need to track down the inputs!!! //
4958 ////////////////////////////////////////////////////////////////////////////////////////////
4959 ////////////////////////////////////////////////////////////////////////////////////////////
4960
4961 /*
4962 DEF ADDARRAY (
4963 INT16/  addq[0..3]
4964         :OUT;
4965         clk
4966         daddasel[0..2]  // data adder input A selection
4967         daddbsel[0..3]
4968         daddmode[0..2]
4969 INT32/  dstd[0..1]
4970 INT32/  iinc
4971         initcin[0..3]   // carry into the adders from the initializers
4972         initinc[0..63]  // the initialisation increment
4973         initpix[0..15]  // Data initialiser pixel value
4974 INT32/  istep
4975 INT32/  patd[0..1]
4976 INT32/  srcdlo
4977 INT32/  srcdhi
4978 INT32/  srcz1[0..1]
4979 INT32/  srcz2[0..1]
4980         reset\
4981 INT32/  zinc
4982 INT32/  zstep
4983         :IN);
4984 */
4985 void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
4986         uint64 dstd, uint32 iinc, uint8 initcin[], uint64 initinc, uint16 initpix,
4987         uint32 istep, uint64 patd, uint64 srcd, uint64 srcz1, uint64 srcz2,
4988         uint32 zinc, uint32 zstep)
4989 {
4990         uint32 initpix2 = ((uint32)initpix << 16) | initpix;
4991         uint32 addalo[8], addahi[8];
4992         addalo[0] = dstd & 0xFFFFFFFF;
4993         addalo[1] = initpix2;
4994         addalo[2] = 0;
4995         addalo[3] = 0;
4996         addalo[4] = srcd & 0xFFFFFFFF;
4997         addalo[5] = patd & 0xFFFFFFFF;
4998         addalo[6] = srcz1 & 0xFFFFFFFF;
4999         addalo[7] = srcz2 & 0xFFFFFFFF;
5000         addahi[0] = dstd >> 32;
5001         addahi[1] = initpix2;
5002         addahi[2] = 0;
5003         addahi[3] = 0;
5004         addahi[4] = srcd >> 32;
5005         addahi[5] = patd >> 32;
5006         addahi[6] = srcz1 >> 32;
5007         addahi[7] = srcz2 >> 32;
5008         uint16 adda[4];
5009         adda[0] = addalo[daddasel] & 0xFFFF;
5010         adda[1] = addalo[daddasel] >> 16;
5011         adda[2] = addahi[daddasel] & 0xFFFF;
5012         adda[3] = addahi[daddasel] >> 16;
5013
5014         uint16 wordmux[8];
5015         wordmux[0] = iinc & 0xFFFF;
5016         wordmux[1] = iinc >> 16;
5017         wordmux[2] = zinc & 0xFFFF;
5018         wordmux[3] = zinc >> 16;;
5019         wordmux[4] = istep & 0xFFFF;
5020         wordmux[5] = istep >> 16;;
5021         wordmux[6] = zstep & 0xFFFF;
5022         wordmux[7] = zstep >> 16;;
5023         uint16 word = wordmux[((daddbsel & 0x08) >> 1) | (daddbsel & 0x03)];
5024         uint16 addb[4];
5025         bool dbsel2 = daddbsel & 0x04;
5026         bool iincsel = (daddbsel & 0x01) && !(daddbsel & 0x04);
5027
5028         if (!dbsel2 && !iincsel)
5029                 addb[0] = srcd & 0xFFFF,
5030                 addb[1] = (srcd >> 16) & 0xFFFF,
5031                 addb[2] = (srcd >> 32) & 0xFFFF,
5032                 addb[3] = (srcd >> 48) & 0xFFFF;
5033         else if (dbsel2 && !iincsel)
5034                 addb[0] = addb[1] = addb[2] = addb[3] = word;
5035         else if (!dbsel2 && iincsel)
5036                 addb[0] = initinc & 0xFFFF,
5037                 addb[1] = (initinc >> 16) & 0xFFFF,
5038                 addb[2] = (initinc >> 32) & 0xFFFF,
5039                 addb[3] = (initinc >> 48) & 0xFFFF;
5040         else
5041                 addb[0] = addb[1] = addb[2] = addb[3] = 0;
5042
5043         uint8 cinsel = (daddmode >= 1 && daddmode <= 4 ? 1 : 0);
5044
5045 static uint8 co[4];//These are preserved between calls...
5046         uint8 cin[4];
5047
5048         for(int i=0; i<4; i++)
5049                 cin[i] = initcin[i] | (co[i] & cinsel);
5050
5051         bool eightbit = daddmode & 0x02;
5052         bool sat = daddmode & 0x03;
5053         bool hicinh = ((daddmode & 0x03) == 0x03);
5054
5055 //Note that the carry out is saved between calls to this function...
5056         for(int i=0; i<4; i++)
5057                 ADD16SAT(addq[i], co[i], adda[i], addb[i], cin[i], sat, eightbit, hicinh);
5058 }
5059
5060 /*
5061 DEF ADD16SAT (
5062 INT16/  r               // result
5063         co              // carry out
5064         :IO;
5065 INT16/  a
5066 INT16/  b
5067         cin
5068         sat
5069         eightbit
5070         hicinh
5071         :IN);
5072 */
5073 void ADD16SAT(uint16 &r, uint8 &co, uint16 a, uint16 b, uint8 cin, bool sat, bool eightbit, bool hicinh)
5074 {
5075 /*if (logBlit)
5076 {
5077         printf("--> [sat=%s 8b=%s hicinh=%s] %04X + %04X (+ %u) = ", (sat ? "T" : "F"), (eightbit ? "T" : "F"), (hicinh ? "T" : "F"), a, b, cin);
5078         fflush(stdout);
5079 }*/
5080         uint8 carry[4];
5081         uint32 qt = (a & 0xFF) + (b & 0xFF) + cin;
5082         carry[0] = (qt & 0x0100 ? 1 : 0);
5083         uint16 q = qt & 0x00FF;
5084         carry[1] = (carry[0] && !eightbit ? carry[0] : 0);
5085         qt = (a & 0x0F00) + (b & 0x0F00) + (carry[1] << 8);
5086         carry[2] = (qt & 0x1000 ? 1 : 0);
5087         q |= qt & 0x0F00;
5088         carry[3] = (carry[2] && !hicinh ? carry[2] : 0);
5089         qt = (a & 0xF000) + (b & 0xF000) + (carry[3] << 12);
5090         co = (qt & 0x10000 ? 1 : 0);
5091         q |= qt & 0xF000;
5092
5093         uint8 btop = (eightbit ? (b & 0x0080) >> 7 : (b & 0x8000) >> 15);
5094         uint8 ctop = (eightbit ? carry[0] : co);
5095
5096         bool saturate = sat && (btop ^ ctop);
5097         bool hisaturate = saturate && !eightbit;
5098 /*if (logBlit)
5099 {
5100         printf("bt=%u ct=%u s=%u hs=%u] ", btop, ctop, saturate, hisaturate);
5101         fflush(stdout);
5102 }*/
5103
5104         r = (saturate ? (ctop ? 0x00FF : 0x0000) : q & 0x00FF);
5105         r |= (hisaturate ? (ctop ? 0xFF00 : 0x0000) : q & 0xFF00);
5106 /*if (logBlit)
5107 {
5108         printf("%04X (co=%u)\n", r, co);
5109         fflush(stdout);
5110 }*/
5111 }
5112
5113 /**  ADDAMUX - Address adder input A selection  *******************
5114
5115 This module generates the data loaded into the address adder input A.  This is
5116 the update value, and can be one of four registers :  A1 step, A2 step, A1
5117 increment and A1 fraction.  It can complement these values to perform
5118 subtraction, and it can generate constants to increment / decrement the window
5119 pointers.
5120
5121 addasel[0..2] select the register to add
5122
5123 000     A1 step integer part
5124 001     A1 step fraction part
5125 010     A1 increment integer part
5126 011     A1 increment fraction part
5127 100     A2 step
5128
5129 adda_xconst[0..2] generate a power of 2 in the range 1-64 or all zeroes when
5130 they are all 1.
5131
5132 addareg selects register value to be added as opposed to constant
5133 value.
5134
5135 suba_x, suba_y complement the X and Y values
5136
5137 */
5138
5139 /*
5140 DEF ADDAMUX (
5141 INT16/  adda_x
5142 INT16/  adda_y
5143         :OUT;
5144         addasel[0..2]
5145 INT16/  a1_step_x
5146 INT16/  a1_step_y
5147 INT16/  a1_stepf_x
5148 INT16/  a1_stepf_y
5149 INT16/  a2_step_x
5150 INT16/  a2_step_y
5151 INT16/  a1_inc_x
5152 INT16/  a1_inc_y
5153 INT16/  a1_incf_x
5154 INT16/  a1_incf_y
5155         adda_xconst[0..2]
5156         adda_yconst
5157         addareg
5158         suba_x
5159         suba_y :IN);
5160 */
5161 void ADDAMUX(int16 &adda_x, int16 &adda_y, uint8 addasel, int16 a1_step_x, int16 a1_step_y,
5162         int16 a1_stepf_x, int16 a1_stepf_y, int16 a2_step_x, int16 a2_step_y,
5163         int16 a1_inc_x, int16 a1_inc_y, int16 a1_incf_x, int16 a1_incf_y, uint8 adda_xconst,
5164         bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
5165 {
5166
5167 /*INT16/        addac_x, addac_y, addar_x, addar_y, addart_x, addart_y,
5168 INT16/  addas_x, addas_y, suba_x16, suba_y16
5169 :LOCAL;
5170 BEGIN
5171
5172 Zero            := TIE0 (zero);*/
5173
5174 /* Multiplex the register terms */
5175
5176 /*Addaselb[0-2] := BUF8 (addaselb[0-2], addasel[0-2]);
5177 Addart_x        := MX4 (addart_x, a1_step_x, a1_stepf_x, a1_inc_x, a1_incf_x, addaselb[0..1]);
5178 Addar_x         := MX2 (addar_x, addart_x, a2_step_x, addaselb[2]);
5179 Addart_y        := MX4 (addart_y, a1_step_y, a1_stepf_y, a1_inc_y, a1_incf_y, addaselb[0..1]);
5180 Addar_y         := MX2 (addar_y, addart_y, a2_step_y, addaselb[2]);*/
5181
5182 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5183         int16 xterm[4], yterm[4];
5184         xterm[0] = a1_step_x, xterm[1] = a1_stepf_x, xterm[2] = a1_inc_x, xterm[3] = a1_incf_x;
5185         yterm[0] = a1_step_y, yterm[1] = a1_stepf_y, yterm[2] = a1_inc_y, yterm[3] = a1_incf_y;
5186         int16 addar_x = (addasel & 0x04 ? a2_step_x : xterm[addasel & 0x03]);
5187         int16 addar_y = (addasel & 0x04 ? a2_step_y : yterm[addasel & 0x03]);
5188 //////////////////////////////////////////////////////////////////////////////////////
5189
5190 /* Generate a constant value - this is a power of 2 in the range
5191 0-64, or zero.  The control bits are adda_xconst[0..2], when they
5192 are all 1  the result is 0.
5193 Constants for Y can only be 0 or 1 */
5194
5195 /*Addac_xlo     := D38H (addac_x[0..6], unused[0], adda_xconst[0..2]);
5196 Unused[0]       := DUMMY (unused[0]);
5197
5198 Addac_x         := JOIN (addac_x, addac_x[0..6], zero, zero, zero, zero, zero, zero, zero, zero, zero);
5199 Addac_y         := JOIN (addac_y, adda_yconst, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero,
5200                         zero, zero, zero, zero, zero);*/
5201 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5202         int16 addac_x = (adda_xconst == 0x07 ? 0 : 1 << adda_xconst);
5203         int16 addac_y = (adda_yconst ? 0x01 : 0);
5204 //////////////////////////////////////////////////////////////////////////////////////
5205
5206 /* Select between constant value and register value */
5207
5208 /*Addas_x               := MX2 (addas_x, addac_x, addar_x, addareg);
5209 Addas_y         := MX2 (addas_y, addac_y, addar_y, addareg);*/
5210 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5211         int16 addas_x = (addareg ? addar_x : addac_x);
5212         int16 addas_y = (addareg ? addar_y : addac_y);
5213 //////////////////////////////////////////////////////////////////////////////////////
5214
5215 /* Complement these values (complement flag gives adder carry in)*/
5216
5217 /*Suba_x16      := JOIN (suba_x16, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x,
5218                         suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x);
5219 Suba_y16        := JOIN (suba_y16, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y,
5220                         suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y);
5221 Adda_x          := EO (adda_x, suba_x16, addas_x);
5222 Adda_y          := EO (adda_y, suba_y16, addas_y);*/
5223 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5224         adda_x = addas_x ^ (suba_x ? 0xFFFF : 0x0000);
5225         adda_y = addas_y ^ (suba_y ? 0xFFFF : 0x0000);
5226 //////////////////////////////////////////////////////////////////////////////////////
5227
5228 //END;
5229 }
5230
5231 /**  ADDBMUX - Address adder input B selection  *******************
5232
5233 This module selects the register to be updated by the address
5234 adder.  This can be one of three registers, the A1 and A2
5235 pointers, or the A1 fractional part. It can also be zero, so that the step
5236 registers load directly into the pointers.
5237 */
5238
5239 /*DEF ADDBMUX (
5240 INT16/  addb_x
5241 INT16/  addb_y
5242         :OUT;
5243         addbsel[0..1]
5244 INT16/  a1_x
5245 INT16/  a1_y
5246 INT16/  a2_x
5247 INT16/  a2_y
5248 INT16/  a1_frac_x
5249 INT16/  a1_frac_y
5250         :IN);
5251 INT16/  zero16 :LOCAL;
5252 BEGIN*/
5253 void ADDBMUX(int16 &addb_x, int16 &addb_y, uint8 addbsel, int16 a1_x, int16 a1_y,
5254         int16 a2_x, int16 a2_y, int16 a1_frac_x, int16 a1_frac_y)
5255 {
5256
5257 /*Zero          := TIE0 (zero);
5258 Zero16          := JOIN (zero16, zero, zero, zero, zero, zero, zero, zero,
5259                         zero, zero, zero, zero, zero, zero, zero, zero, zero);
5260 Addbselb[0-1]   := BUF8 (addbselb[0-1], addbsel[0-1]);
5261 Addb_x          := MX4 (addb_x, a1_x, a2_x, a1_frac_x, zero16, addbselb[0..1]);
5262 Addb_y          := MX4 (addb_y, a1_y, a2_y, a1_frac_y, zero16, addbselb[0..1]);*/
5263 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5264         int16 xterm[4], yterm[4];
5265         xterm[0] = a1_x, xterm[1] = a2_x, xterm[2] = a1_frac_x, xterm[3] = 0;
5266         yterm[0] = a1_y, yterm[1] = a2_y, yterm[2] = a1_frac_y, yterm[3] = 0;
5267         addb_x = xterm[addbsel & 0x03];
5268         addb_y = yterm[addbsel & 0x03];
5269 //////////////////////////////////////////////////////////////////////////////////////
5270
5271 //END;
5272 }
5273
5274 /**  DATAMUX - Address local data bus selection  ******************
5275
5276 Select between the adder output and the input data bus
5277 */
5278
5279 /*DEF DATAMUX (
5280 INT16/  data_x
5281 INT16/  data_y
5282         :OUT;
5283 INT32/  gpu_din
5284 INT16/  addq_x
5285 INT16/  addq_y
5286         addqsel
5287         :IN);
5288
5289 INT16/  gpu_lo, gpu_hi
5290 :LOCAL;
5291 BEGIN*/
5292 void DATAMUX(int16 &data_x, int16 &data_y, uint32 gpu_din, int16 addq_x, int16 addq_y, bool addqsel)
5293 {
5294 /*Gpu_lo                := JOIN (gpu_lo, gpu_din{0..15});
5295 Gpu_hi          := JOIN (gpu_hi, gpu_din{16..31});
5296
5297 Addqselb        := BUF8 (addqselb, addqsel);
5298 Data_x          := MX2 (data_x, gpu_lo, addq_x, addqselb);
5299 Data_y          := MX2 (data_y, gpu_hi, addq_y, addqselb);*/
5300 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5301         data_x = (addqsel ? addq_x : (int16)(gpu_din & 0xFFFF));
5302         data_y = (addqsel ? addq_y : (int16)(gpu_din >> 16));
5303 //////////////////////////////////////////////////////////////////////////////////////
5304
5305 //END;
5306 }
5307
5308 /******************************************************************
5309 addradd
5310 29/11/90
5311
5312 Blitter Address Adder
5313 ---------------------
5314 The blitter address adder is a pair of sixteen bit adders, one
5315 each for X and Y.  The multiplexing of the input terms is
5316 performed elsewhere, but this adder can also perform modulo
5317 arithmetic to align X-addresses onto phrase boundaries.
5318
5319 modx[0..2] take values
5320 000     no mask
5321 001     mask bit 0
5322 010     mask bits 1-0
5323 ..
5324 110     mask bits 5-0
5325
5326 ******************************************************************/
5327
5328 /*IMPORT duplo, tosh;
5329
5330 DEF ADDRADD (
5331 INT16/  addq_x
5332 INT16/  addq_y
5333                 :OUT;
5334                 a1fracldi               // propagate address adder carry
5335 INT16/  adda_x
5336 INT16/  adda_y
5337 INT16/  addb_x
5338 INT16/  addb_y
5339                 clk[0]                  // co-processor clock
5340                 modx[0..2]
5341                 suba_x
5342                 suba_y
5343                 :IN);
5344
5345 BEGIN
5346
5347 Zero            := TIE0 (zero);*/
5348 void ADDRADD(int16 &addq_x, int16 &addq_y, bool a1fracldi,
5349         uint16 adda_x, uint16 adda_y, uint16 addb_x, uint16 addb_y, uint8 modx, bool suba_x, bool suba_y)
5350 {
5351
5352 /* Perform the addition */
5353
5354 /*Adder_x               := ADD16 (addqt_x[0..15], co_x, adda_x{0..15}, addb_x{0..15}, ci_x);
5355 Adder_y         := ADD16 (addq_y[0..15], co_y, adda_y{0..15}, addb_y{0..15}, ci_y);*/
5356
5357 /* latch carry and propagate if required */
5358
5359 /*Cxt0          := AN2 (cxt[0], co_x, a1fracldi);
5360 Cxt1            := FD1Q (cxt[1], cxt[0], clk[0]);
5361 Ci_x            := EO (ci_x, cxt[1], suba_x);
5362
5363 yt0                     := AN2 (cyt[0], co_y, a1fracldi);
5364 Cyt1            := FD1Q (cyt[1], cyt[0], clk[0]);
5365 Ci_y            := EO (ci_y, cyt[1], suba_y);*/
5366
5367 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5368 //I'm sure the following will generate a bunch of warnings, but will have to do for now.
5369         static uint16 co_x = 0, co_y = 0;       // Carry out has to propogate between function calls...
5370         uint16 ci_x = co_x ^ (suba_x ? 1 : 0);
5371         uint16 ci_y = co_y ^ (suba_y ? 1 : 0);
5372         uint32 addqt_x = adda_x + addb_x + ci_x;
5373         uint32 addqt_y = adda_y + addb_y + ci_y;
5374         co_x = ((addqt_x & 0x10000) && a1fracldi ? 1 : 0);
5375         co_y = ((addqt_y & 0x10000) && a1fracldi ? 1 : 0);
5376 //////////////////////////////////////////////////////////////////////////////////////
5377
5378 /* Mask low bits of X to 0 if required */
5379
5380 /*Masksel               := D38H (unused[0], masksel[0..4], maskbit[5], unused[1], modx[0..2]);
5381
5382 Maskbit[0-4]    := OR2 (maskbit[0-4], masksel[0-4], maskbit[1-5]);
5383
5384 Mask[0-5]       := MX2 (addq_x[0-5], addqt_x[0-5], zero, maskbit[0-5]);
5385
5386 Addq_x          := JOIN (addq_x, addq_x[0..5], addqt_x[6..15]);
5387 Addq_y          := JOIN (addq_y, addq_y[0..15]);*/
5388
5389 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5390         int16 mask[8] = { 0xFFFF, 0xFFFE, 0xFFFC, 0xFFF8, 0xFFF0, 0xFFE0, 0xFFC0, 0x0000 };
5391         addq_x = addqt_x & mask[modx];
5392         addq_y = addqt_y & 0xFFFF;
5393 //////////////////////////////////////////////////////////////////////////////////////
5394
5395 //Unused[0-1]   := DUMMY (unused[0-1]);
5396
5397 //END;
5398 }
5399
5400 /*
5401 DEF DATA (
5402                 wdata[0..63]    // co-processor write data bus
5403                 :BUS;
5404                 dcomp[0..7]             // data byte equal flags
5405                 srcd[0..7]              // bits to use for bit to byte expansion
5406                 zcomp[0..3]             // output from Z comparators
5407                 :OUT;
5408                 a1_x[0..1]              // low two bits of A1 X pointer
5409                 big_pix                 // pixel organisation is big-endian
5410                 blitter_active  // blitter is active
5411                 clk                             // co-processor clock
5412                 cmpdst                  // compare dest rather than source
5413                 colorld                 // load the pattern color fields
5414                 daddasel[0..2]  // data adder input A selection
5415                 daddbsel[0..3]  // data adder input B selection
5416                 daddmode[0..2]  // data adder mode
5417                 daddq_sel               // select adder output vs. GPU data
5418                 data[0..63]             // co-processor read data bus
5419                 data_ena                // enable write data
5420                 data_sel[0..1]  // select data to write
5421                 dbinh\[0..7]    // byte oriented changed data inhibits
5422                 dend[0..5]              // end of changed write data zone
5423                 dpipe[0..1]             // load computed data pipe-line latch
5424                 dstart[0..5]    // start of changed write data zone
5425                 dstdld[0..1]    // dest data load (two halves)
5426                 dstzld[0..1]    // dest zed load (two halves)
5427                 ext_int                 // enable extended precision intensity calculations
5428 INT32/  gpu_din                 // GPU data bus
5429                 iincld                  // I increment load
5430                 iincldx                 // alternate I increment load
5431                 init_if                 // initialise I fraction phase
5432                 init_ii                 // initialise I integer phase
5433                 init_zf                 // initialise Z fraction phase
5434                 intld[0..3]             // computed intensities load
5435                 istepadd                // intensity step integer add
5436                 istepfadd               // intensity step fraction add
5437                 istepld                 // I step load
5438                 istepdld                // I step delta load
5439                 lfu_func[0..3]  // LFU function code
5440                 patdadd                 // pattern data gouraud add
5441                 patdld[0..1]    // pattern data load (two halves)
5442                 pdsel[0..1]             // select pattern data type
5443                 phrase_mode             // phrase write mode
5444                 reload                  // transfer contents of double buffers
5445                 reset\                  // system reset
5446                 srcd1ld[0..1]   // source register 1 load (two halves)
5447                 srcdread                // source data read load enable
5448                 srczread                // source zed read load enable
5449                 srcshift[0..5]  // source alignment shift
5450                 srcz1ld[0..1]   // source zed 1 load (two halves)
5451                 srcz2add                // zed fraction gouraud add
5452                 srcz2ld[0..1]   // source zed 2 load (two halves)
5453                 textrgb                 // texture mapping in RGB mode
5454                 txtd[0..63]             // data from the texture unit
5455                 zedld[0..3]             // computed zeds load
5456                 zincld                  // Z increment load
5457                 zmode[0..2]             // Z comparator mode
5458                 zpipe[0..1]             // load computed zed pipe-line latch
5459                 zstepadd                // zed step integer add
5460                 zstepfadd               // zed step fraction add
5461                 zstepld                 // Z step load
5462                 zstepdld                // Z step delta load
5463                 :IN);
5464 */
5465
5466 void DATA(uint64 &wdata, uint8 &dcomp, uint8 &zcomp, bool &nowrite,
5467         bool big_pix, bool cmpdst, uint8 daddasel, uint8 daddbsel, uint8 daddmode, bool daddq_sel, uint8 data_sel,
5468         uint8 dbinh, uint8 dend, uint8 dstart, uint64 dstd, uint32 iinc, uint8 lfu_func, uint64 &patd, bool patdadd,
5469         bool phrase_mode, uint64 srcd, bool srcdread, bool srczread, bool srcz2add, uint8 zmode,
5470         bool bcompen, bool bkgwren, bool dcompen, uint8 icount, uint8 pixsize,
5471         uint64 &srcz, uint64 dstz, uint32 zinc)
5472 {
5473 /*
5474   Stuff we absolutely *need* to have passed in/out:
5475 IN:
5476   patdadd, dstd, srcd, patd, daddasel, daddbsel, daddmode, iinc, srcz1, srcz2, big_pix, phrase_mode, cmpdst
5477 OUT:
5478   changed patd (wdata I guess...) (Nope. We pass it back directly now...)
5479 */
5480
5481 // Source data registers
5482
5483 /*Data_src      := DATA_SRC (srcdlo, srcdhi, srcz[0..1], srczo[0..1], srczp[0..1], srcz1[0..1], srcz2[0..1], big_pix,
5484                         clk, gpu_din, intld[0..3], local_data0, local_data1, srcd1ld[0..1], srcdread, srczread, srcshift[0..5],
5485                         srcz1ld[0..1], srcz2add, srcz2ld[0..1], zedld[0..3], zpipe[0..1]);
5486 Srcd[0-7]       := JOIN (srcd[0-7], srcdlo{0-7});
5487 Srcd[8-31]      := JOIN (srcd[8-31], srcdlo{8-31});
5488 Srcd[32-63]     := JOIN (srcd[32-63], srcdhi{0-31});*/
5489
5490 // Destination data registers
5491
5492 /*Data_dst      := DATA_DST (dstd[0..63], dstz[0..1], clk, dstdld[0..1], dstzld[0..1], load_data[0..1]);
5493 Dstdlo          := JOIN (dstdlo, dstd[0..31]);
5494 Dstdhi          := JOIN (dstdhi, dstd[32..63]);*/
5495
5496 // Pattern and Color data registers
5497
5498 // Looks like this is simply another register file for the pattern data registers. No adding or anything funky
5499 // going on. Note that patd & patdv will output the same info.
5500 // Patdldl/h (patdld[0..1]) can select the local_data bus to overwrite the current pattern data...
5501 // Actually, it can be either patdld OR patdadd...!
5502 /*Data_pat      := DATA_PAT (colord[0..15], int0dp[8..10], int1dp[8..10], int2dp[8..10], int3dp[8..10], mixsel[0..2],
5503                         patd[0..63], patdv[0..1], clk, colorld, dpipe[0], ext_int, gpu_din, intld[0..3], local_data0, local_data1,
5504                         patdadd, patdld[0..1], reload, reset\);
5505 Patdlo          := JOIN (patdlo, patd[0..31]);
5506 Patdhi          := JOIN (patdhi, patd[32..63]);*/
5507
5508 // Multiplying data Mixer (NOT IN JAGUAR I)
5509
5510 /*Datamix               := DATAMIX (patdo[0..1], clk, colord[0..15], dpipe[1], dstd[0..63], int0dp[8..10], int1dp[8..10],
5511                         int2dp[8..10], int3dp[8..10], mixsel[0..2], patd[0..63], pdsel[0..1], srcd[0..63], textrgb, txtd[0..63]);*/
5512
5513 // Logic function unit
5514
5515 /*Lfu           := LFU (lfu[0..1], srcdlo, srcdhi, dstdlo, dstdhi, lfu_func[0..3]);*/
5516 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5517         uint64 funcmask[2] = { 0, 0xFFFFFFFFFFFFFFFFLL };
5518         uint64 func0 = funcmask[lfu_func & 0x01];
5519         uint64 func1 = funcmask[(lfu_func >> 1) & 0x01];
5520         uint64 func2 = funcmask[(lfu_func >> 2) & 0x01];
5521         uint64 func3 = funcmask[(lfu_func >> 3) & 0x01];
5522         uint64 lfu = (~srcd & ~dstd & func0) | (~srcd & dstd & func1) | (srcd & ~dstd & func2) | (srcd & dstd & func3);
5523 //////////////////////////////////////////////////////////////////////////////////////
5524
5525 // Increment and Step Registers
5526
5527 // Does it do anything without the step add lines? Check it!
5528 // No. This is pretty much just a register file without the Jaguar II lines...
5529 /*Inc_step      := INC_STEP (iinc, istep[0..31], zinc, zstep[0..31], clk, ext_int, gpu_din, iincld, iincldx, istepadd,
5530                         istepfadd, istepld, istepdld, reload, reset\, zincld, zstepadd, zstepfadd, zstepld, zstepdld);
5531 Istep           := JOIN (istep, istep[0..31]);
5532 Zstep           := JOIN (zstep, zstep[0..31]);*/
5533
5534 // Pixel data comparator
5535
5536 /*Datacomp      := DATACOMP (dcomp[0..7], cmpdst, dstdlo, dstdhi, patdlo, patdhi, srcdlo, srcdhi);*/
5537 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5538         dcomp = 0;
5539         uint64 cmpd = patd ^ (cmpdst ? dstd : srcd);
5540
5541         if ((cmpd & 0x00000000000000FFLL) == 0)
5542                 dcomp |= 0x01;
5543         if ((cmpd & 0x000000000000FF00LL) == 0)
5544                 dcomp |= 0x02;
5545         if ((cmpd & 0x0000000000FF0000LL) == 0)
5546                 dcomp |= 0x04;
5547         if ((cmpd & 0x00000000FF000000LL) == 0)
5548                 dcomp |= 0x08;
5549         if ((cmpd & 0x000000FF00000000LL) == 0)
5550                 dcomp |= 0x10;
5551         if ((cmpd & 0x0000FF0000000000LL) == 0)
5552                 dcomp |= 0x20;
5553         if ((cmpd & 0x00FF000000000000LL) == 0)
5554                 dcomp |= 0x40;
5555         if ((cmpd & 0xFF00000000000000LL) == 0)
5556                 dcomp |= 0x80;
5557 //////////////////////////////////////////////////////////////////////////////////////
5558
5559 // Zed comparator for Z-buffer operations
5560
5561 /*Zedcomp               := ZEDCOMP (zcomp[0..3], srczp[0..1], dstz[0..1], zmode[0..2]);*/
5562 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5563 //srczp is srcz pipelined, also it goes through a source shift as well...
5564 /*The shift is basically like so (each piece is 16 bits long):
5565
5566         0         1         2         3         4          5         6
5567         srcz1lolo srcz1lohi srcz1hilo srcz1hihi srcrz2lolo srcz2lohi srcz2hilo
5568
5569 with srcshift bits 4 & 5 selecting the start position
5570 */
5571 //So... basically what we have here is:
5572         zcomp = 0;
5573
5574         if ((((srcz & 0x000000000000FFFFLL) < (dstz & 0x000000000000FFFFLL)) && (zmode & 0x01))
5575                 || (((srcz & 0x000000000000FFFFLL) == (dstz & 0x000000000000FFFFLL)) && (zmode & 0x02))
5576                 || (((srcz & 0x000000000000FFFFLL) > (dstz & 0x000000000000FFFFLL)) && (zmode & 0x04)))
5577                 zcomp |= 0x01;
5578
5579         if ((((srcz & 0x00000000FFFF0000LL) < (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x01))
5580                 || (((srcz & 0x00000000FFFF0000LL) == (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x02))
5581                 || (((srcz & 0x00000000FFFF0000LL) > (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x04)))
5582                 zcomp |= 0x02;
5583
5584         if ((((srcz & 0x0000FFFF00000000LL) < (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x01))
5585                 || (((srcz & 0x0000FFFF00000000LL) == (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x02))
5586                 || (((srcz & 0x0000FFFF00000000LL) > (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x04)))
5587                 zcomp |= 0x04;
5588
5589         if ((((srcz & 0xFFFF000000000000LL) < (dstz & 0xFFFF000000000000LL)) && (zmode & 0x01))
5590                 || (((srcz & 0xFFFF000000000000LL) == (dstz & 0xFFFF000000000000LL)) && (zmode & 0x02))
5591                 || (((srcz & 0xFFFF000000000000LL) > (dstz & 0xFFFF000000000000LL)) && (zmode & 0x04)))
5592                 zcomp |= 0x08;
5593
5594 //TEMP, TO TEST IF ZCOMP IS THE CULPRIT...
5595 //Nope, this is NOT the problem...
5596 //zcomp=0;
5597 // We'll do the comparison/bit/byte inhibits here, since that's they way it happens
5598 // in the real thing (dcomp goes out to COMP_CTRL and back into DATA through dbinh)...
5599 #if 1
5600         uint8 dbinht;
5601 //      bool nowrite;
5602         COMP_CTRL(dbinht, nowrite,
5603                 bcompen, true/*big_pix*/, bkgwren, dcomp, dcompen, icount, pixsize, phrase_mode, srcd & 0xFF, zcomp);
5604         dbinh = dbinht;
5605 //      dbinh = 0x00;
5606 #endif
5607
5608 #if 1
5609 #ifdef VERBOSE_BLITTER_LOGGING
5610 if (logBlit)
5611 {
5612         printf("\n[dcomp=%02X zcomp=%02X dbinh=%02X]\n", dcomp, zcomp, dbinh);
5613         fflush(stdout);
5614 }//*/
5615 #endif
5616 #endif
5617 //////////////////////////////////////////////////////////////////////////////////////
5618
5619 // 22 Mar 94
5620 // The data initializer - allows all four initial values to be computed from one (NOT IN JAGUAR I)
5621
5622 /*Datinit               := DATINIT (initcin[0..3], initinc[0..63], initpix[0..15], a1_x[0..1], big_pix, clk, iinc, init_if, init_ii,
5623                         init_zf, istep[0..31], zinc, zstep[0..31]);*/
5624
5625 // Adder array for Z and intensity increments
5626
5627 /*Addarray      := ADDARRAY (addq[0..3], clk, daddasel[0..2], daddbsel[0..3], daddmode[0..2], dstdlo, dstdhi, iinc,
5628                         initcin[0..3], initinc[0..63], initpix[0..15], istep, patdv[0..1], srcdlo, srcdhi, srcz1[0..1],
5629                         srcz2[0..1], reset\, zinc, zstep);*/
5630 /*void ADDARRAY(uint16 * addq, uint8 daddasel, uint8 daddbsel, uint8 daddmode,
5631         uint64 dstd, uint32 iinc, uint8 initcin[], uint64 initinc, uint16 initpix,
5632         uint32 istep, uint64 patd, uint64 srcd, uint64 srcz1, uint64 srcz2,
5633         uint32 zinc, uint32 zstep)*/
5634 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5635         uint16 addq[4];
5636         uint8 initcin[4] = { 0, 0, 0, 0 };
5637         ADDARRAY(addq, daddasel, daddbsel, daddmode, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
5638
5639         //This is normally done asynchronously above (thru local_data) when in patdadd mode...
5640 //And now it's passed back to the caller to be persistent between calls...!
5641 //But it's causing some serious fuck-ups in T2K now... !!! FIX !!! [DONE--???]
5642 //Weird! It doesn't anymore...!
5643         if (patdadd)
5644                 patd = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
5645 //////////////////////////////////////////////////////////////////////////////////////
5646
5647 // Local data bus multiplexer
5648
5649 /*Local_mux     := LOCAL_MUX (local_data[0..1], load_data[0..1],
5650         addq[0..3], gpu_din, data[0..63], blitter_active, daddq_sel);
5651 Local_data0     := JOIN (local_data0, local_data[0]);
5652 Local_data1     := JOIN (local_data1, local_data[1]);*/
5653 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5654 //////////////////////////////////////////////////////////////////////////////////////
5655
5656 // Data output multiplexer and tri-state drive
5657
5658 /*Data_mux      := DATA_MUX (wdata[0..63], addq[0..3], big_pix, dstdlo, dstdhi, dstz[0..1], data_sel[0..1], data_ena,
5659                         dstart[0..5], dend[0..5], dbinh\[0..7], lfu[0..1], patdo[0..1], phrase_mode, srczo[0..1]);*/
5660 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5661 // NOTE: patdo comes from DATAMIX and can be considered the same as patd for Jaguar I
5662
5663 //////////////////////////////////////////////////////////////////////////////////////
5664 //}
5665
5666 /*DEF DATA_MUX (
5667                 wdata[0..63]    // co-processor rwrite data bus
5668                 :BUS;
5669 INT16/  addq[0..3]
5670                 big_pix                 // Pixel organisation is big-endian
5671 INT32/  dstdlo
5672 INT32/  dstdhi
5673 INT32/  dstzlo
5674 INT32/  dstzhi
5675                 data_sel[0..1]  // source of write data
5676                 data_ena                // enable write data onto read/write bus
5677                 dstart[0..5]    // start of changed write data
5678                 dend[0..5]              // end of changed write data
5679                 dbinh\[0..7]    // byte oriented changed data inhibits
5680 INT32/  lfu[0..1]
5681 INT32/  patd[0..1]
5682                 phrase_mode             // phrase write mode
5683 INT32/  srczlo
5684 INT32/  srczhi
5685                 :IN);*/
5686
5687 /*INT32/        addql[0..1], ddatlo, ddathi zero32
5688 :LOCAL;
5689 BEGIN
5690
5691 Phrase_mode\    := INV1 (phrase_mode\, phrase_mode);
5692 Zero            := TIE0 (zero);
5693 Zero32          := JOIN (zero32, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero);*/
5694
5695 /* Generate a changed data mask */
5696
5697 /*Edis          := OR6 (edis\, dend[0..5]);
5698 Ecoarse         := DECL38E (e_coarse\[0..7], dend[3..5], edis\);
5699 E_coarse[0]     := INV1 (e_coarse[0], e_coarse\[0]);
5700 Efine           := DECL38E (unused[0], e_fine\[1..7], dend[0..2], e_coarse[0]);*/
5701 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5702         uint8 decl38e[2][8] = { { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
5703                 { 0xFE, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x7F } };
5704         uint8 dech38[8] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
5705         uint8 dech38el[2][8] = { { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 },
5706                 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } };
5707
5708                         int en = (dend & 0x3F ? 1 : 0);
5709         uint8 e_coarse = decl38e[en][(dend & 0x38) >> 3];               // Actually, this is e_coarse inverted...
5710         uint8 e_fine = decl38e[(e_coarse & 0x01) ^ 0x01][dend & 0x07];
5711         e_fine &= 0xFE;
5712 //////////////////////////////////////////////////////////////////////////////////////
5713
5714 /*Scoarse               := DECH38 (s_coarse[0..7], dstart[3..5]);
5715 Sfen\           := INV1 (sfen\, s_coarse[0]);
5716 Sfine           := DECH38EL (s_fine[0..7], dstart[0..2], sfen\);*/
5717 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5718         uint8 s_coarse = dech38[(dstart & 0x38) >> 3];
5719         uint8 s_fine = dech38el[(s_coarse & 0x01) ^ 0x01][dstart & 0x07];
5720 //////////////////////////////////////////////////////////////////////////////////////
5721
5722 /*Maskt[0]      := BUF1 (maskt[0], s_fine[0]);
5723 Maskt[1-7]      := OAN1P (maskt[1-7], maskt[0-6], s_fine[1-7], e_fine\[1-7]);*/
5724 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5725         uint16 maskt = s_fine & 0x0001;
5726         maskt |= (((maskt & 0x0001) || (s_fine & 0x02)) && (e_fine & 0x02) ? 0x0002 : 0x0000);
5727         maskt |= (((maskt & 0x0002) || (s_fine & 0x04)) && (e_fine & 0x04) ? 0x0004 : 0x0000);
5728         maskt |= (((maskt & 0x0004) || (s_fine & 0x08)) && (e_fine & 0x08) ? 0x0008 : 0x0000);
5729         maskt |= (((maskt & 0x0008) || (s_fine & 0x10)) && (e_fine & 0x10) ? 0x0010 : 0x0000);
5730         maskt |= (((maskt & 0x0010) || (s_fine & 0x20)) && (e_fine & 0x20) ? 0x0020 : 0x0000);
5731         maskt |= (((maskt & 0x0020) || (s_fine & 0x40)) && (e_fine & 0x40) ? 0x0040 : 0x0000);
5732         maskt |= (((maskt & 0x0040) || (s_fine & 0x80)) && (e_fine & 0x80) ? 0x0080 : 0x0000);
5733 //////////////////////////////////////////////////////////////////////////////////////
5734
5735 /* Produce a look-ahead on the ripple carry:
5736 masktla = s_coarse[0] . /e_coarse[0] */
5737 /*Masktla               := AN2 (masktla, s_coarse[0], e_coarse\[0]);
5738 Maskt[8]        := OAN1P (maskt[8], masktla, s_coarse[1], e_coarse\[1]);
5739 Maskt[9-14]     := OAN1P (maskt[9-14], maskt[8-13], s_coarse[2-7], e_coarse\[2-7]);*/
5740 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5741         maskt |= (((s_coarse & e_coarse & 0x01) || (s_coarse & 0x02)) && (e_coarse & 0x02) ? 0x0100 : 0x0000);
5742         maskt |= (((maskt & 0x0100) || (s_coarse & 0x04)) && (e_coarse & 0x04) ? 0x0200 : 0x0000);
5743         maskt |= (((maskt & 0x0200) || (s_coarse & 0x08)) && (e_coarse & 0x08) ? 0x0400 : 0x0000);
5744         maskt |= (((maskt & 0x0400) || (s_coarse & 0x10)) && (e_coarse & 0x10) ? 0x0800 : 0x0000);
5745         maskt |= (((maskt & 0x0800) || (s_coarse & 0x20)) && (e_coarse & 0x20) ? 0x1000 : 0x0000);
5746         maskt |= (((maskt & 0x1000) || (s_coarse & 0x40)) && (e_coarse & 0x40) ? 0x2000 : 0x0000);
5747         maskt |= (((maskt & 0x2000) || (s_coarse & 0x80)) && (e_coarse & 0x80) ? 0x4000 : 0x0000);
5748 //////////////////////////////////////////////////////////////////////////////////////
5749
5750 /* The bit terms are mirrored for big-endian pixels outside phrase
5751 mode.  The byte terms are mirrored for big-endian pixels in phrase
5752 mode.  */
5753
5754 /*Mirror_bit    := AN2M (mir_bit, phrase_mode\, big_pix);
5755 Mirror_byte     := AN2H (mir_byte, phrase_mode, big_pix);
5756
5757 Masktb[14]      := BUF1 (masktb[14], maskt[14]);
5758 Masku[0]        := MX4 (masku[0],  maskt[0],  maskt[7],  maskt[14],  zero, mir_bit, mir_byte);
5759 Masku[1]        := MX4 (masku[1],  maskt[1],  maskt[6],  maskt[14],  zero, mir_bit, mir_byte);
5760 Masku[2]        := MX4 (masku[2],  maskt[2],  maskt[5],  maskt[14],  zero, mir_bit, mir_byte);
5761 Masku[3]        := MX4 (masku[3],  maskt[3],  maskt[4],  masktb[14], zero, mir_bit, mir_byte);
5762 Masku[4]        := MX4 (masku[4],  maskt[4],  maskt[3],  masktb[14], zero, mir_bit, mir_byte);
5763 Masku[5]        := MX4 (masku[5],  maskt[5],  maskt[2],  masktb[14], zero, mir_bit, mir_byte);
5764 Masku[6]        := MX4 (masku[6],  maskt[6],  maskt[1],  masktb[14], zero, mir_bit, mir_byte);
5765 Masku[7]        := MX4 (masku[7],  maskt[7],  maskt[0],  masktb[14], zero, mir_bit, mir_byte);
5766 Masku[8]        := MX2 (masku[8],  maskt[8],  maskt[13], mir_byte);
5767 Masku[9]        := MX2 (masku[9],  maskt[9],  maskt[12], mir_byte);
5768 Masku[10]       := MX2 (masku[10], maskt[10], maskt[11], mir_byte);
5769 Masku[11]       := MX2 (masku[11], maskt[11], maskt[10], mir_byte);
5770 Masku[12]       := MX2 (masku[12], maskt[12], maskt[9],  mir_byte);
5771 Masku[13]       := MX2 (masku[13], maskt[13], maskt[8],  mir_byte);
5772 Masku[14]       := MX2 (masku[14], maskt[14], maskt[0],  mir_byte);*/
5773 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5774         bool mir_bit = true/*big_pix*/ && !phrase_mode;
5775         bool mir_byte = true/*big_pix*/ && phrase_mode;
5776         uint16 masku = maskt;
5777
5778         if (mir_bit)
5779         {
5780                 masku &= 0xFF00;
5781                 masku |= (maskt >> 7) & 0x0001;
5782                 masku |= (maskt >> 5) & 0x0002;
5783                 masku |= (maskt >> 3) & 0x0004;
5784                 masku |= (maskt >> 1) & 0x0008;
5785                 masku |= (maskt << 1) & 0x0010;
5786                 masku |= (maskt << 3) & 0x0020;
5787                 masku |= (maskt << 5) & 0x0040;
5788                 masku |= (maskt << 7) & 0x0080;
5789         }
5790
5791         if (mir_byte)
5792         {
5793                 masku = 0;
5794                 masku |= (maskt >> 14) & 0x0001;
5795                 masku |= (maskt >> 13) & 0x0002;
5796                 masku |= (maskt >> 12) & 0x0004;
5797                 masku |= (maskt >> 11) & 0x0008;
5798                 masku |= (maskt >> 10) & 0x0010;
5799                 masku |= (maskt >> 9)  & 0x0020;
5800                 masku |= (maskt >> 8)  & 0x0040;
5801                 masku |= (maskt >> 7)  & 0x0080;
5802
5803                 masku |= (maskt >> 5) & 0x0100;
5804                 masku |= (maskt >> 3) & 0x0200;
5805                 masku |= (maskt >> 1) & 0x0400;
5806                 masku |= (maskt << 1) & 0x0800;
5807                 masku |= (maskt << 3) & 0x1000;
5808                 masku |= (maskt << 5) & 0x2000;
5809                 masku |= (maskt << 7) & 0x4000;
5810         }
5811 //////////////////////////////////////////////////////////////////////////////////////
5812
5813 /* The maskt terms define the area for changed data, but the byte
5814 inhibit terms can override these */
5815
5816 /*Mask[0-7]     := AN2 (mask[0-7], masku[0-7], dbinh\[0]);
5817 Mask[8-14]      := AN2H (mask[8-14], masku[8-14], dbinh\[1-7]);*/
5818 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5819         uint16 mask = masku & (!(dbinh & 0x01) ? 0xFFFF : 0xFF00);
5820         mask &= ~(((uint16)dbinh & 0x00FE) << 7);
5821 //////////////////////////////////////////////////////////////////////////////////////
5822
5823 /*Addql[0]      := JOIN (addql[0], addq[0..1]);
5824 Addql[1]        := JOIN (addql[1], addq[2..3]);
5825
5826 Dsel0b[0-1]     := BUF8 (dsel0b[0-1], data_sel[0]);
5827 Dsel1b[0-1]     := BUF8 (dsel1b[0-1], data_sel[1]);
5828 Ddatlo          := MX4 (ddatlo, patd[0], lfu[0], addql[0], zero32, dsel0b[0], dsel1b[0]);
5829 Ddathi          := MX4 (ddathi, patd[1], lfu[1], addql[1], zero32, dsel0b[1], dsel1b[1]);*/
5830 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5831         uint64 dmux[4];
5832         dmux[0] = patd;
5833         dmux[1] = lfu;
5834         dmux[2] = ((uint64)addq[3] << 48) | ((uint64)addq[2] << 32) | ((uint64)addq[1] << 16) | (uint64)addq[0];
5835         dmux[3] = 0;
5836         uint64 ddat = dmux[data_sel];
5837 //////////////////////////////////////////////////////////////////////////////////////
5838
5839 /*Zed_sel               := AN2 (zed_sel, data_sel[0..1]);
5840 Zed_selb[0-1]   := BUF8 (zed_selb[0-1], zed_sel);
5841
5842 Dat[0-7]        := MX4 (dat[0-7],   dstdlo{0-7},   ddatlo{0-7},   dstzlo{0-7},   srczlo{0-7},   mask[0-7], zed_selb[0]);
5843 Dat[8-15]       := MX4 (dat[8-15],  dstdlo{8-15},  ddatlo{8-15},  dstzlo{8-15},  srczlo{8-15},  mask[8],   zed_selb[0]);
5844 Dat[16-23]      := MX4 (dat[16-23], dstdlo{16-23}, ddatlo{16-23}, dstzlo{16-23}, srczlo{16-23}, mask[9],   zed_selb[0]);
5845 Dat[24-31]      := MX4 (dat[24-31], dstdlo{24-31}, ddatlo{24-31}, dstzlo{24-31}, srczlo{24-31}, mask[10],  zed_selb[0]);
5846 Dat[32-39]      := MX4 (dat[32-39], dstdhi{0-7},   ddathi{0-7},   dstzhi{0-7},   srczhi{0-7},   mask[11],  zed_selb[1]);
5847 Dat[40-47]      := MX4 (dat[40-47], dstdhi{8-15},  ddathi{8-15},  dstzhi{8-15},  srczhi{8-15},  mask[12],  zed_selb[1]);
5848 Dat[48-55]      := MX4 (dat[48-55], dstdhi{16-23}, ddathi{16-23}, dstzhi{16-23}, srczhi{16-23}, mask[13],  zed_selb[1]);
5849 Dat[56-63]      := MX4 (dat[56-63], dstdhi{24-31}, ddathi{24-31}, dstzhi{24-31}, srczhi{24-31}, mask[14],  zed_selb[1]);*/
5850 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5851         wdata = ((ddat & mask) | (dstd & ~mask)) & 0x00000000000000FFLL;
5852         wdata |= (mask & 0x0100 ? ddat : dstd) & 0x000000000000FF00LL;
5853         wdata |= (mask & 0x0200 ? ddat : dstd) & 0x0000000000FF0000LL;
5854         wdata |= (mask & 0x0400 ? ddat : dstd) & 0x00000000FF000000LL;
5855         wdata |= (mask & 0x0800 ? ddat : dstd) & 0x000000FF00000000LL;
5856         wdata |= (mask & 0x1000 ? ddat : dstd) & 0x0000FF0000000000LL;
5857         wdata |= (mask & 0x2000 ? ddat : dstd) & 0x00FF000000000000LL;
5858         wdata |= (mask & 0x4000 ? ddat : dstd) & 0xFF00000000000000LL;
5859 /*if (logBlit)
5860 {
5861         printf("\n[ddat=%08X%08X dstd=%08X%08X wdata=%08X%08X mask=%04X]\n",
5862                 (uint32)(ddat >> 32), (uint32)(ddat & 0xFFFFFFFF),
5863                 (uint32)(dstd >> 32), (uint32)(dstd & 0xFFFFFFFF),
5864                 (uint32)(wdata >> 32), (uint32)(wdata & 0xFFFFFFFF), mask);
5865         fflush(stdout);
5866 }//*/
5867 //This is a crappy way of handling this, but it should work for now...
5868         uint64 zwdata;
5869         zwdata = ((srcz & mask) | (dstz & ~mask)) & 0x00000000000000FFLL;
5870         zwdata |= (mask & 0x0100 ? srcz : dstz) & 0x000000000000FF00LL;
5871         zwdata |= (mask & 0x0200 ? srcz : dstz) & 0x0000000000FF0000LL;
5872         zwdata |= (mask & 0x0400 ? srcz : dstz) & 0x00000000FF000000LL;
5873         zwdata |= (mask & 0x0800 ? srcz : dstz) & 0x000000FF00000000LL;
5874         zwdata |= (mask & 0x1000 ? srcz : dstz) & 0x0000FF0000000000LL;
5875         zwdata |= (mask & 0x2000 ? srcz : dstz) & 0x00FF000000000000LL;
5876         zwdata |= (mask & 0x4000 ? srcz : dstz) & 0xFF00000000000000LL;
5877 if (logBlit)
5878 {
5879         printf("\n[srcz=%08X%08X dstz=%08X%08X zwdata=%08X%08X mask=%04X]\n",
5880                 (uint32)(srcz >> 32), (uint32)(srcz & 0xFFFFFFFF),
5881                 (uint32)(dstz >> 32), (uint32)(dstz & 0xFFFFFFFF),
5882                 (uint32)(zwdata >> 32), (uint32)(zwdata & 0xFFFFFFFF), mask);
5883         fflush(stdout);
5884 }//*/
5885         srcz = zwdata;
5886 //////////////////////////////////////////////////////////////////////////////////////
5887
5888 /*Data_enab[0-1]        := BUF8 (data_enab[0-1], data_ena);
5889 Datadrv[0-31]   := TS (wdata[0-31],  dat[0-31],  data_enab[0]);
5890 Datadrv[32-63]  := TS (wdata[32-63], dat[32-63], data_enab[1]);
5891
5892 Unused[0]       := DUMMY (unused[0]);
5893
5894 END;*/
5895 }
5896
5897 /**  COMP_CTRL - Comparator output control logic  *****************
5898
5899 This block is responsible for taking the comparator outputs and
5900 using them as appropriate to inhibit writes.  Two methods are
5901 supported for inhibiting write data:
5902
5903 -       suppression of the inner loop controlled write operation
5904 -       a set of eight byte inhibit lines to write back dest data
5905
5906 The first technique is used in pixel oriented modes, the second in
5907 phrase mode, but the phrase mode form is only applicable to eight
5908 and sixteen bit pixel modes.
5909
5910 Writes can be suppressed by data being equal, by the Z comparator
5911 conditions being met, or by the bit to pixel expansion scheme.
5912
5913 Pipe-lining issues: the data derived comparator outputs are stable
5914 until the next data read, well after the affected write from this
5915 operation.  However, the inner counter bits can count immediately
5916 before the ack for the last write.  Therefore, it is necessary to
5917 delay bcompbit select terms by one inner loop pipe-line stage,
5918 when generating the select for the data control - the output is
5919 delayed one further tick to give it write data timing (2/34).
5920
5921 There is also a problem with computed data - the new values are
5922 calculated before the write associated with the old value has been
5923 performed.  The is taken care of within the zed comparator by
5924 pipe-lining the comparator inputs where appropriate.
5925 */
5926
5927 //#define LOG_COMP_CTRL
5928 /*DEF COMP_CTRL (
5929         dbinh\[0..7]    // destination byte inhibit lines
5930         nowrite         // suppress inner loop write operation
5931         :OUT;
5932         bcompen         // bit selector inhibit enable
5933         big_pix         // pixels are big-endian
5934         bkgwren         // enable dest data write in pix inhibit
5935         clk             // co-processor clock
5936         dcomp[0..7]     // output of data byte comparators
5937         dcompen         // data comparator inhibit enable
5938         icount[0..2]    // low bits of inner count
5939         pixsize[0..2]   // destination pixel size
5940         phrase_mode     // phrase write mode
5941         srcd[0..7]      // bits to use for bit to byte expansion
5942         step_inner      // inner loop advance
5943         zcomp[0..3]     // output of word zed comparators
5944         :IN);*/
5945 void COMP_CTRL(uint8 &dbinh, bool &nowrite,
5946         bool bcompen, bool big_pix, bool bkgwren, uint8 dcomp, bool dcompen, uint8 icount,
5947         uint8 pixsize, bool phrase_mode, uint8 srcd, uint8 zcomp)
5948 {
5949 //BEGIN
5950
5951 /*Bkgwren\      := INV1 (bkgwren\, bkgwren);
5952 Phrase_mode\    := INV1 (phrase_mode\, phrase_mode);
5953 Pixsize\[0-2]   := INV2 (pixsize\[0-2], pixsize[0-2]);*/
5954
5955 /* The bit comparator bits are derived from the source data, which
5956 will have been suitably aligned for phrase mode.  The contents of
5957 the inner counter are used to select which bit to use.
5958
5959 When not in phrase mode the inner count value is used to select
5960 one bit.  It is assumed that the count has already occurred, so,
5961 7 selects bit 0, etc.  In big-endian pixel mode, this turns round,
5962 so that a count of 7 selects bit 7.
5963
5964 In phrase mode, the eight bits are used directly, and this mode is
5965 only applicable to 8-bit pixel mode (2/34) */
5966
5967 /*Bcompselt[0-2]        := EO (bcompselt[0-2], icount[0-2], big_pix);
5968 Bcompbit        := MX8 (bcompbit, srcd[7], srcd[6], srcd[5],
5969                         srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompselt[0..2]);
5970 Bcompbit\       := INV1 (bcompbit\, bcompbit);*/
5971 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5972 #ifdef LOG_COMP_CTRL
5973 if (logBlit)
5974 {
5975         printf("\n     [bcompen=%s dcompen=%s phrase_mode=%s bkgwren=%s dcomp=%02X zcomp=%02X]", (bcompen ? "T" : "F"), (dcompen ? "T" : "F"), (phrase_mode ? "T" : "F"), (bkgwren ? "T" : "F"), dcomp, zcomp);
5976         printf("\n     ");
5977         fflush(stdout);
5978 }
5979 #endif
5980         uint8 bcompselt = (big_pix ? ~icount : icount) & 0x07;
5981         uint8 bitmask[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
5982         bool bcompbit = srcd & bitmask[bcompselt];
5983 //////////////////////////////////////////////////////////////////////////////////////
5984
5985 /* pipe-line the count */
5986 /*Bcompsel[0-2] := FDSYNC (bcompsel[0-2], bcompselt[0-2], step_inner, clk);
5987 Bcompbt         := MX8 (bcompbitpt, srcd[7], srcd[6], srcd[5],
5988                         srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompsel[0..2]);
5989 Bcompbitp       := FD1Q (bcompbitp, bcompbitpt, clk);
5990 Bcompbitp\      := INV1 (bcompbitp\, bcompbitp);*/
5991
5992 /* For pixel mode, generate the write inhibit signal for all modes
5993 on bit inhibit, for 8 and 16 bit modes on comparator inhibit, and
5994 for 16 bit mode on Z inhibit
5995
5996 Nowrite = bcompen . /bcompbit . /phrase_mode
5997         + dcompen . dcomp[0] . /phrase_mode . pixsize = 011
5998         + dcompen . dcomp[0..1] . /phrase_mode . pixsize = 100
5999         + zcomp[0] . /phrase_mode . pixsize = 100
6000 */
6001
6002 /*Nowt0         := NAN3 (nowt[0], bcompen, bcompbit\, phrase_mode\);
6003 Nowt1           := ND6  (nowt[1], dcompen, dcomp[0], phrase_mode\, pixsize\[2], pixsize[0..1]);
6004 Nowt2           := ND7  (nowt[2], dcompen, dcomp[0..1], phrase_mode\, pixsize[2], pixsize\[0..1]);
6005 Nowt3           := NAN5 (nowt[3], zcomp[0], phrase_mode\, pixsize[2], pixsize\[0..1]);
6006 Nowt4           := NAN4 (nowt[4], nowt[0..3]);
6007 Nowrite         := AN2  (nowrite, nowt[4], bkgwren\);*/
6008 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6009         nowrite = ((bcompen && !bcompbit && !phrase_mode)
6010                 || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
6011                 || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
6012                 || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4)))
6013                 && !bkgwren;
6014 //////////////////////////////////////////////////////////////////////////////////////
6015
6016 /*Winht         := NAN3 (winht, bcompen, bcompbitp\, phrase_mode\);
6017 Winhibit        := NAN4 (winhibit, winht, nowt[1..3]);*/
6018 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6019 //This is the same as above, but with bcompbit delayed one tick and called 'winhibit'
6020 //Small difference: Besides the pipeline effect, it's also not using !bkgwren...
6021 //      bool winhibit = (bcompen && !
6022         bool winhibit = (bcompen && !bcompbit && !phrase_mode)
6023                 || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
6024                 || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
6025                 || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4));
6026 #ifdef LOG_COMP_CTRL
6027 if (logBlit)
6028 {
6029         printf("[nw=%s wi=%s]", (nowrite ? "T" : "F"), (winhibit ? "T" : "F"));
6030         fflush(stdout);
6031 }
6032 #endif
6033 //////////////////////////////////////////////////////////////////////////////////////
6034
6035 /* For phrase mode, generate the byte inhibit signals for eight bit
6036 mode 011, or sixteen bit mode 100
6037 dbinh\[0] =  pixsize[2] . zcomp[0]
6038          +  pixsize[2] . dcomp[0] . dcomp[1] . dcompen
6039          + /pixsize[2] . dcomp[0] . dcompen
6040          + /srcd[0] . bcompen
6041
6042 Inhibits 0-3 are also used when not in phrase mode to write back
6043 destination data.
6044 */
6045
6046 /*Srcd\[0-7]    := INV1 (srcd\[0-7], srcd[0-7]);
6047
6048 Di0t0           := NAN2H (di0t[0], pixsize[2], zcomp[0]);
6049 Di0t1           := NAN4H (di0t[1], pixsize[2], dcomp[0..1], dcompen);
6050 Di0t2           := NAN2 (di0t[2], srcd\[0], bcompen);
6051 Di0t3           := NAN3 (di0t[3], pixsize\[2], dcomp[0], dcompen);
6052 Di0t4           := NAN4 (di0t[4], di0t[0..3]);
6053 Dbinh[0]        := ANR1P (dbinh\[0], di0t[4], phrase_mode, winhibit);*/
6054 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6055         dbinh = 0;
6056         bool di0t0_1 = ((pixsize & 0x04) && (zcomp & 0x01))
6057                 || ((pixsize & 0x04) && (dcomp & 0x01) && (dcomp & 0x02) && dcompen);
6058         bool di0t4 = di0t0_1
6059                 || (!(srcd & 0x01) && bcompen)
6060                 || (!(pixsize & 0x04) && (dcomp & 0x01) && dcompen);
6061         dbinh |= (!((di0t4 && phrase_mode) || winhibit) ? 0x01 : 0x00);
6062 #ifdef LOG_COMP_CTRL
6063 if (logBlit)
6064 {
6065         printf("[di0t0_1=%s di0t4=%s]", (di0t0_1 ? "T" : "F"), (di0t4 ? "T" : "F"));
6066         fflush(stdout);
6067 }
6068 #endif
6069 //////////////////////////////////////////////////////////////////////////////////////
6070
6071 /*Di1t0         := NAN3 (di1t[0], pixsize\[2], dcomp[1], dcompen);
6072 Di1t1           := NAN2 (di1t[1], srcd\[1], bcompen);
6073 Di1t2           := NAN4 (di1t[2], di0t[0..1], di1t[0..1]);
6074 Dbinh[1]        := ANR1 (dbinh\[1], di1t[2], phrase_mode, winhibit);*/
6075 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6076         bool di1t2 = di0t0_1
6077                 || (!(srcd & 0x02) && bcompen)
6078                 || (!(pixsize & 0x04) && (dcomp & 0x02) && dcompen);
6079         dbinh |= (!((di1t2 && phrase_mode) || winhibit) ? 0x02 : 0x00);
6080 #ifdef LOG_COMP_CTRL
6081 if (logBlit)
6082 {
6083         printf("[di1t2=%s]", (di1t2 ? "T" : "F"));
6084         fflush(stdout);
6085 }
6086 #endif
6087 //////////////////////////////////////////////////////////////////////////////////////
6088
6089 /*Di2t0         := NAN2H (di2t[0], pixsize[2], zcomp[1]);
6090 Di2t1           := NAN4H (di2t[1], pixsize[2], dcomp[2..3], dcompen);
6091 Di2t2           := NAN2 (di2t[2], srcd\[2], bcompen);
6092 Di2t3           := NAN3 (di2t[3], pixsize\[2], dcomp[2], dcompen);
6093 Di2t4           := NAN4 (di2t[4], di2t[0..3]);
6094 Dbinh[2]        := ANR1 (dbinh\[2], di2t[4], phrase_mode, winhibit);*/
6095 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6096 //[bcompen=F dcompen=T phrase_mode=T bkgwren=F][nw=F wi=F]
6097 //[di0t0_1=F di0t4=F][di1t2=F][di2t0_1=T di2t4=T][di3t2=T][di4t0_1=F di2t4=F][di5t2=F][di6t0_1=F di6t4=F][di7t2=F]
6098 //[dcomp=$00 dbinh=$0C][7804780400007804] (icount=0005, inc=4)
6099         bool di2t0_1 = ((pixsize & 0x04) && (zcomp & 0x02))
6100                 || ((pixsize & 0x04) && (dcomp & 0x04) && (dcomp & 0x08) && dcompen);
6101         bool di2t4 = di2t0_1
6102                 || (!(srcd & 0x04) && bcompen)
6103                 || (!(pixsize & 0x04) && (dcomp & 0x04) && dcompen);
6104         dbinh |= (!((di2t4 && phrase_mode) || winhibit) ? 0x04 : 0x00);
6105 #ifdef LOG_COMP_CTRL
6106 if (logBlit)
6107 {
6108         printf("[di2t0_1=%s di2t4=%s]", (di2t0_1 ? "T" : "F"), (di2t4 ? "T" : "F"));
6109         fflush(stdout);
6110 }
6111 #endif
6112 //////////////////////////////////////////////////////////////////////////////////////
6113
6114 /*Di3t0         := NAN3 (di3t[0], pixsize\[2], dcomp[3], dcompen);
6115 Di3t1           := NAN2 (di3t[1], srcd\[3], bcompen);
6116 Di3t2           := NAN4 (di3t[2], di2t[0..1], di3t[0..1]);
6117 Dbinh[3]        := ANR1 (dbinh\[3], di3t[2], phrase_mode, winhibit);*/
6118 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6119         bool di3t2 = di2t0_1
6120                 || (!(srcd & 0x08) && bcompen)
6121                 || (!(pixsize & 0x04) && (dcomp & 0x08) && dcompen);
6122         dbinh |= (!((di3t2 && phrase_mode) || winhibit) ? 0x08 : 0x00);
6123 #ifdef LOG_COMP_CTRL
6124 if (logBlit)
6125 {
6126         printf("[di3t2=%s]", (di3t2 ? "T" : "F"));
6127         fflush(stdout);
6128 }
6129 #endif
6130 //////////////////////////////////////////////////////////////////////////////////////
6131
6132 /*Di4t0         := NAN2H (di4t[0], pixsize[2], zcomp[2]);
6133 Di4t1           := NAN4H (di4t[1], pixsize[2], dcomp[4..5], dcompen);
6134 Di4t2           := NAN2 (di4t[2], srcd\[4], bcompen);
6135 Di4t3           := NAN3 (di4t[3], pixsize\[2], dcomp[4], dcompen);
6136 Di4t4           := NAN4 (di4t[4], di4t[0..3]);
6137 Dbinh[4]        := NAN2 (dbinh\[4], di4t[4], phrase_mode);*/
6138 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6139         bool di4t0_1 = ((pixsize & 0x04) && (zcomp & 0x04))
6140                 || ((pixsize & 0x04) && (dcomp & 0x10) && (dcomp & 0x20) && dcompen);
6141         bool di4t4 = di4t0_1
6142                 || (!(srcd & 0x10) && bcompen)
6143                 || (!(pixsize & 0x04) && (dcomp & 0x10) && dcompen);
6144         dbinh |= (!(di4t4 && phrase_mode) ? 0x10 : 0x00);
6145 #ifdef LOG_COMP_CTRL
6146 if (logBlit)
6147 {
6148         printf("[di4t0_1=%s di2t4=%s]", (di4t0_1 ? "T" : "F"), (di4t4 ? "T" : "F"));
6149         fflush(stdout);
6150 }
6151 #endif
6152 //////////////////////////////////////////////////////////////////////////////////////
6153
6154 /*Di5t0         := NAN3 (di5t[0], pixsize\[2], dcomp[5], dcompen);
6155 Di5t1           := NAN2 (di5t[1], srcd\[5], bcompen);
6156 Di5t2           := NAN4 (di5t[2], di4t[0..1], di5t[0..1]);
6157 Dbinh[5]        := NAN2 (dbinh\[5], di5t[2], phrase_mode);*/
6158 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6159         bool di5t2 = di4t0_1
6160                 || (!(srcd & 0x20) && bcompen)
6161                 || (!(pixsize & 0x04) && (dcomp & 0x20) && dcompen);
6162         dbinh |= (!(di5t2 && phrase_mode) ? 0x20 : 0x00);
6163 #ifdef LOG_COMP_CTRL
6164 if (logBlit)
6165 {
6166         printf("[di5t2=%s]", (di5t2 ? "T" : "F"));
6167         fflush(stdout);
6168 }
6169 #endif
6170 //////////////////////////////////////////////////////////////////////////////////////
6171
6172 /*Di6t0         := NAN2H (di6t[0], pixsize[2], zcomp[3]);
6173 Di6t1           := NAN4H (di6t[1], pixsize[2], dcomp[6..7], dcompen);
6174 Di6t2           := NAN2 (di6t[2], srcd\[6], bcompen);
6175 Di6t3           := NAN3 (di6t[3], pixsize\[2], dcomp[6], dcompen);
6176 Di6t4           := NAN4 (di6t[4], di6t[0..3]);
6177 Dbinh[6]        := NAN2 (dbinh\[6], di6t[4], phrase_mode);*/
6178 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6179         bool di6t0_1 = ((pixsize & 0x04) && (zcomp & 0x08))
6180                 || ((pixsize & 0x04) && (dcomp & 0x40) && (dcomp & 0x80) && dcompen);
6181         bool di6t4 = di6t0_1
6182                 || (!(srcd & 0x40) && bcompen)
6183                 || (!(pixsize & 0x04) && (dcomp & 0x40) && dcompen);
6184         dbinh |= (!(di6t4 && phrase_mode) ? 0x40 : 0x00);
6185 #ifdef LOG_COMP_CTRL
6186 if (logBlit)
6187 {
6188         printf("[di6t0_1=%s di6t4=%s]", (di6t0_1 ? "T" : "F"), (di6t4 ? "T" : "F"));
6189         fflush(stdout);
6190 }
6191 #endif
6192 //////////////////////////////////////////////////////////////////////////////////////
6193
6194 /*Di7t0         := NAN3 (di7t[0], pixsize\[2], dcomp[7], dcompen);
6195 Di7t1           := NAN2 (di7t[1], srcd\[7], bcompen);
6196 Di7t2           := NAN4 (di7t[2], di6t[0..1], di7t[0..1]);
6197 Dbinh[7]        := NAN2 (dbinh\[7], di7t[2], phrase_mode);*/
6198 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6199         bool di7t2 = di6t0_1
6200                 || (!(srcd & 0x80) && bcompen)
6201                 || (!(pixsize & 0x04) && (dcomp & 0x80) && dcompen);
6202         dbinh |= (!(di7t2 && phrase_mode) ? 0x80 : 0x00);
6203 #ifdef LOG_COMP_CTRL
6204 if (logBlit)
6205 {
6206         printf("[di7t2=%s]", (di7t2 ? "T" : "F"));
6207         fflush(stdout);
6208 }
6209 #endif
6210 //////////////////////////////////////////////////////////////////////////////////////
6211
6212 //END;
6213 //kludge
6214 dbinh = ~dbinh;
6215 #ifdef LOG_COMP_CTRL
6216 if (logBlit)
6217 {
6218         printf("[dcomp=$%02X dbinh=$%02X]\n    ", dcomp, dbinh);
6219         fflush(stdout);
6220 }
6221 #endif
6222 }
6223
6224
6225 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6226 //////////////////////////////////////////////////////////////////////////////////////
6227
6228 // !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
6229 // !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
6230 // !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
6231
6232 #endif
6233