5 // (C) 2010 Underground Software
7 // JLH = James Hammons <jlhamm@acm.org>
10 // --- ---------- -------------------------------------------------------------
11 // JLH 01/16/2010 Created this log ;-)
15 // I owe a debt of gratitude to Curt Vendel and to John Mathieson--to Curt
16 // for supplying the Oberon ASIC nets and to John for making them available
17 // to Curt. ;-) Without that excellent documentation which shows *exactly*
18 // what's going on inside the TOM chip, we'd all still be guessing as to how
19 // the wily blitter and other pieces of the Jaguar puzzle actually work.
20 // Now how about those JERRY ASIC nets gentlemen...? [We have those now!] ;-)
33 // Various conditional compilation goodies...
37 #define USE_ORIGINAL_BLITTER
38 //#define USE_MIDSUMMER_BLITTER
39 #define USE_MIDSUMMER_BLITTER_MKII
41 #ifdef USE_ORIGINAL_BLITTER
42 #ifdef USE_MIDSUMMER_BLITTER_MKII
43 #define USE_BOTH_BLITTERS
48 // External global variables
50 extern int jaguar_active_memory_dumps;
52 // Local global variables
54 int start_logging = 0;
55 uint8_t blitter_working = 0;
56 bool startConciseBlitLogging = false;
59 // Blitter register RAM (most of it is hidden from the user)
61 static uint8_t blitter_ram[0x100];
65 bool specialLog = false;
66 extern int effect_start;
67 extern int blit_start_log;
68 void BlitterMidsummer(uint32_t cmd);
69 void BlitterMidsummer2(void);
71 #define REG(A) (((uint32_t)blitter_ram[(A)] << 24) | ((uint32_t)blitter_ram[(A)+1] << 16) \
72 | ((uint32_t)blitter_ram[(A)+2] << 8) | (uint32_t)blitter_ram[(A)+3])
73 #define WREG(A,D) (blitter_ram[(A)] = ((D)>>24)&0xFF, blitter_ram[(A)+1] = ((D)>>16)&0xFF, \
74 blitter_ram[(A)+2] = ((D)>>8)&0xFF, blitter_ram[(A)+3] = (D)&0xFF)
76 // Blitter registers (offsets from F02200)
78 #define A1_BASE ((uint32_t)0x00)
79 #define A1_FLAGS ((uint32_t)0x04)
80 #define A1_CLIP ((uint32_t)0x08) // Height and width values for clipping
81 #define A1_PIXEL ((uint32_t)0x0C) // Integer part of the pixel (Y.i and X.i)
82 #define A1_STEP ((uint32_t)0x10) // Integer part of the step
83 #define A1_FSTEP ((uint32_t)0x14) // Fractional part of the step
84 #define A1_FPIXEL ((uint32_t)0x18) // Fractional part of the pixel (Y.f and X.f)
85 #define A1_INC ((uint32_t)0x1C) // Integer part of the increment
86 #define A1_FINC ((uint32_t)0x20) // Fractional part of the increment
87 #define A2_BASE ((uint32_t)0x24)
88 #define A2_FLAGS ((uint32_t)0x28)
89 #define A2_MASK ((uint32_t)0x2C) // Modulo values for x and y (M.y and M.x)
90 #define A2_PIXEL ((uint32_t)0x30) // Integer part of the pixel (no fractional part for A2)
91 #define A2_STEP ((uint32_t)0x34) // Integer part of the step (no fractional part for A2)
92 #define COMMAND ((uint32_t)0x38)
93 #define PIXLINECOUNTER ((uint32_t)0x3C) // Inner & outer loop values
94 #define SRCDATA ((uint32_t)0x40)
95 #define DSTDATA ((uint32_t)0x48)
96 #define DSTZ ((uint32_t)0x50)
97 #define SRCZINT ((uint32_t)0x58)
98 #define SRCZFRAC ((uint32_t)0x60)
99 #define PATTERNDATA ((uint32_t)0x68)
100 #define INTENSITYINC ((uint32_t)0x70)
101 #define ZINC ((uint32_t)0x74)
102 #define COLLISIONCTRL ((uint32_t)0x78)
103 #define PHRASEINT0 ((uint32_t)0x7C)
104 #define PHRASEINT1 ((uint32_t)0x80)
105 #define PHRASEINT2 ((uint32_t)0x84)
106 #define PHRASEINT3 ((uint32_t)0x88)
107 #define PHRASEZ0 ((uint32_t)0x8C)
108 #define PHRASEZ1 ((uint32_t)0x90)
109 #define PHRASEZ2 ((uint32_t)0x94)
110 #define PHRASEZ3 ((uint32_t)0x98)
112 // Blitter command bits
114 #define SRCEN (cmd & 0x00000001)
115 #define SRCENZ (cmd & 0x00000002)
116 #define SRCENX (cmd & 0x00000004)
117 #define DSTEN (cmd & 0x00000008)
118 #define DSTENZ (cmd & 0x00000010)
119 #define DSTWRZ (cmd & 0x00000020)
120 #define CLIPA1 (cmd & 0x00000040)
122 #define UPDA1F (cmd & 0x00000100)
123 #define UPDA1 (cmd & 0x00000200)
124 #define UPDA2 (cmd & 0x00000400)
126 #define DSTA2 (cmd & 0x00000800)
128 #define Z_OP_INF (cmd & 0x00040000)
129 #define Z_OP_EQU (cmd & 0x00080000)
130 #define Z_OP_SUP (cmd & 0x00100000)
132 #define LFU_NAN (cmd & 0x00200000)
133 #define LFU_NA (cmd & 0x00400000)
134 #define LFU_AN (cmd & 0x00800000)
135 #define LFU_A (cmd & 0x01000000)
137 #define CMPDST (cmd & 0x02000000)
138 #define BCOMPEN (cmd & 0x04000000)
139 #define DCOMPEN (cmd & 0x08000000)
141 #define PATDSEL (cmd & 0x00010000)
142 #define ADDDSEL (cmd & 0x00020000)
143 #define TOPBEN (cmd & 0x00004000)
144 #define TOPNEN (cmd & 0x00008000)
145 #define BKGWREN (cmd & 0x10000000)
146 #define GOURD (cmd & 0x00001000)
147 #define GOURZ (cmd & 0x00002000)
148 #define SRCSHADE (cmd & 0x40000000)
156 #define XSIGNSUB_A1 (REG(A1_FLAGS)&0x080000)
157 #define XSIGNSUB_A2 (REG(A2_FLAGS)&0x080000)
159 #define YSIGNSUB_A1 (REG(A1_FLAGS)&0x100000)
160 #define YSIGNSUB_A2 (REG(A2_FLAGS)&0x100000)
162 #define YADD1_A1 (REG(A1_FLAGS)&0x040000)
163 #define YADD1_A2 (REG(A2_FLAGS)&0x040000)
165 /*******************************************************************************
166 ********************** STUFF CUT BELOW THIS LINE! ******************************
167 *******************************************************************************/
168 #ifdef USE_ORIGINAL_BLITTER // We're ditching this crap for now...
170 //Put 'em back, once we fix the problem!!! [KO]
172 #define PIXEL_SHIFT_1(a) (((~a##_x) >> 16) & 7)
173 #define PIXEL_OFFSET_1(a) (((((uint32_t)a##_y >> 16) * a##_width / 8) + (((uint32_t)a##_x >> 19) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 19) & 7))
174 #define READ_PIXEL_1(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER) >> PIXEL_SHIFT_1(a)) & 0x01)
175 //#define READ_PIXEL_1(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a)) >> PIXEL_SHIFT_1(a)) & 0x01)
178 #define PIXEL_SHIFT_2(a) (((~a##_x) >> 15) & 6)
179 #define PIXEL_OFFSET_2(a) (((((uint32_t)a##_y >> 16) * a##_width / 4) + (((uint32_t)a##_x >> 18) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 18) & 7))
180 #define READ_PIXEL_2(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER) >> PIXEL_SHIFT_2(a)) & 0x03)
181 //#define READ_PIXEL_2(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a)) >> PIXEL_SHIFT_2(a)) & 0x03)
184 #define PIXEL_SHIFT_4(a) (((~a##_x) >> 14) & 4)
185 #define PIXEL_OFFSET_4(a) (((((uint32_t)a##_y >> 16) * (a##_width/2)) + (((uint32_t)a##_x >> 17) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 17) & 7))
186 #define READ_PIXEL_4(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER) >> PIXEL_SHIFT_4(a)) & 0x0f)
187 //#define READ_PIXEL_4(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a)) >> PIXEL_SHIFT_4(a)) & 0x0f)
190 #define PIXEL_OFFSET_8(a) (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 7))
191 #define READ_PIXEL_8(a) (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a), BLITTER))
192 //#define READ_PIXEL_8(a) (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a)))
195 #define PIXEL_OFFSET_16(a) (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~3)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 3))
196 #define READ_PIXEL_16(a) (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), BLITTER))
197 //#define READ_PIXEL_16(a) (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1)))
200 #define PIXEL_OFFSET_32(a) (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
201 #define READ_PIXEL_32(a) (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), BLITTER))
202 //#define READ_PIXEL_32(a) (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2)))
205 #define READ_PIXEL(a,f) (\
206 (((f>>3)&0x07) == 0) ? (READ_PIXEL_1(a)) : \
207 (((f>>3)&0x07) == 1) ? (READ_PIXEL_2(a)) : \
208 (((f>>3)&0x07) == 2) ? (READ_PIXEL_4(a)) : \
209 (((f>>3)&0x07) == 3) ? (READ_PIXEL_8(a)) : \
210 (((f>>3)&0x07) == 4) ? (READ_PIXEL_16(a)) : \
211 (((f>>3)&0x07) == 5) ? (READ_PIXEL_32(a)) : 0)
213 // 16 bpp z data read
214 #define ZDATA_OFFSET_16(a) (PIXEL_OFFSET_16(a) + a##_zoffs * 4)
215 #define READ_ZDATA_16(a) (JaguarReadWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), BLITTER))
216 //#define READ_ZDATA_16(a) (JaguarReadWord(a##_addr+(ZDATA_OFFSET_16(a)<<1)))
219 #define READ_ZDATA(a,f) (READ_ZDATA_16(a))
221 // 16 bpp z data write
222 #define WRITE_ZDATA_16(a,d) { JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d, BLITTER); }
223 //#define WRITE_ZDATA_16(a,d) { JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d); }
226 #define WRITE_ZDATA(a,f,d) WRITE_ZDATA_16(a,d);
229 #define READ_RDATA_1(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 19) & 0x04))) >> (((uint32_t)a##_x >> 16) & 0x1F)) & 0x0001 : (REG(r) & 0x0001))
232 #define READ_RDATA_2(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 18) & 0x04))) >> (((uint32_t)a##_x >> 15) & 0x3E)) & 0x0003 : (REG(r) & 0x0003))
235 #define READ_RDATA_4(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 17) & 0x04))) >> (((uint32_t)a##_x >> 14) & 0x28)) & 0x000F : (REG(r) & 0x000F))
238 #define READ_RDATA_8(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 16) & 0x04))) >> (((uint32_t)a##_x >> 13) & 0x18)) & 0x00FF : (REG(r) & 0x00FF))
240 // 16 bpp r data read
241 #define READ_RDATA_16(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 15) & 0x04))) >> (((uint32_t)a##_x >> 12) & 0x10)) & 0xFFFF : (REG(r) & 0xFFFF))
243 // 32 bpp r data read
244 #define READ_RDATA_32(r,a,p) ((p) ? REG(r+(((uint32_t)a##_x >> 14) & 0x04)) : REG(r))
246 // register data read
247 #define READ_RDATA(r,a,f,p) (\
248 (((f>>3)&0x07) == 0) ? (READ_RDATA_1(r,a,p)) : \
249 (((f>>3)&0x07) == 1) ? (READ_RDATA_2(r,a,p)) : \
250 (((f>>3)&0x07) == 2) ? (READ_RDATA_4(r,a,p)) : \
251 (((f>>3)&0x07) == 3) ? (READ_RDATA_8(r,a,p)) : \
252 (((f>>3)&0x07) == 4) ? (READ_RDATA_16(r,a,p)) : \
253 (((f>>3)&0x07) == 5) ? (READ_RDATA_32(r,a,p)) : 0)
256 #define WRITE_PIXEL_1(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_1(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER)&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a)), BLITTER); }
257 //#define WRITE_PIXEL_1(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_1(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a))&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a))); }
260 #define WRITE_PIXEL_2(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_2(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER)&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a)), BLITTER); }
261 //#define WRITE_PIXEL_2(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_2(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a))&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a))); }
264 #define WRITE_PIXEL_4(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_4(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER)&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a)), BLITTER); }
265 //#define WRITE_PIXEL_4(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_4(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a))&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a))); }
268 #define WRITE_PIXEL_8(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_8(a), d, BLITTER); }
269 //#define WRITE_PIXEL_8(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_8(a), d); }
271 // 16 bpp pixel write
272 //#define WRITE_PIXEL_16(a,d) { JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1),d); }
273 #define WRITE_PIXEL_16(a,d) { JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d, BLITTER); if (specialLog) WriteLog("Pixel write address: %08X\n", a##_addr+(PIXEL_OFFSET_16(a)<<1)); }
274 //#define WRITE_PIXEL_16(a,d) { JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d); if (specialLog) WriteLog("Pixel write address: %08X\n", a##_addr+(PIXEL_OFFSET_16(a)<<1)); }
276 // 32 bpp pixel write
277 #define WRITE_PIXEL_32(a,d) { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d, BLITTER); }
278 //#define WRITE_PIXEL_32(a,d) { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d); }
281 #define WRITE_PIXEL(a,f,d) {\
282 switch ((f>>3)&0x07) { \
283 case 0: WRITE_PIXEL_1(a,d); break; \
284 case 1: WRITE_PIXEL_2(a,d); break; \
285 case 2: WRITE_PIXEL_4(a,d); break; \
286 case 3: WRITE_PIXEL_8(a,d); break; \
287 case 4: WRITE_PIXEL_16(a,d); break; \
288 case 5: WRITE_PIXEL_32(a,d); break; \
291 // Width in Pixels of a Scanline
292 // This is a pretranslation of the value found in the A1 & A2 flags: It's really a floating point value
293 // of the form EEEEMM where MM is the mantissa with an implied "1." in front of it and the EEEE value is
294 // the exponent. Valid values for the exponent range from 0 to 11 (decimal). It's easiest to think of it
295 // as a floating point bit pattern being followed by a number of zeroes. So, e.g., 001101 translates to
296 // 1.01 (the "1." being implied) x (2 ^ 3) or 1010 -> 10 in base 10 (i.e., 1.01 with the decimal place
297 // being shifted to the right 3 places).
298 /*static uint32_t blitter_scanline_width[48] =
300 0, 0, 0, 0, // Note: This would really translate to 1, 1, 1, 1
310 1024, 1280, 1536, 1792,
311 2048, 2560, 3072, 3584
314 //static uint8_t * tom_ram_8;
315 //static uint8_t * paletteRam;
319 static uint8_t a1ctl;
325 static uint32_t a1_addr;
326 static uint32_t a2_addr;
327 static int32_t a1_zoffs;
328 static int32_t a2_zoffs;
329 static uint32_t xadd_a1_control;
330 static uint32_t xadd_a2_control;
331 static int32_t a1_pitch;
332 static int32_t a2_pitch;
333 static uint32_t n_pixels;
334 static uint32_t n_lines;
337 static int32_t a1_width;
340 static int32_t a2_width;
341 static int32_t a2_mask_x;
342 static int32_t a2_mask_y;
343 static int32_t a1_xadd;
344 static int32_t a1_yadd;
345 static int32_t a2_xadd;
346 static int32_t a2_yadd;
347 static uint8_t a1_phrase_mode;
348 static uint8_t a2_phrase_mode;
349 static int32_t a1_step_x = 0;
350 static int32_t a1_step_y = 0;
351 static int32_t a2_step_x = 0;
352 static int32_t a2_step_y = 0;
353 static uint32_t outer_loop;
354 static uint32_t inner_loop;
355 static uint32_t a2_psize;
356 static uint32_t a1_psize;
357 static uint32_t gouraud_add;
358 //static uint32_t gouraud_data;
359 //static uint16_t gint[4];
360 //static uint16_t gfrac[4];
361 //static uint8_t gcolour[4];
364 static int gd_ia, gd_ca;
365 static int colour_index = 0;
367 static uint32_t z_i[4];
369 static int32_t a1_clip_x, a1_clip_y;
371 // In the spirit of "get it right first, *then* optimize" I've taken the liberty
372 // of removing all the unnecessary code caching. If it turns out to be a good way
373 // to optimize the blitter, then we may revisit it in the future...
376 // Generic blit handler
378 void blitter_generic(uint32_t cmd)
381 Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
382 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
383 A1 step values: -2 (X), 1 (Y)
384 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
385 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
386 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
387 A1 x/y: 100/12, A2 x/y: 106/0 Pattern: 000000F300000000
390 // specialLog = true;
391 /*if (cmd == 0x1401060C && blit_start_log)
392 specialLog = true;//*/
394 //uint32_t logGo = ((cmd == 0x01800E01 && REG(A1_BASE) == 0x898000) ? 1 : 0);
395 uint32_t srcdata, srczdata, dstdata, dstzdata, writedata, inhibit;
396 uint32_t bppSrc = (DSTA2 ? 1 << ((REG(A1_FLAGS) >> 3) & 0x07) : 1 << ((REG(A2_FLAGS) >> 3) & 0x07));
400 WriteLog("About to do n x m blit (BM width is ? pixels)...\n");
401 WriteLog("A1_STEP_X/Y = %08X/%08X, A2_STEP_X/Y = %08X/%08X\n", a1_step_x, a1_step_y, a2_step_x, a2_step_y);
415 WriteLog(" A1_X/Y = %08X/%08X, A2_X/Y = %08X/%08X\n", a1_x, a1_y, a2_x, a2_y);
417 uint32_t a1_start = a1_x, a2_start = a2_x, bitPos = 0;
419 //Kludge for Hover Strike...
420 //I wonder if this kludge is in conjunction with the SRCENX down below...
421 // This isn't so much a kludge but the way things work in BCOMPEN mode...!
422 if (BCOMPEN && SRCENX)
424 if (n_pixels < bppSrc)
425 bitPos = bppSrc - n_pixels;
428 inner_loop = n_pixels;
433 WriteLog(" A1_X/Y = %08X/%08X, A2_X/Y = %08X/%08X\n", a1_x, a1_y, a2_x, a2_y);
435 srcdata = srczdata = dstdata = dstzdata = writedata = inhibit = 0;
437 if (!DSTA2) // Data movement: A1 <- A2
439 // load src data and Z
441 if (SRCEN || SRCENX) // Not sure if this is correct... (seems to be...!)
443 srcdata = READ_PIXEL(a2, REG(A2_FLAGS));
446 srczdata = READ_ZDATA(a2, REG(A2_FLAGS));
447 else if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
448 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
450 else // Use SRCDATA register...
452 srcdata = READ_RDATA(SRCDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
454 if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
455 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
458 // load dst data and Z
461 dstdata = READ_PIXEL(a1, REG(A1_FLAGS));
464 dstzdata = READ_ZDATA(a1, REG(A1_FLAGS));
466 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
470 dstdata = READ_RDATA(DSTDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
473 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
476 /*This wasn't working... // a1 clipping
477 if (cmd & 0x00000040)
479 if (a1_x < 0 || a1_y < 0 || (a1_x >> 16) >= (REG(A1_CLIP) & 0x7FFF)
480 || (a1_y >> 16) >= ((REG(A1_CLIP) >> 16) & 0x7FFF))
485 srczdata = z_i[colour_index] >> 16;
487 // apply z comparator
488 if (Z_OP_INF && srczdata < dstzdata) inhibit = 1;
489 if (Z_OP_EQU && srczdata == dstzdata) inhibit = 1;
490 if (Z_OP_SUP && srczdata > dstzdata) inhibit = 1;
492 // apply data comparator
493 // Note: DCOMPEN only works in 8/16 bpp modes! !!! FIX !!!
494 // Does BCOMPEN only work in 1 bpp mode???
495 // No, but it always does a 1 bit expansion no matter what the BPP of the channel is set to. !!! FIX !!!
496 // This is bit tricky... We need to fix the XADD value so that it acts like a 1BPP value while inside
498 if (DCOMPEN | BCOMPEN)
500 //Temp, for testing Hover Strike
501 //Doesn't seem to do it... Why?
502 //What needs to happen here is twofold. First, the address generator in the outer loop has
503 //to honor the BPP when calculating the start address (which it kinda does already). Second,
504 //it has to step bit by bit when using BCOMPEN. How to do this???
506 //small problem with this approach: it's not accurate... We need a proper address to begin with
507 //and *then* we can do the bit stepping from there the way it's *supposed* to be done... !!! FIX !!!
510 uint32_t pixShift = (~bitPos) & (bppSrc - 1);
511 srcdata = (srcdata >> pixShift) & 0x01;
514 // if (bitPos % bppSrc == 0)
515 // a2_x += 0x00010000;
518 Interesting (Hover Strike--large letter):
520 Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
521 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
522 A1 step values: -2 (X), 1 (Y)
523 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
524 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
525 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
526 A1 x/y: 100/12, A2 x/y: 106/0 Pattern: 000000F300000000
528 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
529 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
530 A1 step values: -8 (X), 1 (Y)
531 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
532 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
533 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
534 A1 x/y: 102/12, A2 x/y: 107/0 Pattern: 000000F300000000
536 Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
537 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
538 A1 step values: -1 (X), 1 (Y)
539 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
540 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
541 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
542 A1 x/y: 118/12, A2 x/y: 70/0 Pattern: 000000F300000000
544 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
545 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
546 A1 step values: -8 (X), 1 (Y)
547 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
548 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
549 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
550 A1 x/y: 119/12, A2 x/y: 71/0 Pattern: 000000F300000000
552 Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
553 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
554 A1 step values: -1 (X), 1 (Y)
555 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
556 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
557 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
558 A1 x/y: 127/12, A2 x/y: 66/0 Pattern: 000000F300000000
560 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
561 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
562 A1 step values: -8 (X), 1 (Y)
563 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
564 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
565 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
566 A1 x/y: 128/12, A2 x/y: 67/0 Pattern: 000000F300000000
572 //WriteLog("Blitter: BCOMPEN set on command %08X inhibit prev:%u, now:", cmd, inhibit);
573 // compare source pixel with pattern pixel
575 Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd: 05810001]
576 CMD -> src: SRCEN dst: misc: a1ctl: mode: ity: PATDSEL z-op: op: LFU_REPLACE ctrl: BCOMPEN
577 A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
578 A2 -> pitch: 1 phrases, depth: 1bpp, z-off: 0, width: 16 (10), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
582 // AvP is still wrong, could be cuz it's doing A1 -> A2...
584 // Src is the 1bpp bitmap... DST is the PATTERN!!!
585 // This seems to solve at least ONE of the problems with MC3D...
586 // Why should this be inverted???
587 // Bcuz it is. This is supposed to be used only for a bit -> pixel expansion...
588 /* if (srcdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
589 // if (srcdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
591 /* uint32_t A2bpp = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
592 if (A2bpp == 1 || A2bpp == 16 || A2bpp == 8)
593 inhibit = (srcdata == 0 ? 1: 0);
594 // inhibit = !srcdata;
596 WriteLog("Blitter: Bad BPP (%u) selected for BCOMPEN mode!\n", A2bpp);//*/
597 // What it boils down to is this:
604 // compare destination pixel with pattern pixel
605 if (dstdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
606 // if (dstdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
610 // This is DEFINITELY WRONG
611 // if (a1_phrase_mode || a2_phrase_mode)
612 // inhibit = !inhibit;
617 inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
618 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
621 // compute the write data and store
624 // Houston, we have a problem...
625 // Look here, at PATDSEL and GOURD. If both are active (as they are on the BIOS intro), then there's
627 //Blit! (00100000 <- 000095D0) count: 3 x 1, A1/2_FLAGS: 00014220/00004020 [cmd: 00011008]
628 // CMD -> src: dst: DSTEN misc: a1ctl: mode: GOURD ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
629 // A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
630 // A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
631 // A1 x/y: 90/171, A2 x/y: 808/0 Pattern: 776D770077007700
635 // use pattern data for write data
636 writedata = READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
640 /*if (blit_start_log)
641 WriteLog("BLIT: ADDDSEL srcdata: %08X\, dstdata: %08X, ", srcdata, dstdata);//*/
643 // intensity addition
644 //Ok, this is wrong... Or is it? Yes, it's wrong! !!! FIX !!!
645 /* writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
646 if (!(TOPBEN) && writedata > 0xFF)
649 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
650 if (!(TOPNEN) && writedata > 0xFFF)
651 // writedata = 0xFFF;
653 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);//*/
654 //notneeded--writedata &= 0xFFFF;
655 /*if (blit_start_log)
656 WriteLog("writedata: %08X\n", writedata);//*/
658 Hover Strike ADDDSEL blit:
660 Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cmd: 00020208]
661 CMD -> src: dst: DSTEN misc: a1ctl: UPDA1 mode: ity: ADDDSEL z-op: op: LFU_CLEAR ctrl:
662 A1 step values: -320 (X), 1 (Y)
663 A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
664 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
665 A1 x/y: 0/0, A2 x/y: 3288/0 Pattern: 0000000000000000 SRCDATA: 00FD00FD00FD00FD
667 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
671 //This is correct now, but slow...
672 int16_t s = (srcdata & 0xFF) | (srcdata & 0x80 ? 0xFF00 : 0x0000),
681 writedata = (uint32_t)sum;
684 //This doesn't seem right... Looks like it would muck up the low byte... !!! FIX !!!
685 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
687 if (!TOPNEN && writedata > 0xFFF)
692 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
696 if (LFU_NAN) writedata |= ~srcdata & ~dstdata;
697 if (LFU_NA) writedata |= ~srcdata & dstdata;
698 if (LFU_AN) writedata |= srcdata & ~dstdata;
699 if (LFU_A) writedata |= srcdata & dstdata;
702 //Although, this looks like it's OK... (even if it is shitty!)
703 //According to JTRM, this is part of the four things the blitter does with the write data (the other
704 //three being PATDSEL, ADDDSEL, and LFU (default). I'm not sure which gets precedence, this or PATDSEL
705 //(see above blit example)...
707 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
711 int intensity = srcdata & 0xFF;
712 int ia = gd_ia >> 16;
714 ia = 0xFFFFFF00 | ia;
718 if (intensity > 0xFF)
720 writedata = (srcdata & 0xFF00) | intensity;
729 //Tried 2nd below for Hover Strike: No dice.
730 if (/*a1_phrase_mode || */BKGWREN || !inhibit)
731 // if (/*a1_phrase_mode || BKGWREN ||*/ !inhibit)
733 /*if (((REG(A1_FLAGS) >> 3) & 0x07) == 5)
735 uint32_t offset = a1_addr+(PIXEL_OFFSET_32(a1)<<2);
736 // (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
737 if ((offset >= 0x1FF020 && offset <= 0x1FF03F) || (offset >= 0x1FF820 && offset <= 0x1FF83F))
738 WriteLog("32bpp pixel write: A1 Phrase mode --> ");
740 // write to the destination
741 WRITE_PIXEL(a1, REG(A1_FLAGS), writedata);
743 WRITE_ZDATA(a1, REG(A1_FLAGS), srczdata);
746 else // if (DSTA2) // Data movement: A1 -> A2
748 // load src data and Z
751 srcdata = READ_PIXEL(a1, REG(A1_FLAGS));
753 srczdata = READ_ZDATA(a1, REG(A1_FLAGS));
754 else if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
755 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
759 srcdata = READ_RDATA(SRCDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
760 if (cmd & 0x001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
761 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
764 // load dst data and Z
767 dstdata = READ_PIXEL(a2, REG(A2_FLAGS));
769 dstzdata = READ_ZDATA(a2, REG(A2_FLAGS));
771 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
775 dstdata = READ_RDATA(DSTDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
777 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
781 srczdata = z_i[colour_index] >> 16;
783 // apply z comparator
784 if (Z_OP_INF && srczdata < dstzdata) inhibit = 1;
785 if (Z_OP_EQU && srczdata == dstzdata) inhibit = 1;
786 if (Z_OP_SUP && srczdata > dstzdata) inhibit = 1;
788 // apply data comparator
789 //NOTE: The bit comparator (BCOMPEN) is NOT the same at the data comparator!
790 if (DCOMPEN | BCOMPEN)
794 // compare source pixel with pattern pixel
795 // AvP: Numbers are correct, but sprites are not!
796 //This doesn't seem to be a problem... But could still be wrong...
797 /* if (srcdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
798 // if (srcdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
800 // This is probably not 100% correct... It works in the 1bpp case
801 // (in A1 <- A2 mode, that is...)
802 // AvP: This is causing blocks to be written instead of bit patterns...
804 // NOTE: We really should separate out the BCOMPEN & DCOMPEN stuff!
805 /* uint32_t A1bpp = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
806 if (A1bpp == 1 || A1bpp == 16 || A1bpp == 8)
807 inhibit = (srcdata == 0 ? 1: 0);
809 WriteLog("Blitter: Bad BPP (%u) selected for BCOMPEN mode!\n", A1bpp);//*/
810 // What it boils down to is this:
816 // compare destination pixel with pattern pixel
817 if (dstdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
818 // if (dstdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
822 // This is DEFINITELY WRONG
823 // if (a1_phrase_mode || a2_phrase_mode)
824 // inhibit = !inhibit;
829 inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
830 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
833 // compute the write data and store
838 // use pattern data for write data
839 writedata = READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
843 // intensity addition
844 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
845 if (!(TOPBEN) && writedata > 0xFF)
847 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
848 if (!(TOPNEN) && writedata > 0xFFF)
850 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
855 writedata |= ~srcdata & ~dstdata;
857 writedata |= ~srcdata & dstdata;
859 writedata |= srcdata & ~dstdata;
861 writedata |= srcdata & dstdata;
865 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
869 int intensity = srcdata & 0xFF;
870 int ia = gd_ia >> 16;
872 ia = 0xFFFFFF00 | ia;
876 if (intensity > 0xFF)
878 writedata = (srcdata & 0xFF00) | intensity;
887 if (/*a2_phrase_mode || */BKGWREN || !inhibit)
891 uint32_t offset = a2_addr+(PIXEL_OFFSET_16(a2)<<1);
892 // (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
893 WriteLog("[%08X:%04X] ", offset, writedata);
895 // write to the destination
896 WRITE_PIXEL(a2, REG(A2_FLAGS), writedata);
899 WRITE_ZDATA(a2, REG(A2_FLAGS), srczdata);
903 // Update x and y (inner loop)
904 //Now it does! But crappy, crappy, crappy! !!! FIX !!! [DONE]
905 //This is less than ideal, but it works...
908 a1_x += a1_xadd, a1_y += a1_yadd;
909 a2_x = (a2_x + a2_xadd) & a2_mask_x, a2_y = (a2_y + a2_yadd) & a2_mask_y;
913 a1_y += a1_yadd, a2_y = (a2_y + a2_yadd) & a2_mask_y;
917 if (bitPos % bppSrc == 0)
918 a2_x = (a2_x + a2_xadd) & a2_mask_x;
922 a2_x = (a2_x + a2_xadd) & a2_mask_x;
923 if (bitPos % bppSrc == 0)
929 z_i[colour_index] += zadd;
931 if (GOURD || SRCSHADE)
933 gd_i[colour_index] += gd_ia;
934 //Hmm, this doesn't seem to do anything...
935 //But it is correct according to the JTRM...!
936 if ((int32_t)gd_i[colour_index] < 0)
937 gd_i[colour_index] = 0;
938 if (gd_i[colour_index] > 0x00FFFFFF)
939 gd_i[colour_index] = 0x00FFFFFF;//*/
941 gd_c[colour_index] += gd_ca;
942 if ((int32_t)gd_c[colour_index] < 0)
943 gd_c[colour_index] = 0;
944 if (gd_c[colour_index] > 0x000000FF)
945 gd_c[colour_index] = 0x000000FF;//*/
948 if (GOURD || SRCSHADE || GOURZ)
951 //This screws things up WORSE (for the BIOS opening screen)
952 // if (a1_phrase_mode || a2_phrase_mode)
953 colour_index = (colour_index + 1) & 0x03;
958 Here's the problem... The phrase mode code!
959 Blit! (00100000 -> 00148000) count: 327 x 267, A1/2_FLAGS: 00004420/00004420 [cmd: 41802E01]
960 CMD -> src: SRCEN dst: misc: a1ctl: UPDA1 UPDA2 mode: DSTA2 GOURZ ity: z-op: op: LFU_REPLACE ctrl: SRCSHADE
961 A1 step values: -327 (X), 1 (Y)
962 A2 step values: -327 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
963 A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
964 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
965 A1 x/y: 28/58, A2 x/y: 28/58 Pattern: 00EA7BEA77EA77EA SRCDATA: 7BFF7BFF7BFF7BFF
967 Below fixes it, but then borks:
970 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
971 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
972 A1 step values: -15 (X), 1 (Y)
973 A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
974 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
975 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
976 A1 x/y: 173/144, A2 x/y: 4052/0
978 Lesse, with pre-add we'd have:
981 00001111222233334444555566667777
984 |rolls back to here. Hmm.
987 //NOTE: The way to fix the CD BIOS is to uncomment below and comment the stuff after
988 // the phrase mode mucking around. But it fucks up everything else...
989 //#define SCREWY_CD_DEPENDENT
990 #ifdef SCREWY_CD_DEPENDENT
994 a2_y += a2_step_y;//*/
997 //New: Phrase mode taken into account! :-p
998 /* if (a1_phrase_mode) // v1
1000 // Bump the pointer to the next phrase boundary
1001 // Even though it works, this is crappy... Clean it up!
1002 uint32_t size = 64 / a1_psize;
1004 // Crappy kludge... ('aligning' source to destination)
1005 if (a2_phrase_mode && DSTA2)
1007 uint32_t extra = (a2_start >> 16) % size;
1008 a1_x += extra << 16;
1011 uint32_t newx = (a1_x >> 16) / size;
1012 uint32_t newxrem = (a1_x >> 16) % size;
1014 a1_x |= (((newx + (newxrem == 0 ? 0 : 1)) * size) & 0xFFFF) << 16;
1016 if (a1_phrase_mode) // v2
1018 // Bump the pointer to the next phrase boundary
1019 // Even though it works, this is crappy... Clean it up!
1020 uint32_t size = 64 / a1_psize;
1022 // Crappy kludge... ('aligning' source to destination)
1023 if (a2_phrase_mode && DSTA2)
1025 uint32_t extra = (a2_start >> 16) % size;
1026 a1_x += extra << 16;
1029 uint32_t pixelSize = (size - 1) << 16;
1030 a1_x = (a1_x + pixelSize) & ~pixelSize;
1033 /* if (a2_phrase_mode) // v1
1035 // Bump the pointer to the next phrase boundary
1036 // Even though it works, this is crappy... Clean it up!
1037 uint32_t size = 64 / a2_psize;
1039 // Crappy kludge... ('aligning' source to destination)
1040 // Prolly should do this for A1 channel as well... [DONE]
1041 if (a1_phrase_mode && !DSTA2)
1043 uint32_t extra = (a1_start >> 16) % size;
1044 a2_x += extra << 16;
1047 uint32_t newx = (a2_x >> 16) / size;
1048 uint32_t newxrem = (a2_x >> 16) % size;
1050 a2_x |= (((newx + (newxrem == 0 ? 0 : 1)) * size) & 0xFFFF) << 16;
1052 if (a2_phrase_mode) // v1
1054 // Bump the pointer to the next phrase boundary
1055 // Even though it works, this is crappy... Clean it up!
1056 uint32_t size = 64 / a2_psize;
1058 // Crappy kludge... ('aligning' source to destination)
1059 // Prolly should do this for A1 channel as well... [DONE]
1060 if (a1_phrase_mode && !DSTA2)
1062 uint32_t extra = (a1_start >> 16) % size;
1063 a2_x += extra << 16;
1066 uint32_t pixelSize = (size - 1) << 16;
1067 a2_x = (a2_x + pixelSize) & ~pixelSize;
1070 //Not entirely: This still mucks things up... !!! FIX !!!
1071 //Should this go before or after the phrase mode mucking around?
1072 #ifndef SCREWY_CD_DEPENDENT
1076 a2_y += a2_step_y;//*/
1080 // write values back to registers
1081 WREG(A1_PIXEL, (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
1082 WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
1083 WREG(A2_PIXEL, (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
1087 void blitter_blit(uint32_t cmd)
1089 //Apparently this is doing *something*, just not sure exactly what...
1090 /*if (cmd == 0x41802E01)
1092 WriteLog("BLIT: Found our blit. Was: %08X ", cmd);
1094 WriteLog("Is: %08X\n", cmd);
1097 uint32_t pitchValue[4] = { 0, 1, 3, 2 };
1100 dst = (cmd >> 3) & 0x07;
1101 misc = (cmd >> 6) & 0x03;
1102 a1ctl = (cmd >> 8) & 0x7;
1103 mode = (cmd >> 11) & 0x07;
1104 ity = (cmd >> 14) & 0x0F;
1105 zop = (cmd >> 18) & 0x07;
1106 op = (cmd >> 21) & 0x0F;
1107 ctrl = (cmd >> 25) & 0x3F;
1109 // Addresses in A1/2_BASE are *phrase* aligned, i.e., bottom three bits are ignored!
1110 // NOTE: This fixes Rayman's bad collision detection AND keeps T2K working!
1111 a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
1112 a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
1114 a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
1115 a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
1117 xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
1118 xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
1120 a1_pitch = pitchValue[(REG(A1_FLAGS) & 0x03)];
1121 a2_pitch = pitchValue[(REG(A2_FLAGS) & 0x03)];
1123 n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
1124 n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
1126 a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
1127 a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
1128 //According to the JTRM, X is restricted to 15 bits and Y is restricted to 12.
1129 //But it seems to fuck up T2K! !!! FIX !!!
1130 //Could it be sign extended??? Doesn't seem to be so according to JTRM
1131 // a1_x &= 0x7FFFFFFF, a1_y &= 0x0FFFFFFF;
1132 //Actually, it says that the X is 16 bits. But it still seems to mess with the Y when restricted to 12...
1133 // a1_y &= 0x0FFFFFFF;
1135 // a1_width = blitter_scanline_width[((REG(A1_FLAGS) & 0x00007E00) >> 9)];
1136 // According to JTRM, this must give a *whole number* of phrases in the current
1137 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
1138 uint32_t m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1139 a1_width = ((0x04 | m) << e) >> 2;//*/
1141 a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
1142 a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
1143 //According to the JTRM, X is restricted to 15 bits and Y is restricted to 12.
1144 //But it seems to fuck up T2K! !!! FIX !!!
1145 // a2_x &= 0x7FFFFFFF, a2_y &= 0x0FFFFFFF;
1146 //Actually, it says that the X is 16 bits. But it still seems to mess with the Y when restricted to 12...
1147 // a2_y &= 0x0FFFFFFF;
1149 // a2_width = blitter_scanline_width[((REG(A2_FLAGS) & 0x00007E00) >> 9)];
1150 // According to JTRM, this must give a *whole number* of phrases in the current
1151 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
1152 m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1153 a2_width = ((0x04 | m) << e) >> 2;//*/
1154 a2_mask_x = ((REG(A2_MASK) & 0x0000FFFF) << 16) | 0xFFFF;
1155 a2_mask_y = (REG(A2_MASK) & 0xFFFF0000) | 0xFFFF;
1157 // Check for "use mask" flag
1158 if (!(REG(A2_FLAGS) & 0x8000))
1160 a2_mask_x = 0xFFFFFFFF; // must be 16.16
1161 a2_mask_y = 0xFFFFFFFF; // must be 16.16
1166 // According to the official documentation, a hardware bug ties A2's yadd bit to A1's...
1167 a2_yadd = a1_yadd = (YADD1_A1 ? 1 << 16 : 0);
1172 // determine a1_xadd
1173 switch (xadd_a1_control)
1176 // This is a documented Jaguar bug relating to phrase mode and truncation... Look into it!
1177 // add phrase offset to X and truncate
1182 // add pixelsize (1) to X
1186 // add zero (for those nice vertical lines)
1190 // add the contents of the increment register
1191 a1_xadd = (REG(A1_INC) << 16) | (REG(A1_FINC) & 0x0000FFFF);
1192 a1_yadd = (REG(A1_INC) & 0xFFFF0000) | (REG(A1_FINC) >> 16);
1197 //Blit! (0011D000 -> 000B9600) count: 228 x 1, A1/2_FLAGS: 00073820/00064220 [cmd: 41802801]
1198 // A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 128 (1C), addctl: XADDINC YADD1 XSIGNADD YSIGNADD
1199 // A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADD0 YADD1 XSIGNADD YSIGNADD
1200 //if (YADD1_A1 && YADD1_A2 && xadd_a2_control == XADD0 && xadd_a1_control == XADDINC)// &&
1201 // uint32_t a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
1202 //Ok, so this ISN'T it... Prolly the XADDPHR code above that's doing it...
1203 //if (REG(A1_FLAGS) == 0x00073820 && REG(A2_FLAGS) == 0x00064220 && cmd == 0x41802801)
1204 // A1 x/y: 14368/7, A2 x/y: 150/36
1205 //This is it... The problem...
1206 //if ((a1_x >> 16) == 14368) // 14368 = $3820
1207 // return; //Lesse what we got...
1217 // determine a2_xadd
1218 switch (xadd_a2_control)
1221 // add phrase offset to X and truncate
1226 // add pixelsize (1) to X
1230 // add zero (for those nice vertical lines)
1233 //This really isn't a valid bit combo for A2... Shouldn't this cause the blitter to just say no?
1235 WriteLog("BLIT: Asked to use invalid bit combo (XADDINC) for A2...\n");
1236 // add the contents of the increment register
1237 // since there is no register for a2 we just add 1
1238 //Let's do nothing, since it's not listed as a valid bit combo...
1239 // a2_xadd = 1 << 16;
1246 // Modify outer loop steps based on blitter command
1254 a1_step_x = (REG(A1_FSTEP) & 0xFFFF),
1255 a1_step_y = (REG(A1_FSTEP) >> 16);
1258 a1_step_x |= ((REG(A1_STEP) & 0x0000FFFF) << 16),
1259 a1_step_y |= ((REG(A1_STEP) & 0xFFFF0000));
1262 a2_step_x = (REG(A2_STEP) & 0x0000FFFF) << 16,
1263 a2_step_y = (REG(A2_STEP) & 0xFFFF0000);
1265 outer_loop = n_lines;
1270 a1_clip_x = REG(A1_CLIP) & 0x7FFF,
1271 a1_clip_y = (REG(A1_CLIP) >> 16) & 0x7FFF;
1273 // This phrase sizing is incorrect as well... !!! FIX !!! [NOTHING TO FIX]
1274 // Err, this is pixel size... (and it's OK)
1275 a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
1276 a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
1283 for(int v=0; v<4; v++)
1284 z_i[v] = REG(PHRASEZ0 + v*4);
1288 if (GOURD || GOURZ || SRCSHADE)
1290 gd_c[0] = blitter_ram[PATTERNDATA + 6];
1291 gd_i[0] = ((uint32_t)blitter_ram[PATTERNDATA + 7] << 16)
1292 | ((uint32_t)blitter_ram[SRCDATA + 6] << 8) | blitter_ram[SRCDATA + 7];
1294 gd_c[1] = blitter_ram[PATTERNDATA + 4];
1295 gd_i[1] = ((uint32_t)blitter_ram[PATTERNDATA + 5] << 16)
1296 | ((uint32_t)blitter_ram[SRCDATA + 4] << 8) | blitter_ram[SRCDATA + 5];
1298 gd_c[2] = blitter_ram[PATTERNDATA + 2];
1299 gd_i[2] = ((uint32_t)blitter_ram[PATTERNDATA + 3] << 16)
1300 | ((uint32_t)blitter_ram[SRCDATA + 2] << 8) | blitter_ram[SRCDATA + 3];
1302 gd_c[3] = blitter_ram[PATTERNDATA + 0];
1303 gd_i[3] = ((uint32_t)blitter_ram[PATTERNDATA + 1] << 16)
1304 | ((uint32_t)blitter_ram[SRCDATA + 0] << 8) | blitter_ram[SRCDATA + 1];
1306 gouraud_add = REG(INTENSITYINC);
1308 gd_ia = gouraud_add & 0x00FFFFFF;
1309 if (gd_ia & 0x00800000)
1310 gd_ia = 0xFF000000 | gd_ia;
1312 gd_ca = (gouraud_add >> 24) & 0xFF;
1313 if (gd_ca & 0x00000080)
1314 gd_ca = 0xFFFFFF00 | gd_ca;
1317 // Bit comparitor fixing...
1320 // Determine the data flow direction...
1322 a2_step_x /= (1 << ((REG(A2_FLAGS) >> 3) & 0x07));
1326 /* if (BCOMPEN)//Kludge for Hover Strike... !!! FIX !!!
1328 // Determine the data flow direction...
1336 WriteLog("Blit!\n");
1337 WriteLog(" cmd = 0x%.8x\n",cmd);
1338 WriteLog(" a1_base = %08X\n", a1_addr);
1339 WriteLog(" a1_pitch = %d\n", a1_pitch);
1340 WriteLog(" a1_psize = %d\n", a1_psize);
1341 WriteLog(" a1_width = %d\n", a1_width);
1342 WriteLog(" a1_xadd = %f (phrase=%d)\n", (float)a1_xadd / 65536.0, a1_phrase_mode);
1343 WriteLog(" a1_yadd = %f\n", (float)a1_yadd / 65536.0);
1344 WriteLog(" a1_xstep = %f\n", (float)a1_step_x / 65536.0);
1345 WriteLog(" a1_ystep = %f\n", (float)a1_step_y / 65536.0);
1346 WriteLog(" a1_x = %f\n", (float)a1_x / 65536.0);
1347 WriteLog(" a1_y = %f\n", (float)a1_y / 65536.0);
1348 WriteLog(" a1_zoffs = %i\n",a1_zoffs);
1350 WriteLog(" a2_base = %08X\n", a2_addr);
1351 WriteLog(" a2_pitch = %d\n", a2_pitch);
1352 WriteLog(" a2_psize = %d\n", a2_psize);
1353 WriteLog(" a2_width = %d\n", a2_width);
1354 WriteLog(" a2_xadd = %f (phrase=%d)\n", (float)a2_xadd / 65536.0, a2_phrase_mode);
1355 WriteLog(" a2_yadd = %f\n", (float)a2_yadd / 65536.0);
1356 WriteLog(" a2_xstep = %f\n", (float)a2_step_x / 65536.0);
1357 WriteLog(" a2_ystep = %f\n", (float)a2_step_y / 65536.0);
1358 WriteLog(" a2_x = %f\n", (float)a2_x / 65536.0);
1359 WriteLog(" a2_y = %f\n", (float)a2_y / 65536.0);
1360 WriteLog(" a2_mask_x= 0x%.4x\n",a2_mask_x);
1361 WriteLog(" a2_mask_y= 0x%.4x\n",a2_mask_y);
1362 WriteLog(" a2_zoffs = %i\n",a2_zoffs);
1364 WriteLog(" count = %d x %d\n", n_pixels, n_lines);
1366 WriteLog(" command = %08X\n", cmd);
1367 WriteLog(" dsten = %i\n",DSTEN);
1368 WriteLog(" srcen = %i\n",SRCEN);
1369 WriteLog(" patdsel = %i\n",PATDSEL);
1370 WriteLog(" color = 0x%.8x\n",REG(PATTERNDATA));
1371 WriteLog(" dcompen = %i\n",DCOMPEN);
1372 WriteLog(" bcompen = %i\n",BCOMPEN);
1373 WriteLog(" cmpdst = %i\n",CMPDST);
1374 WriteLog(" GOURZ = %i\n",GOURZ);
1375 WriteLog(" GOURD = %i\n",GOURD);
1376 WriteLog(" SRCSHADE= %i\n",SRCSHADE);
1380 //NOTE: Pitch is ignored!
1382 //This *might* be the altimeter blits (they are)...
1383 //On captured screen, x-pos for black (inner) is 259, for pink is 257
1384 //Black is short by 3, pink is short by 1...
1386 Blit! (00110000 <- 000BF010) count: 9 x 31, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1387 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
1388 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1389 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1390 A1 x/y: 262/124, A2 x/y: 128/0
1391 Blit! (00110000 <- 000BF010) count: 5 x 38, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1392 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
1393 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1394 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1395 A1 x/y: 264/117, A2 x/y: 407/0
1397 Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1398 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
1399 A1 step values: -10 (X), 1 (Y)
1400 A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1401 A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1402 A1 x/y: 262/132, A2 x/y: 129/0
1403 Blit! (00110000 <- 000BF010) count: 5 x 27, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1404 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
1405 A1 step values: -8 (X), 1 (Y)
1406 A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1407 A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1408 A1 x/y: 264/128, A2 x/y: 336/0
1410 264v vCursor ends up here...
1414 262v vCursor ends up here...
1418 Fixed! Now for more:
1420 ; This looks like the ship icon in the upper left corner...
1422 Blit! (00110000 <- 0010B2A8) count: 11 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1423 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1424 A1 step values: -12 (X), 1 (Y)
1425 A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1426 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1427 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1428 A1 x/y: 20/24, A2 x/y: 5780/0
1432 More (not sure this is a blitter problem as much as it's a GPU problem):
1433 All but the "M" are trashed...
1434 This does *NOT* look like a blitter problem, as it's rendering properly...
1435 Actually, if you look at the A1 step values, there IS a discrepancy!
1439 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1440 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1441 A1 step values: -14 (X), 1 (Y)
1442 A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1443 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1444 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1445 A1 x/y: 134/144, A2 x/y: 2516/0
1450 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1451 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1452 A1 step values: -13 (X), 1 (Y)
1453 A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1454 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1455 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1456 A1 x/y: 147/144, A2 x/y: 2660/0
1460 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1461 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1462 A1 step values: -12 (X), 1 (Y)
1463 A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1464 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1465 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1466 A1 x/y: 160/144, A2 x/y: 3764/0
1470 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1471 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1472 A1 step values: -15 (X), 1 (Y)
1473 A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1474 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1475 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1476 A1 x/y: 173/144, A2 x/y: 4052/0
1479 //extern int op_start_log;
1482 const char * ctrlStr[4] = { "XADDPHR\0", "XADDPIX\0", "XADD0\0", "XADDINC\0" };
1483 const char * bppStr[8] = { "1bpp\0", "2bpp\0", "4bpp\0", "8bpp\0", "16bpp\0", "32bpp\0", "???\0", "!!!\0" };
1484 const char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
1485 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
1486 uint32_t /*src = cmd & 0x07, dst = (cmd >> 3) & 0x07, misc = (cmd >> 6) & 0x03,
1487 a1ctl = (cmd >> 8) & 0x07,*/ mode = (cmd >> 11) & 0x07/*, ity = (cmd >> 14) & 0x0F,
1488 zop = (cmd >> 18) & 0x07, op = (cmd >> 21) & 0x0F, ctrl = (cmd >> 25) & 0x3F*/;
1489 uint32_t a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
1490 uint32_t p1 = a1f & 0x07, p2 = a2f & 0x07,
1491 d1 = (a1f >> 3) & 0x07, d2 = (a2f >> 3) & 0x07,
1492 zo1 = (a1f >> 6) & 0x07, zo2 = (a2f >> 6) & 0x07,
1493 w1 = (a1f >> 9) & 0x3F, w2 = (a2f >> 9) & 0x3F,
1494 ac1 = (a1f >> 16) & 0x1F, ac2 = (a2f >> 16) & 0x1F;
1495 uint32_t iw1 = ((0x04 | (w1 & 0x03)) << ((w1 & 0x3C) >> 2)) >> 2;
1496 uint32_t iw2 = ((0x04 | (w2 & 0x03)) << ((w2 & 0x3C) >> 2)) >> 2;
1497 WriteLog("Blit! (%08X %s %08X) count: %d x %d, A1/2_FLAGS: %08X/%08X [cmd: %08X]\n", a1_addr, (mode&0x01 ? "->" : "<-"), a2_addr, n_pixels, n_lines, a1f, a2f, cmd);
1498 // WriteLog(" CMD -> src: %d, dst: %d, misc: %d, a1ctl: %d, mode: %d, ity: %1X, z-op: %d, op: %1X, ctrl: %02X\n", src, dst, misc, a1ctl, mode, ity, zop, op, ctrl);
1500 WriteLog(" CMD -> src: %s%s%s ", (cmd & 0x0001 ? "SRCEN " : ""), (cmd & 0x0002 ? "SRCENZ " : ""), (cmd & 0x0004 ? "SRCENX" : ""));
1501 WriteLog("dst: %s%s%s ", (cmd & 0x0008 ? "DSTEN " : ""), (cmd & 0x0010 ? "DSTENZ " : ""), (cmd & 0x0020 ? "DSTWRZ" : ""));
1502 WriteLog("misc: %s%s ", (cmd & 0x0040 ? "CLIP_A1 " : ""), (cmd & 0x0080 ? "???" : ""));
1503 WriteLog("a1ctl: %s%s%s ", (cmd & 0x0100 ? "UPDA1F " : ""), (cmd & 0x0200 ? "UPDA1 " : ""), (cmd & 0x0400 ? "UPDA2" : ""));
1504 WriteLog("mode: %s%s%s ", (cmd & 0x0800 ? "DSTA2 " : ""), (cmd & 0x1000 ? "GOURD " : ""), (cmd & 0x2000 ? "GOURZ" : ""));
1505 WriteLog("ity: %s%s%s%s ", (cmd & 0x4000 ? "TOPBEN " : ""), (cmd & 0x8000 ? "TOPNEN " : ""), (cmd & 0x00010000 ? "PATDSEL" : ""), (cmd & 0x00020000 ? "ADDDSEL" : ""));
1506 WriteLog("z-op: %s%s%s ", (cmd & 0x00040000 ? "ZMODELT " : ""), (cmd & 0x00080000 ? "ZMODEEQ " : ""), (cmd & 0x00100000 ? "ZMODEGT" : ""));
1507 WriteLog("op: %s ", opStr[(cmd >> 21) & 0x0F]);
1508 WriteLog("ctrl: %s%s%s%s%s%s\n", (cmd & 0x02000000 ? "CMPDST " : ""), (cmd & 0x04000000 ? "BCOMPEN " : ""), (cmd & 0x08000000 ? "DCOMPEN " : ""), (cmd & 0x10000000 ? "BKGWREN " : ""), (cmd & 0x20000000 ? "BUSHI " : ""), (cmd & 0x40000000 ? "SRCSHADE" : ""));
1511 WriteLog(" A1 step values: %d (X), %d (Y)\n", a1_step_x >> 16, a1_step_y >> 16);
1514 WriteLog(" A2 step values: %d (X), %d (Y) [mask (%sused): %08X - %08X/%08X]\n", a2_step_x >> 16, a2_step_y >> 16, (a2f & 0x8000 ? "" : "un"), REG(A2_MASK), a2_mask_x, a2_mask_y);
1516 WriteLog(" A1 -> pitch: %d phrases, depth: %s, z-off: %d, width: %d (%02X), addctl: %s %s %s %s\n", 1 << p1, bppStr[d1], zo1, iw1, w1, ctrlStr[ac1&0x03], (ac1&0x04 ? "YADD1" : "YADD0"), (ac1&0x08 ? "XSIGNSUB" : "XSIGNADD"), (ac1&0x10 ? "YSIGNSUB" : "YSIGNADD"));
1517 WriteLog(" A2 -> pitch: %d phrases, depth: %s, z-off: %d, width: %d (%02X), addctl: %s %s %s %s\n", 1 << p2, bppStr[d2], zo2, iw2, w2, ctrlStr[ac2&0x03], (ac2&0x04 ? "YADD1" : "YADD0"), (ac2&0x08 ? "XSIGNSUB" : "XSIGNADD"), (ac2&0x10 ? "YSIGNSUB" : "YSIGNADD"));
1518 WriteLog(" A1 x/y: %d/%d, A2 x/y: %d/%d Pattern: %08X%08X SRCDATA: %08X%08X\n", a1_x >> 16, a1_y >> 16, a2_x >> 16, a2_y >> 16, REG(PATTERNDATA), REG(PATTERNDATA + 4), REG(SRCDATA), REG(SRCDATA + 4));
1519 // blit_start_log = 0;
1520 // op_start_log = 1;
1523 blitter_working = 1;
1524 //#ifndef USE_GENERIC_BLITTER
1525 // if (!blitter_execute_cached_code(blitter_in_cache(cmd)))
1527 blitter_generic(cmd);
1529 /*if (blit_start_log)
1531 if (a1_addr == 0xF03000 && a2_addr == 0x004D58)
1533 WriteLog("\nBytes at 004D58:\n");
1534 for(int i=0x004D58; i<0x004D58+(10*127*4); i++)
1535 WriteLog("%02X ", JaguarReadByte(i));
1536 WriteLog("\nBytes at F03000:\n");
1537 for(int i=0xF03000; i<0xF03000+(6*127*4); i++)
1538 WriteLog("%02X ", JaguarReadByte(i));
1543 blitter_working = 0;
1545 #endif // of the #if 0 near the top...
1546 /*******************************************************************************
1547 ********************** STUFF CUT ABOVE THIS LINE! ******************************
1548 *******************************************************************************/
1551 void BlitterInit(void)
1557 void BlitterReset(void)
1559 memset(blitter_ram, 0x00, 0xA0);
1563 void BlitterDone(void)
1565 WriteLog("BLIT: Done.\n");
1569 uint8_t BlitterReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1574 //This isn't cycle accurate--how to fix? !!! FIX !!!
1575 //Probably have to do some multi-threaded implementation or at least a reentrant safe implementation...
1576 //Real hardware returns $00000805, just like the JTRM says.
1577 if (offset == (0x38 + 0))
1579 if (offset == (0x38 + 1))
1581 if (offset == (0x38 + 2))
1583 if (offset == (0x38 + 3))
1584 return 0x05; // always idle/never stopped (collision detection ignored!)
1586 // CHECK HERE ONCE THIS FIX HAS BEEN TESTED: [X]
1588 if (offset >= 0x04 && offset <= 0x07)
1589 //This is it. I wonder if it just ignores the lower three bits?
1590 //No, this is a documented Jaguar I bug. It also bites the read at $F02230 as well...
1591 return blitter_ram[offset + 0x08]; // A1_PIXEL ($F0220C) read at $F02204
1593 if (offset >= 0x2C && offset <= 0x2F)
1594 return blitter_ram[offset + 0x04]; // A2_PIXEL ($F02230) read at $F0222C
1596 return blitter_ram[offset];
1601 uint16_t BlitterReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1603 return ((uint16_t)BlitterReadByte(offset, who) << 8) | (uint16_t)BlitterReadByte(offset+1, who);
1608 uint32_t BlitterReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1610 return (BlitterReadWord(offset, who) << 16) | BlitterReadWord(offset+2, who);
1614 void BlitterWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
1616 /*if (offset & 0xFF == 0x7B)
1617 WriteLog("--> Wrote to B_STOP: value -> %02X\n", data);*/
1619 /*if ((offset >= PATTERNDATA) && (offset < PATTERNDATA + 8))
1621 printf("--> %s wrote %02X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - PATTERNDATA);
1625 // This handles writes to INTENSITY0-3 by also writing them to their proper places in
1626 // PATTERNDATA & SOURCEDATA (should do the same for the Z registers! !!! FIX !!! [DONE])
1627 if ((offset >= 0x7C) && (offset <= 0x9B))
1631 // INTENSITY registers 0-3
1633 case 0x7D: blitter_ram[PATTERNDATA + 7] = data; break;
1634 case 0x7E: blitter_ram[SRCDATA + 6] = data; break;
1635 case 0x7F: blitter_ram[SRCDATA + 7] = data; break;
1638 case 0x81: blitter_ram[PATTERNDATA + 5] = data; break;
1639 case 0x82: blitter_ram[SRCDATA + 4] = data; break;
1640 case 0x83: blitter_ram[SRCDATA + 5] = data; break;
1643 case 0x85: blitter_ram[PATTERNDATA + 3] = data; break;
1644 case 0x86: blitter_ram[SRCDATA + 2] = data; break;
1645 case 0x87: blitter_ram[SRCDATA + 3] = data; break;
1648 case 0x89: blitter_ram[PATTERNDATA + 1] = data; break;
1649 case 0x8A: blitter_ram[SRCDATA + 0] = data; break;
1650 case 0x8B: blitter_ram[SRCDATA + 1] = data; break;
1654 case 0x8C: blitter_ram[SRCZINT + 6] = data; break;
1655 case 0x8D: blitter_ram[SRCZINT + 7] = data; break;
1656 case 0x8E: blitter_ram[SRCZFRAC + 6] = data; break;
1657 case 0x8F: blitter_ram[SRCZFRAC + 7] = data; break;
1659 case 0x90: blitter_ram[SRCZINT + 4] = data; break;
1660 case 0x91: blitter_ram[SRCZINT + 5] = data; break;
1661 case 0x92: blitter_ram[SRCZFRAC + 4] = data; break;
1662 case 0x93: blitter_ram[SRCZFRAC + 5] = data; break;
1664 case 0x94: blitter_ram[SRCZINT + 2] = data; break;
1665 case 0x95: blitter_ram[SRCZINT + 3] = data; break;
1666 case 0x96: blitter_ram[SRCZFRAC + 2] = data; break;
1667 case 0x97: blitter_ram[SRCZFRAC + 3] = data; break;
1669 case 0x98: blitter_ram[SRCZINT + 0] = data; break;
1670 case 0x99: blitter_ram[SRCZINT + 1] = data; break;
1671 case 0x9A: blitter_ram[SRCZFRAC + 0] = data; break;
1672 case 0x9B: blitter_ram[SRCZFRAC + 1] = data; break;
1676 // It looks weird, but this is how the 64 bit registers are actually handled...!
1678 else if ((offset >= SRCDATA + 0) && (offset <= SRCDATA + 3)
1679 || (offset >= DSTDATA + 0) && (offset <= DSTDATA + 3)
1680 || (offset >= DSTZ + 0) && (offset <= DSTZ + 3)
1681 || (offset >= SRCZINT + 0) && (offset <= SRCZINT + 3)
1682 || (offset >= SRCZFRAC + 0) && (offset <= SRCZFRAC + 3)
1683 || (offset >= PATTERNDATA + 0) && (offset <= PATTERNDATA + 3))
1685 blitter_ram[offset + 4] = data;
1687 else if ((offset >= SRCDATA + 4) && (offset <= SRCDATA + 7)
1688 || (offset >= DSTDATA + 4) && (offset <= DSTDATA + 7)
1689 || (offset >= DSTZ + 4) && (offset <= DSTZ + 7)
1690 || (offset >= SRCZINT + 4) && (offset <= SRCZINT + 7)
1691 || (offset >= SRCZFRAC + 4) && (offset <= SRCZFRAC + 7)
1692 || (offset >= PATTERNDATA + 4) && (offset <= PATTERNDATA + 7))
1694 blitter_ram[offset - 4] = data;
1697 blitter_ram[offset] = data;
1701 void BlitterWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
1703 /*if (((offset & 0xFF) >= PATTERNDATA) && ((offset & 0xFF) < PATTERNDATA + 8))
1705 printf("----> %s wrote %04X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - (0xF02200 + PATTERNDATA));
1709 /* if (offset & 0xFF == A1_PIXEL && data == 14368)
1711 WriteLog("\n1\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1712 extern bool doGPUDis;
1715 if ((offset & 0xFF) == (A1_PIXEL + 2) && data == 14368)
1717 WriteLog("\n2\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1718 extern bool doGPUDis;
1723 BlitterWriteByte(offset + 0, data >> 8, who);
1724 BlitterWriteByte(offset + 1, data & 0xFF, who);
1726 if ((offset & 0xFF) == 0x3A)
1727 // I.e., the second write of 32-bit value--not convinced this is the best way to do this!
1728 // But then again, according to the Jaguar docs, this is correct...!
1729 /*extern int blit_start_log;
1730 extern bool doGPUDis;
1733 WriteLog("BLIT: Blitter started by %s...\n", whoName[who]);
1736 #ifndef USE_BOTH_BLITTERS
1737 #ifdef USE_ORIGINAL_BLITTER
1738 blitter_blit(GET32(blitter_ram, 0x38));
1740 #ifdef USE_MIDSUMMER_BLITTER
1741 BlitterMidsummer(GET32(blitter_ram, 0x38));
1743 #ifdef USE_MIDSUMMER_BLITTER_MKII
1744 BlitterMidsummer2();
1748 if (vjs.useFastBlitter)
1749 blitter_blit(GET32(blitter_ram, 0x38));
1751 BlitterMidsummer2();
1758 void BlitterWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
1760 /*if (((offset & 0xFF) >= PATTERNDATA) && ((offset & 0xFF) < PATTERNDATA + 8))
1762 printf("------> %s wrote %08X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - (0xF02200 + PATTERNDATA));
1766 /* if ((offset & 0xFF) == A1_PIXEL && (data & 0xFFFF) == 14368)
1768 WriteLog("\n3\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1769 extern bool doGPUDis;
1774 BlitterWriteWord(offset + 0, data >> 16, who);
1775 BlitterWriteWord(offset + 2, data & 0xFFFF, who);
1781 const char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
1782 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
1783 uint32_t cmd = GET32(blitter_ram, 0x38);
1784 uint32_t m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1785 uint32_t a1_width = ((0x04 | m) << e) >> 2;
1786 m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1787 uint32_t a2_width = ((0x04 | m) << e) >> 2;
1789 WriteLog("Blit!\n");
1790 WriteLog(" COMMAND = %08X\n", cmd);
1791 WriteLog(" a1_base = %08X\n", REG(A1_BASE));
1792 WriteLog(" a1_flags = %08X (%c %c %c %c%c . %c%c%c%c%c%c %c%c%c %c%c%c . %c%c)\n", REG(A1_FLAGS),
1793 (REG(A1_FLAGS) & 0x100000 ? '1' : '0'),
1794 (REG(A1_FLAGS) & 0x080000 ? '1' : '0'),
1795 (REG(A1_FLAGS) & 0x040000 ? '1' : '0'),
1796 (REG(A1_FLAGS) & 0x020000 ? '1' : '0'),
1797 (REG(A1_FLAGS) & 0x010000 ? '1' : '0'),
1798 (REG(A1_FLAGS) & 0x004000 ? '1' : '0'),
1799 (REG(A1_FLAGS) & 0x002000 ? '1' : '0'),
1800 (REG(A1_FLAGS) & 0x001000 ? '1' : '0'),
1801 (REG(A1_FLAGS) & 0x000800 ? '1' : '0'),
1802 (REG(A1_FLAGS) & 0x000400 ? '1' : '0'),
1803 (REG(A1_FLAGS) & 0x000200 ? '1' : '0'),
1804 (REG(A1_FLAGS) & 0x000100 ? '1' : '0'),
1805 (REG(A1_FLAGS) & 0x000080 ? '1' : '0'),
1806 (REG(A1_FLAGS) & 0x000040 ? '1' : '0'),
1807 (REG(A1_FLAGS) & 0x000020 ? '1' : '0'),
1808 (REG(A1_FLAGS) & 0x000010 ? '1' : '0'),
1809 (REG(A1_FLAGS) & 0x000008 ? '1' : '0'),
1810 (REG(A1_FLAGS) & 0x000002 ? '1' : '0'),
1811 (REG(A1_FLAGS) & 0x000001 ? '1' : '0'));
1812 WriteLog(" pitch=%u, pixSz=%u, zOff=%u, width=%u, xCtrl=%u\n",
1813 REG(A1_FLAGS) & 0x00003, (REG(A1_FLAGS) & 0x00038) >> 3,
1814 (REG(A1_FLAGS) & 0x001C0) >> 6, a1_width, (REG(A1_FLAGS) & 0x30000) >> 16);
1815 WriteLog(" a1_clip = %u, %u (%08X)\n", GET16(blitter_ram, A1_CLIP + 2), GET16(blitter_ram, A1_CLIP + 0), GET32(blitter_ram, A1_CLIP));
1816 WriteLog(" a1_pixel = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A1_PIXEL + 2), (int16_t)GET16(blitter_ram, A1_PIXEL + 0), GET32(blitter_ram, A1_PIXEL));
1817 WriteLog(" a1_step = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A1_STEP + 2), (int16_t)GET16(blitter_ram, A1_STEP + 0), GET32(blitter_ram, A1_STEP));
1818 WriteLog(" a1_fstep = %u, %u (%08X)\n", GET16(blitter_ram, A1_FSTEP + 2), GET16(blitter_ram, A1_FSTEP + 0), GET32(blitter_ram, A1_FSTEP));
1819 WriteLog(" a1_fpixel= %u, %u (%08X)\n", GET16(blitter_ram, A1_FPIXEL + 2), GET16(blitter_ram, A1_FPIXEL + 0), GET32(blitter_ram, A1_FPIXEL));
1820 WriteLog(" a1_inc = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A1_INC + 2), (int16_t)GET16(blitter_ram, A1_INC + 0), GET32(blitter_ram, A1_INC));
1821 WriteLog(" a1_finc = %u, %u (%08X)\n", GET16(blitter_ram, A1_FINC + 2), GET16(blitter_ram, A1_FINC + 0), GET32(blitter_ram, A1_FINC));
1823 WriteLog(" a2_base = %08X\n", REG(A2_BASE));
1824 WriteLog(" a2_flags = %08X (%c %c %c %c%c %c %c%c%c%c%c%c %c%c%c %c%c%c . %c%c)\n", REG(A2_FLAGS),
1825 (REG(A2_FLAGS) & 0x100000 ? '1' : '0'),
1826 (REG(A2_FLAGS) & 0x080000 ? '1' : '0'),
1827 (REG(A2_FLAGS) & 0x040000 ? '1' : '0'),
1828 (REG(A2_FLAGS) & 0x020000 ? '1' : '0'),
1829 (REG(A2_FLAGS) & 0x010000 ? '1' : '0'),
1830 (REG(A2_FLAGS) & 0x008000 ? '1' : '0'),
1831 (REG(A2_FLAGS) & 0x004000 ? '1' : '0'),
1832 (REG(A2_FLAGS) & 0x002000 ? '1' : '0'),
1833 (REG(A2_FLAGS) & 0x001000 ? '1' : '0'),
1834 (REG(A2_FLAGS) & 0x000800 ? '1' : '0'),
1835 (REG(A2_FLAGS) & 0x000400 ? '1' : '0'),
1836 (REG(A2_FLAGS) & 0x000200 ? '1' : '0'),
1837 (REG(A2_FLAGS) & 0x000100 ? '1' : '0'),
1838 (REG(A2_FLAGS) & 0x000080 ? '1' : '0'),
1839 (REG(A2_FLAGS) & 0x000040 ? '1' : '0'),
1840 (REG(A2_FLAGS) & 0x000020 ? '1' : '0'),
1841 (REG(A2_FLAGS) & 0x000010 ? '1' : '0'),
1842 (REG(A2_FLAGS) & 0x000008 ? '1' : '0'),
1843 (REG(A2_FLAGS) & 0x000002 ? '1' : '0'),
1844 (REG(A2_FLAGS) & 0x000001 ? '1' : '0'));
1845 WriteLog(" pitch=%u, pixSz=%u, zOff=%u, width=%u, xCtrl=%u\n",
1846 REG(A2_FLAGS) & 0x00003, (REG(A2_FLAGS) & 0x00038) >> 3,
1847 (REG(A2_FLAGS) & 0x001C0) >> 6, a2_width, (REG(A2_FLAGS) & 0x30000) >> 16);
1848 WriteLog(" a2_mask = %u, %u (%08X)\n", GET16(blitter_ram, A2_MASK + 2), GET16(blitter_ram, A2_MASK + 0), GET32(blitter_ram, A2_MASK));
1849 WriteLog(" a2_pixel = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A2_PIXEL + 2), (int16_t)GET16(blitter_ram, A2_PIXEL + 0), GET32(blitter_ram, A2_PIXEL));
1850 WriteLog(" a2_step = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A2_STEP + 2), (int16_t)GET16(blitter_ram, A2_STEP + 0), GET32(blitter_ram, A2_STEP));
1852 WriteLog(" count = %d x %d\n", GET16(blitter_ram, PIXLINECOUNTER + 2), GET16(blitter_ram, PIXLINECOUNTER));
1854 WriteLog(" SRCEN = %s\n", (SRCEN ? "1" : "0"));
1855 WriteLog(" SRCENZ = %s\n", (SRCENZ ? "1" : "0"));
1856 WriteLog(" SRCENX = %s\n", (SRCENX ? "1" : "0"));
1857 WriteLog(" DSTEN = %s\n", (DSTEN ? "1" : "0"));
1858 WriteLog(" DSTENZ = %s\n", (DSTENZ ? "1" : "0"));
1859 WriteLog(" DSTWRZ = %s\n", (DSTWRZ ? "1" : "0"));
1860 WriteLog(" CLIPA1 = %s\n", (CLIPA1 ? "1" : "0"));
1861 WriteLog(" UPDA1F = %s\n", (UPDA1F ? "1" : "0"));
1862 WriteLog(" UPDA1 = %s\n", (UPDA1 ? "1" : "0"));
1863 WriteLog(" UPDA2 = %s\n", (UPDA2 ? "1" : "0"));
1864 WriteLog(" DSTA2 = %s\n", (DSTA2 ? "1" : "0"));
1865 WriteLog(" ZOP = %s %s %s\n", (Z_OP_INF ? "<" : ""), (Z_OP_EQU ? "=" : ""), (Z_OP_SUP ? ">" : ""));
1866 WriteLog("+-LFUFUNC = %s\n", opStr[(cmd >> 21) & 0x0F]);
1867 WriteLog("| PATDSEL = %s (PD=%08X%08X)\n", (PATDSEL ? "1" : "0"), REG(PATTERNDATA), REG(PATTERNDATA + 4));
1868 WriteLog("+-ADDDSEL = %s\n", (ADDDSEL ? "1" : "0"));
1869 WriteLog(" CMPDST = %s\n", (CMPDST ? "1" : "0"));
1870 WriteLog(" BCOMPEN = %s\n", (BCOMPEN ? "1" : "0"));
1871 WriteLog(" DCOMPEN = %s\n", (DCOMPEN ? "1" : "0"));
1872 WriteLog(" TOPBEN = %s\n", (TOPBEN ? "1" : "0"));
1873 WriteLog(" TOPNEN = %s\n", (TOPNEN ? "1" : "0"));
1874 WriteLog(" BKGWREN = %s\n", (BKGWREN ? "1" : "0"));
1875 WriteLog(" GOURD = %s (II=%08X, SD=%08X%08X)\n", (GOURD ? "1" : "0"), REG(INTENSITYINC), REG(SRCDATA), REG(SRCDATA + 4));
1876 WriteLog(" GOURZ = %s (ZI=%08X, ZD=%08X%08X, SZ1=%08X%08X, SZ2=%08X%08X)\n", (GOURZ ? "1" : "0"), REG(ZINC), REG(DSTZ), REG(DSTZ + 4),
1877 REG(SRCZINT), REG(SRCZINT + 4), REG(SRCZFRAC), REG(SRCZFRAC + 4));
1878 WriteLog(" SRCSHADE = %s\n", (SRCSHADE ? "1" : "0"));
1882 #ifdef USE_MIDSUMMER_BLITTER
1884 // Here's an attempt to write a blitter that conforms to the Midsummer specs--since
1885 // it's supposedly backwards compatible, it should work well...
1887 //#define LOG_BLITTER_MEMORY_ACCESSES
1889 #define DATINIT (false)
1890 #define TXTEXT (false)
1891 #define POLYGON (false)
1893 void BlitterMidsummer(uint32_t cmd)
1898 uint32_t outer_loop, inner_loop, a1_addr, a2_addr;
1899 int32_t a1_x, a1_y, a2_x, a2_y, a1_width, a2_width;
1900 uint8_t a1_phrase_mode, a2_phrase_mode;
1902 a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
1903 a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
1904 a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
1905 a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
1906 uint32_t m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1907 a1_width = ((0x04 | m) << e) >> 2;//*/
1908 a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
1909 a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
1910 m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1911 a2_width = ((0x04 | m) << e) >> 2;//*/
1913 a1_phrase_mode = a2_phrase_mode = 0;
1915 if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 0)
1918 if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 0)
1921 #define INNER0 (inner_loop == 0)
1922 #define OUTER0 (outer_loop == 0)
1924 // $01800005 has SRCENX, may have to investigate further...
1925 // $00011008 has GOURD & DSTEN.
1926 // $41802F41 has SRCSHADE, CLIPA1
1927 /*bool logBlit = false;
1928 if (cmd != 0x00010200 && cmd != 0x01800001 && cmd != 0x01800005
1929 && cmd != 0x00011008 && cmd !=0x41802F41)
1935 uint64_t srcData = GET64(blitter_ram, SRCDATA), srcXtraData,
1936 dstData = GET64(blitter_ram, DSTDATA), writeData;
1937 uint32_t srcAddr, dstAddr;
1938 uint8_t bitCount, a1PixelSize, a2PixelSize;
1940 // JTRM says phrase mode only works for 8BPP or higher, so let's try this...
1941 uint32_t phraseOffset[8] = { 8, 8, 8, 8, 4, 2, 0, 0 };
1942 uint8_t pixelShift[8] = { 3, 2, 1, 0, 1, 2, 0, 0 };
1944 a1PixelSize = (blitter_ram[A1_FLAGS + 3] >> 3) & 0x07;
1945 a2PixelSize = (blitter_ram[A2_FLAGS + 3] >> 3) & 0x07;
1947 outer_loop = GET16(blitter_ram, PIXLINECOUNTER + 0);
1949 if (outer_loop == 0)
1950 outer_loop = 0x10000;
1952 // We just list the states here and jump from state to state in order to
1953 // keep things somewhat clear. Optimization/cleanups later.
1955 //idle: // Blitter is idle, and will not perform any bus activity
1957 idle Blitter is off the bus, and no activity takes place.
1958 if GO if DATINIT goto init_if
1967 inner Inner loop is active, read and write cycles are performed
1969 inner: // Run inner loop state machine (asserts step from its idle state)
1970 inner_loop = GET16(blitter_ram, PIXLINECOUNTER + 2);
1972 if (inner_loop == 0)
1973 inner_loop = 0x10000;
1976 ------------------------------
1977 idle: Inactive, blitter is idle or passing round outer loop
1978 idle Another state in the outer loop is active. No bus transfers are performed.
1980 if SRCENX goto sreadx
1981 else if TXTEXT goto txtread
1982 else if SRCEN goto sread
1983 else if DSTEN goto dread
1984 else if DSTENZ goto dzread
2001 sreadx Extra source data read at the start of an inner loop pass.
2003 if SRCENZ goto szreadx
2004 else if TXTEXT goto txtread
2005 else if SRCEN goto sread
2006 else if DSTEN goto dread
2007 else if DSTENZ goto dzread
2010 sreadx: // Extra source data read
2025 szreadx Extra source Z read as the start of an inner loop pass.
2027 if TXTEXT goto txtread
2030 szreadx: // Extra source Z read
2037 txtread Read texture data from external memory. This state is only used for external texture.
2038 TEXTEXT is the condition TEXTMODE=1.
2041 else if DSTEN goto dread
2042 else if DSTENZ goto dzread
2045 txtread: // Read external texture data
2056 sread Source data read.
2058 if SRCENZ goto szread
2059 else if DSTEN goto dread
2060 else if DSTENZ goto dzread
2063 sread: // Source data read
2064 //The JTRM doesn't really specify the internal structure of the source data read, but I would
2065 //imagine that if it's in phrase mode that it starts by reading the phrase that the window is
2066 //pointing at. Likewise, the pixel (if in BPP 1, 2 & 4, chopped) otherwise. It probably still
2067 //transfers an entire phrase even in pixel mode.
2068 //Odd thought: Does it expand, e.g., 1 BPP pixels into 32 BPP internally? Hmm...
2071 a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
2072 a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
2073 a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
2074 a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
2075 xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
2076 xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
2077 a1_pitch = pitchValue[(REG(A1_FLAGS) & 0x03)];
2078 a2_pitch = pitchValue[(REG(A2_FLAGS) & 0x03)];
2079 n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
2080 n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
2081 a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
2082 a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
2083 a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
2084 a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
2087 a1_width = ((0x04 | m) << e) >> 2;
2088 a2_width = ((0x04 | m) << e) >> 2;
2090 // write values back to registers
2091 WREG(A1_PIXEL, (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
2092 WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
2093 WREG(A2_PIXEL, (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
2095 // Calculate the address to be read...
2097 //Need to fix phrase mode calcs here, since they should *step* by eight, not mulitply.
2098 //Also, need to fix various differing BPP modes here, since offset won't be correct except
2099 //for 8BPP. !!! FIX !!!
2100 srcAddr = (DSTA2 ? a1_addr : a2_addr);
2102 /* if ((DSTA2 ? a1_phrase_mode : a2_phrase_mode) == 1)
2104 srcAddr += (((DSTA2 ? a1_x : a2_x) >> 16)
2105 + (((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width)));
2109 // uint32_t pixAddr = ((DSTA2 ? a1_x : a2_x) >> 16)
2110 // + (((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
2111 int32_t pixAddr = (int16_t)((DSTA2 ? a1_x : a2_x) >> 16)
2112 + ((int16_t)((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
2114 if ((DSTA2 ? a1PixelSize : a2PixelSize) < 3)
2115 pixAddr >>= pixelShift[(DSTA2 ? a1PixelSize : a2PixelSize)];
2116 else if ((DSTA2 ? a1PixelSize : a2PixelSize) > 3)
2117 pixAddr <<= pixelShift[(DSTA2 ? a1PixelSize : a2PixelSize)];
2124 if ((DSTA2 ? a1_phrase_mode : a2_phrase_mode) == 1)
2126 srcData = ((uint64_t)JaguarReadLong(srcAddr, BLITTER) << 32)
2127 | (uint64_t)JaguarReadLong(srcAddr + 4, BLITTER);
2131 //1,2,&4BPP are wrong here... !!! FIX !!!
2132 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 0) // 1 BPP
2133 srcData = JaguarReadByte(srcAddr, BLITTER);
2134 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 1) // 2 BPP
2135 srcData = JaguarReadByte(srcAddr, BLITTER);
2136 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 2) // 4 BPP
2137 srcData = JaguarReadByte(srcAddr, BLITTER);
2138 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 3) // 8 BPP
2139 srcData = JaguarReadByte(srcAddr, BLITTER);
2140 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 4) // 16 BPP
2141 srcData = JaguarReadWord(srcAddr, BLITTER);
2142 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 5) // 32 BPP
2143 srcData = JaguarReadLong(srcAddr, BLITTER);
2146 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2148 WriteLog("BLITTER: srcAddr=%08X, srcData=%08X %08X\n", srcAddr, (uint32_t)(srcData >> 32), (uint32_t)(srcData & 0xFFFFFFFF));
2160 szread: // Source Z read
2162 szread Source Z read.
2165 else if DSTENZ goto dzread
2175 dread: // Destination data read
2177 dread Destination data read.
2179 if DSTENZ goto dzread
2182 // Calculate the destination address to be read...
2184 //Need to fix phrase mode calcs here, since they should *step* by eight, not mulitply.
2185 //Also, need to fix various differing BPP modes here, since offset won't be correct except
2186 //for 8BPP. !!! FIX !!!
2187 dstAddr = (DSTA2 ? a2_addr : a1_addr);
2190 // uint32_t pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
2191 // + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2192 int32_t pixAddr = (int16_t)((DSTA2 ? a2_x : a1_x) >> 16)
2193 + ((int16_t)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2195 if ((DSTA2 ? a2PixelSize : a1PixelSize) < 3)
2196 pixAddr >>= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2197 else if ((DSTA2 ? a2PixelSize : a1PixelSize) > 3)
2198 pixAddr <<= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2205 if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2207 dstData = ((uint64_t)JaguarReadLong(srcAddr, BLITTER) << 32)
2208 | (uint64_t)JaguarReadLong(srcAddr + 4, BLITTER);
2212 //1,2,&4BPP are wrong here... !!! FIX !!!
2213 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 0) // 1 BPP
2214 dstData = JaguarReadByte(dstAddr, BLITTER);
2215 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 1) // 2 BPP
2216 dstData = JaguarReadByte(dstAddr, BLITTER);
2217 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 2) // 4 BPP
2218 dstData = JaguarReadByte(dstAddr, BLITTER);
2219 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 3) // 8 BPP
2220 dstData = JaguarReadByte(dstAddr, BLITTER);
2221 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 4) // 16 BPP
2222 dstData = JaguarReadWord(dstAddr, BLITTER);
2223 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 5) // 32 BPP
2224 dstData = JaguarReadLong(dstAddr, BLITTER);
2227 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2229 WriteLog("BLITTER (dread): dstAddr=%08X, dstData=%08X %08X\n", dstAddr, (uint32_t)(dstData >> 32), (uint32_t)(dstData & 0xFFFFFFFF));
2237 dzread: // Destination Z read
2239 dzread Destination Z read.
2244 dwrite: // Destination data write
2246 dwrite Destination write. Every pass round the inner loop must go through this state..
2248 if DSTWRZ goto dzwrite
2249 else if INNER0 goto idle
2250 else if TXTEXT goto txtread
2251 else if SRCEN goto sread
2252 else if DSTEN goto dread
2253 else if DSTENZ goto dzread
2262 a1_xadd = 1.000000 (phrase=0)
2271 a2_xadd = 1.000000 (phrase=1)
2275 a2_mask_x= 0xFFFFFFFF
2276 a2_mask_y= 0xFFFFFFFF
2286 --LFUFUNC = LFU_CLEAR
2287 | PATDSEL = 1 (PD=77C7 7700 7700 7700)
2289 GOURD = 1 (II=00FC 1A00, SD=FF00 0000 0000 0000)
2292 //Still need to do CLIPA1 and SRCSHADE and GOURD and GOURZ...
2294 // Check clipping...
2298 uint16_t x = a1_x >> 16, y = a1_y >> 16;
2300 if (x >= GET16(blitter_ram, A1_CLIP + 2) || y >= GET16(blitter_ram, A1_CLIP))
2304 // Figure out what gets written...
2308 writeData = GET64(blitter_ram, PATTERNDATA);
2309 //GOURD works properly only in 16BPP mode...
2310 //SRCDATA holds the intensity fractions...
2311 //Does GOURD get calc'ed here or somewhere else???
2312 //Temporary testing kludge...
2314 // writeData >>= 48;
2315 // writeData = 0xFF88;
2316 //OK, it's not writing an entire strip of pixels... Why?
2317 //bad incrementing, that's why!
2321 // Apparently this only works with 16-bit pixels. Not sure if it works in phrase mode either.
2322 //Also, take TOPBEN & TOPNEN into account here as well...
2323 writeData = srcData + dstData;
2325 else // LFUFUNC is the default...
2330 writeData |= ~srcData & ~dstData;
2332 writeData |= ~srcData & dstData;
2334 writeData |= srcData & ~dstData;
2336 writeData |= srcData & dstData;
2339 // Calculate the address to be written...
2341 dstAddr = (DSTA2 ? a2_addr : a1_addr);
2343 /* if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2345 //both of these calculate the wrong address because they don't take into account
2347 dstAddr += ((DSTA2 ? a2_x : a1_x) >> 16)
2348 + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2352 /* dstAddr += ((DSTA2 ? a2_x : a1_x) >> 16)
2353 + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));*/
2354 // uint32_t pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
2355 // + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2356 int32_t pixAddr = (int16_t)((DSTA2 ? a2_x : a1_x) >> 16)
2357 + ((int16_t)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2359 if ((DSTA2 ? a2PixelSize : a1PixelSize) < 3)
2360 pixAddr >>= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2361 else if ((DSTA2 ? a2PixelSize : a1PixelSize) > 3)
2362 pixAddr <<= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2369 if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2371 JaguarWriteLong(dstAddr, writeData >> 32, BLITTER);
2372 JaguarWriteLong(dstAddr + 4, writeData & 0xFFFFFFFF, BLITTER);
2376 //1,2,&4BPP are wrong here... !!! FIX !!!
2377 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 0) // 1 BPP
2378 JaguarWriteByte(dstAddr, writeData, BLITTER);
2379 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 1) // 2 BPP
2380 JaguarWriteByte(dstAddr, writeData, BLITTER);
2381 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 2) // 4 BPP
2382 JaguarWriteByte(dstAddr, writeData, BLITTER);
2383 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 3) // 8 BPP
2384 JaguarWriteByte(dstAddr, writeData, BLITTER);
2385 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 4) // 16 BPP
2386 JaguarWriteWord(dstAddr, writeData, BLITTER);
2387 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 5) // 32 BPP
2388 JaguarWriteLong(dstAddr, writeData, BLITTER);
2391 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2393 WriteLog("BLITTER: dstAddr=%08X, writeData=%08X %08X\n", dstAddr, (uint32_t)(writeData >> 32), (uint32_t)(writeData & 0xFFFFFFFF));
2396 inhibitWrite://Should this go here? or on the other side of the X/Y incrementing?
2397 //Seems OK here... for now.
2399 // Do funky X/Y incrementation here as well... !!! FIX !!!
2401 // Handle A1 channel stepping
2403 if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 0)
2404 a1_x += phraseOffset[a1PixelSize] << 16;
2405 else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 1)
2406 a1_x += (blitter_ram[A1_FLAGS + 1] & 0x08 ? -1 << 16 : 1 << 16);
2407 /* else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 2)
2409 else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 3)
2411 //Always add the FINC here??? That was the problem with the BIOS screen... So perhaps.
2412 a1_x += GET16(blitter_ram, A1_FINC + 2);
2413 a1_y += GET16(blitter_ram, A1_FINC + 0);
2415 a1_x += GET16(blitter_ram, A1_INC + 2) << 16;
2416 a1_y += GET16(blitter_ram, A1_INC + 0) << 16;
2419 if ((blitter_ram[A1_FLAGS + 1] & 0x04) && (blitter_ram[A1_FLAGS + 1] & 0x03 != 3))
2420 a1_y += (blitter_ram[A1_FLAGS + 1] & 0x10 ? -1 << 16 : 1 << 16);
2422 // Handle A2 channel stepping
2424 if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 0)
2425 a2_x += phraseOffset[a2PixelSize] << 16;
2426 else if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 1)
2427 a2_x += (blitter_ram[A2_FLAGS + 1] & 0x08 ? -1 << 16 : 1 << 16);
2428 /* else if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 2)
2431 if (blitter_ram[A2_FLAGS + 1] & 0x04)
2432 a2_y += (blitter_ram[A2_FLAGS + 1] & 0x10 ? -1 << 16 : 1 << 16);
2434 //Need to fix this so that it subtracts (saturating, of course) the correct number of pixels
2435 //in phrase mode... !!! FIX !!! [DONE]
2436 //Need to fix this so that it counts down the correct item. Does it count the
2437 //source or the destination phrase mode???
2438 //It shouldn't matter, because we *should* end up processing the same amount
2439 //the same number of pixels... Not sure though.
2440 if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2442 if (inner_loop < phraseOffset[DSTA2 ? a2PixelSize : a1PixelSize])
2445 inner_loop -= phraseOffset[DSTA2 ? a2PixelSize : a1PixelSize];
2466 dzwrite: // Destination Z write
2468 dzwrite Destination Z write.
2471 else if TXTEXT goto txtread
2472 else if SRCEN goto sread
2473 else if DSTEN goto dread
2474 else if DSTENZ goto dzread
2491 ------------------------------
2492 if INDONE if OUTER0 goto idle
2493 else if UPDA1F goto a1fupdate
2494 else if UPDA1 goto a1update
2495 else if GOURZ.POLYGON goto zfupdate
2496 else if UPDA2 goto a2update
2497 else if DATINIT goto init_if
2510 //kill this, for now...
2511 // else if (GOURZ.POLYGON)
2520 a1fupdate: // Update A1 pointer fractions and more (see below)
2522 a1fupdate A1 step fraction is added to A1 pointer fraction
2523 POLYGON true: A1 step delta X and Y fraction parts are added to the A1
2524 step X and Y fraction parts (the value prior to this add is used for
2525 the step to pointer add).
2526 POLYGON true: inner count step fraction is added to the inner count
2528 POLYGON.GOURD true: the I fraction step is added to the computed
2529 intensity fraction parts +
2530 POLYGON.GOURD true: the I fraction step delta is added to the I
2535 #define A1_PIXEL ((uint32_t)0x0C) // Integer part of the pixel (Y.i and X.i)
2536 #define A1_STEP ((uint32_t)0x10) // Integer part of the step
2537 #define A1_FSTEP ((uint32_t)0x14) // Fractional part of the step
2538 #define A1_FPIXEL ((uint32_t)0x18) // Fractional part of the pixel (Y.f and X.f)
2541 // This is all kinda murky. All we have are the Midsummer docs to give us any guidance,
2542 // and it's incomplete or filled with errors (like above). Aarrrgggghhhhh!
2544 //This isn't right. Is it? I don't think the fractional parts are signed...
2545 // a1_x += (int32_t)((int16_t)GET16(blitter_ram, A1_FSTEP + 2));
2546 // a1_y += (int32_t)((int16_t)GET16(blitter_ram, A1_FSTEP + 0));
2547 a1_x += GET16(blitter_ram, A1_FSTEP + 2);
2548 a1_y += GET16(blitter_ram, A1_FSTEP + 0);
2552 a1update: // Update A1 pointer integers
2554 a1update A1 step is added to A1 pointer, with carry from the fractional add
2555 POLYGON true: A1 step delta X and Y integer parts are added to the A1
2556 step X and Y integer parts, with carry from the corresponding
2557 fractional part add (again, the value prior to this add is used for
2558 the step to pointer add).
2559 POLYGON true: inner count step is added to the inner count, with carry
2560 POLYGON.GOURD true: the I step is added to the computed intensities,
2562 POLYGON.GOURD true: the I step delta is added to the I step, with
2563 carry the texture X and Y step delta values are added to the X and Y
2565 if GOURZ.POLYGON goto zfupdate
2566 else if UPDA2 goto a2update
2567 else if DATINIT goto init_if
2570 a1_x += (int32_t)(GET16(blitter_ram, A1_STEP + 2) << 16);
2571 a1_y += (int32_t)(GET16(blitter_ram, A1_STEP + 0) << 16);
2574 //kill this, for now...
2575 // if (GOURZ.POLYGON)
2585 zfupdate: // Update computed Z step fractions
2587 zfupdate the Z fraction step is added to the computed Z fraction parts +
2588 the Z fraction step delta is added to the Z fraction step
2593 zupdate: // Update computed Z step integers
2595 zupdate the Z step is added to the computed Zs, with carry +
2596 the Z step delta is added to the Z step, with carry
2597 if UPDA2 goto a2update
2598 else if DATINIT goto init_if
2608 a2update: // Update A2 pointer
2610 a2update A2 step is added to the A2 pointer
2611 if DATINIT goto init_if
2614 a2_x += (int32_t)(GET16(blitter_ram, A2_STEP + 2) << 16);
2615 a2_y += (int32_t)(GET16(blitter_ram, A2_STEP + 0) << 16);
2623 init_if: // Initialise intensity fractions and texture X
2625 init_if Initialise the fractional part of the computed intensity fields, from
2626 the increment and step registers. The texture X integer and fractional
2627 parts can also be initialised.
2632 init_ii: // Initialise intensity integers and texture Y
2634 init_ii Initialise the integer part of the computed intensity, and texture Y
2635 integer and fractional parts
2636 if GOURZ goto init_zf
2644 init_zf: // Initialise Z fractions
2646 init_zf Initialise the fractional part of the computed Z fields.
2651 init_zi: // Initialise Z integers
2653 init_zi Initialise the integer part of the computed Z fields.
2660 The outer loop state machine fires off the inner loop, and controls the updating
2661 process between passes through the inner loop.
2663 + -- these functions are irrelevant if the DATINIT function is enabled, which it
2666 All these states will complete in one clock cycle, with the exception of the idle
2667 state, which means the blitter is quiescent; and the inner state, which takes as
2668 long as is required to complete one strip of pixels. It is therefore possible for
2669 the blitter to spend a maximum of nine clock cycles of inactivity between passes
2670 through the inner loop.
2680 // Here's attempt #2--taken from the Oberon chip specs!
2683 #ifdef USE_MIDSUMMER_BLITTER_MKII
2685 void ADDRGEN(uint32_t &, uint32_t &, bool, bool,
2686 uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t,
2687 uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t);
2688 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
2689 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
2690 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
2691 uint32_t zinc, uint32_t zstep);
2692 void ADD16SAT(uint16_t &r, uint8_t &co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh);
2693 void ADDAMUX(int16_t &adda_x, int16_t &adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
2694 int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
2695 int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
2696 bool adda_yconst, bool addareg, bool suba_x, bool suba_y);
2697 void ADDBMUX(int16_t &addb_x, int16_t &addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
2698 int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y);
2699 void DATAMUX(int16_t &data_x, int16_t &data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel);
2700 void ADDRADD(int16_t &addq_x, int16_t &addq_y, bool a1fracldi,
2701 uint16_t adda_x, uint16_t adda_y, uint16_t addb_x, uint16_t addb_y, uint8_t modx, bool suba_x, bool suba_y);
2702 void DATA(uint64_t &wdata, uint8_t &dcomp, uint8_t &zcomp, bool &nowrite,
2703 bool big_pix, bool cmpdst, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode, bool daddq_sel, uint8_t data_sel,
2704 uint8_t dbinh, uint8_t dend, uint8_t dstart, uint64_t dstd, uint32_t iinc, uint8_t lfu_func, uint64_t &patd, bool patdadd,
2705 bool phrase_mode, uint64_t srcd, bool srcdread, bool srczread, bool srcz2add, uint8_t zmode,
2706 bool bcompen, bool bkgwren, bool dcompen, uint8_t icount, uint8_t pixsize,
2707 uint64_t &srcz, uint64_t dstz, uint32_t zinc);
2708 void COMP_CTRL(uint8_t &dbinh, bool &nowrite,
2709 bool bcompen, bool big_pix, bool bkgwren, uint8_t dcomp, bool dcompen, uint8_t icount,
2710 uint8_t pixsize, bool phrase_mode, uint8_t srcd, uint8_t zcomp);
2711 #define VERBOSE_BLITTER_LOGGING
2713 void BlitterMidsummer2(void)
2718 if (startConciseBlitLogging)
2721 // Here's what the specs say the state machine does. Note that this can probably be
2722 // greatly simplified (also, it's different from what John has in his Oberon docs):
2723 //Will remove stuff that isn't in Jaguar I once fully described (stuff like texture won't
2724 //be described here at all)...
2726 uint32_t cmd = GET32(blitter_ram, COMMAND);
2731 cmd != 0x00010200 && // PATDSEL
2732 cmd != 0x01800001 // SRCEN LFUFUNC=C
2733 && cmd != 0x01800005
2734 //Boot ROM ATARI letters:
2735 && cmd != 0x00011008 // DSTEN GOURD PATDSEL
2736 //Boot ROM spinning cube:
2737 && cmd != 0x41802F41 // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 GOURZ ZMODE=0 LFUFUNC=C SRCSHADE
2739 && cmd != 0x01800E01 // SRCEN UPDA1 UPDA2 DSTA2 LFUFUNC=C
2740 //T2K TEMPEST letters:
2741 && cmd != 0x09800741 // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 LFUFUNC=C DCOMPEN
2742 //Static letters on Cybermorph intro screen:
2743 && cmd != 0x09800609 // SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C DCOMPEN
2744 //Static pic on title screen:
2745 && cmd != 0x01800601 // SRCEN UPDA1 UPDA2 LFUFUNC=C
2746 //Turning letters on Cybermorph intro screen:
2747 // && cmd != 0x09800F41 // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
2748 && cmd != 0x00113078 // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
2749 && cmd != 0x09900F39 // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
2750 && cmd != 0x09800209 // SRCEN DSTEN UPDA1 LFUFUNC=C DCOMPEN
2751 && cmd != 0x00011200 // UPDA1 GOURD PATDSEL
2752 //Start of Hover Strike (clearing screen):
2753 && cmd != 0x00010000 // PATDSEL
2754 //Hover Strike text:
2755 && cmd != 0x1401060C // SRCENX DSTEN UPDA1 UPDA2 PATDSEL BCOMPEN BKGWREN
2756 //Hover Strike 3D stuff
2757 && cmd != 0x01902839 // SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
2758 //Hover Strike darkening on intro to play (briefing) screen
2759 && cmd != 0x00020208 // DSTEN UPDA1 ADDDSEL
2760 //Trevor McFur stuff:
2761 && cmd != 0x05810601 // SRCEN UPDA1 UPDA2 PATDSEL BCOMPEN
2762 && cmd != 0x01800201 // SRCEN UPDA1 LFUFUNC=C
2764 && cmd != 0x00011000 // GOURD PATDSEL
2765 && cmd != 0x00011040 // CLIP_A1 GOURD PATDSEL
2767 && cmd != 0x01800000 // LFUFUNC=C
2768 && cmd != 0x01800401 //
2769 && cmd != 0x01800040 //
2770 && cmd != 0x00020008 //
2771 // && cmd != 0x09800F41 // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
2777 if (blit_start_log == 0) // Wait for the signal...
2778 logBlit = false;//*/
2779 //temp, for testing...
2780 /*if (cmd != 0x49820609)
2781 logBlit = false;//*/
2784 Some T2K unique blits:
2785 logBlit = F, cmd = 00010200 *
2786 logBlit = F, cmd = 00011000
2787 logBlit = F, cmd = 00011040
2788 logBlit = F, cmd = 01800005 *
2789 logBlit = F, cmd = 09800741 *
2791 Hover Strike mission selection screen:
2792 Blit! (CMD = 01902839) // SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
2794 Checkered Flag blits in the screw up zone:
2795 Blit! (CMD = 01800001) // SRCEN LFUFUNC=C
2796 Blit! (CMD = 01800000) // LFUFUNC=C
2797 Blit! (CMD = 00010000) // PATDSEL
2799 Wolfenstein 3D in the fuckup zone:
2800 Blit! (CMD = 01800000) // LFUFUNC=C
2803 //printf("logBlit = %s, cmd = %08X\n", (logBlit ? "T" : "F"), cmd);
2808 Blit! (CMD = 00011040)
2809 Flags: CLIP_A1 GOURD PATDSEL
2811 a1_base = 00100000, a2_base = 0081F6A8
2812 a1_x = 00A7, a1_y = 0014, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0001, a2_y = 0000
2813 a1_step_x = FE80, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFF8, a2_step_y = 0001
2814 a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
2815 a1_win_x = 0180, a1_win_y = 0118, a2_mask_x = 0000, a2_mask_y = 0000
2816 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
2817 a1_pixsize = 4, a2_pixsize = 4
2821 if (cmd == 0x00011040
2822 && (GET16(blitter_ram, A1_PIXEL + 2) == 0x00A7) && (GET16(blitter_ram, A1_PIXEL + 0) == 0x0014)
2823 && (GET16(blitter_ram, A2_PIXEL + 2) == 0x0001) && (GET16(blitter_ram, A2_PIXEL + 0) == 0x0000)
2824 && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 18))
2827 // Line states passed in via the command register
2829 bool srcen = (SRCEN), srcenx = (SRCENX), srcenz = (SRCENZ),
2830 dsten = (DSTEN), dstenz = (DSTENZ), dstwrz = (DSTWRZ), clip_a1 = (CLIPA1),
2831 upda1 = (UPDA1), upda1f = (UPDA1F), upda2 = (UPDA2), dsta2 = (DSTA2),
2832 gourd = (GOURD), gourz = (GOURZ), topben = (TOPBEN), topnen = (TOPNEN),
2833 patdsel = (PATDSEL), adddsel = (ADDDSEL), cmpdst = (CMPDST), bcompen = (BCOMPEN),
2834 dcompen = (DCOMPEN), bkgwren = (BKGWREN), srcshade = (SRCSHADE);
2836 uint8_t zmode = (cmd & 0x01C0000) >> 18, lfufunc = (cmd & 0x1E00000) >> 21;
2838 //Where to find various lines:
2840 // gourd -> dcontrol, inner, outer, state
2841 // gourz -> dcontrol, inner, outer, state
2842 // cmpdst -> blit, data, datacomp, state
2843 // bcompen -> acontrol, inner, mcontrol, state
2844 // dcompen -> inner, state
2845 // bkgwren -> inner, state
2846 // srcshade -> dcontrol, inner, state
2847 // adddsel -> dcontrol
2848 //NOTE: ADDDSEL takes precedence over PATDSEL, PATDSEL over LFU_FUNC
2849 #ifdef VERBOSE_BLITTER_LOGGING
2852 char zfs[512], lfus[512];
2853 zfs[0] = lfus[0] = 0;
2854 if (dstwrz || dstenz || gourz)
2855 sprintf(zfs, " ZMODE=%X", zmode);
2856 if (!(patdsel || adddsel))
2857 sprintf(lfus, " LFUFUNC=%X", lfufunc);
2858 WriteLog("\nBlit! (CMD = %08X)\nFlags:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", cmd,
2859 (srcen ? " SRCEN" : ""), (srcenx ? " SRCENX" : ""), (srcenz ? " SRCENZ" : ""),
2860 (dsten ? " DSTEN" : ""), (dstenz ? " DSTENZ" : ""), (dstwrz ? " DSTWRZ" : ""),
2861 (clip_a1 ? " CLIP_A1" : ""), (upda1 ? " UPDA1" : ""), (upda1f ? " UPDA1F" : ""),
2862 (upda2 ? " UPDA2" : ""), (dsta2 ? " DSTA2" : ""), (gourd ? " GOURD" : ""),
2863 (gourz ? " GOURZ" : ""), (topben ? " TOPBEN" : ""), (topnen ? " TOPNEN" : ""),
2864 (patdsel ? " PATDSEL" : ""), (adddsel ? " ADDDSEL" : ""), zfs, lfus, (cmpdst ? " CMPDST" : ""),
2865 (bcompen ? " BCOMPEN" : ""), (dcompen ? " DCOMPEN" : ""), (bkgwren ? " BKGWREN" : ""),
2866 (srcshade ? " SRCSHADE" : ""));
2867 WriteLog(" count = %d x %d\n", GET16(blitter_ram, PIXLINECOUNTER + 2), GET16(blitter_ram, PIXLINECOUNTER));
2871 // Lines that don't exist in Jaguar I (and will never be asserted)
2873 bool polygon = false, datinit = false, a1_stepld = false, a2_stepld = false, ext_int = false;
2874 bool istepadd = false, istepfadd = false, finneradd = false, inneradd = false;
2875 bool zstepfadd = false, zstepadd = false;
2877 // Various state lines (initial state--basically the reset state of the FDSYNCs)
2879 bool go = true, idle = true, inner = false, a1fupdate = false, a1update = false,
2880 zfupdate = false, zupdate = false, a2update = false, init_if = false, init_ii = false,
2881 init_zf = false, init_zi = false;
2883 bool outer0 = false, indone = false;
2885 bool idlei, inneri, a1fupdatei, a1updatei, zfupdatei, zupdatei, a2updatei, init_ifi, init_iii,
2888 bool notgzandp = !(gourz && polygon);
2890 // Various registers set up by user
2892 uint16_t ocount = GET16(blitter_ram, PIXLINECOUNTER);
2893 uint8_t a1_pitch = blitter_ram[A1_FLAGS + 3] & 0x03;
2894 uint8_t a2_pitch = blitter_ram[A2_FLAGS + 3] & 0x03;
2895 uint8_t a1_pixsize = (blitter_ram[A1_FLAGS + 3] & 0x38) >> 3;
2896 uint8_t a2_pixsize = (blitter_ram[A2_FLAGS + 3] & 0x38) >> 3;
2897 uint8_t a1_zoffset = (GET16(blitter_ram, A1_FLAGS + 2) >> 6) & 0x07;
2898 uint8_t a2_zoffset = (GET16(blitter_ram, A2_FLAGS + 2) >> 6) & 0x07;
2899 uint8_t a1_width = (blitter_ram[A1_FLAGS + 2] >> 1) & 0x3F;
2900 uint8_t a2_width = (blitter_ram[A2_FLAGS + 2] >> 1) & 0x3F;
2901 bool a2_mask = blitter_ram[A2_FLAGS + 2] & 0x80;
2902 uint8_t a1addx = blitter_ram[A1_FLAGS + 1] & 0x03, a2addx = blitter_ram[A2_FLAGS + 1] & 0x03;
2903 bool a1addy = blitter_ram[A1_FLAGS + 1] & 0x04, a2addy = blitter_ram[A2_FLAGS + 1] & 0x04;
2904 bool a1xsign = blitter_ram[A1_FLAGS + 1] & 0x08, a2xsign = blitter_ram[A2_FLAGS + 1] & 0x08;
2905 bool a1ysign = blitter_ram[A1_FLAGS + 1] & 0x10, a2ysign = blitter_ram[A2_FLAGS + 1] & 0x10;
2906 uint32_t a1_base = GET32(blitter_ram, A1_BASE) & 0xFFFFFFF8; // Phrase aligned by ignoring bottom 3 bits
2907 uint32_t a2_base = GET32(blitter_ram, A2_BASE) & 0xFFFFFFF8;
2909 uint16_t a1_win_x = GET16(blitter_ram, A1_CLIP + 2) & 0x7FFF;
2910 uint16_t a1_win_y = GET16(blitter_ram, A1_CLIP + 0) & 0x7FFF;
2911 int16_t a1_x = (int16_t)GET16(blitter_ram, A1_PIXEL + 2);
2912 int16_t a1_y = (int16_t)GET16(blitter_ram, A1_PIXEL + 0);
2913 int16_t a1_step_x = (int16_t)GET16(blitter_ram, A1_STEP + 2);
2914 int16_t a1_step_y = (int16_t)GET16(blitter_ram, A1_STEP + 0);
2915 uint16_t a1_stepf_x = GET16(blitter_ram, A1_FSTEP + 2);
2916 uint16_t a1_stepf_y = GET16(blitter_ram, A1_FSTEP + 0);
2917 uint16_t a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
2918 uint16_t a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
2919 int16_t a1_inc_x = (int16_t)GET16(blitter_ram, A1_INC + 2);
2920 int16_t a1_inc_y = (int16_t)GET16(blitter_ram, A1_INC + 0);
2921 uint16_t a1_incf_x = GET16(blitter_ram, A1_FINC + 2);
2922 uint16_t a1_incf_y = GET16(blitter_ram, A1_FINC + 0);
2924 int16_t a2_x = (int16_t)GET16(blitter_ram, A2_PIXEL + 2);
2925 int16_t a2_y = (int16_t)GET16(blitter_ram, A2_PIXEL + 0);
2926 uint16_t a2_mask_x = GET16(blitter_ram, A2_MASK + 2);
2927 uint16_t a2_mask_y = GET16(blitter_ram, A2_MASK + 0);
2928 int16_t a2_step_x = (int16_t)GET16(blitter_ram, A2_STEP + 2);
2929 int16_t a2_step_y = (int16_t)GET16(blitter_ram, A2_STEP + 0);
2931 uint64_t srcd1 = GET64(blitter_ram, SRCDATA);
2933 uint64_t dstd = GET64(blitter_ram, DSTDATA);
2934 uint64_t patd = GET64(blitter_ram, PATTERNDATA);
2935 uint32_t iinc = GET32(blitter_ram, INTENSITYINC);
2936 uint64_t srcz1 = GET64(blitter_ram, SRCZINT);
2937 uint64_t srcz2 = GET64(blitter_ram, SRCZFRAC);
2938 uint64_t dstz = GET64(blitter_ram, DSTZ);
2939 uint32_t zinc = GET32(blitter_ram, ZINC);
2940 uint32_t collision = GET32(blitter_ram, COLLISIONCTRL);// 0=RESUME, 1=ABORT, 2=STOPEN
2942 uint8_t pixsize = (dsta2 ? a2_pixsize : a1_pixsize); // From ACONTROL
2944 //Testing Trevor McFur--I *think* it's the circle on the lower RHS of the screen...
2946 if (cmd == 0x05810601 && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 96)
2947 && (GET16(blitter_ram, PIXLINECOUNTER + 0) == 72))
2950 //if (cmd == 0x1401060C) patd = 0xFFFFFFFFFFFFFFFFLL;
2951 //if (cmd == 0x1401060C) patd = 0x00000000000000FFLL;
2952 //If it's still not working (bcompen-patd) then see who's writing what to patd and where...
2953 //Still not OK. Check to see who's writing what to where in patd!
2954 //It looks like M68K is writing to the top half of patd... Hmm...
2956 ----> M68K wrote 0000 to byte 15737344 of PATTERNDATA...
2957 --> M68K wrote 00 to byte 0 of PATTERNDATA...
2958 --> M68K wrote 00 to byte 1 of PATTERNDATA...
2959 ----> M68K wrote 00FF to byte 15737346 of PATTERNDATA...
2960 --> M68K wrote 00 to byte 2 of PATTERNDATA...
2961 --> M68K wrote FF to byte 3 of PATTERNDATA...
2962 logBlit = F, cmd = 1401060C
2964 Wren0 := ND6 (wren\[0], gpua\[5], gpua\[6..8], bliten, gpu_memw);
2965 Wren1 := ND6 (wren\[1], gpua[5], gpua\[6..8], bliten, gpu_memw);
2966 Wren2 := ND6 (wren\[2], gpua\[5], gpua[6], gpua\[7..8], bliten, gpu_memw);
2967 Wren3 := ND6 (wren\[3], gpua[5], gpua[6], gpua\[7..8], bliten, gpu_memw);
2970 Dec0 := D38GH (a1baseld, a1flagld, a1winld, a1ptrld, a1stepld, a1stepfld, a1fracld, a1incld, gpua[2..4], wren\[0]);
2972 Dec1 := D38GH (a1incfld, a2baseld, a2flagld, a2maskld, a2ptrldg, a2stepld, cmdldt, countldt, gpua[2..4], wren\[1]);
2974 Dec2 := D38GH (srcd1ldg[0..1], dstdldg[0..1], dstzldg[0..1], srcz1ldg[0..1], gpua[2..4], wren\[2]);
2976 Dec3 := D38GH (srcz2ld[0..1], patdld[0..1], iincld, zincld, stopld, intld[0], gpua[2..4], wren\[3]);
2978 wren[3] is asserted when gpu address bus = 0 011x xx00
2979 patdld[0] -> 0 0110 1000 -> $F02268 (lo 32 bits)
2980 patdld[1] -> 0 0110 1100 -> $F0226C (hi 32 bits)
2982 So... It's reversed! The data organization of the patd register is [low 32][high 32]! !!! FIX !!! [DONE]
2983 And fix all the other 64 bit registers [DONE]
2985 /*if (cmd == 0x1401060C)
2987 printf("logBlit = %s, cmd = %08X\n", (logBlit ? "T" : "F"), cmd);
2991 if ((cmd == 0x00010200) && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 9))
2994 ; Pink altimeter bar
2996 Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
2997 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
2998 A1 step values: -10 (X), 1 (Y)
2999 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
3000 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
3001 A1 x/y: 262/132, A2 x/y: 129/0
3002 ;x-coord is 257 in pic, so add 5
3003 ;20 for ship, 33 for #... Let's see if we can find 'em!
3005 ; Black altimeter bar
3007 Blit! (00110000 <- 000BF010) count: 5 x 29, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
3008 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
3009 A1 step values: -8 (X), 1 (Y)
3010 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
3011 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
3012 A1 x/y: 264/126, A2 x/y: 336/0
3014 Here's the pink bar--note that it's phrase mode without dread, so how does this work???
3015 Not sure, but I *think* that somehow it MUXes the data at the write site in on the left or right side
3016 of the write data when masked in phrase mode. I'll have to do some tracing to see if this is the mechanism
3019 Blit! (CMD = 00010200)
3020 Flags: UPDA1 PATDSEL
3022 a1_base = 00110010, a2_base = 000BD7E0
3023 a1_x = 0106, a1_y = 0090, a1_frac_x = 0000, a1_frac_y = 8000, a2_x = 025A, a2_y = 0000
3024 a1_step_x = FFF6, a1_step_y = 0001, a1_stepf_x = 5E00, a1_stepf_y = D100, a2_step_x = FFF7, a2_step_y = 0001
3025 a1_inc_x = 0001, a1_inc_y = FFFF, a1_incf_x = 0000, a1_incf_y = E000
3026 a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
3027 a2_mask=F a1add=+phr/+0 a2add=+1/+0
3028 a1_pixsize = 4, a2_pixsize = 4
3029 srcd=BAC673AC2C92E578 dstd=0000000000000000 patd=74C074C074C074C0 iinc=0002E398
3030 srcz1=7E127E12000088DA srcz2=DBE06DF000000000 dstz=0000000000000000 zinc=FFFE4840, coll=0
3032 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3033 Entering INNER state...
3034 Entering DWRITE state...
3035 Dest write address/pix address: 0016A830/0 [dstart=20 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [7400000074C074C0] (icount=0007, inc=2)
3036 Entering A1_ADD state [a1_x=0106, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3037 Entering DWRITE state...
3038 Dest write address/pix address: 0016A850/0 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [74C074C074C074C0] (icount=0003, inc=4)
3039 Entering A1_ADD state [a1_x=0108, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3040 Entering DWRITE state...
3041 Dest write address/pix address: 0016A870/0 [dstart=0 dend=30 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [74C074C074C00000] (icount=FFFF, inc=4)
3042 Entering A1_ADD state [a1_x=010C, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3043 Entering IDLE_INNER state...
3044 Leaving INNER state... (ocount=000A)
3045 [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3046 Entering A1UPDATE state... (272/144 -> 262/145)
3047 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3048 Entering INNER state...
3053 a2addy = a1addy; // A2 channel Y add bit is tied to A1's
3055 //if (logBlit && (ocount > 20)) logBlit = false;
3056 #ifdef VERBOSE_BLITTER_LOGGING
3059 WriteLog(" a1_base = %08X, a2_base = %08X\n", a1_base, a2_base);
3060 WriteLog(" a1_x = %04X, a1_y = %04X, a1_frac_x = %04X, a1_frac_y = %04X, a2_x = %04X, a2_y = %04X\n", (uint16_t)a1_x, (uint16_t)a1_y, a1_frac_x, a1_frac_y, (uint16_t)a2_x, (uint16_t)a2_y);
3061 WriteLog(" a1_step_x = %04X, a1_step_y = %04X, a1_stepf_x = %04X, a1_stepf_y = %04X, a2_step_x = %04X, a2_step_y = %04X\n", (uint16_t)a1_step_x, (uint16_t)a1_step_y, a1_stepf_x, a1_stepf_y, (uint16_t)a2_step_x, (uint16_t)a2_step_y);
3062 WriteLog(" a1_inc_x = %04X, a1_inc_y = %04X, a1_incf_x = %04X, a1_incf_y = %04X\n", (uint16_t)a1_inc_x, (uint16_t)a1_inc_y, a1_incf_x, a1_incf_y);
3063 WriteLog(" a1_win_x = %04X, a1_win_y = %04X, a2_mask_x = %04X, a2_mask_y = %04X\n", a1_win_x, a1_win_y, a2_mask_x, a2_mask_y);
3064 char x_add_str[4][4] = { "phr", "1", "0", "inc" };
3065 WriteLog(" a2_mask=%s a1add=%s%s/%s%s a2add=%s%s/%s%s\n", (a2_mask ? "T" : "F"), (a1xsign ? "-" : "+"), x_add_str[a1addx],
3066 (a1ysign ? "-" : "+"), (a1addy ? "1" : "0"), (a2xsign ? "-" : "+"), x_add_str[a2addx],
3067 (a2ysign ? "-" : "+"), (a2addy ? "1" : "0"));
3068 WriteLog(" a1_pixsize = %u, a2_pixsize = %u\n", a1_pixsize, a2_pixsize);
3069 WriteLog(" srcd=%08X%08X dstd=%08X%08X patd=%08X%08X iinc=%08X\n",
3070 (uint32_t)(srcd1 >> 32), (uint32_t)(srcd1 & 0xFFFFFFFF),
3071 (uint32_t)(dstd >> 32), (uint32_t)(dstd & 0xFFFFFFFF),
3072 (uint32_t)(patd >> 32), (uint32_t)(patd & 0xFFFFFFFF), iinc);
3073 WriteLog(" srcz1=%08X%08X srcz2=%08X%08X dstz=%08X%08X zinc=%08X, coll=%X\n",
3074 (uint32_t)(srcz1 >> 32), (uint32_t)(srcz1 & 0xFFFFFFFF),
3075 (uint32_t)(srcz2 >> 32), (uint32_t)(srcz2 & 0xFFFFFFFF),
3076 (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF), zinc, collision);
3080 // Various state lines set up by user
3082 bool phrase_mode = ((!dsta2 && a1addx == 0) || (dsta2 && a2addx == 0) ? true : false); // From ACONTROL
3083 #ifdef VERBOSE_BLITTER_LOGGING
3085 WriteLog(" Phrase mode is %s\n", (phrase_mode ? "ON" : "off"));
3089 // Stopgap vars to simulate various lines
3091 uint16_t a1FracCInX = 0, a1FracCInY = 0;
3097 if ((idle && !go) || (inner && outer0 && indone))
3099 #ifdef VERBOSE_BLITTER_LOGGING
3101 WriteLog(" Entering IDLE state...\n");
3105 //Instead of a return, let's try breaking out of the loop...
3112 // INNER LOOP ACTIVE
3114 Entering DWRITE state... (icount=0000, inc=4)
3115 Entering IDLE_INNER state...
3116 Leaving INNER state... (ocount=00EF)
3117 [in=T a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3118 Entering INNER state...
3120 [in=F a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3123 if ((idle && go && !datinit)
3124 || (inner && !indone)
3125 || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && !datinit)
3126 || (a1update && !upda2 && notgzandp && !datinit)
3127 || (zupdate && !upda2 && !datinit)
3128 || (a2update && !datinit)
3129 || (init_ii && !gourz)
3137 // A1 FRACTION UPDATE
3139 if (inner && indone && !outer0 && upda1f)
3146 // A1 POINTER UPDATE
3149 || (inner && indone && !outer0 && !upda1f && upda1))
3156 // Z FRACTION UPDATE
3158 if ((a1update && gourz && polygon)
3159 || (inner && indone && !outer0 && !upda1f && !upda1 && gourz && polygon))
3175 // A2 POINTER UPDATE
3177 if ((a1update && upda2 && notgzandp)
3178 || (zupdate && upda2)
3179 || (inner && indone && !outer0 && !upda1f && notgzandp && !upda1 && upda2))
3186 // INITIALIZE INTENSITY FRACTION
3188 if ((zupdate && !upda2 && datinit)
3189 || (a1update && !upda2 && datinit && notgzandp)
3190 || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && datinit)
3191 || (a2update && datinit)
3192 || (idle && go && datinit))
3199 // INITIALIZE INTENSITY INTEGER
3208 // INITIALIZE Z FRACTION
3210 if (init_ii && gourz)
3217 // INITIALIZE Z INTEGER
3226 // Here we move the fooi into their foo counterparts in order to simulate the moving
3227 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
3231 a1fupdate = a1fupdatei;
3232 a1update = a1updatei;
3233 zfupdate = zfupdatei; // *
3234 zupdate = zupdatei; // *
3235 a2update = a2updatei;
3236 init_if = init_ifi; // *
3237 init_ii = init_iii; // *
3238 init_zf = init_zfi; // *
3239 init_zi = init_zii; // *
3240 // * denotes states that will never assert for Jaguar I
3241 #ifdef VERBOSE_BLITTER_LOGGING
3243 WriteLog(" [in=%c a1f=%c a1=%c zf=%c z=%c a2=%c iif=%c iii=%c izf=%c izi=%c]\n",
3244 (inner ? 'T' : 'F'), (a1fupdate ? 'T' : 'F'), (a1update ? 'T' : 'F'),
3245 (zfupdate ? 'T' : 'F'), (zupdate ? 'T' : 'F'), (a2update ? 'T' : 'F'),
3246 (init_if ? 'T' : 'F'), (init_ii ? 'T' : 'F'), (init_zf ? 'T' : 'F'),
3247 (init_zi ? 'T' : 'F'));
3250 // Now, depending on how we want to handle things, we could either put the implementation
3251 // of the various pieces up above, or handle them down below here.
3253 // Let's try postprocessing for now...
3258 #ifdef VERBOSE_BLITTER_LOGGING
3260 WriteLog(" Entering INNER state...\n");
3262 uint16_t icount = GET16(blitter_ram, PIXLINECOUNTER + 2);
3263 bool idle_inner = true, step = true, sreadx = false, szreadx = false, sread = false,
3264 szread = false, dread = false, dzread = false, dwrite = false, dzwrite = false;
3265 bool inner0 = false;
3266 bool idle_inneri, sreadxi, szreadxi, sreadi, szreadi, dreadi, dzreadi, dwritei, dzwritei;
3268 // State lines that will never assert in Jaguar I
3270 bool textext = false, txtread = false;
3273 uint8_t srcshift = 0;
3274 bool sshftld = true; // D flipflop (D -> Q): instart -> sshftld
3275 //NOTE: sshftld probably is only asserted at the beginning of the inner loop. !!! FIX !!!
3277 Blit! (CMD = 01800005)
3278 Flags: SRCEN SRCENX LFUFUNC=C
3280 a1_base = 00037290, a2_base = 000095D0
3281 a1_x = 0000, a1_y = 0000, a2_x = 0002, a2_y = 0000
3282 a1_pixsize = 4, a2_pixsize = 4
3283 srcd=0000000000000000, dstd=0000000000000000, patd=0000000000000000
3285 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3286 Entering INNER state...
3287 Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
3288 Source extra read address/pix address: 000095D4/0 [0000001C00540038]
3289 Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3290 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
3291 Source read address/pix address: 000095D8/0 [0054003800009814]
3292 Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3293 Entering DWRITE state...
3294 Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026E, inc=4)
3295 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3296 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
3297 Source read address/pix address: 000095E0/0 [00009968000377C7]
3298 Entering A2_ADD state [a2_x=0008, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3299 Entering DWRITE state...
3300 Dest write address/pix address: 00037298/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026A, inc=4)
3301 Entering A1_ADD state [a1_x=0004, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3304 // while (!idle_inner)
3309 if ((idle_inner && !step)
3310 || (dzwrite && step && inner0)
3311 || (dwrite && step && !dstwrz && inner0))
3313 #ifdef VERBOSE_BLITTER_LOGGING
3315 WriteLog(" Entering IDLE_INNER state...\n");
3321 idle_inneri = false;
3323 // EXTRA SOURCE DATA READ
3325 if ((idle_inner && step && srcenx)
3326 || (sreadx && !step))
3333 // EXTRA SOURCE ZED READ
3335 if ((sreadx && step && srcenz)
3336 || (szreadx && !step))
3343 // TEXTURE DATA READ (not implemented because not in Jaguar I)
3347 if ((szreadx && step && !textext)
3348 || (sreadx && step && !srcenz && srcen)
3349 || (idle_inner && step && !srcenx && !textext && srcen)
3350 || (dzwrite && step && !inner0 && !textext && srcen)
3351 || (dwrite && step && !dstwrz && !inner0 && !textext && srcen)
3352 || (txtread && step && srcen)
3353 || (sread && !step))
3362 if ((sread && step && srcenz)
3363 || (szread && !step))
3370 // DESTINATION DATA READ
3372 if ((szread && step && dsten)
3373 || (sread && step && !srcenz && dsten)
3374 || (sreadx && step && !srcenz && !textext && !srcen && dsten)
3375 || (idle_inner && step && !srcenx && !textext && !srcen && dsten)
3376 || (dzwrite && step && !inner0 && !textext && !srcen && dsten)
3377 || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && dsten)
3378 || (txtread && step && !srcen && dsten)
3379 || (dread && !step))
3386 // DESTINATION ZED READ
3388 if ((dread && step && dstenz)
3389 || (szread && step && !dsten && dstenz)
3390 || (sread && step && !srcenz && !dsten && dstenz)
3391 || (sreadx && step && !srcenz && !textext && !srcen && !dsten && dstenz)
3392 || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && dstenz)
3393 || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && dstenz)
3394 || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && dstenz)
3395 || (txtread && step && !srcen && !dsten && dstenz)
3396 || (dzread && !step))
3403 // DESTINATION DATA WRITE
3405 if ((dzread && step)
3406 || (dread && step && !dstenz)
3407 || (szread && step && !dsten && !dstenz)
3408 || (sread && step && !srcenz && !dsten && !dstenz)
3409 || (txtread && step && !srcen && !dsten && !dstenz)
3410 || (sreadx && step && !srcenz && !textext && !srcen && !dsten && !dstenz)
3411 || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && !dstenz)
3412 || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && !dstenz)
3413 || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && !dstenz)
3414 || (dwrite && !step))
3421 // DESTINATION ZED WRITE
3423 if ((dzwrite && !step)
3424 || (dwrite && step && dstwrz))
3431 //Kludge: A QnD way to make sure that sshftld is asserted only for the first
3432 // cycle of the inner loop...
3433 sshftld = idle_inner;
3435 // Here we move the fooi into their foo counterparts in order to simulate the moving
3436 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
3438 idle_inner = idle_inneri;
3448 // Here's a few more decodes--not sure if they're supposed to go here or not...
3450 bool srca_addi = (sreadxi && !srcenz) || (sreadi && !srcenz) || szreadxi || szreadi;
3452 bool dsta_addi = (dwritei && !dstwrz) || dzwritei;
3454 bool gensrc = sreadxi || szreadxi || sreadi || szreadi;
3455 bool gendst = dreadi || dzreadi || dwritei || dzwritei;
3456 bool gena2i = (gensrc && !dsta2) || (gendst && dsta2);
3458 bool zaddr = szreadx || szread || dzread || dzwrite;
3460 // Some stuff from MCONTROL.NET--not sure if this is the correct use of this decode or not...
3461 /*Fontread\ := OND1 (fontread\, sread[1], sreadx[1], bcompen);
3462 Fontread := INV1 (fontread, fontread\);
3463 Justt := NAN3 (justt, fontread\, phrase_mode, tactive\);
3464 Justify := TS (justify, justt, busen);*/
3465 bool fontread = (sread || sreadx) && bcompen;
3466 bool justify = !(!fontread && phrase_mode /*&& tactive*/);
3468 /* Generate inner loop update enables */
3470 A1_addi := MX2 (a1_addi, dsta_addi, srca_addi, dsta2);
3471 A2_addi := MX2 (a2_addi, srca_addi, dsta_addi, dsta2);
3472 A1_add := FD1 (a1_add, a1_add\, a1_addi, clk);
3473 A2_add := FD1 (a2_add, a2_add\, a2_addi, clk);
3474 A2_addb := BUF1 (a2_addb, a2_add);
3476 bool a1_add = (dsta2 ? srca_addi : dsta_addi);
3477 bool a2_add = (dsta2 ? dsta_addi : srca_addi);
3479 /* Address adder input A register selection
3480 000 A1 step integer part
3481 001 A1 step fraction part
3482 010 A1 increment integer part
3483 011 A1 increment fraction part
3487 bit 1 = /a2update . (a1_add . a1addx[0..1])
3488 bit 0 = /a2update . ( a1fupdate
3489 + a1_add . atick[0] . a1addx[0..1])
3490 The /a2update term on bits 0 and 1 is redundant.
3491 Now look-ahead based
3493 uint8_t addasel = (a1fupdate || (a1_add && a1addx == 3) ? 0x01 : 0x00);
3494 addasel |= (a1_add && a1addx == 3 ? 0x02 : 0x00);
3495 addasel |= (a2update ? 0x04 : 0x00);
3496 /* Address adder input A X constant selection
3497 adda_xconst[0..2] generate a power of 2 in the range 1-64 or all
3498 zeroes when they are all 1
3499 Remember - these are pixels, so to add one phrase the pixel size
3500 has to be taken into account to get the appropriate value.
3502 if a1addx[0..1] are 00 set 6 - pixel size
3503 if a1addx[0..1] are 01 set the value 000
3504 if a1addx[0..1] are 10 set the value 111
3506 JLH: Also, 11 will likewise set the value to 111
3508 uint8_t a1_xconst = 6 - a1_pixsize, a2_xconst = 6 - a2_pixsize;
3512 else if (a1addx & 0x02)
3517 else if (a2addx & 0x02)
3520 uint8_t adda_xconst = (a2_add ? a2_xconst : a1_xconst);
3521 /* Address adder input A Y constant selection
3522 22 June 94 - This was erroneous, because only the a1addy bit was reflected here.
3523 Therefore, the selection has to be controlled by a bug fix bit.
3524 JLH: Bug fix bit in Jaguar II--not in Jaguar I!
3526 bool adda_yconst = a1addy;
3527 /* Address adder input A register versus constant selection
3528 given by a1_add . a1addx[0..1]
3531 + a2_add . a2addx[0..1]
3534 bool addareg = ((a1_add && a1addx == 3) || a1update || a1fupdate
3535 || (a2_add && a2addx == 3) || a2update ? true : false);
3536 /* The adders can be put into subtract mode in add pixel size
3537 mode when the corresponding flags are set */
3538 bool suba_x = ((a1_add && a1xsign && a1addx == 1) || (a2_add && a2xsign && a2addx == 1) ? true : false);
3539 bool suba_y = ((a1_add && a1addy && a1ysign) || (a2_add && a2addy && a2ysign) ? true : false);
3540 /* Address adder input B selection
3547 + (a1_add . atick[0] . a1addx[0..1])
3548 + a1fupdate . a1_stepld
3549 + a1update . a1_stepld
3550 + a2update . a2_stepld
3551 Bit 0 = a2update + a2_add
3552 + a1fupdate . a1_stepld
3553 + a1update . a1_stepld
3554 + a2update . a2_stepld
3556 uint8_t addbsel = (a2update || a2_add || (a1fupdate && a1_stepld)
3557 || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x01 : 0x00);
3558 addbsel |= (a1fupdate || (a1_add && a1addx == 3) || (a1fupdate && a1_stepld)
3559 || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x02 : 0x00);
3561 /* The modulo bits are used to align X onto a phrase boundary when
3562 it is being updated by one phrase
3569 Masking is enabled for a1 when a1addx[0..1] is 00, and the value
3570 is 6 - the pixel size (again!)
3572 uint8_t maska1 = (a1_add && a1addx == 0 ? 6 - a1_pixsize : 0);
3573 uint8_t maska2 = (a2_add && a2addx == 0 ? 6 - a2_pixsize : 0);
3574 uint8_t modx = (a2_add ? maska2 : maska1);
3575 /* Generate load strobes for the increment updates */
3577 /*A1pldt := NAN2 (a1pldt, atick[1], a1_add);
3578 A1ptrldi := NAN2 (a1ptrldi, a1update\, a1pldt);
3580 A1fldt := NAN4 (a1fldt, atick[0], a1_add, a1addx[0..1]);
3581 A1fracldi := NAN2 (a1fracldi, a1fupdate\, a1fldt);
3583 A2pldt := NAN2 (a2pldt, atick[1], a2_add);
3584 A2ptrldi := NAN2 (a2ptrldi, a2update\, a2pldt);*/
3585 bool a1fracldi = a1fupdate || (a1_add && a1addx == 3);
3587 // Some more from DCONTROL...
3588 // atick[] just MAY be important here! We're assuming it's true and dropping the term...
3589 // That will probably screw up some of the lower terms that seem to rely on the timing of it...
3590 #warning srcdreadd is not properly initialized!
3591 bool srcdreadd = false; // Set in INNER.NET
3592 //Shadeadd\ := NAN2H (shadeadd\, dwrite, srcshade);
3593 //Shadeadd := INV2 (shadeadd, shadeadd\);
3594 bool shadeadd = dwrite && srcshade;
3595 /* Data adder control, input A selection
3596 000 Destination data
3597 001 Initialiser pixel value
3598 100 Source data - computed intensity fraction
3599 101 Pattern data - computed intensity
3600 110 Source zed 1 - computed zed
3601 111 Source zed 2 - computed zed fraction
3603 Bit 0 = dwrite . gourd . atick[1]
3604 + dzwrite . gourz . atick[0]
3607 + init_if + init_ii + init_zf + init_zi
3608 Bit 1 = dzwrite . gourz . (atick[0] + atick[1])
3611 Bit 2 = (gourd + gourz) . /(init_if + init_ii + init_zf + init_zi)
3614 uint8_t daddasel = ((dwrite && gourd) || (dzwrite && gourz) || istepadd || zstepfadd
3615 || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
3616 daddasel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
3617 daddasel |= (((gourd || gourz) && !(init_if || init_ii || init_zf || init_zi))
3618 || (dwrite && srcshade) ? 0x04 : 0x00);
3619 /* Data adder control, input B selection
3621 0001 Data initialiser increment
3622 0100 Bottom 16 bits of I increment repeated four times
3623 0101 Top 16 bits of I increment repeated four times
3624 0110 Bottom 16 bits of Z increment repeated four times
3625 0111 Top 16 bits of Z increment repeated four times
3626 1100 Bottom 16 bits of I step repeated four times
3627 1101 Top 16 bits of I step repeated four times
3628 1110 Bottom 16 bits of Z step repeated four times
3629 1111 Top 16 bits of Z step repeated four times
3631 Bit 0 = dwrite . gourd . atick[1]
3632 + dzwrite . gourz . atick[1]
3636 + init_if + init_ii + init_zf + init_zi
3637 Bit 1 = dzwrite . gourz . (atick[0] + atick[1])
3640 Bit 2 = dwrite . gourd . (atick[0] + atick[1])
3641 + dzwrite . gourz . (atick[0] + atick[1])
3643 + istepadd + istepfadd + zstepadd + zstepfadd
3644 Bit 3 = istepadd + istepfadd + zstepadd + zstepfadd
3646 uint8_t daddbsel = ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
3647 || istepadd || zstepadd || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
3648 daddbsel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
3649 daddbsel |= ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
3650 || istepadd || istepfadd || zstepadd || zstepfadd ? 0x04 : 0x00);
3651 daddbsel |= (istepadd && istepfadd && zstepadd && zstepfadd ? 0x08 : 0x00);
3652 /* Data adder mode control
3653 000 16-bit normal add
3654 001 16-bit saturating add with carry
3655 010 8-bit saturating add with carry, carry into top byte is
3657 011 8-bit saturating add with carry, carry into top byte and
3658 between top nybbles is inhibited (CRY)
3659 100 16-bit normal add with carry
3660 101 16-bit saturating add
3661 110 8-bit saturating add, carry into top byte is inhibited
3662 111 8-bit saturating add, carry into top byte and between top
3663 nybbles is inhibited
3665 The first five are used for Gouraud calculations, the latter three
3666 for adding source and destination data
3668 Bit 0 = dzwrite . gourz . atick[1]
3669 + dwrite . gourd . atick[1] . /topnen . /topben . /ext_int
3670 + dwrite . gourd . atick[1] . topnen . topben . /ext_int
3672 + istepadd . /topnen . /topben . /ext_int
3673 + istepadd . topnen . topben . /ext_int
3674 + /gourd . /gourz . /topnen . /topben
3675 + /gourd . /gourz . topnen . topben
3676 + shadeadd . /topnen . /topben
3677 + shadeadd . topnen . topben
3678 + init_ii . /topnen . /topben . /ext_int
3679 + init_ii . topnen . topben . /ext_int
3682 Bit 1 = dwrite . gourd . atick[1] . /topben . /ext_int
3683 + istepadd . /topben . /ext_int
3684 + /gourd . /gourz . /topben
3685 + shadeadd . /topben
3686 + init_ii . /topben . /ext_int
3688 Bit 2 = /gourd . /gourz
3690 + dwrite . gourd . atick[1] . ext_int
3691 + istepadd . ext_int
3694 uint8_t daddmode = ((dzwrite && gourz) || (dwrite && gourd && !topnen && !topben && !ext_int)
3695 || (dwrite && gourd && topnen && topben && !ext_int) || zstepadd
3696 || (istepadd && !topnen && !topben && !ext_int)
3697 || (istepadd && topnen && topben && !ext_int) || (!gourd && !gourz && !topnen && !topben)
3698 || (!gourd && !gourz && topnen && topben) || (shadeadd && !topnen && !topben)
3699 || (shadeadd && topnen && topben) || (init_ii && !topnen && !topben && !ext_int)
3700 || (init_ii && topnen && topben && !ext_int) || init_zi ? 0x01 : 0x00);
3701 daddmode |= ((dwrite && gourd && !topben && !ext_int) || (istepadd && !topben && !ext_int)
3702 || (!gourd && !gourz && !topben) || (shadeadd && !topben)
3703 || (init_ii && !topben && !ext_int) ? 0x02 : 0x00);
3704 daddmode |= ((!gourd && !gourz) || shadeadd || (dwrite && gourd && ext_int)
3705 || (istepadd && ext_int) || (init_ii && ext_int) ? 0x04 : 0x00);
3706 /* Data add load controls
3707 Pattern fraction (dest data) is loaded on
3708 dwrite . gourd . atick[0]
3709 + istepfadd . /datinit
3711 Pattern data is loaded on
3712 dwrite . gourd . atick[1]
3713 + istepadd . /datinit . /datinit
3715 Source z1 is loaded on
3716 dzwrite . gourz . atick[1]
3717 + zstepadd . /datinit . /datinit
3719 Source z2 is loaded on
3720 dzwrite . gourz . atick[0]
3723 Texture map shaded data is loaded on
3724 srcdreadd . srcshade
3726 bool patfadd = (dwrite && gourd) || (istepfadd && !datinit) || init_if;
3727 bool patdadd = (dwrite && gourd) || (istepadd && !datinit) || init_ii;
3728 bool srcz1add = (dzwrite && gourz) || (zstepadd && !datinit) || init_zi;
3729 bool srcz2add = (dzwrite && gourz) || zstepfadd || init_zf;
3730 bool srcshadd = srcdreadd && srcshade;
3731 bool daddq_sel = patfadd || patdadd || srcz1add || srcz2add || srcshadd;
3732 /* Select write data
3733 This has to be controlled from stage 1 of the pipe-line, delayed
3734 by one tick, as the write occurs in the cycle after the ack.
3741 Bit 0 = /patdsel . /adddsel
3746 uint8_t data_sel = ((!patdsel && !adddsel) || dzwrite ? 0x01 : 0x00)
3747 | (adddsel || dzwrite ? 0x02 : 0x00);
3749 uint32_t address, pixAddr;
3750 ADDRGEN(address, pixAddr, gena2i, zaddr,
3751 a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3752 a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3754 //Here's my guess as to how the addresses get truncated to phrase boundaries in phrase mode...
3756 address &= 0xFFFFF8;
3758 /* Generate source alignment shift
3759 -------------------------------
3760 The source alignment shift for data move is the difference between
3761 the source and destination X pointers, multiplied by the pixel
3762 size. Only the low six bits of the pointers are of interest, as
3763 pixel sizes are always a power of 2 and window rows are always
3766 When not in phrase mode, the top 3 bits of the shift value are
3769 Source shifting is also used to extract bits for bit-to-byte
3770 expansion in phrase mode. This involves only the bottom three
3771 bits of the shift value, and is based on the offset within the
3772 phrase of the destination X pointer, in pixels.
3774 Source shifting is disabled when srcen is not set.
3776 uint8_t dstxp = (dsta2 ? a2_x : a1_x) & 0x3F;
3777 uint8_t srcxp = (dsta2 ? a1_x : a2_x) & 0x3F;
3778 uint8_t shftv = ((dstxp - srcxp) << pixsize) & 0x3F;
3779 /* The phrase mode alignment count is given by the phrase offset
3780 of the first pixel, for bit to byte expansion */
3784 pobb = dstxp & 0x07;
3786 pobb = dstxp & 0x03;
3788 pobb = dstxp & 0x01;
3790 bool pobbsel = phrase_mode && bcompen;
3791 uint8_t loshd = (pobbsel ? pobb : shftv) & 0x07;
3792 uint8_t shfti = (srcen || pobbsel ? (sshftld ? loshd : srcshift & 0x07) : 0);
3793 /* Enable for high bits is srcen . phrase_mode */
3794 shfti |= (srcen && phrase_mode ? (sshftld ? shftv & 0x38 : srcshift & 0x38) : 0);
3799 #ifdef VERBOSE_BLITTER_LOGGING
3801 WriteLog(" Entering SREADX state...");
3803 //uint32_t srcAddr, pixAddr;
3804 //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
3805 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3806 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3808 srcd1 = ((uint64_t)JaguarReadLong(address + 0, BLITTER) << 32)
3809 | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3810 //Kludge to take pixel size into account...
3811 //Hmm. If we're not in phrase mode, this is most likely NOT going to be used...
3812 //Actually, it would be--because of BCOMPEN expansion, for example...
3821 else if (pixsize == 4)
3827 #ifdef VERBOSE_BLITTER_LOGGING
3829 WriteLog(" Source extra read address/pix address: %08X/%1X [%08X%08X]\n",
3830 address, pixAddr, (uint32_t)(srcd1 >> 32), (uint32_t)(srcd1 & 0xFFFFFFFF));
3836 #ifdef VERBOSE_BLITTER_LOGGING
3838 WriteLog(" Entering SZREADX state...");
3841 srcz1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3842 #ifdef VERBOSE_BLITTER_LOGGING
3844 WriteLog(" Src Z extra read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3845 (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF));
3851 #ifdef VERBOSE_BLITTER_LOGGING
3853 WriteLog(" Entering SREAD state...");
3855 //uint32_t srcAddr, pixAddr;
3856 //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
3857 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3858 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3860 srcd1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3861 //Kludge to take pixel size into account...
3870 else if (pixsize == 4)
3876 #ifdef VERBOSE_BLITTER_LOGGING
3879 WriteLog(" Source read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3880 (uint32_t)(srcd1 >> 32), (uint32_t)(srcd1 & 0xFFFFFFFF));
3888 #ifdef VERBOSE_BLITTER_LOGGING
3891 WriteLog(" Entering SZREAD state...");
3896 srcz1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3897 //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
3898 if (!phrase_mode && pixsize == 4)
3901 #ifdef VERBOSE_BLITTER_LOGGING
3904 WriteLog(" Src Z read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3905 (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF));
3912 #ifdef VERBOSE_BLITTER_LOGGING
3914 WriteLog(" Entering DREAD state...");
3916 //uint32_t dstAddr, pixAddr;
3917 //ADDRGEN(dstAddr, pixAddr, gena2i, zaddr,
3918 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3919 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3920 dstd = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3921 //Kludge to take pixel size into account...
3926 else if (pixsize == 4)
3931 #ifdef VERBOSE_BLITTER_LOGGING
3933 WriteLog(" Dest read address/pix address: %08X/%1X [%08X%08X]\n", address,
3934 pixAddr, (uint32_t)(dstd >> 32), (uint32_t)(dstd & 0xFFFFFFFF));
3940 // Is Z always 64 bit read? Or sometimes 16 bit (dependent on phrase_mode)?
3941 #ifdef VERBOSE_BLITTER_LOGGING
3943 WriteLog(" Entering DZREAD state...");
3945 dstz = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3946 //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
3947 if (!phrase_mode && pixsize == 4)
3950 #ifdef VERBOSE_BLITTER_LOGGING
3952 WriteLog(" Dest Z read address/pix address: %08X/%1X [%08X%08X]\n", address,
3953 pixAddr, (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF));
3957 // These vars should probably go further up in the code... !!! FIX !!!
3958 // We can't preassign these unless they're static...
3959 //uint64_t srcz = 0; // These are assigned to shut up stupid compiler warnings--dwrite is ALWAYS asserted
3960 //bool winhibit = false;
3963 //NOTE: SRCSHADE requires GOURZ to be set to work properly--another Jaguar I bug
3966 #ifdef VERBOSE_BLITTER_LOGGING
3968 WriteLog(" Entering DWRITE state...");
3970 //Counter is done on the dwrite state...! (We'll do it first, since it affects dstart/dend calculations.)
3971 //Here's the voodoo for figuring the correct amount of pixels in phrase mode (or not):
3972 int8_t inct = -((dsta2 ? a2_x : a1_x) & 0x07); // From INNER_CNT
3974 inc = (!phrase_mode || (phrase_mode && (inct & 0x01)) ? 0x01 : 0x00);
3975 inc |= (phrase_mode && (((pixsize == 3 || pixsize == 4) && (inct & 0x02)) || pixsize == 5 && !(inct & 0x01)) ? 0x02 : 0x00);
3976 inc |= (phrase_mode && ((pixsize == 3 && (inct & 0x04)) || (pixsize == 4 && !(inct & 0x03))) ? 0x04 : 0x00);
3977 inc |= (phrase_mode && pixsize == 3 && !(inct & 0x07) ? 0x08 : 0x00);
3979 uint16_t oldicount = icount; // Save icount to detect underflow...
3982 if (icount == 0 || ((icount & 0x8000) && !(oldicount & 0x8000)))
3984 // X/Y stepping is also done here, I think...No. It's done when a1_add or a2_add is asserted...
3986 //*********************************************************************************
3987 //Start & end write mask computations...
3988 //*********************************************************************************
3993 dstart = (dstxp & 0x07) << 3;
3995 dstart = (dstxp & 0x03) << 4;
3997 dstart = (dstxp & 0x01) << 5;
3999 dstart = (phrase_mode ? dstart : pixAddr & 0x07);
4001 //This is the other Jaguar I bug... Normally, should ALWAYS select a1_x here.
4002 uint16_t dstxwr = (dsta2 ? a2_x : a1_x) & 0x7FFE;
4003 uint16_t pseq = dstxwr ^ (a1_win_x & 0x7FFE);
4004 pseq = (pixsize == 5 ? pseq : pseq & 0x7FFC);
4005 pseq = ((pixsize & 0x06) == 4 ? pseq : pseq & 0x7FF8);
4006 bool penden = clip_a1 && (pseq == 0);
4007 uint8_t window_mask = 0;
4010 window_mask = (a1_win_x & 0x07) << 3;
4012 window_mask = (a1_win_x & 0x03) << 4;
4014 window_mask = (a1_win_x & 0x01) << 5;
4016 window_mask = (penden ? window_mask : 0);
4019 Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4020 Source extra read address/pix address: 000095D0/0 [000004E40000001C]
4021 Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4022 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4023 Source read address/pix address: 000095D8/0 [0054003800009814]
4024 Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4025 Entering DWRITE state...
4026 Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=20][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000001C00000000] (icount=026E, inc=4)
4027 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4029 (icount=026E, inc=4)
4030 icount & 0x03 = 0x02
4033 window_mask = 0x1000
4035 Therefore, it chooses the inner_mask over the window_mask every time! Argh!
4036 This is because we did this wrong:
4037 Innerm[3-5] := AN2 (inner_mask[3-5], imb[3-5], inner0);
4038 NOTE! This doesn't fix the problem because inner0 is asserted too late to help here. !!! FIX !!! [Should be DONE]
4041 /* The mask to be used if within one phrase of the end of the inner
4043 uint8_t inner_mask = 0;
4046 inner_mask = (icount & 0x07) << 3;
4048 inner_mask = (icount & 0x03) << 4;
4050 inner_mask = (icount & 0x01) << 5;
4053 /* The actual mask used should be the lesser of the window masks and
4054 the inner mask, where is all cases 000 means 1000. */
4055 window_mask = (window_mask == 0 ? 0x40 : window_mask);
4056 inner_mask = (inner_mask == 0 ? 0x40 : inner_mask);
4057 uint8_t emask = (window_mask > inner_mask ? inner_mask : window_mask);
4058 /* The mask to be used for the pixel size, to which must be added
4060 uint8_t pma = pixAddr + (1 << pixsize);
4061 /* Select the mask */
4062 uint8_t dend = (phrase_mode ? emask : pma);
4064 /* The cycle width in phrase mode is normally one phrase. However,
4065 at the start and end it may be narrower. The start and end masks
4066 are used to generate this. The width is given by:
4068 8 - start mask - (8 - end mask)
4069 = end mask - start mask
4071 This is only used for writes in phrase mode.
4072 Start and end from the address level of the pipeline are used.
4074 uint8_t pwidth = (((dend | dstart) & 0x07) == 0 ? 0x08 : (dend - dstart) & 0x07);
4076 //uint32_t dstAddr, pixAddr;
4077 //ADDRGEN(dstAddr, pixAddr, gena2i, zaddr,
4078 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
4079 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
4080 #ifdef VERBOSE_BLITTER_LOGGING
4082 WriteLog(" Dest write address/pix address: %08X/%1X", address, pixAddr);
4085 //More testing... This is almost certainly wrong, but how else does this work???
4086 //Seems to kinda work... But still, this doesn't seem to make any sense!
4087 if (phrase_mode && !dsten)
4088 dstd = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
4090 //Testing only... for now...
4091 //This is wrong because the write data is a combination of srcd and dstd--either run
4092 //thru the LFU or in PATDSEL or ADDDSEL mode. [DONE now, thru DATA module]
4093 // Precedence is ADDDSEL > PATDSEL > LFU.
4094 //Also, doesn't take into account the start & end masks, or the phrase width...
4097 // srcd2 = xxxx xxxx 0123 4567, srcd = 8901 2345 xxxx xxxx, srcshift = $20 (32)
4098 uint64_t srcd = (srcd2 << (64 - srcshift)) | (srcd1 >> srcshift);
4099 //bleh, ugly ugly ugly
4103 //NOTE: This only works with pixel sizes less than 8BPP...
4104 //DOUBLE NOTE: Still need to do regression testing to ensure that this doesn't break other stuff... !!! CHECK !!!
4105 if (!phrase_mode && srcshift != 0)
4106 srcd = ((srcd2 & 0xFF) << (8 - srcshift)) | ((srcd1 & 0xFF) >> srcshift);
4108 //Z DATA() stuff done here... And it has to be done before any Z shifting...
4109 //Note that we need to have phrase mode start/end support here... (Not since we moved it from dzwrite...!)
4111 Here are a couple of Cybermorph blits with Z:
4112 $00113078 // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
4113 $09900F39 // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
4115 We're having the same phrase mode overwrite problem we had with the pixels... !!! FIX !!!
4116 Odd. It's equating 0 with 0... Even though ZMODE is $04 (less than)!
4121 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
4122 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
4123 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
4124 uint32_t zinc, uint32_t zstep)
4127 uint8_t initcin[4] = { 0, 0, 0, 0 };
4128 ADDARRAY(addq, 7/*daddasel*/, 6/*daddbsel*/, 0/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
4129 srcz2 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
4130 ADDARRAY(addq, 6/*daddasel*/, 7/*daddbsel*/, 1/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
4131 srcz1 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
4133 #if 0//def VERBOSE_BLITTER_LOGGING
4135 WriteLog("\n[srcz1=%08X%08X, srcz2=%08X%08X, zinc=%08X",
4136 (uint32_t)(srcz1 >> 32), (uint32_t)(srcz1 & 0xFFFFFFFF),
4137 (uint32_t)(srcz2 >> 32), (uint32_t)(srcz2 & 0xFFFFFFFF), zinc);
4141 uint8_t zSrcShift = srcshift & 0x30;
4142 srcz = (srcz2 << (64 - zSrcShift)) | (srcz1 >> zSrcShift);
4143 //bleh, ugly ugly ugly
4147 #if 0//def VERBOSE_BLITTER_LOGGING
4149 WriteLog(" srcz=%08X%08X]\n", (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF));
4152 //When in SRCSHADE mode, it adds the IINC to the read source (from LFU???)
4153 //According to following line, it gets LFU mode. But does it feed the source into the LFU
4155 //Dest write address/pix address: 0014E83E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003F, inc=1)
4159 //NOTE: This is basically doubling the work done by DATA--since this is what
4160 // ADDARRAY is loaded with when srschshade is enabled... !!! FIX !!!
4161 // Also note that it doesn't work properly unless GOURZ is set--there's the clue!
4163 uint8_t initcin[4] = { 0, 0, 0, 0 };
4164 ADDARRAY(addq, 4/*daddasel*/, 5/*daddbsel*/, 7/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
4165 srcd = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
4167 //Seems to work... Not 100% sure tho.
4170 //Temporary kludge, to see if the fractional pattern does anything...
4172 //But it seems to mess up in Cybermorph... the shading should be smooth but it isn't...
4173 //Seems the carry out is lost again... !!! FIX !!! [DONE--see below]
4177 uint8_t initcin[4] = { 0, 0, 0, 0 };
4178 ADDARRAY(addq, 4/*daddasel*/, 4/*daddbsel*/, 0/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
4179 srcd1 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
4182 //Note that we still don't take atick[0] & [1] into account here, so this will skip half of the data needed... !!! FIX !!!
4183 //Not yet enumerated: dbinh, srcdread, srczread
4184 //Also, should do srcshift on the z value in phrase mode... !!! FIX !!! [DONE]
4185 //As well as add a srcz variable we can set external to this state... !!! FIX !!! [DONE]
4188 uint8_t dcomp, zcomp;
4189 DATA(wdata, dcomp, zcomp, winhibit,
4190 true, cmpdst, daddasel, daddbsel, daddmode, daddq_sel, data_sel, 0/*dbinh*/,
4191 dend, dstart, dstd, iinc, lfufunc, patd, patdadd,
4192 phrase_mode, srcd, false/*srcdread*/, false/*srczread*/, srcz2add, zmode,
4193 bcompen, bkgwren, dcompen, icount & 0x07, pixsize,
4196 Seems that the phrase mode writes with DCOMPEN and DSTEN are corrupting inside of DATA: !!! FIX !!!
4197 It's fairly random as well. 7CFE -> 7DFE, 7FCA -> 78CA, 7FA4 -> 78A4, 7F88 -> 8F88
4198 It could be related to an uninitialized variable, like the zmode bug...
4200 It was a bug in the dech38el data--it returned $FF for ungated instead of $00...
4202 Blit! (CMD = 09800609)
4203 Flags: SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C DCOMPEN
4205 a1_base = 00110000, a2_base = 0010B2A8
4206 a1_x = 004B, a1_y = 00D8, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0704, a2_y = 0000
4207 a1_step_x = FFF3, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFFC, a2_step_y = 0000
4208 a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4209 a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
4210 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4211 a1_pixsize = 4, a2_pixsize = 4
4212 srcd=0000000000000000 dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4213 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4215 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4216 Entering INNER state...
4217 Entering SREAD state... Source read address/pix address: 0010C0B0/0 [0000000078047804]
4218 Entering A2_ADD state [a2_x=0704, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4219 Entering DREAD state...
4220 Dest read address/pix address: 00197240/0 [0000000000000028]
4221 Entering DWRITE state...
4222 Dest write address/pix address: 00197240/0 [dstart=30 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000028] (icount=0009, inc=1)
4223 Entering A1_ADD state [a1_x=004B, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4224 Entering SREAD state... Source read address/pix address: 0010C0B8/0 [7804780478047804]
4225 Entering A2_ADD state [a2_x=0708, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4226 Entering DREAD state...
4227 Dest read address/pix address: 00197260/0 [0028000000200008]
4228 Entering DWRITE state...
4229 Dest write address/pix address: 00197260/0 [dstart=0 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0028780478047804] (icount=0005, inc=4)
4230 Entering A1_ADD state [a1_x=004C, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4231 Entering SREAD state... Source read address/pix address: 0010C0C0/0 [0000000000000000]
4232 Entering A2_ADD state [a2_x=070C, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4233 Entering DREAD state...
4234 Dest read address/pix address: 00197280/0 [0008001800180018]
4235 Entering DWRITE state...
4236 Dest write address/pix address: 00197280/0 [dstart=0 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [7804780478040018] (icount=0001, inc=4)
4237 Entering A1_ADD state [a1_x=0050, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4238 Entering SREAD state... Source read address/pix address: 0010C0C8/0 [000078047BFE7BFE]
4239 Entering A2_ADD state [a2_x=0710, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4240 Entering DREAD state...
4241 Dest read address/pix address: 001972A0/0 [0008002000000000]
4242 Entering DWRITE state...
4243 Dest write address/pix address: 001972A0/0 [dstart=0 dend=10 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0008002000000000] (icount=FFFD, inc=4)
4244 Entering A1_ADD state [a1_x=0054, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4245 Entering IDLE_INNER state...
4248 //Why isn't this taken care of in DATA? Because, DATA is modifying its local copy instead of the one used here.
4249 //!!! FIX !!! [DONE]
4258 a1_outside // A1 pointer is outside window bounds
4267 // The address is outside if negative, or if greater than or equal
4268 // to the window size
4270 A1_xcomp := MAG_15 (a1xgr, a1xeq, a1xlt, a1_x{0..14}, a1_win_x{0..14});
4271 A1_ycomp := MAG_15 (a1ygr, a1yeq, a1ylt, a1_y{0..14}, a1_win_y{0..14});
4272 A1_outside := OR6 (a1_outside, a1_x{15}, a1xgr, a1xeq, a1_y{15}, a1ygr, a1yeq);
4274 //NOTE: There seems to be an off-by-one bug here in the clip_a1 section... !!! FIX !!!
4275 // Actually, seems to be related to phrase mode writes...
4276 // Or is it? Could be related to non-15-bit compares as above?
4277 if (clip_a1 && ((a1_x & 0x8000) || (a1_y & 0x8000) || (a1_x >= a1_win_x) || (a1_y >= a1_win_y)))
4284 JaguarWriteLong(address + 0, wdata >> 32, BLITTER);
4285 JaguarWriteLong(address + 4, wdata & 0xFFFFFFFF, BLITTER);
4290 JaguarWriteLong(address, wdata & 0xFFFFFFFF, BLITTER);
4291 else if (pixsize == 4)
4292 JaguarWriteWord(address, wdata & 0x0000FFFF, BLITTER);
4294 JaguarWriteByte(address, wdata & 0x000000FF, BLITTER);
4298 #ifdef VERBOSE_BLITTER_LOGGING
4301 WriteLog(" [%08X%08X]%s", (uint32_t)(wdata >> 32), (uint32_t)(wdata & 0xFFFFFFFF), (winhibit ? "[X]" : ""));
4302 WriteLog(" (icount=%04X, inc=%u)\n", icount, (uint16_t)inc);
4303 WriteLog(" [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
4304 WriteLog("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
4311 // OK, here's the big insight: When NOT in GOURZ mode, srcz1 & 2 function EXACTLY the same way that
4312 // srcd1 & 2 work--there's an implicit shift from srcz1 to srcz2 whenever srcz1 is read.
4313 // OTHERWISE, srcz1 is the integer for the computed Z and srcz2 is the fractional part.
4314 // Writes to srcz1 & 2 follow the same pattern as the other 64-bit registers--low 32 at the low address,
4315 // high 32 at the high address (little endian!).
4316 // NOTE: GOURZ is still not properly supported. Check patd/patf handling...
4317 // Phrase mode start/end masks are not properly supported either...
4318 #ifdef VERBOSE_BLITTER_LOGGING
4321 WriteLog(" Entering DZWRITE state...");
4322 WriteLog(" Dest Z write address/pix address: %08X/%1X [%08X%08X]\n", address,
4323 pixAddr, (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF));
4326 //This is not correct... !!! FIX !!!
4327 //Should be OK now... We'll see...
4328 //Nope. Having the same starstep write problems in phrase mode as we had with pixels... !!! FIX !!!
4329 //This is not causing the problem in Hover Strike... :-/
4330 //The problem was with the SREADX not shifting. Still problems with Z comparisons & other text in pregame screen...
4335 JaguarWriteLong(address + 0, srcz >> 32, BLITTER);
4336 JaguarWriteLong(address + 4, srcz & 0xFFFFFFFF, BLITTER);
4341 JaguarWriteWord(address, srcz & 0x0000FFFF, BLITTER);
4344 #ifdef VERBOSE_BLITTER_LOGGING
4347 // printf(" [%08X%08X]\n", (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF));
4349 //printf(" [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
4350 WriteLog(" [dstart=? dend=? pwidth=? srcshift=%X]", srcshift);
4351 WriteLog("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
4358 This is because the address generator was using only 15 bits of the X when it should have
4361 There's a slight problem here: The X pointer isn't wrapping like it should when it hits
4362 the edge of the window... Notice how the X isn't reset at the edge of the window:
4364 Blit! (CMD = 00010000)
4367 a1_base = 000E8008, a2_base = 0001FA68
4368 a1_x = 0000, a1_y = 0000, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0000, a2_y = 0000
4369 a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
4370 a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4371 a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
4372 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4373 a1_pixsize = 5, a2_pixsize = 5
4374 srcd=7717771777177717 dstd=0000000000000000 patd=7730773077307730 iinc=00000000
4375 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4377 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4378 Entering INNER state...
4379 Entering DWRITE state... Dest write address/pix address: 000E8008/0 [7730773077307730] (icount=009E, inc=2)
4380 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4381 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4382 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4383 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4384 Entering DWRITE state... Dest write address/pix address: 000E8018/0 [7730773077307730] (icount=009C, inc=2)
4385 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4386 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4387 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4388 Entering A1_ADD state [a1_x=0002, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4392 Entering A1_ADD state [a1_x=009C, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4393 Entering DWRITE state... Dest write address/pix address: 000E84F8/0 [7730773077307730] (icount=0000, inc=2)
4394 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4395 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4396 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4397 Entering A1_ADD state [a1_x=009E, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4398 Entering IDLE_INNER state...
4400 Leaving INNER state... (ocount=0104)
4401 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4403 Entering INNER state...
4404 Entering DWRITE state... Dest write address/pix address: 000E8508/0 [7730773077307730] (icount=009E, inc=2)
4405 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4406 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4407 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4408 Entering A1_ADD state [a1_x=00A0, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4409 Entering DWRITE state... Dest write address/pix address: 000E8518/0 [7730773077307730] (icount=009C, inc=2)
4410 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4411 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4412 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4413 Entering A1_ADD state [a1_x=00A2, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4419 #ifdef VERBOSE_BLITTER_LOGGING
4422 //printf(" Entering A1_ADD state [addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4423 WriteLog(" Entering A1_ADD state [a1_x=%04X, a1_y=%04X, addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", a1_x, a1_y, addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4427 int16_t adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
4428 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4429 a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4430 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4431 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4433 #if 0//def VERBOSE_BLITTER_LOGGING
4436 WriteLog(" [adda_x=%d, adda_y=%d, addb_x=%d, addb_y=%d, addq_x=%d, addq_y=%d]\n", adda_x, adda_y, addb_x, addb_y, addq_x, addq_y);
4440 //Now, write to what???
4441 //a2ptrld comes from a2ptrldi...
4442 //I believe it's addbsel that determines the writeback...
4443 // This is where atick[0] & [1] come in, in determining which part (fractional, integer)
4444 // gets written to...
4447 //Kludge, to get A1 channel increment working...
4450 a1_frac_x = addq_x, a1_frac_y = addq_y;
4452 addasel = 2, addbsel = 0, a1fracldi = false;
4453 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4454 a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4455 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4456 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4458 a1_x = addq_x, a1_y = addq_y;
4461 a1_x = addq_x, a1_y = addq_y;
4466 #ifdef VERBOSE_BLITTER_LOGGING
4469 //printf(" Entering A2_ADD state [addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4470 WriteLog(" Entering A2_ADD state [a2_x=%04X, a2_y=%04X, addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", a2_x, a2_y, addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4474 //void ADDAMUX(int16_t &adda_x, int16_t &adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
4475 // int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
4476 // int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
4477 // bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
4478 //void ADDBMUX(int16_t &addb_x, int16_t &addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
4479 // int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y)
4480 //void ADDRADD(int16_t &addq_x, int16_t &addq_y, bool a1fracldi,
4481 // int16_t adda_x, int16_t adda_y, int16_t addb_x, int16_t addb_y, uint8_t modx, bool suba_x, bool suba_y)
4482 //void DATAMUX(int16_t &data_x, int16_t &data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel)
4483 int16_t adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
4484 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4485 a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4486 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4487 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4489 #if 0//def VERBOSE_BLITTER_LOGGING
4492 WriteLog(" [adda_x=%d, adda_y=%d, addb_x=%d, addb_y=%d, addq_x=%d, addq_y=%d]\n", adda_x, adda_y, addb_x, addb_y, addq_x, addq_y);
4496 //Now, write to what???
4497 //a2ptrld comes from a2ptrldi...
4498 //I believe it's addbsel that determines the writeback...
4504 Flags: SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 GOURZ ZMODE=0 LFUFUNC=C SRCSHADE
4506 a1_base = 0015B000, a2_base = 0014B000
4507 a1_x = 0000, a1_y = 0000, a1_frac_x = 8000, a1_frac_y = 8000, a2_x = 001F, a2_y = 0038
4508 a1_step_x = FFFFFFC0, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 2AAA, a2_step_x = FFFFFFC0, a2_step_y = 0001
4509 a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4510 a1_win_x = 0040, a1_win_y = 0040, a2_mask_x = 0000, a2_mask_y = 0000
4511 a2_mask=F a1add=+inc/+0 a2add=+1/+0
4512 a1_pixsize = 4, a2_pixsize = 4
4513 srcd=FF00FF00FF00FF00 dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4514 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, col=0
4516 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4517 Entering INNER state...
4518 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4519 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4520 Entering DWRITE state...
4521 Dest write address/pix address: 0014E83E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003F, inc=1)
4522 Entering A2_ADD state [a2_x=001F, a2_y=0038, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4523 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4524 Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4525 Entering DWRITE state...
4526 Dest write address/pix address: 0014E942/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003E, inc=1)
4527 Entering A2_ADD state [a2_x=0021, a2_y=0039, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4528 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4529 Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4530 Entering DWRITE state...
4531 Dest write address/pix address: 0014EA46/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003D, inc=1)
4532 Entering A2_ADD state [a2_x=0023, a2_y=003A, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4533 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4534 Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4535 Entering DWRITE state...
4536 Dest write address/pix address: 0014EB4A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003C, inc=1)
4537 Entering A2_ADD state [a2_x=0025, a2_y=003B, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4539 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4540 Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4541 Entering DWRITE state...
4542 Dest write address/pix address: 0015283A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=0000, inc=1)
4543 Entering A2_ADD state [a2_x=009D, a2_y=0077, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4544 Entering IDLE_INNER state...
4545 Leaving INNER state... (ocount=0036)
4546 [in=F a1f=T a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4547 Entering A1FUPDATE state...
4548 [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4549 Entering A1UPDATE state... (-32768/-32768 -> 32704/-32767)
4550 [in=F a1f=F a1=F zf=F z=F a2=T iif=F iii=F izf=F izi=F]
4551 Entering A2UPDATE state... (159/120 -> 95/121)
4552 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4553 Entering INNER state...
4556 #ifdef VERBOSE_BLITTER_LOGGING
4559 WriteLog(" Leaving INNER state...");
4564 // The outer counter is updated here as well on the clock cycle...
4566 /* the inner loop is started whenever another state is about to
4567 cause the inner state to go active */
4568 //Instart := ND7 (instart, innert[0], innert[2..7]);
4570 //Actually, it's done only when inner gets asserted without the 2nd line of conditions
4571 //(inner AND !indone)
4573 //Since we don't get here until the inner loop is finished (indone = true) we can get
4574 //away with doing it here...!
4579 #ifdef VERBOSE_BLITTER_LOGGING
4582 WriteLog(" (ocount=%04X)\n", ocount);
4590 #ifdef VERBOSE_BLITTER_LOGGING
4593 WriteLog(" Entering A1FUPDATE state...\n");
4597 uint32_t a1_frac_xt = (uint32_t)a1_frac_x + (uint32_t)a1_stepf_x;
4598 uint32_t a1_frac_yt = (uint32_t)a1_frac_y + (uint32_t)a1_stepf_y;
4599 a1FracCInX = a1_frac_xt >> 16;
4600 a1FracCInY = a1_frac_yt >> 16;
4601 a1_frac_x = (uint16_t)(a1_frac_xt & 0xFFFF);
4602 a1_frac_y = (uint16_t)(a1_frac_yt & 0xFFFF);
4607 #ifdef VERBOSE_BLITTER_LOGGING
4610 WriteLog(" Entering A1UPDATE state... (%d/%d -> ", a1_x, a1_y);
4614 a1_x += a1_step_x + a1FracCInX;
4615 a1_y += a1_step_y + a1FracCInY;
4616 #ifdef VERBOSE_BLITTER_LOGGING
4619 WriteLog("%d/%d)\n", a1_x, a1_y);
4627 #ifdef VERBOSE_BLITTER_LOGGING
4630 WriteLog(" Entering A2UPDATE state... (%d/%d -> ", a2_x, a2_y);
4636 #ifdef VERBOSE_BLITTER_LOGGING
4639 WriteLog("%d/%d)\n", a2_x, a2_y);
4646 // We never get here! !!! FIX !!!
4648 #ifdef VERBOSE_BLITTER_LOGGING
4651 WriteLog("Done!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n",
4652 GET16(blitter_ram, A1_PIXEL + 2),
4653 GET16(blitter_ram, A1_PIXEL + 0),
4654 GET16(blitter_ram, A1_FPIXEL + 2),
4655 GET16(blitter_ram, A1_FPIXEL + 0),
4656 GET16(blitter_ram, A2_PIXEL + 2),
4657 GET16(blitter_ram, A2_PIXEL + 0));
4662 // Write values back to registers (in real blitter, these are continuously updated)
4663 SET16(blitter_ram, A1_PIXEL + 2, a1_x);
4664 SET16(blitter_ram, A1_PIXEL + 0, a1_y);
4665 SET16(blitter_ram, A1_FPIXEL + 2, a1_frac_x);
4666 SET16(blitter_ram, A1_FPIXEL + 0, a1_frac_y);
4667 SET16(blitter_ram, A2_PIXEL + 2, a2_x);
4668 SET16(blitter_ram, A2_PIXEL + 0, a2_y);
4670 #ifdef VERBOSE_BLITTER_LOGGING
4673 WriteLog("Writeback!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n",
4674 GET16(blitter_ram, A1_PIXEL + 2),
4675 GET16(blitter_ram, A1_PIXEL + 0),
4676 GET16(blitter_ram, A1_FPIXEL + 2),
4677 GET16(blitter_ram, A1_FPIXEL + 0),
4678 GET16(blitter_ram, A2_PIXEL + 2),
4679 GET16(blitter_ram, A2_PIXEL + 0));
4687 int16_t a1_x = (int16_t)GET16(blitter_ram, A1_PIXEL + 2);
4688 int16_t a1_y = (int16_t)GET16(blitter_ram, A1_PIXEL + 0);
4689 uint16_t a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
4690 uint16_t a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
4691 int16_t a2_x = (int16_t)GET16(blitter_ram, A2_PIXEL + 2);
4692 int16_t a2_y = (int16_t)GET16(blitter_ram, A2_PIXEL + 0);
4694 Seems that the ending a1_x should be written between blits, but it doesn't seem to be...
4696 Blit! (CMD = 01800000)
4699 a1_base = 00050000, a2_base = 00070000
4700 a1_x = 0000, a1_y = 0000, a1_frac_x = 49CD, a1_frac_y = 0000, a2_x = 0033, a2_y = 0001
4701 a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 939A, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
4702 a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4703 a1_win_x = 0100, a1_win_y = 0020, a2_mask_x = 0000, a2_mask_y = 0000
4704 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4705 a1_pixsize = 4, a2_pixsize = 3
4706 srcd=DEDEDEDEDEDEDEDE dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4707 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4710 Blit! (CMD = 01800000)
4713 a1_base = 00050000, a2_base = 00070000
4714 a1_x = 0000, a1_y = 0000, a1_frac_x = 49CD, a1_frac_y = 0000, a2_x = 0033, a2_y = 0001
4715 a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 939A, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
4716 a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4717 a1_win_x = 0100, a1_win_y = 0020, a2_mask_x = 0000, a2_mask_y = 0000
4718 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4719 a1_pixsize = 4, a2_pixsize = 3
4720 srcd=D6D6D6D6D6D6D6D6 dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4721 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4727 // Various pieces of the blitter puzzle are teased out here...
4733 INT24/ address // byte address
4734 pixa[0..2] // bit part of address, un-pipe-lined
4750 apipe // load address pipe-line latch
4751 clk // co-processor clock
4752 gena2 // generate A2 as opposed to A1
4753 zaddr // generate Z address
4757 void ADDRGEN(uint32_t &address, uint32_t &pixa, bool gena2, bool zaddr,
4758 uint16_t a1_x, uint16_t a1_y, uint32_t a1_base, uint8_t a1_pitch, uint8_t a1_pixsize, uint8_t a1_width, uint8_t a1_zoffset,
4759 uint16_t a2_x, uint16_t a2_y, uint32_t a2_base, uint8_t a2_pitch, uint8_t a2_pixsize, uint8_t a2_width, uint8_t a2_zoffset)
4761 // uint16_t x = (gena2 ? a2_x : a1_x) & 0x7FFF;
4762 uint16_t x = (gena2 ? a2_x : a1_x) & 0xFFFF; // Actually uses all 16 bits to generate address...!
4763 uint16_t y = (gena2 ? a2_y : a1_y) & 0x0FFF;
4764 uint8_t width = (gena2 ? a2_width : a1_width);
4765 uint8_t pixsize = (gena2 ? a2_pixsize : a1_pixsize);
4766 uint8_t pitch = (gena2 ? a2_pitch : a1_pitch);
4767 uint32_t base = (gena2 ? a2_base : a1_base) >> 3;//Only upper 21 bits are passed around the bus? Seems like it...
4768 uint8_t zoffset = (gena2 ? a2_zoffset : a1_zoffset);
4770 uint32_t ytm = ((uint32_t)y << 2) + (width & 0x02 ? (uint32_t)y << 1 : 0) + (width & 0x01 ? (uint32_t)y : 0);
4772 uint32_t ya = (ytm << (width >> 2)) >> 2;
4774 uint32_t pa = ya + x;
4776 /*uint32*/ pixa = pa << pixsize;
4778 uint8_t pt = ((pitch & 0x01) && !(pitch & 0x02) ? 0x01 : 0x00)
4779 | (!(pitch & 0x01) && (pitch & 0x02) ? 0x02 : 0x00);
4780 // uint32_t phradr = pixa << pt;
4781 uint32_t phradr = (pixa >> 6) << pt;
4782 uint32_t shup = (pitch == 0x03 ? (pixa >> 6) : 0);
4784 uint8_t za = (zaddr ? zoffset : 0) & 0x03;
4785 // uint32_t addr = za + (phradr & 0x07) + (shup << 1) + base;
4786 uint32_t addr = za + phradr + (shup << 1) + base;
4787 /*uint32*/ address = ((pixa & 0x38) >> 3) | ((addr & 0x1FFFFF) << 3);
4788 #if 0//def VERBOSE_BLITTER_LOGGING
4791 WriteLog(" [gena2=%s, x=%04X, y=%04X, w=%1X, pxsz=%1X, ptch=%1X, b=%08X, zoff=%1X]\n", (gena2 ? "T" : "F"), x, y, width, pixsize, pitch, base, zoffset);
4792 WriteLog(" [ytm=%X, ya=%X, pa=%X, pixa=%X, pt=%X, phradr=%X, shup=%X, za=%X, addr=%X, address=%X]\n", ytm, ya, pa, pixa, pt, phradr, shup, za, addr, address);
4798 Entering INNER state...
4799 [gena2=T, x=0002, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4800 [ytm=0, ya=0, pa=2, pixa=20, pt=0, phradr=0, shup=0, za=0, addr=12BA, address=95D4]
4801 Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4802 Source extra read address/pix address: 000095D4/0 [0000001C00540038]
4803 Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4804 [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4805 [ytm=0, ya=0, pa=4, pixa=40, pt=0, phradr=1, shup=0, za=0, addr=12BB, address=95D8]
4806 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
4807 Source read address/pix address: 000095D8/0 [0054003800009814]
4808 Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4809 [gena2=F, x=0000, y=0000, w=20, pxsz=4, ptch=0, b=00006E52, zoff=0]
4810 [ytm=0, ya=0, pa=0, pixa=0, pt=0, phradr=0, shup=0, za=0, addr=6E52, address=37290]
4811 Entering DWRITE state...
4812 Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026E, inc=4)
4813 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4814 [gena2=T, x=0008, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4815 [ytm=0, ya=0, pa=8, pixa=80, pt=0, phradr=2, shup=0, za=0, addr=12BC, address=95E0]
4819 Entering SREAD state...
4820 [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4821 [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=40, shup=0, za=0, addr=10AC, address=8560]
4822 Source read address/pix address: 00008560/0 [8C27981B327E00F0]
4824 2nd pass (still wrong):
4825 Entering SREAD state...
4826 [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4827 [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=40, shup=0, za=0, addr=10EC, address=8760]
4828 Source read address/pix address: 00008760/0 [00E06DC04581880C]
4831 Entering SREAD state...
4832 [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4833 [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=1, shup=0, za=0, addr=10AD, address=8568]
4834 Source read address/pix address: 00008568/0 [6267981A327C00F0]
4836 OK, now we're back into incorrect (or is it?):
4837 Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4838 Source extra read address/pix address: 000095D4/0 [0000 001C 0054 0038]
4839 Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4840 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
4841 Source read address/pix address: 000095D8/0 [0054 0038 0000 9814]
4842 Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4843 I think this may be correct...!
4848 // source and destination address update conditions
4850 Sraat0 := AN2 (sraat[0], sreadxi, srcenz\);
4851 Sraat1 := AN2 (sraat[1], sreadi, srcenz\);
4852 Srca_addi := OR4 (srca_addi, szreadxi, szreadi, sraat[0..1]);
4853 Srca_add := FD1Q (srca_add, srca_addi, clk);
4855 Dstaat := AN2 (dstaat, dwritei, dstwrz\);
4856 Dsta_addi := OR2 (dsta_addi, dzwritei, dstaat);
4857 // Dsta_add := FD1Q (dsta_add, dsta_addi, clk);
4859 // source and destination address generate conditions
4861 Gensrc := OR4 (gensrc, sreadxi, szreadxi, sreadi, szreadi);
4862 Gendst := OR4 (gendst, dreadi, dzreadi, dwritei, dzwritei);
4863 Dsta2\ := INV1 (dsta2\, dsta2);
4864 Gena2t0 := NAN2 (gena2t[0], gensrc, dsta2\);
4865 Gena2t1 := NAN2 (gena2t[1], gendst, dsta2);
4866 Gena2i := NAN2 (gena2i, gena2t[0..1]);
4867 Gena2 := FD1QU (gena2, gena2i, clk);
4869 Zaddr := OR4 (zaddr, szreadx, szread, dzread, dzwrite);
4874 // Basically, the above translates to:
4875 bool srca_addi = (sreadxi && !srcenz) || (sreadi && !srcenz) || szreadxi || szreadi;
4877 bool dsta_addi = (dwritei && !dstwrz) || dzwritei;
4879 bool gensrc = sreadxi || szreadxi || sreadi || szreadi;
4880 bool gendst = dreadi || szreadi || dwritei || dzwritei;
4881 bool gena2i = (gensrc && !dsta2) || (gendst && dsta2);
4883 bool zaddr = szreadx || szread || dzread || dzwrite;
4887 // source data reads
4889 Srcdpset\ := NAN2 (srcdpset\, readreq, sread);
4890 Srcdpt1 := NAN2 (srcdpt[1], srcdpend, srcdack\);
4891 Srcdpt2 := NAN2 (srcdpt[2], srcdpset\, srcdpt[1]);
4892 Srcdpend := FD2Q (srcdpend, srcdpt[2], clk, reset\);
4894 Srcdxpset\ := NAN2 (srcdxpset\, readreq, sreadx);
4895 Srcdxpt1 := NAN2 (srcdxpt[1], srcdxpend, srcdxack\);
4896 Srcdxpt2 := NAN2 (srcdxpt[2], srcdxpset\, srcdxpt[1]);
4897 Srcdxpend := FD2Q (srcdxpend, srcdxpt[2], clk, reset\);
4899 Sdpend := OR2 (sdpend, srcdxpend, srcdpend);
4900 Srcdreadt := AN2 (srcdreadt, sdpend, read_ack);
4902 //2/9/92 - enhancement?
4903 //Load srcdread on the next tick as well to modify it in srcshade
4905 Srcdreadd := FD1Q (srcdreadd, srcdreadt, clk);
4906 Srcdread := AOR1 (srcdread, srcshade, srcdreadd, srcdreadt);
4910 Srczpset\ := NAN2 (srczpset\, readreq, szread);
4911 Srczpt1 := NAN2 (srczpt[1], srczpend, srczack\);
4912 Srczpt2 := NAN2 (srczpt[2], srczpset\, srczpt[1]);
4913 Srczpend := FD2Q (srczpend, srczpt[2], clk, reset\);
4915 Srczxpset\ := NAN2 (srczxpset\, readreq, szreadx);
4916 Srczxpt1 := NAN2 (srczxpt[1], srczxpend, srczxack\);
4917 Srczxpt2 := NAN2 (srczxpt[2], srczxpset\, srczxpt[1]);
4918 Srczxpend := FD2Q (srczxpend, srczxpt[2], clk, reset\);
4920 Szpend := OR2 (szpend, srczpend, srczxpend);
4921 Srczread := AN2 (srczread, szpend, read_ack);
4923 // destination data reads
4925 Dstdpset\ := NAN2 (dstdpset\, readreq, dread);
4926 Dstdpt0 := NAN2 (dstdpt[0], dstdpend, dstdack\);
4927 Dstdpt1 := NAN2 (dstdpt[1], dstdpset\, dstdpt[0]);
4928 Dstdpend := FD2Q (dstdpend, dstdpt[1], clk, reset\);
4929 Dstdread := AN2 (dstdread, dstdpend, read_ack);
4931 // destination zed reads
4933 Dstzpset\ := NAN2 (dstzpset\, readreq, dzread);
4934 Dstzpt0 := NAN2 (dstzpt[0], dstzpend, dstzack\);
4935 Dstzpt1 := NAN2 (dstzpt[1], dstzpset\, dstzpt[0]);
4936 Dstzpend := FD2Q (dstzpend, dstzpt[1], clk, reset\);
4937 Dstzread := AN2 (dstzread, dstzpend, read_ack);
4942 // Basically, the above translates to:
4943 bool srcdpend = (readreq && sread) || (srcdpend && !srcdack);
4944 bool srcdxpend = (readreq && sreadx) || (srcdxpend && !srcdxack);
4945 bool sdpend = srcxpend || srcdpend;
4946 bool srcdread = ((sdpend && read_ack) && srcshade) || (sdpend && read_ack);//the latter term is lookahead
4951 ////////////////////////////////////////////////////////////////////////////////////////////
4952 ////////////////////////////////////////////////////////////////////////////////////////////
4953 // Here's an important bit: The source data adder logic. Need to track down the inputs!!! //
4954 ////////////////////////////////////////////////////////////////////////////////////////////
4955 ////////////////////////////////////////////////////////////////////////////////////////////
4962 daddasel[0..2] // data adder input A selection
4967 initcin[0..3] // carry into the adders from the initializers
4968 initinc[0..63] // the initialisation increment
4969 initpix[0..15] // Data initialiser pixel value
4981 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
4982 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
4983 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
4984 uint32_t zinc, uint32_t zstep)
4986 uint32_t initpix2 = ((uint32_t)initpix << 16) | initpix;
4987 uint32_t addalo[8], addahi[8];
4988 addalo[0] = dstd & 0xFFFFFFFF;
4989 addalo[1] = initpix2;
4992 addalo[4] = srcd & 0xFFFFFFFF;
4993 addalo[5] = patd & 0xFFFFFFFF;
4994 addalo[6] = srcz1 & 0xFFFFFFFF;
4995 addalo[7] = srcz2 & 0xFFFFFFFF;
4996 addahi[0] = dstd >> 32;
4997 addahi[1] = initpix2;
5000 addahi[4] = srcd >> 32;
5001 addahi[5] = patd >> 32;
5002 addahi[6] = srcz1 >> 32;
5003 addahi[7] = srcz2 >> 32;
5005 adda[0] = addalo[daddasel] & 0xFFFF;
5006 adda[1] = addalo[daddasel] >> 16;
5007 adda[2] = addahi[daddasel] & 0xFFFF;
5008 adda[3] = addahi[daddasel] >> 16;
5010 uint16_t wordmux[8];
5011 wordmux[0] = iinc & 0xFFFF;
5012 wordmux[1] = iinc >> 16;
5013 wordmux[2] = zinc & 0xFFFF;
5014 wordmux[3] = zinc >> 16;;
5015 wordmux[4] = istep & 0xFFFF;
5016 wordmux[5] = istep >> 16;;
5017 wordmux[6] = zstep & 0xFFFF;
5018 wordmux[7] = zstep >> 16;;
5019 uint16_t word = wordmux[((daddbsel & 0x08) >> 1) | (daddbsel & 0x03)];
5021 bool dbsel2 = daddbsel & 0x04;
5022 bool iincsel = (daddbsel & 0x01) && !(daddbsel & 0x04);
5024 if (!dbsel2 && !iincsel)
5025 addb[0] = srcd & 0xFFFF,
5026 addb[1] = (srcd >> 16) & 0xFFFF,
5027 addb[2] = (srcd >> 32) & 0xFFFF,
5028 addb[3] = (srcd >> 48) & 0xFFFF;
5029 else if (dbsel2 && !iincsel)
5030 addb[0] = addb[1] = addb[2] = addb[3] = word;
5031 else if (!dbsel2 && iincsel)
5032 addb[0] = initinc & 0xFFFF,
5033 addb[1] = (initinc >> 16) & 0xFFFF,
5034 addb[2] = (initinc >> 32) & 0xFFFF,
5035 addb[3] = (initinc >> 48) & 0xFFFF;
5037 addb[0] = addb[1] = addb[2] = addb[3] = 0;
5039 uint8_t cinsel = (daddmode >= 1 && daddmode <= 4 ? 1 : 0);
5041 static uint8_t co[4];//These are preserved between calls...
5044 for(int i=0; i<4; i++)
5045 cin[i] = initcin[i] | (co[i] & cinsel);
5047 bool eightbit = daddmode & 0x02;
5048 bool sat = daddmode & 0x03;
5049 bool hicinh = ((daddmode & 0x03) == 0x03);
5051 //Note that the carry out is saved between calls to this function...
5052 for(int i=0; i<4; i++)
5053 ADD16SAT(addq[i], co[i], adda[i], addb[i], cin[i], sat, eightbit, hicinh);
5070 void ADD16SAT(uint16_t &r, uint8_t &co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh)
5074 printf("--> [sat=%s 8b=%s hicinh=%s] %04X + %04X (+ %u) = ", (sat ? "T" : "F"), (eightbit ? "T" : "F"), (hicinh ? "T" : "F"), a, b, cin);
5078 uint32_t qt = (a & 0xFF) + (b & 0xFF) + cin;
5079 carry[0] = (qt & 0x0100 ? 1 : 0);
5080 uint16_t q = qt & 0x00FF;
5081 carry[1] = (carry[0] && !eightbit ? carry[0] : 0);
5082 qt = (a & 0x0F00) + (b & 0x0F00) + (carry[1] << 8);
5083 carry[2] = (qt & 0x1000 ? 1 : 0);
5085 carry[3] = (carry[2] && !hicinh ? carry[2] : 0);
5086 qt = (a & 0xF000) + (b & 0xF000) + (carry[3] << 12);
5087 co = (qt & 0x10000 ? 1 : 0);
5090 uint8_t btop = (eightbit ? (b & 0x0080) >> 7 : (b & 0x8000) >> 15);
5091 uint8_t ctop = (eightbit ? carry[0] : co);
5093 bool saturate = sat && (btop ^ ctop);
5094 bool hisaturate = saturate && !eightbit;
5097 printf("bt=%u ct=%u s=%u hs=%u] ", btop, ctop, saturate, hisaturate);
5101 r = (saturate ? (ctop ? 0x00FF : 0x0000) : q & 0x00FF);
5102 r |= (hisaturate ? (ctop ? 0xFF00 : 0x0000) : q & 0xFF00);
5105 printf("%04X (co=%u)\n", r, co);
5111 /** ADDAMUX - Address adder input A selection *******************
5113 This module generates the data loaded into the address adder input A. This is
5114 the update value, and can be one of four registers : A1 step, A2 step, A1
5115 increment and A1 fraction. It can complement these values to perform
5116 subtraction, and it can generate constants to increment / decrement the window
5119 addasel[0..2] select the register to add
5121 000 A1 step integer part
5122 001 A1 step fraction part
5123 010 A1 increment integer part
5124 011 A1 increment fraction part
5127 adda_xconst[0..2] generate a power of 2 in the range 1-64 or all zeroes when
5130 addareg selects register value to be added as opposed to constant
5133 suba_x, suba_y complement the X and Y values
5159 void ADDAMUX(int16_t &adda_x, int16_t &adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
5160 int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
5161 int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
5162 bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
5165 /*INT16/ addac_x, addac_y, addar_x, addar_y, addart_x, addart_y,
5166 INT16/ addas_x, addas_y, suba_x16, suba_y16
5170 Zero := TIE0 (zero);*/
5172 /* Multiplex the register terms */
5174 /*Addaselb[0-2] := BUF8 (addaselb[0-2], addasel[0-2]);
5175 Addart_x := MX4 (addart_x, a1_step_x, a1_stepf_x, a1_inc_x, a1_incf_x, addaselb[0..1]);
5176 Addar_x := MX2 (addar_x, addart_x, a2_step_x, addaselb[2]);
5177 Addart_y := MX4 (addart_y, a1_step_y, a1_stepf_y, a1_inc_y, a1_incf_y, addaselb[0..1]);
5178 Addar_y := MX2 (addar_y, addart_y, a2_step_y, addaselb[2]);*/
5180 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5181 int16_t xterm[4], yterm[4];
5182 xterm[0] = a1_step_x, xterm[1] = a1_stepf_x, xterm[2] = a1_inc_x, xterm[3] = a1_incf_x;
5183 yterm[0] = a1_step_y, yterm[1] = a1_stepf_y, yterm[2] = a1_inc_y, yterm[3] = a1_incf_y;
5184 int16_t addar_x = (addasel & 0x04 ? a2_step_x : xterm[addasel & 0x03]);
5185 int16_t addar_y = (addasel & 0x04 ? a2_step_y : yterm[addasel & 0x03]);
5186 //////////////////////////////////////////////////////////////////////////////////////
5188 /* Generate a constant value - this is a power of 2 in the range
5189 0-64, or zero. The control bits are adda_xconst[0..2], when they
5190 are all 1 the result is 0.
5191 Constants for Y can only be 0 or 1 */
5193 /*Addac_xlo := D38H (addac_x[0..6], unused[0], adda_xconst[0..2]);
5194 Unused[0] := DUMMY (unused[0]);
5196 Addac_x := JOIN (addac_x, addac_x[0..6], zero, zero, zero, zero, zero, zero, zero, zero, zero);
5197 Addac_y := JOIN (addac_y, adda_yconst, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero,
5198 zero, zero, zero, zero, zero);*/
5199 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5200 int16_t addac_x = (adda_xconst == 0x07 ? 0 : 1 << adda_xconst);
5201 int16_t addac_y = (adda_yconst ? 0x01 : 0);
5202 //////////////////////////////////////////////////////////////////////////////////////
5204 /* Select between constant value and register value */
5206 /*Addas_x := MX2 (addas_x, addac_x, addar_x, addareg);
5207 Addas_y := MX2 (addas_y, addac_y, addar_y, addareg);*/
5208 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5209 int16_t addas_x = (addareg ? addar_x : addac_x);
5210 int16_t addas_y = (addareg ? addar_y : addac_y);
5211 //////////////////////////////////////////////////////////////////////////////////////
5213 /* Complement these values (complement flag gives adder carry in)*/
5215 /*Suba_x16 := JOIN (suba_x16, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x,
5216 suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x);
5217 Suba_y16 := JOIN (suba_y16, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y,
5218 suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y);
5219 Adda_x := EO (adda_x, suba_x16, addas_x);
5220 Adda_y := EO (adda_y, suba_y16, addas_y);*/
5221 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5222 adda_x = addas_x ^ (suba_x ? 0xFFFF : 0x0000);
5223 adda_y = addas_y ^ (suba_y ? 0xFFFF : 0x0000);
5224 //////////////////////////////////////////////////////////////////////////////////////
5230 /** ADDBMUX - Address adder input B selection *******************
5232 This module selects the register to be updated by the address
5233 adder. This can be one of three registers, the A1 and A2
5234 pointers, or the A1 fractional part. It can also be zero, so that the step
5235 registers load directly into the pointers.
5250 INT16/ zero16 :LOCAL;
5252 void ADDBMUX(int16_t &addb_x, int16_t &addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
5253 int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y)
5256 /*Zero := TIE0 (zero);
5257 Zero16 := JOIN (zero16, zero, zero, zero, zero, zero, zero, zero,
5258 zero, zero, zero, zero, zero, zero, zero, zero, zero);
5259 Addbselb[0-1] := BUF8 (addbselb[0-1], addbsel[0-1]);
5260 Addb_x := MX4 (addb_x, a1_x, a2_x, a1_frac_x, zero16, addbselb[0..1]);
5261 Addb_y := MX4 (addb_y, a1_y, a2_y, a1_frac_y, zero16, addbselb[0..1]);*/
5262 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5263 int16_t xterm[4], yterm[4];
5264 xterm[0] = a1_x, xterm[1] = a2_x, xterm[2] = a1_frac_x, xterm[3] = 0;
5265 yterm[0] = a1_y, yterm[1] = a2_y, yterm[2] = a1_frac_y, yterm[3] = 0;
5266 addb_x = xterm[addbsel & 0x03];
5267 addb_y = yterm[addbsel & 0x03];
5268 //////////////////////////////////////////////////////////////////////////////////////
5274 /** DATAMUX - Address local data bus selection ******************
5276 Select between the adder output and the input data bus
5289 INT16/ gpu_lo, gpu_hi
5292 void DATAMUX(int16_t &data_x, int16_t &data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel)
5294 /*Gpu_lo := JOIN (gpu_lo, gpu_din{0..15});
5295 Gpu_hi := JOIN (gpu_hi, gpu_din{16..31});
5297 Addqselb := BUF8 (addqselb, addqsel);
5298 Data_x := MX2 (data_x, gpu_lo, addq_x, addqselb);
5299 Data_y := MX2 (data_y, gpu_hi, addq_y, addqselb);*/
5300 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5301 data_x = (addqsel ? addq_x : (int16_t)(gpu_din & 0xFFFF));
5302 data_y = (addqsel ? addq_y : (int16_t)(gpu_din >> 16));
5303 //////////////////////////////////////////////////////////////////////////////////////
5309 /******************************************************************
5313 Blitter Address Adder
5314 ---------------------
5315 The blitter address adder is a pair of sixteen bit adders, one
5316 each for X and Y. The multiplexing of the input terms is
5317 performed elsewhere, but this adder can also perform modulo
5318 arithmetic to align X-addresses onto phrase boundaries.
5320 modx[0..2] take values
5327 ******************************************************************/
5329 /*IMPORT duplo, tosh;
5335 a1fracldi // propagate address adder carry
5340 clk[0] // co-processor clock
5348 Zero := TIE0 (zero);*/
5349 void ADDRADD(int16_t &addq_x, int16_t &addq_y, bool a1fracldi,
5350 uint16_t adda_x, uint16_t adda_y, uint16_t addb_x, uint16_t addb_y, uint8_t modx, bool suba_x, bool suba_y)
5353 /* Perform the addition */
5355 /*Adder_x := ADD16 (addqt_x[0..15], co_x, adda_x{0..15}, addb_x{0..15}, ci_x);
5356 Adder_y := ADD16 (addq_y[0..15], co_y, adda_y{0..15}, addb_y{0..15}, ci_y);*/
5358 /* latch carry and propagate if required */
5360 /*Cxt0 := AN2 (cxt[0], co_x, a1fracldi);
5361 Cxt1 := FD1Q (cxt[1], cxt[0], clk[0]);
5362 Ci_x := EO (ci_x, cxt[1], suba_x);
5364 yt0 := AN2 (cyt[0], co_y, a1fracldi);
5365 Cyt1 := FD1Q (cyt[1], cyt[0], clk[0]);
5366 Ci_y := EO (ci_y, cyt[1], suba_y);*/
5368 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5369 //I'm sure the following will generate a bunch of warnings, but will have to do for now.
5370 static uint16_t co_x = 0, co_y = 0; // Carry out has to propogate between function calls...
5371 uint16_t ci_x = co_x ^ (suba_x ? 1 : 0);
5372 uint16_t ci_y = co_y ^ (suba_y ? 1 : 0);
5373 uint32_t addqt_x = adda_x + addb_x + ci_x;
5374 uint32_t addqt_y = adda_y + addb_y + ci_y;
5375 co_x = ((addqt_x & 0x10000) && a1fracldi ? 1 : 0);
5376 co_y = ((addqt_y & 0x10000) && a1fracldi ? 1 : 0);
5377 //////////////////////////////////////////////////////////////////////////////////////
5379 /* Mask low bits of X to 0 if required */
5381 /*Masksel := D38H (unused[0], masksel[0..4], maskbit[5], unused[1], modx[0..2]);
5383 Maskbit[0-4] := OR2 (maskbit[0-4], masksel[0-4], maskbit[1-5]);
5385 Mask[0-5] := MX2 (addq_x[0-5], addqt_x[0-5], zero, maskbit[0-5]);
5387 Addq_x := JOIN (addq_x, addq_x[0..5], addqt_x[6..15]);
5388 Addq_y := JOIN (addq_y, addq_y[0..15]);*/
5390 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5391 int16_t mask[8] = { 0xFFFF, 0xFFFE, 0xFFFC, 0xFFF8, 0xFFF0, 0xFFE0, 0xFFC0, 0x0000 };
5392 addq_x = addqt_x & mask[modx];
5393 addq_y = addqt_y & 0xFFFF;
5394 //////////////////////////////////////////////////////////////////////////////////////
5396 //Unused[0-1] := DUMMY (unused[0-1]);
5404 wdata[0..63] // co-processor write data bus
5406 dcomp[0..7] // data byte equal flags
5407 srcd[0..7] // bits to use for bit to byte expansion
5408 zcomp[0..3] // output from Z comparators
5410 a1_x[0..1] // low two bits of A1 X pointer
5411 big_pix // pixel organisation is big-endian
5412 blitter_active // blitter is active
5413 clk // co-processor clock
5414 cmpdst // compare dest rather than source
5415 colorld // load the pattern color fields
5416 daddasel[0..2] // data adder input A selection
5417 daddbsel[0..3] // data adder input B selection
5418 daddmode[0..2] // data adder mode
5419 daddq_sel // select adder output vs. GPU data
5420 data[0..63] // co-processor read data bus
5421 data_ena // enable write data
5422 data_sel[0..1] // select data to write
5423 dbinh\[0..7] // byte oriented changed data inhibits
5424 dend[0..5] // end of changed write data zone
5425 dpipe[0..1] // load computed data pipe-line latch
5426 dstart[0..5] // start of changed write data zone
5427 dstdld[0..1] // dest data load (two halves)
5428 dstzld[0..1] // dest zed load (two halves)
5429 ext_int // enable extended precision intensity calculations
5430 INT32/ gpu_din // GPU data bus
5431 iincld // I increment load
5432 iincldx // alternate I increment load
5433 init_if // initialise I fraction phase
5434 init_ii // initialise I integer phase
5435 init_zf // initialise Z fraction phase
5436 intld[0..3] // computed intensities load
5437 istepadd // intensity step integer add
5438 istepfadd // intensity step fraction add
5439 istepld // I step load
5440 istepdld // I step delta load
5441 lfu_func[0..3] // LFU function code
5442 patdadd // pattern data gouraud add
5443 patdld[0..1] // pattern data load (two halves)
5444 pdsel[0..1] // select pattern data type
5445 phrase_mode // phrase write mode
5446 reload // transfer contents of double buffers
5447 reset\ // system reset
5448 srcd1ld[0..1] // source register 1 load (two halves)
5449 srcdread // source data read load enable
5450 srczread // source zed read load enable
5451 srcshift[0..5] // source alignment shift
5452 srcz1ld[0..1] // source zed 1 load (two halves)
5453 srcz2add // zed fraction gouraud add
5454 srcz2ld[0..1] // source zed 2 load (two halves)
5455 textrgb // texture mapping in RGB mode
5456 txtd[0..63] // data from the texture unit
5457 zedld[0..3] // computed zeds load
5458 zincld // Z increment load
5459 zmode[0..2] // Z comparator mode
5460 zpipe[0..1] // load computed zed pipe-line latch
5461 zstepadd // zed step integer add
5462 zstepfadd // zed step fraction add
5463 zstepld // Z step load
5464 zstepdld // Z step delta load
5468 void DATA(uint64_t &wdata, uint8_t &dcomp, uint8_t &zcomp, bool &nowrite,
5469 bool big_pix, bool cmpdst, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode, bool daddq_sel, uint8_t data_sel,
5470 uint8_t dbinh, uint8_t dend, uint8_t dstart, uint64_t dstd, uint32_t iinc, uint8_t lfu_func, uint64_t &patd, bool patdadd,
5471 bool phrase_mode, uint64_t srcd, bool srcdread, bool srczread, bool srcz2add, uint8_t zmode,
5472 bool bcompen, bool bkgwren, bool dcompen, uint8_t icount, uint8_t pixsize,
5473 uint64_t &srcz, uint64_t dstz, uint32_t zinc)
5476 Stuff we absolutely *need* to have passed in/out:
5478 patdadd, dstd, srcd, patd, daddasel, daddbsel, daddmode, iinc, srcz1, srcz2, big_pix, phrase_mode, cmpdst
5480 changed patd (wdata I guess...) (Nope. We pass it back directly now...)
5483 // Source data registers
5485 /*Data_src := DATA_SRC (srcdlo, srcdhi, srcz[0..1], srczo[0..1], srczp[0..1], srcz1[0..1], srcz2[0..1], big_pix,
5486 clk, gpu_din, intld[0..3], local_data0, local_data1, srcd1ld[0..1], srcdread, srczread, srcshift[0..5],
5487 srcz1ld[0..1], srcz2add, srcz2ld[0..1], zedld[0..3], zpipe[0..1]);
5488 Srcd[0-7] := JOIN (srcd[0-7], srcdlo{0-7});
5489 Srcd[8-31] := JOIN (srcd[8-31], srcdlo{8-31});
5490 Srcd[32-63] := JOIN (srcd[32-63], srcdhi{0-31});*/
5492 // Destination data registers
5494 /*Data_dst := DATA_DST (dstd[0..63], dstz[0..1], clk, dstdld[0..1], dstzld[0..1], load_data[0..1]);
5495 Dstdlo := JOIN (dstdlo, dstd[0..31]);
5496 Dstdhi := JOIN (dstdhi, dstd[32..63]);*/
5498 // Pattern and Color data registers
5500 // Looks like this is simply another register file for the pattern data registers. No adding or anything funky
5501 // going on. Note that patd & patdv will output the same info.
5502 // Patdldl/h (patdld[0..1]) can select the local_data bus to overwrite the current pattern data...
5503 // Actually, it can be either patdld OR patdadd...!
5504 /*Data_pat := DATA_PAT (colord[0..15], int0dp[8..10], int1dp[8..10], int2dp[8..10], int3dp[8..10], mixsel[0..2],
5505 patd[0..63], patdv[0..1], clk, colorld, dpipe[0], ext_int, gpu_din, intld[0..3], local_data0, local_data1,
5506 patdadd, patdld[0..1], reload, reset\);
5507 Patdlo := JOIN (patdlo, patd[0..31]);
5508 Patdhi := JOIN (patdhi, patd[32..63]);*/
5510 // Multiplying data Mixer (NOT IN JAGUAR I)
5512 /*Datamix := DATAMIX (patdo[0..1], clk, colord[0..15], dpipe[1], dstd[0..63], int0dp[8..10], int1dp[8..10],
5513 int2dp[8..10], int3dp[8..10], mixsel[0..2], patd[0..63], pdsel[0..1], srcd[0..63], textrgb, txtd[0..63]);*/
5515 // Logic function unit
5517 /*Lfu := LFU (lfu[0..1], srcdlo, srcdhi, dstdlo, dstdhi, lfu_func[0..3]);*/
5518 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5519 uint64_t funcmask[2] = { 0, 0xFFFFFFFFFFFFFFFFLL };
5520 uint64_t func0 = funcmask[lfu_func & 0x01];
5521 uint64_t func1 = funcmask[(lfu_func >> 1) & 0x01];
5522 uint64_t func2 = funcmask[(lfu_func >> 2) & 0x01];
5523 uint64_t func3 = funcmask[(lfu_func >> 3) & 0x01];
5524 uint64_t lfu = (~srcd & ~dstd & func0) | (~srcd & dstd & func1) | (srcd & ~dstd & func2) | (srcd & dstd & func3);
5525 //////////////////////////////////////////////////////////////////////////////////////
5527 // Increment and Step Registers
5529 // Does it do anything without the step add lines? Check it!
5530 // No. This is pretty much just a register file without the Jaguar II lines...
5531 /*Inc_step := INC_STEP (iinc, istep[0..31], zinc, zstep[0..31], clk, ext_int, gpu_din, iincld, iincldx, istepadd,
5532 istepfadd, istepld, istepdld, reload, reset\, zincld, zstepadd, zstepfadd, zstepld, zstepdld);
5533 Istep := JOIN (istep, istep[0..31]);
5534 Zstep := JOIN (zstep, zstep[0..31]);*/
5536 // Pixel data comparator
5538 /*Datacomp := DATACOMP (dcomp[0..7], cmpdst, dstdlo, dstdhi, patdlo, patdhi, srcdlo, srcdhi);*/
5539 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5541 uint64_t cmpd = patd ^ (cmpdst ? dstd : srcd);
5543 if ((cmpd & 0x00000000000000FFLL) == 0)
5545 if ((cmpd & 0x000000000000FF00LL) == 0)
5547 if ((cmpd & 0x0000000000FF0000LL) == 0)
5549 if ((cmpd & 0x00000000FF000000LL) == 0)
5551 if ((cmpd & 0x000000FF00000000LL) == 0)
5553 if ((cmpd & 0x0000FF0000000000LL) == 0)
5555 if ((cmpd & 0x00FF000000000000LL) == 0)
5557 if ((cmpd & 0xFF00000000000000LL) == 0)
5559 //////////////////////////////////////////////////////////////////////////////////////
5561 // Zed comparator for Z-buffer operations
5563 /*Zedcomp := ZEDCOMP (zcomp[0..3], srczp[0..1], dstz[0..1], zmode[0..2]);*/
5564 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5565 //srczp is srcz pipelined, also it goes through a source shift as well...
5566 /*The shift is basically like so (each piece is 16 bits long):
5569 srcz1lolo srcz1lohi srcz1hilo srcz1hihi srcrz2lolo srcz2lohi srcz2hilo
5571 with srcshift bits 4 & 5 selecting the start position
5573 //So... basically what we have here is:
5576 if ((((srcz & 0x000000000000FFFFLL) < (dstz & 0x000000000000FFFFLL)) && (zmode & 0x01))
5577 || (((srcz & 0x000000000000FFFFLL) == (dstz & 0x000000000000FFFFLL)) && (zmode & 0x02))
5578 || (((srcz & 0x000000000000FFFFLL) > (dstz & 0x000000000000FFFFLL)) && (zmode & 0x04)))
5581 if ((((srcz & 0x00000000FFFF0000LL) < (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x01))
5582 || (((srcz & 0x00000000FFFF0000LL) == (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x02))
5583 || (((srcz & 0x00000000FFFF0000LL) > (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x04)))
5586 if ((((srcz & 0x0000FFFF00000000LL) < (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x01))
5587 || (((srcz & 0x0000FFFF00000000LL) == (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x02))
5588 || (((srcz & 0x0000FFFF00000000LL) > (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x04)))
5591 if ((((srcz & 0xFFFF000000000000LL) < (dstz & 0xFFFF000000000000LL)) && (zmode & 0x01))
5592 || (((srcz & 0xFFFF000000000000LL) == (dstz & 0xFFFF000000000000LL)) && (zmode & 0x02))
5593 || (((srcz & 0xFFFF000000000000LL) > (dstz & 0xFFFF000000000000LL)) && (zmode & 0x04)))
5596 //TEMP, TO TEST IF ZCOMP IS THE CULPRIT...
5597 //Nope, this is NOT the problem...
5599 // We'll do the comparison/bit/byte inhibits here, since that's they way it happens
5600 // in the real thing (dcomp goes out to COMP_CTRL and back into DATA through dbinh)...
5604 COMP_CTRL(dbinht, nowrite,
5605 bcompen, true/*big_pix*/, bkgwren, dcomp, dcompen, icount, pixsize, phrase_mode, srcd & 0xFF, zcomp);
5611 #ifdef VERBOSE_BLITTER_LOGGING
5613 WriteLog("\n[dcomp=%02X zcomp=%02X dbinh=%02X]\n", dcomp, zcomp, dbinh);
5616 //////////////////////////////////////////////////////////////////////////////////////
5619 // The data initializer - allows all four initial values to be computed from one (NOT IN JAGUAR I)
5621 /*Datinit := DATINIT (initcin[0..3], initinc[0..63], initpix[0..15], a1_x[0..1], big_pix, clk, iinc, init_if, init_ii,
5622 init_zf, istep[0..31], zinc, zstep[0..31]);*/
5624 // Adder array for Z and intensity increments
5626 /*Addarray := ADDARRAY (addq[0..3], clk, daddasel[0..2], daddbsel[0..3], daddmode[0..2], dstdlo, dstdhi, iinc,
5627 initcin[0..3], initinc[0..63], initpix[0..15], istep, patdv[0..1], srcdlo, srcdhi, srcz1[0..1],
5628 srcz2[0..1], reset\, zinc, zstep);*/
5629 /*void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
5630 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
5631 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
5632 uint32_t zinc, uint32_t zstep)*/
5633 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5635 uint8_t initcin[4] = { 0, 0, 0, 0 };
5636 ADDARRAY(addq, daddasel, daddbsel, daddmode, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
5638 //This is normally done asynchronously above (thru local_data) when in patdadd mode...
5639 //And now it's passed back to the caller to be persistent between calls...!
5640 //But it's causing some serious fuck-ups in T2K now... !!! FIX !!! [DONE--???]
5641 //Weird! It doesn't anymore...!
5643 patd = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
5644 //////////////////////////////////////////////////////////////////////////////////////
5646 // Local data bus multiplexer
5648 /*Local_mux := LOCAL_MUX (local_data[0..1], load_data[0..1],
5649 addq[0..3], gpu_din, data[0..63], blitter_active, daddq_sel);
5650 Local_data0 := JOIN (local_data0, local_data[0]);
5651 Local_data1 := JOIN (local_data1, local_data[1]);*/
5652 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5653 //////////////////////////////////////////////////////////////////////////////////////
5655 // Data output multiplexer and tri-state drive
5657 /*Data_mux := DATA_MUX (wdata[0..63], addq[0..3], big_pix, dstdlo, dstdhi, dstz[0..1], data_sel[0..1], data_ena,
5658 dstart[0..5], dend[0..5], dbinh\[0..7], lfu[0..1], patdo[0..1], phrase_mode, srczo[0..1]);*/
5659 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5660 // NOTE: patdo comes from DATAMIX and can be considered the same as patd for Jaguar I
5662 //////////////////////////////////////////////////////////////////////////////////////
5666 wdata[0..63] // co-processor rwrite data bus
5669 big_pix // Pixel organisation is big-endian
5674 data_sel[0..1] // source of write data
5675 data_ena // enable write data onto read/write bus
5676 dstart[0..5] // start of changed write data
5677 dend[0..5] // end of changed write data
5678 dbinh\[0..7] // byte oriented changed data inhibits
5681 phrase_mode // phrase write mode
5686 /*INT32/ addql[0..1], ddatlo, ddathi zero32
5690 Phrase_mode\ := INV1 (phrase_mode\, phrase_mode);
5691 Zero := TIE0 (zero);
5692 Zero32 := JOIN (zero32, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero);*/
5694 /* Generate a changed data mask */
5696 /*Edis := OR6 (edis\, dend[0..5]);
5697 Ecoarse := DECL38E (e_coarse\[0..7], dend[3..5], edis\);
5698 E_coarse[0] := INV1 (e_coarse[0], e_coarse\[0]);
5699 Efine := DECL38E (unused[0], e_fine\[1..7], dend[0..2], e_coarse[0]);*/
5700 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5701 uint8_t decl38e[2][8] = { { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
5702 { 0xFE, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x7F } };
5703 uint8_t dech38[8] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
5704 uint8_t dech38el[2][8] = { { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 },
5705 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } };
5707 int en = (dend & 0x3F ? 1 : 0);
5708 uint8_t e_coarse = decl38e[en][(dend & 0x38) >> 3]; // Actually, this is e_coarse inverted...
5709 uint8_t e_fine = decl38e[(e_coarse & 0x01) ^ 0x01][dend & 0x07];
5711 //////////////////////////////////////////////////////////////////////////////////////
5713 /*Scoarse := DECH38 (s_coarse[0..7], dstart[3..5]);
5714 Sfen\ := INV1 (sfen\, s_coarse[0]);
5715 Sfine := DECH38EL (s_fine[0..7], dstart[0..2], sfen\);*/
5716 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5717 uint8_t s_coarse = dech38[(dstart & 0x38) >> 3];
5718 uint8_t s_fine = dech38el[(s_coarse & 0x01) ^ 0x01][dstart & 0x07];
5719 //////////////////////////////////////////////////////////////////////////////////////
5721 /*Maskt[0] := BUF1 (maskt[0], s_fine[0]);
5722 Maskt[1-7] := OAN1P (maskt[1-7], maskt[0-6], s_fine[1-7], e_fine\[1-7]);*/
5723 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5724 uint16_t maskt = s_fine & 0x0001;
5725 maskt |= (((maskt & 0x0001) || (s_fine & 0x02)) && (e_fine & 0x02) ? 0x0002 : 0x0000);
5726 maskt |= (((maskt & 0x0002) || (s_fine & 0x04)) && (e_fine & 0x04) ? 0x0004 : 0x0000);
5727 maskt |= (((maskt & 0x0004) || (s_fine & 0x08)) && (e_fine & 0x08) ? 0x0008 : 0x0000);
5728 maskt |= (((maskt & 0x0008) || (s_fine & 0x10)) && (e_fine & 0x10) ? 0x0010 : 0x0000);
5729 maskt |= (((maskt & 0x0010) || (s_fine & 0x20)) && (e_fine & 0x20) ? 0x0020 : 0x0000);
5730 maskt |= (((maskt & 0x0020) || (s_fine & 0x40)) && (e_fine & 0x40) ? 0x0040 : 0x0000);
5731 maskt |= (((maskt & 0x0040) || (s_fine & 0x80)) && (e_fine & 0x80) ? 0x0080 : 0x0000);
5732 //////////////////////////////////////////////////////////////////////////////////////
5734 /* Produce a look-ahead on the ripple carry:
5735 masktla = s_coarse[0] . /e_coarse[0] */
5736 /*Masktla := AN2 (masktla, s_coarse[0], e_coarse\[0]);
5737 Maskt[8] := OAN1P (maskt[8], masktla, s_coarse[1], e_coarse\[1]);
5738 Maskt[9-14] := OAN1P (maskt[9-14], maskt[8-13], s_coarse[2-7], e_coarse\[2-7]);*/
5739 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5740 maskt |= (((s_coarse & e_coarse & 0x01) || (s_coarse & 0x02)) && (e_coarse & 0x02) ? 0x0100 : 0x0000);
5741 maskt |= (((maskt & 0x0100) || (s_coarse & 0x04)) && (e_coarse & 0x04) ? 0x0200 : 0x0000);
5742 maskt |= (((maskt & 0x0200) || (s_coarse & 0x08)) && (e_coarse & 0x08) ? 0x0400 : 0x0000);
5743 maskt |= (((maskt & 0x0400) || (s_coarse & 0x10)) && (e_coarse & 0x10) ? 0x0800 : 0x0000);
5744 maskt |= (((maskt & 0x0800) || (s_coarse & 0x20)) && (e_coarse & 0x20) ? 0x1000 : 0x0000);
5745 maskt |= (((maskt & 0x1000) || (s_coarse & 0x40)) && (e_coarse & 0x40) ? 0x2000 : 0x0000);
5746 maskt |= (((maskt & 0x2000) || (s_coarse & 0x80)) && (e_coarse & 0x80) ? 0x4000 : 0x0000);
5747 //////////////////////////////////////////////////////////////////////////////////////
5749 /* The bit terms are mirrored for big-endian pixels outside phrase
5750 mode. The byte terms are mirrored for big-endian pixels in phrase
5753 /*Mirror_bit := AN2M (mir_bit, phrase_mode\, big_pix);
5754 Mirror_byte := AN2H (mir_byte, phrase_mode, big_pix);
5756 Masktb[14] := BUF1 (masktb[14], maskt[14]);
5757 Masku[0] := MX4 (masku[0], maskt[0], maskt[7], maskt[14], zero, mir_bit, mir_byte);
5758 Masku[1] := MX4 (masku[1], maskt[1], maskt[6], maskt[14], zero, mir_bit, mir_byte);
5759 Masku[2] := MX4 (masku[2], maskt[2], maskt[5], maskt[14], zero, mir_bit, mir_byte);
5760 Masku[3] := MX4 (masku[3], maskt[3], maskt[4], masktb[14], zero, mir_bit, mir_byte);
5761 Masku[4] := MX4 (masku[4], maskt[4], maskt[3], masktb[14], zero, mir_bit, mir_byte);
5762 Masku[5] := MX4 (masku[5], maskt[5], maskt[2], masktb[14], zero, mir_bit, mir_byte);
5763 Masku[6] := MX4 (masku[6], maskt[6], maskt[1], masktb[14], zero, mir_bit, mir_byte);
5764 Masku[7] := MX4 (masku[7], maskt[7], maskt[0], masktb[14], zero, mir_bit, mir_byte);
5765 Masku[8] := MX2 (masku[8], maskt[8], maskt[13], mir_byte);
5766 Masku[9] := MX2 (masku[9], maskt[9], maskt[12], mir_byte);
5767 Masku[10] := MX2 (masku[10], maskt[10], maskt[11], mir_byte);
5768 Masku[11] := MX2 (masku[11], maskt[11], maskt[10], mir_byte);
5769 Masku[12] := MX2 (masku[12], maskt[12], maskt[9], mir_byte);
5770 Masku[13] := MX2 (masku[13], maskt[13], maskt[8], mir_byte);
5771 Masku[14] := MX2 (masku[14], maskt[14], maskt[0], mir_byte);*/
5772 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5773 bool mir_bit = true/*big_pix*/ && !phrase_mode;
5774 bool mir_byte = true/*big_pix*/ && phrase_mode;
5775 uint16_t masku = maskt;
5780 masku |= (maskt >> 7) & 0x0001;
5781 masku |= (maskt >> 5) & 0x0002;
5782 masku |= (maskt >> 3) & 0x0004;
5783 masku |= (maskt >> 1) & 0x0008;
5784 masku |= (maskt << 1) & 0x0010;
5785 masku |= (maskt << 3) & 0x0020;
5786 masku |= (maskt << 5) & 0x0040;
5787 masku |= (maskt << 7) & 0x0080;
5793 masku |= (maskt >> 14) & 0x0001;
5794 masku |= (maskt >> 13) & 0x0002;
5795 masku |= (maskt >> 12) & 0x0004;
5796 masku |= (maskt >> 11) & 0x0008;
5797 masku |= (maskt >> 10) & 0x0010;
5798 masku |= (maskt >> 9) & 0x0020;
5799 masku |= (maskt >> 8) & 0x0040;
5800 masku |= (maskt >> 7) & 0x0080;
5802 masku |= (maskt >> 5) & 0x0100;
5803 masku |= (maskt >> 3) & 0x0200;
5804 masku |= (maskt >> 1) & 0x0400;
5805 masku |= (maskt << 1) & 0x0800;
5806 masku |= (maskt << 3) & 0x1000;
5807 masku |= (maskt << 5) & 0x2000;
5808 masku |= (maskt << 7) & 0x4000;
5810 //////////////////////////////////////////////////////////////////////////////////////
5812 /* The maskt terms define the area for changed data, but the byte
5813 inhibit terms can override these */
5815 /*Mask[0-7] := AN2 (mask[0-7], masku[0-7], dbinh\[0]);
5816 Mask[8-14] := AN2H (mask[8-14], masku[8-14], dbinh\[1-7]);*/
5817 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5818 uint16_t mask = masku & (!(dbinh & 0x01) ? 0xFFFF : 0xFF00);
5819 mask &= ~(((uint16_t)dbinh & 0x00FE) << 7);
5820 //////////////////////////////////////////////////////////////////////////////////////
5822 /*Addql[0] := JOIN (addql[0], addq[0..1]);
5823 Addql[1] := JOIN (addql[1], addq[2..3]);
5825 Dsel0b[0-1] := BUF8 (dsel0b[0-1], data_sel[0]);
5826 Dsel1b[0-1] := BUF8 (dsel1b[0-1], data_sel[1]);
5827 Ddatlo := MX4 (ddatlo, patd[0], lfu[0], addql[0], zero32, dsel0b[0], dsel1b[0]);
5828 Ddathi := MX4 (ddathi, patd[1], lfu[1], addql[1], zero32, dsel0b[1], dsel1b[1]);*/
5829 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5833 dmux[2] = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
5835 uint64_t ddat = dmux[data_sel];
5836 //////////////////////////////////////////////////////////////////////////////////////
5838 /*Zed_sel := AN2 (zed_sel, data_sel[0..1]);
5839 Zed_selb[0-1] := BUF8 (zed_selb[0-1], zed_sel);
5841 Dat[0-7] := MX4 (dat[0-7], dstdlo{0-7}, ddatlo{0-7}, dstzlo{0-7}, srczlo{0-7}, mask[0-7], zed_selb[0]);
5842 Dat[8-15] := MX4 (dat[8-15], dstdlo{8-15}, ddatlo{8-15}, dstzlo{8-15}, srczlo{8-15}, mask[8], zed_selb[0]);
5843 Dat[16-23] := MX4 (dat[16-23], dstdlo{16-23}, ddatlo{16-23}, dstzlo{16-23}, srczlo{16-23}, mask[9], zed_selb[0]);
5844 Dat[24-31] := MX4 (dat[24-31], dstdlo{24-31}, ddatlo{24-31}, dstzlo{24-31}, srczlo{24-31}, mask[10], zed_selb[0]);
5845 Dat[32-39] := MX4 (dat[32-39], dstdhi{0-7}, ddathi{0-7}, dstzhi{0-7}, srczhi{0-7}, mask[11], zed_selb[1]);
5846 Dat[40-47] := MX4 (dat[40-47], dstdhi{8-15}, ddathi{8-15}, dstzhi{8-15}, srczhi{8-15}, mask[12], zed_selb[1]);
5847 Dat[48-55] := MX4 (dat[48-55], dstdhi{16-23}, ddathi{16-23}, dstzhi{16-23}, srczhi{16-23}, mask[13], zed_selb[1]);
5848 Dat[56-63] := MX4 (dat[56-63], dstdhi{24-31}, ddathi{24-31}, dstzhi{24-31}, srczhi{24-31}, mask[14], zed_selb[1]);*/
5849 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5850 wdata = ((ddat & mask) | (dstd & ~mask)) & 0x00000000000000FFLL;
5851 wdata |= (mask & 0x0100 ? ddat : dstd) & 0x000000000000FF00LL;
5852 wdata |= (mask & 0x0200 ? ddat : dstd) & 0x0000000000FF0000LL;
5853 wdata |= (mask & 0x0400 ? ddat : dstd) & 0x00000000FF000000LL;
5854 wdata |= (mask & 0x0800 ? ddat : dstd) & 0x000000FF00000000LL;
5855 wdata |= (mask & 0x1000 ? ddat : dstd) & 0x0000FF0000000000LL;
5856 wdata |= (mask & 0x2000 ? ddat : dstd) & 0x00FF000000000000LL;
5857 wdata |= (mask & 0x4000 ? ddat : dstd) & 0xFF00000000000000LL;
5860 printf("\n[ddat=%08X%08X dstd=%08X%08X wdata=%08X%08X mask=%04X]\n",
5861 (uint32_t)(ddat >> 32), (uint32_t)(ddat & 0xFFFFFFFF),
5862 (uint32_t)(dstd >> 32), (uint32_t)(dstd & 0xFFFFFFFF),
5863 (uint32_t)(wdata >> 32), (uint32_t)(wdata & 0xFFFFFFFF), mask);
5866 //This is a crappy way of handling this, but it should work for now...
5868 zwdata = ((srcz & mask) | (dstz & ~mask)) & 0x00000000000000FFLL;
5869 zwdata |= (mask & 0x0100 ? srcz : dstz) & 0x000000000000FF00LL;
5870 zwdata |= (mask & 0x0200 ? srcz : dstz) & 0x0000000000FF0000LL;
5871 zwdata |= (mask & 0x0400 ? srcz : dstz) & 0x00000000FF000000LL;
5872 zwdata |= (mask & 0x0800 ? srcz : dstz) & 0x000000FF00000000LL;
5873 zwdata |= (mask & 0x1000 ? srcz : dstz) & 0x0000FF0000000000LL;
5874 zwdata |= (mask & 0x2000 ? srcz : dstz) & 0x00FF000000000000LL;
5875 zwdata |= (mask & 0x4000 ? srcz : dstz) & 0xFF00000000000000LL;
5878 WriteLog("\n[srcz=%08X%08X dstz=%08X%08X zwdata=%08X%08X mask=%04X]\n",
5879 (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF),
5880 (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF),
5881 (uint32_t)(zwdata >> 32), (uint32_t)(zwdata & 0xFFFFFFFF), mask);
5885 //////////////////////////////////////////////////////////////////////////////////////
5887 /*Data_enab[0-1] := BUF8 (data_enab[0-1], data_ena);
5888 Datadrv[0-31] := TS (wdata[0-31], dat[0-31], data_enab[0]);
5889 Datadrv[32-63] := TS (wdata[32-63], dat[32-63], data_enab[1]);
5891 Unused[0] := DUMMY (unused[0]);
5897 /** COMP_CTRL - Comparator output control logic *****************
5899 This block is responsible for taking the comparator outputs and
5900 using them as appropriate to inhibit writes. Two methods are
5901 supported for inhibiting write data:
5903 - suppression of the inner loop controlled write operation
5904 - a set of eight byte inhibit lines to write back dest data
5906 The first technique is used in pixel oriented modes, the second in
5907 phrase mode, but the phrase mode form is only applicable to eight
5908 and sixteen bit pixel modes.
5910 Writes can be suppressed by data being equal, by the Z comparator
5911 conditions being met, or by the bit to pixel expansion scheme.
5913 Pipe-lining issues: the data derived comparator outputs are stable
5914 until the next data read, well after the affected write from this
5915 operation. However, the inner counter bits can count immediately
5916 before the ack for the last write. Therefore, it is necessary to
5917 delay bcompbit select terms by one inner loop pipe-line stage,
5918 when generating the select for the data control - the output is
5919 delayed one further tick to give it write data timing (2/34).
5921 There is also a problem with computed data - the new values are
5922 calculated before the write associated with the old value has been
5923 performed. The is taken care of within the zed comparator by
5924 pipe-lining the comparator inputs where appropriate.
5927 //#define LOG_COMP_CTRL
5929 dbinh\[0..7] // destination byte inhibit lines
5930 nowrite // suppress inner loop write operation
5932 bcompen // bit selector inhibit enable
5933 big_pix // pixels are big-endian
5934 bkgwren // enable dest data write in pix inhibit
5935 clk // co-processor clock
5936 dcomp[0..7] // output of data byte comparators
5937 dcompen // data comparator inhibit enable
5938 icount[0..2] // low bits of inner count
5939 pixsize[0..2] // destination pixel size
5940 phrase_mode // phrase write mode
5941 srcd[0..7] // bits to use for bit to byte expansion
5942 step_inner // inner loop advance
5943 zcomp[0..3] // output of word zed comparators
5945 void COMP_CTRL(uint8_t &dbinh, bool &nowrite,
5946 bool bcompen, bool big_pix, bool bkgwren, uint8_t dcomp, bool dcompen, uint8_t icount,
5947 uint8_t pixsize, bool phrase_mode, uint8_t srcd, uint8_t zcomp)
5951 /*Bkgwren\ := INV1 (bkgwren\, bkgwren);
5952 Phrase_mode\ := INV1 (phrase_mode\, phrase_mode);
5953 Pixsize\[0-2] := INV2 (pixsize\[0-2], pixsize[0-2]);*/
5955 /* The bit comparator bits are derived from the source data, which
5956 will have been suitably aligned for phrase mode. The contents of
5957 the inner counter are used to select which bit to use.
5959 When not in phrase mode the inner count value is used to select
5960 one bit. It is assumed that the count has already occurred, so,
5961 7 selects bit 0, etc. In big-endian pixel mode, this turns round,
5962 so that a count of 7 selects bit 7.
5964 In phrase mode, the eight bits are used directly, and this mode is
5965 only applicable to 8-bit pixel mode (2/34) */
5967 /*Bcompselt[0-2] := EO (bcompselt[0-2], icount[0-2], big_pix);
5968 Bcompbit := MX8 (bcompbit, srcd[7], srcd[6], srcd[5],
5969 srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompselt[0..2]);
5970 Bcompbit\ := INV1 (bcompbit\, bcompbit);*/
5971 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5972 #ifdef LOG_COMP_CTRL
5975 WriteLog("\n [bcompen=%s dcompen=%s phrase_mode=%s bkgwren=%s dcomp=%02X zcomp=%02X]", (bcompen ? "T" : "F"), (dcompen ? "T" : "F"), (phrase_mode ? "T" : "F"), (bkgwren ? "T" : "F"), dcomp, zcomp);
5980 uint8_t bcompselt = (big_pix ? ~icount : icount) & 0x07;
5981 uint8_t bitmask[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
5982 bool bcompbit = srcd & bitmask[bcompselt];
5983 //////////////////////////////////////////////////////////////////////////////////////
5985 /* pipe-line the count */
5986 /*Bcompsel[0-2] := FDSYNC (bcompsel[0-2], bcompselt[0-2], step_inner, clk);
5987 Bcompbt := MX8 (bcompbitpt, srcd[7], srcd[6], srcd[5],
5988 srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompsel[0..2]);
5989 Bcompbitp := FD1Q (bcompbitp, bcompbitpt, clk);
5990 Bcompbitp\ := INV1 (bcompbitp\, bcompbitp);*/
5992 /* For pixel mode, generate the write inhibit signal for all modes
5993 on bit inhibit, for 8 and 16 bit modes on comparator inhibit, and
5994 for 16 bit mode on Z inhibit
5996 Nowrite = bcompen . /bcompbit . /phrase_mode
5997 + dcompen . dcomp[0] . /phrase_mode . pixsize = 011
5998 + dcompen . dcomp[0..1] . /phrase_mode . pixsize = 100
5999 + zcomp[0] . /phrase_mode . pixsize = 100
6002 /*Nowt0 := NAN3 (nowt[0], bcompen, bcompbit\, phrase_mode\);
6003 Nowt1 := ND6 (nowt[1], dcompen, dcomp[0], phrase_mode\, pixsize\[2], pixsize[0..1]);
6004 Nowt2 := ND7 (nowt[2], dcompen, dcomp[0..1], phrase_mode\, pixsize[2], pixsize\[0..1]);
6005 Nowt3 := NAN5 (nowt[3], zcomp[0], phrase_mode\, pixsize[2], pixsize\[0..1]);
6006 Nowt4 := NAN4 (nowt[4], nowt[0..3]);
6007 Nowrite := AN2 (nowrite, nowt[4], bkgwren\);*/
6008 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6009 nowrite = ((bcompen && !bcompbit && !phrase_mode)
6010 || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
6011 || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
6012 || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4)))
6014 //////////////////////////////////////////////////////////////////////////////////////
6016 /*Winht := NAN3 (winht, bcompen, bcompbitp\, phrase_mode\);
6017 Winhibit := NAN4 (winhibit, winht, nowt[1..3]);*/
6018 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6019 //This is the same as above, but with bcompbit delayed one tick and called 'winhibit'
6020 //Small difference: Besides the pipeline effect, it's also not using !bkgwren...
6021 // bool winhibit = (bcompen && !
6022 bool winhibit = (bcompen && !bcompbit && !phrase_mode)
6023 || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
6024 || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
6025 || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4));
6026 #ifdef LOG_COMP_CTRL
6029 WriteLog("[nw=%s wi=%s]", (nowrite ? "T" : "F"), (winhibit ? "T" : "F"));
6033 //////////////////////////////////////////////////////////////////////////////////////
6035 /* For phrase mode, generate the byte inhibit signals for eight bit
6036 mode 011, or sixteen bit mode 100
6037 dbinh\[0] = pixsize[2] . zcomp[0]
6038 + pixsize[2] . dcomp[0] . dcomp[1] . dcompen
6039 + /pixsize[2] . dcomp[0] . dcompen
6040 + /srcd[0] . bcompen
6042 Inhibits 0-3 are also used when not in phrase mode to write back
6046 /*Srcd\[0-7] := INV1 (srcd\[0-7], srcd[0-7]);
6048 Di0t0 := NAN2H (di0t[0], pixsize[2], zcomp[0]);
6049 Di0t1 := NAN4H (di0t[1], pixsize[2], dcomp[0..1], dcompen);
6050 Di0t2 := NAN2 (di0t[2], srcd\[0], bcompen);
6051 Di0t3 := NAN3 (di0t[3], pixsize\[2], dcomp[0], dcompen);
6052 Di0t4 := NAN4 (di0t[4], di0t[0..3]);
6053 Dbinh[0] := ANR1P (dbinh\[0], di0t[4], phrase_mode, winhibit);*/
6054 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6056 bool di0t0_1 = ((pixsize & 0x04) && (zcomp & 0x01))
6057 || ((pixsize & 0x04) && (dcomp & 0x01) && (dcomp & 0x02) && dcompen);
6058 bool di0t4 = di0t0_1
6059 || (!(srcd & 0x01) && bcompen)
6060 || (!(pixsize & 0x04) && (dcomp & 0x01) && dcompen);
6061 dbinh |= (!((di0t4 && phrase_mode) || winhibit) ? 0x01 : 0x00);
6062 #ifdef LOG_COMP_CTRL
6065 WriteLog("[di0t0_1=%s di0t4=%s]", (di0t0_1 ? "T" : "F"), (di0t4 ? "T" : "F"));
6069 //////////////////////////////////////////////////////////////////////////////////////
6071 /*Di1t0 := NAN3 (di1t[0], pixsize\[2], dcomp[1], dcompen);
6072 Di1t1 := NAN2 (di1t[1], srcd\[1], bcompen);
6073 Di1t2 := NAN4 (di1t[2], di0t[0..1], di1t[0..1]);
6074 Dbinh[1] := ANR1 (dbinh\[1], di1t[2], phrase_mode, winhibit);*/
6075 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6076 bool di1t2 = di0t0_1
6077 || (!(srcd & 0x02) && bcompen)
6078 || (!(pixsize & 0x04) && (dcomp & 0x02) && dcompen);
6079 dbinh |= (!((di1t2 && phrase_mode) || winhibit) ? 0x02 : 0x00);
6080 #ifdef LOG_COMP_CTRL
6083 WriteLog("[di1t2=%s]", (di1t2 ? "T" : "F"));
6087 //////////////////////////////////////////////////////////////////////////////////////
6089 /*Di2t0 := NAN2H (di2t[0], pixsize[2], zcomp[1]);
6090 Di2t1 := NAN4H (di2t[1], pixsize[2], dcomp[2..3], dcompen);
6091 Di2t2 := NAN2 (di2t[2], srcd\[2], bcompen);
6092 Di2t3 := NAN3 (di2t[3], pixsize\[2], dcomp[2], dcompen);
6093 Di2t4 := NAN4 (di2t[4], di2t[0..3]);
6094 Dbinh[2] := ANR1 (dbinh\[2], di2t[4], phrase_mode, winhibit);*/
6095 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6096 //[bcompen=F dcompen=T phrase_mode=T bkgwren=F][nw=F wi=F]
6097 //[di0t0_1=F di0t4=F][di1t2=F][di2t0_1=T di2t4=T][di3t2=T][di4t0_1=F di2t4=F][di5t2=F][di6t0_1=F di6t4=F][di7t2=F]
6098 //[dcomp=$00 dbinh=$0C][7804780400007804] (icount=0005, inc=4)
6099 bool di2t0_1 = ((pixsize & 0x04) && (zcomp & 0x02))
6100 || ((pixsize & 0x04) && (dcomp & 0x04) && (dcomp & 0x08) && dcompen);
6101 bool di2t4 = di2t0_1
6102 || (!(srcd & 0x04) && bcompen)
6103 || (!(pixsize & 0x04) && (dcomp & 0x04) && dcompen);
6104 dbinh |= (!((di2t4 && phrase_mode) || winhibit) ? 0x04 : 0x00);
6105 #ifdef LOG_COMP_CTRL
6108 WriteLog("[di2t0_1=%s di2t4=%s]", (di2t0_1 ? "T" : "F"), (di2t4 ? "T" : "F"));
6112 //////////////////////////////////////////////////////////////////////////////////////
6114 /*Di3t0 := NAN3 (di3t[0], pixsize\[2], dcomp[3], dcompen);
6115 Di3t1 := NAN2 (di3t[1], srcd\[3], bcompen);
6116 Di3t2 := NAN4 (di3t[2], di2t[0..1], di3t[0..1]);
6117 Dbinh[3] := ANR1 (dbinh\[3], di3t[2], phrase_mode, winhibit);*/
6118 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6119 bool di3t2 = di2t0_1
6120 || (!(srcd & 0x08) && bcompen)
6121 || (!(pixsize & 0x04) && (dcomp & 0x08) && dcompen);
6122 dbinh |= (!((di3t2 && phrase_mode) || winhibit) ? 0x08 : 0x00);
6123 #ifdef LOG_COMP_CTRL
6126 WriteLog("[di3t2=%s]", (di3t2 ? "T" : "F"));
6130 //////////////////////////////////////////////////////////////////////////////////////
6132 /*Di4t0 := NAN2H (di4t[0], pixsize[2], zcomp[2]);
6133 Di4t1 := NAN4H (di4t[1], pixsize[2], dcomp[4..5], dcompen);
6134 Di4t2 := NAN2 (di4t[2], srcd\[4], bcompen);
6135 Di4t3 := NAN3 (di4t[3], pixsize\[2], dcomp[4], dcompen);
6136 Di4t4 := NAN4 (di4t[4], di4t[0..3]);
6137 Dbinh[4] := NAN2 (dbinh\[4], di4t[4], phrase_mode);*/
6138 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6139 bool di4t0_1 = ((pixsize & 0x04) && (zcomp & 0x04))
6140 || ((pixsize & 0x04) && (dcomp & 0x10) && (dcomp & 0x20) && dcompen);
6141 bool di4t4 = di4t0_1
6142 || (!(srcd & 0x10) && bcompen)
6143 || (!(pixsize & 0x04) && (dcomp & 0x10) && dcompen);
6144 dbinh |= (!(di4t4 && phrase_mode) ? 0x10 : 0x00);
6145 #ifdef LOG_COMP_CTRL
6148 WriteLog("[di4t0_1=%s di2t4=%s]", (di4t0_1 ? "T" : "F"), (di4t4 ? "T" : "F"));
6152 //////////////////////////////////////////////////////////////////////////////////////
6154 /*Di5t0 := NAN3 (di5t[0], pixsize\[2], dcomp[5], dcompen);
6155 Di5t1 := NAN2 (di5t[1], srcd\[5], bcompen);
6156 Di5t2 := NAN4 (di5t[2], di4t[0..1], di5t[0..1]);
6157 Dbinh[5] := NAN2 (dbinh\[5], di5t[2], phrase_mode);*/
6158 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6159 bool di5t2 = di4t0_1
6160 || (!(srcd & 0x20) && bcompen)
6161 || (!(pixsize & 0x04) && (dcomp & 0x20) && dcompen);
6162 dbinh |= (!(di5t2 && phrase_mode) ? 0x20 : 0x00);
6163 #ifdef LOG_COMP_CTRL
6166 WriteLog("[di5t2=%s]", (di5t2 ? "T" : "F"));
6170 //////////////////////////////////////////////////////////////////////////////////////
6172 /*Di6t0 := NAN2H (di6t[0], pixsize[2], zcomp[3]);
6173 Di6t1 := NAN4H (di6t[1], pixsize[2], dcomp[6..7], dcompen);
6174 Di6t2 := NAN2 (di6t[2], srcd\[6], bcompen);
6175 Di6t3 := NAN3 (di6t[3], pixsize\[2], dcomp[6], dcompen);
6176 Di6t4 := NAN4 (di6t[4], di6t[0..3]);
6177 Dbinh[6] := NAN2 (dbinh\[6], di6t[4], phrase_mode);*/
6178 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6179 bool di6t0_1 = ((pixsize & 0x04) && (zcomp & 0x08))
6180 || ((pixsize & 0x04) && (dcomp & 0x40) && (dcomp & 0x80) && dcompen);
6181 bool di6t4 = di6t0_1
6182 || (!(srcd & 0x40) && bcompen)
6183 || (!(pixsize & 0x04) && (dcomp & 0x40) && dcompen);
6184 dbinh |= (!(di6t4 && phrase_mode) ? 0x40 : 0x00);
6185 #ifdef LOG_COMP_CTRL
6188 WriteLog("[di6t0_1=%s di6t4=%s]", (di6t0_1 ? "T" : "F"), (di6t4 ? "T" : "F"));
6192 //////////////////////////////////////////////////////////////////////////////////////
6194 /*Di7t0 := NAN3 (di7t[0], pixsize\[2], dcomp[7], dcompen);
6195 Di7t1 := NAN2 (di7t[1], srcd\[7], bcompen);
6196 Di7t2 := NAN4 (di7t[2], di6t[0..1], di7t[0..1]);
6197 Dbinh[7] := NAN2 (dbinh\[7], di7t[2], phrase_mode);*/
6198 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6199 bool di7t2 = di6t0_1
6200 || (!(srcd & 0x80) && bcompen)
6201 || (!(pixsize & 0x04) && (dcomp & 0x80) && dcompen);
6202 dbinh |= (!(di7t2 && phrase_mode) ? 0x80 : 0x00);
6203 #ifdef LOG_COMP_CTRL
6206 WriteLog("[di7t2=%s]", (di7t2 ? "T" : "F"));
6210 //////////////////////////////////////////////////////////////////////////////////////
6215 #ifdef LOG_COMP_CTRL
6218 WriteLog("[dcomp=$%02X dbinh=$%02X]\n ", dcomp, dbinh);
6225 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6226 //////////////////////////////////////////////////////////////////////////////////////
6228 // !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
6229 // !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
6230 // !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!