5 // (C) 2010 Underground Software
7 // JLH = James Hammons <jlhamm@acm.org>
10 // --- ---------- -------------------------------------------------------------
11 // JLH 01/16/2010 Created this log ;-)
15 // I owe a debt of gratitude to Curt Vendel and to John Mathieson--to Curt
16 // for supplying the Oberon ASIC nets and to John for making them available
17 // to Curt. ;-) Without that excellent documentation which shows *exactly*
18 // what's going on inside the TOM chip, we'd all still be guessing as to how
19 // the wily blitter and other pieces of the Jaguar puzzle actually work.
20 // Now how about those JERRY ASIC nets gentlemen...? [We have those now!] ;-)
32 // Various conditional compilation goodies...
36 //#define USE_ORIGINAL_BLITTER
37 //#define USE_MIDSUMMER_BLITTER
38 #define USE_MIDSUMMER_BLITTER_MKII
40 // External global variables
42 extern int jaguar_active_memory_dumps;
44 // Local global variables
46 int start_logging = 0;
47 uint8_t blitter_working = 0;
48 bool startConciseBlitLogging = false;
51 // Blitter register RAM (most of it is hidden from the user)
53 static uint8_t blitter_ram[0x100];
57 bool specialLog = false;
58 extern int effect_start;
59 extern int blit_start_log;
60 void BlitterMidsummer(uint32_t cmd);
61 void BlitterMidsummer2(void);
63 #define REG(A) (((uint32_t)blitter_ram[(A)] << 24) | ((uint32_t)blitter_ram[(A)+1] << 16) \
64 | ((uint32_t)blitter_ram[(A)+2] << 8) | (uint32_t)blitter_ram[(A)+3])
65 #define WREG(A,D) (blitter_ram[(A)] = ((D)>>24)&0xFF, blitter_ram[(A)+1] = ((D)>>16)&0xFF, \
66 blitter_ram[(A)+2] = ((D)>>8)&0xFF, blitter_ram[(A)+3] = (D)&0xFF)
68 // Blitter registers (offsets from F02200)
70 #define A1_BASE ((uint32_t)0x00)
71 #define A1_FLAGS ((uint32_t)0x04)
72 #define A1_CLIP ((uint32_t)0x08) // Height and width values for clipping
73 #define A1_PIXEL ((uint32_t)0x0C) // Integer part of the pixel (Y.i and X.i)
74 #define A1_STEP ((uint32_t)0x10) // Integer part of the step
75 #define A1_FSTEP ((uint32_t)0x14) // Fractional part of the step
76 #define A1_FPIXEL ((uint32_t)0x18) // Fractional part of the pixel (Y.f and X.f)
77 #define A1_INC ((uint32_t)0x1C) // Integer part of the increment
78 #define A1_FINC ((uint32_t)0x20) // Fractional part of the increment
79 #define A2_BASE ((uint32_t)0x24)
80 #define A2_FLAGS ((uint32_t)0x28)
81 #define A2_MASK ((uint32_t)0x2C) // Modulo values for x and y (M.y and M.x)
82 #define A2_PIXEL ((uint32_t)0x30) // Integer part of the pixel (no fractional part for A2)
83 #define A2_STEP ((uint32_t)0x34) // Integer part of the step (no fractional part for A2)
84 #define COMMAND ((uint32_t)0x38)
85 #define PIXLINECOUNTER ((uint32_t)0x3C) // Inner & outer loop values
86 #define SRCDATA ((uint32_t)0x40)
87 #define DSTDATA ((uint32_t)0x48)
88 #define DSTZ ((uint32_t)0x50)
89 #define SRCZINT ((uint32_t)0x58)
90 #define SRCZFRAC ((uint32_t)0x60)
91 #define PATTERNDATA ((uint32_t)0x68)
92 #define INTENSITYINC ((uint32_t)0x70)
93 #define ZINC ((uint32_t)0x74)
94 #define COLLISIONCTRL ((uint32_t)0x78)
95 #define PHRASEINT0 ((uint32_t)0x7C)
96 #define PHRASEINT1 ((uint32_t)0x80)
97 #define PHRASEINT2 ((uint32_t)0x84)
98 #define PHRASEINT3 ((uint32_t)0x88)
99 #define PHRASEZ0 ((uint32_t)0x8C)
100 #define PHRASEZ1 ((uint32_t)0x90)
101 #define PHRASEZ2 ((uint32_t)0x94)
102 #define PHRASEZ3 ((uint32_t)0x98)
104 // Blitter command bits
106 #define SRCEN (cmd & 0x00000001)
107 #define SRCENZ (cmd & 0x00000002)
108 #define SRCENX (cmd & 0x00000004)
109 #define DSTEN (cmd & 0x00000008)
110 #define DSTENZ (cmd & 0x00000010)
111 #define DSTWRZ (cmd & 0x00000020)
112 #define CLIPA1 (cmd & 0x00000040)
114 #define UPDA1F (cmd & 0x00000100)
115 #define UPDA1 (cmd & 0x00000200)
116 #define UPDA2 (cmd & 0x00000400)
118 #define DSTA2 (cmd & 0x00000800)
120 #define Z_OP_INF (cmd & 0x00040000)
121 #define Z_OP_EQU (cmd & 0x00080000)
122 #define Z_OP_SUP (cmd & 0x00100000)
124 #define LFU_NAN (cmd & 0x00200000)
125 #define LFU_NA (cmd & 0x00400000)
126 #define LFU_AN (cmd & 0x00800000)
127 #define LFU_A (cmd & 0x01000000)
129 #define CMPDST (cmd & 0x02000000)
130 #define BCOMPEN (cmd & 0x04000000)
131 #define DCOMPEN (cmd & 0x08000000)
133 #define PATDSEL (cmd & 0x00010000)
134 #define ADDDSEL (cmd & 0x00020000)
135 #define TOPBEN (cmd & 0x00004000)
136 #define TOPNEN (cmd & 0x00008000)
137 #define BKGWREN (cmd & 0x10000000)
138 #define GOURD (cmd & 0x00001000)
139 #define GOURZ (cmd & 0x00002000)
140 #define SRCSHADE (cmd & 0x40000000)
148 #define XSIGNSUB_A1 (REG(A1_FLAGS)&0x080000)
149 #define XSIGNSUB_A2 (REG(A2_FLAGS)&0x080000)
151 #define YSIGNSUB_A1 (REG(A1_FLAGS)&0x100000)
152 #define YSIGNSUB_A2 (REG(A2_FLAGS)&0x100000)
154 #define YADD1_A1 (REG(A1_FLAGS)&0x040000)
155 #define YADD1_A2 (REG(A2_FLAGS)&0x040000)
157 /*******************************************************************************
158 ********************** STUFF CUT BELOW THIS LINE! ******************************
159 *******************************************************************************/
160 #ifdef USE_ORIGINAL_BLITTER // We're ditching this crap for now...
162 //Put 'em back, once we fix the problem!!! [KO]
164 #define PIXEL_SHIFT_1(a) (((~a##_x) >> 16) & 7)
165 #define PIXEL_OFFSET_1(a) (((((uint32_t)a##_y >> 16) * a##_width / 8) + (((uint32_t)a##_x >> 19) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 19) & 7))
166 #define READ_PIXEL_1(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER) >> PIXEL_SHIFT_1(a)) & 0x01)
167 //#define READ_PIXEL_1(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a)) >> PIXEL_SHIFT_1(a)) & 0x01)
170 #define PIXEL_SHIFT_2(a) (((~a##_x) >> 15) & 6)
171 #define PIXEL_OFFSET_2(a) (((((uint32_t)a##_y >> 16) * a##_width / 4) + (((uint32_t)a##_x >> 18) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 18) & 7))
172 #define READ_PIXEL_2(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER) >> PIXEL_SHIFT_2(a)) & 0x03)
173 //#define READ_PIXEL_2(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a)) >> PIXEL_SHIFT_2(a)) & 0x03)
176 #define PIXEL_SHIFT_4(a) (((~a##_x) >> 14) & 4)
177 #define PIXEL_OFFSET_4(a) (((((uint32_t)a##_y >> 16) * (a##_width/2)) + (((uint32_t)a##_x >> 17) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 17) & 7))
178 #define READ_PIXEL_4(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER) >> PIXEL_SHIFT_4(a)) & 0x0f)
179 //#define READ_PIXEL_4(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a)) >> PIXEL_SHIFT_4(a)) & 0x0f)
182 #define PIXEL_OFFSET_8(a) (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 7))
183 #define READ_PIXEL_8(a) (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a), BLITTER))
184 //#define READ_PIXEL_8(a) (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a)))
187 #define PIXEL_OFFSET_16(a) (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~3)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 3))
188 #define READ_PIXEL_16(a) (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), BLITTER))
189 //#define READ_PIXEL_16(a) (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1)))
192 #define PIXEL_OFFSET_32(a) (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
193 #define READ_PIXEL_32(a) (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), BLITTER))
194 //#define READ_PIXEL_32(a) (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2)))
197 #define READ_PIXEL(a,f) (\
198 (((f>>3)&0x07) == 0) ? (READ_PIXEL_1(a)) : \
199 (((f>>3)&0x07) == 1) ? (READ_PIXEL_2(a)) : \
200 (((f>>3)&0x07) == 2) ? (READ_PIXEL_4(a)) : \
201 (((f>>3)&0x07) == 3) ? (READ_PIXEL_8(a)) : \
202 (((f>>3)&0x07) == 4) ? (READ_PIXEL_16(a)) : \
203 (((f>>3)&0x07) == 5) ? (READ_PIXEL_32(a)) : 0)
205 // 16 bpp z data read
206 #define ZDATA_OFFSET_16(a) (PIXEL_OFFSET_16(a) + a##_zoffs * 4)
207 #define READ_ZDATA_16(a) (JaguarReadWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), BLITTER))
208 //#define READ_ZDATA_16(a) (JaguarReadWord(a##_addr+(ZDATA_OFFSET_16(a)<<1)))
211 #define READ_ZDATA(a,f) (READ_ZDATA_16(a))
213 // 16 bpp z data write
214 #define WRITE_ZDATA_16(a,d) { JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d, BLITTER); }
215 //#define WRITE_ZDATA_16(a,d) { JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d); }
218 #define WRITE_ZDATA(a,f,d) WRITE_ZDATA_16(a,d);
221 #define READ_RDATA_1(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 19) & 0x04))) >> (((uint32_t)a##_x >> 16) & 0x1F)) & 0x0001 : (REG(r) & 0x0001))
224 #define READ_RDATA_2(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 18) & 0x04))) >> (((uint32_t)a##_x >> 15) & 0x3E)) & 0x0003 : (REG(r) & 0x0003))
227 #define READ_RDATA_4(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 17) & 0x04))) >> (((uint32_t)a##_x >> 14) & 0x28)) & 0x000F : (REG(r) & 0x000F))
230 #define READ_RDATA_8(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 16) & 0x04))) >> (((uint32_t)a##_x >> 13) & 0x18)) & 0x00FF : (REG(r) & 0x00FF))
232 // 16 bpp r data read
233 #define READ_RDATA_16(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 15) & 0x04))) >> (((uint32_t)a##_x >> 12) & 0x10)) & 0xFFFF : (REG(r) & 0xFFFF))
235 // 32 bpp r data read
236 #define READ_RDATA_32(r,a,p) ((p) ? REG(r+(((uint32_t)a##_x >> 14) & 0x04)) : REG(r))
238 // register data read
239 #define READ_RDATA(r,a,f,p) (\
240 (((f>>3)&0x07) == 0) ? (READ_RDATA_1(r,a,p)) : \
241 (((f>>3)&0x07) == 1) ? (READ_RDATA_2(r,a,p)) : \
242 (((f>>3)&0x07) == 2) ? (READ_RDATA_4(r,a,p)) : \
243 (((f>>3)&0x07) == 3) ? (READ_RDATA_8(r,a,p)) : \
244 (((f>>3)&0x07) == 4) ? (READ_RDATA_16(r,a,p)) : \
245 (((f>>3)&0x07) == 5) ? (READ_RDATA_32(r,a,p)) : 0)
248 #define WRITE_PIXEL_1(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_1(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER)&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a)), BLITTER); }
249 //#define WRITE_PIXEL_1(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_1(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a))&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a))); }
252 #define WRITE_PIXEL_2(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_2(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER)&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a)), BLITTER); }
253 //#define WRITE_PIXEL_2(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_2(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a))&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a))); }
256 #define WRITE_PIXEL_4(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_4(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER)&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a)), BLITTER); }
257 //#define WRITE_PIXEL_4(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_4(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a))&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a))); }
260 #define WRITE_PIXEL_8(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_8(a), d, BLITTER); }
261 //#define WRITE_PIXEL_8(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_8(a), d); }
263 // 16 bpp pixel write
264 //#define WRITE_PIXEL_16(a,d) { JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1),d); }
265 #define WRITE_PIXEL_16(a,d) { JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d, BLITTER); if (specialLog) WriteLog("Pixel write address: %08X\n", a##_addr+(PIXEL_OFFSET_16(a)<<1)); }
266 //#define WRITE_PIXEL_16(a,d) { JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d); if (specialLog) WriteLog("Pixel write address: %08X\n", a##_addr+(PIXEL_OFFSET_16(a)<<1)); }
268 // 32 bpp pixel write
269 #define WRITE_PIXEL_32(a,d) { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d, BLITTER); }
270 //#define WRITE_PIXEL_32(a,d) { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d); }
273 #define WRITE_PIXEL(a,f,d) {\
274 switch ((f>>3)&0x07) { \
275 case 0: WRITE_PIXEL_1(a,d); break; \
276 case 1: WRITE_PIXEL_2(a,d); break; \
277 case 2: WRITE_PIXEL_4(a,d); break; \
278 case 3: WRITE_PIXEL_8(a,d); break; \
279 case 4: WRITE_PIXEL_16(a,d); break; \
280 case 5: WRITE_PIXEL_32(a,d); break; \
283 // Width in Pixels of a Scanline
284 // This is a pretranslation of the value found in the A1 & A2 flags: It's really a floating point value
285 // of the form EEEEMM where MM is the mantissa with an implied "1." in front of it and the EEEE value is
286 // the exponent. Valid values for the exponent range from 0 to 11 (decimal). It's easiest to think of it
287 // as a floating point bit pattern being followed by a number of zeroes. So, e.g., 001101 translates to
288 // 1.01 (the "1." being implied) x (2 ^ 3) or 1010 -> 10 in base 10 (i.e., 1.01 with the decimal place
289 // being shifted to the right 3 places).
290 /*static uint32_t blitter_scanline_width[48] =
292 0, 0, 0, 0, // Note: This would really translate to 1, 1, 1, 1
302 1024, 1280, 1536, 1792,
303 2048, 2560, 3072, 3584
306 //static uint8_t * tom_ram_8;
307 //static uint8_t * paletteRam;
311 static uint8_t a1ctl;
317 static uint32_t a1_addr;
318 static uint32_t a2_addr;
319 static int32_t a1_zoffs;
320 static int32_t a2_zoffs;
321 static uint32_t xadd_a1_control;
322 static uint32_t xadd_a2_control;
323 static int32_t a1_pitch;
324 static int32_t a2_pitch;
325 static uint32_t n_pixels;
326 static uint32_t n_lines;
329 static int32_t a1_width;
332 static int32_t a2_width;
333 static int32_t a2_mask_x;
334 static int32_t a2_mask_y;
335 static int32_t a1_xadd;
336 static int32_t a1_yadd;
337 static int32_t a2_xadd;
338 static int32_t a2_yadd;
339 static uint8_t a1_phrase_mode;
340 static uint8_t a2_phrase_mode;
341 static int32_t a1_step_x = 0;
342 static int32_t a1_step_y = 0;
343 static int32_t a2_step_x = 0;
344 static int32_t a2_step_y = 0;
345 static uint32_t outer_loop;
346 static uint32_t inner_loop;
347 static uint32_t a2_psize;
348 static uint32_t a1_psize;
349 static uint32_t gouraud_add;
350 //static uint32_t gouraud_data;
351 //static uint16_t gint[4];
352 //static uint16_t gfrac[4];
353 //static uint8_t gcolour[4];
356 static int gd_ia, gd_ca;
357 static int colour_index = 0;
359 static uint32_t z_i[4];
361 static int32_t a1_clip_x, a1_clip_y;
363 // In the spirit of "get it right first, *then* optimize" I've taken the liberty
364 // of removing all the unnecessary code caching. If it turns out to be a good way
365 // to optimize the blitter, then we may revisit it in the future...
368 // Generic blit handler
370 void blitter_generic(uint32_t cmd)
373 Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
374 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
375 A1 step values: -2 (X), 1 (Y)
376 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
377 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
378 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
379 A1 x/y: 100/12, A2 x/y: 106/0 Pattern: 000000F300000000
382 // specialLog = true;
383 /*if (cmd == 0x1401060C && blit_start_log)
384 specialLog = true;//*/
386 //uint32_t logGo = ((cmd == 0x01800E01 && REG(A1_BASE) == 0x898000) ? 1 : 0);
387 uint32_t srcdata, srczdata, dstdata, dstzdata, writedata, inhibit;
388 uint32_t bppSrc = (DSTA2 ? 1 << ((REG(A1_FLAGS) >> 3) & 0x07) : 1 << ((REG(A2_FLAGS) >> 3) & 0x07));
392 WriteLog("About to do n x m blit (BM width is ? pixels)...\n");
393 WriteLog("A1_STEP_X/Y = %08X/%08X, A2_STEP_X/Y = %08X/%08X\n", a1_step_x, a1_step_y, a2_step_x, a2_step_y);
407 WriteLog(" A1_X/Y = %08X/%08X, A2_X/Y = %08X/%08X\n", a1_x, a1_y, a2_x, a2_y);
409 uint32_t a1_start = a1_x, a2_start = a2_x, bitPos = 0;
411 //Kludge for Hover Strike...
412 //I wonder if this kludge is in conjunction with the SRCENX down below...
413 // This isn't so much a kludge but the way things work in BCOMPEN mode...!
414 if (BCOMPEN && SRCENX)
416 if (n_pixels < bppSrc)
417 bitPos = bppSrc - n_pixels;
420 inner_loop = n_pixels;
425 WriteLog(" A1_X/Y = %08X/%08X, A2_X/Y = %08X/%08X\n", a1_x, a1_y, a2_x, a2_y);
427 srcdata = srczdata = dstdata = dstzdata = writedata = inhibit = 0;
429 if (!DSTA2) // Data movement: A1 <- A2
431 // load src data and Z
433 if (SRCEN || SRCENX) // Not sure if this is correct... (seems to be...!)
435 srcdata = READ_PIXEL(a2, REG(A2_FLAGS));
438 srczdata = READ_ZDATA(a2, REG(A2_FLAGS));
439 else if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
440 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
442 else // Use SRCDATA register...
444 srcdata = READ_RDATA(SRCDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
446 if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
447 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
450 // load dst data and Z
453 dstdata = READ_PIXEL(a1, REG(A1_FLAGS));
456 dstzdata = READ_ZDATA(a1, REG(A1_FLAGS));
458 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
462 dstdata = READ_RDATA(DSTDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
465 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
468 /*This wasn't working... // a1 clipping
469 if (cmd & 0x00000040)
471 if (a1_x < 0 || a1_y < 0 || (a1_x >> 16) >= (REG(A1_CLIP) & 0x7FFF)
472 || (a1_y >> 16) >= ((REG(A1_CLIP) >> 16) & 0x7FFF))
477 srczdata = z_i[colour_index] >> 16;
479 // apply z comparator
480 if (Z_OP_INF && srczdata < dstzdata) inhibit = 1;
481 if (Z_OP_EQU && srczdata == dstzdata) inhibit = 1;
482 if (Z_OP_SUP && srczdata > dstzdata) inhibit = 1;
484 // apply data comparator
485 // Note: DCOMPEN only works in 8/16 bpp modes! !!! FIX !!!
486 // Does BCOMPEN only work in 1 bpp mode???
487 // No, but it always does a 1 bit expansion no matter what the BPP of the channel is set to. !!! FIX !!!
488 // This is bit tricky... We need to fix the XADD value so that it acts like a 1BPP value while inside
490 if (DCOMPEN | BCOMPEN)
492 //Temp, for testing Hover Strike
493 //Doesn't seem to do it... Why?
494 //What needs to happen here is twofold. First, the address generator in the outer loop has
495 //to honor the BPP when calculating the start address (which it kinda does already). Second,
496 //it has to step bit by bit when using BCOMPEN. How to do this???
498 //small problem with this approach: it's not accurate... We need a proper address to begin with
499 //and *then* we can do the bit stepping from there the way it's *supposed* to be done... !!! FIX !!!
502 uint32_t pixShift = (~bitPos) & (bppSrc - 1);
503 srcdata = (srcdata >> pixShift) & 0x01;
506 // if (bitPos % bppSrc == 0)
507 // a2_x += 0x00010000;
510 Interesting (Hover Strike--large letter):
512 Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
513 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
514 A1 step values: -2 (X), 1 (Y)
515 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
516 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
517 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
518 A1 x/y: 100/12, A2 x/y: 106/0 Pattern: 000000F300000000
520 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
521 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
522 A1 step values: -8 (X), 1 (Y)
523 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
524 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
525 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
526 A1 x/y: 102/12, A2 x/y: 107/0 Pattern: 000000F300000000
528 Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
529 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
530 A1 step values: -1 (X), 1 (Y)
531 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
532 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
533 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
534 A1 x/y: 118/12, A2 x/y: 70/0 Pattern: 000000F300000000
536 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
537 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
538 A1 step values: -8 (X), 1 (Y)
539 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
540 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
541 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
542 A1 x/y: 119/12, A2 x/y: 71/0 Pattern: 000000F300000000
544 Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
545 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
546 A1 step values: -1 (X), 1 (Y)
547 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
548 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
549 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
550 A1 x/y: 127/12, A2 x/y: 66/0 Pattern: 000000F300000000
552 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
553 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
554 A1 step values: -8 (X), 1 (Y)
555 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
556 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
557 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
558 A1 x/y: 128/12, A2 x/y: 67/0 Pattern: 000000F300000000
564 //WriteLog("Blitter: BCOMPEN set on command %08X inhibit prev:%u, now:", cmd, inhibit);
565 // compare source pixel with pattern pixel
567 Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd: 05810001]
568 CMD -> src: SRCEN dst: misc: a1ctl: mode: ity: PATDSEL z-op: op: LFU_REPLACE ctrl: BCOMPEN
569 A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
570 A2 -> pitch: 1 phrases, depth: 1bpp, z-off: 0, width: 16 (10), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
574 // AvP is still wrong, could be cuz it's doing A1 -> A2...
576 // Src is the 1bpp bitmap... DST is the PATTERN!!!
577 // This seems to solve at least ONE of the problems with MC3D...
578 // Why should this be inverted???
579 // Bcuz it is. This is supposed to be used only for a bit -> pixel expansion...
580 /* if (srcdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
581 // if (srcdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
583 /* uint32_t A2bpp = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
584 if (A2bpp == 1 || A2bpp == 16 || A2bpp == 8)
585 inhibit = (srcdata == 0 ? 1: 0);
586 // inhibit = !srcdata;
588 WriteLog("Blitter: Bad BPP (%u) selected for BCOMPEN mode!\n", A2bpp);//*/
589 // What it boils down to is this:
596 // compare destination pixel with pattern pixel
597 if (dstdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
598 // if (dstdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
602 // This is DEFINITELY WRONG
603 // if (a1_phrase_mode || a2_phrase_mode)
604 // inhibit = !inhibit;
609 inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
610 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
613 // compute the write data and store
616 // Houston, we have a problem...
617 // Look here, at PATDSEL and GOURD. If both are active (as they are on the BIOS intro), then there's
619 //Blit! (00100000 <- 000095D0) count: 3 x 1, A1/2_FLAGS: 00014220/00004020 [cmd: 00011008]
620 // CMD -> src: dst: DSTEN misc: a1ctl: mode: GOURD ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
621 // A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
622 // A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
623 // A1 x/y: 90/171, A2 x/y: 808/0 Pattern: 776D770077007700
627 // use pattern data for write data
628 writedata = READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
632 /*if (blit_start_log)
633 WriteLog("BLIT: ADDDSEL srcdata: %08X\, dstdata: %08X, ", srcdata, dstdata);//*/
635 // intensity addition
636 //Ok, this is wrong... Or is it? Yes, it's wrong! !!! FIX !!!
637 /* writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
638 if (!(TOPBEN) && writedata > 0xFF)
641 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
642 if (!(TOPNEN) && writedata > 0xFFF)
643 // writedata = 0xFFF;
645 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);//*/
646 //notneeded--writedata &= 0xFFFF;
647 /*if (blit_start_log)
648 WriteLog("writedata: %08X\n", writedata);//*/
650 Hover Strike ADDDSEL blit:
652 Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cmd: 00020208]
653 CMD -> src: dst: DSTEN misc: a1ctl: UPDA1 mode: ity: ADDDSEL z-op: op: LFU_CLEAR ctrl:
654 A1 step values: -320 (X), 1 (Y)
655 A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
656 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
657 A1 x/y: 0/0, A2 x/y: 3288/0 Pattern: 0000000000000000 SRCDATA: 00FD00FD00FD00FD
659 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
663 //This is correct now, but slow...
664 int16_t s = (srcdata & 0xFF) | (srcdata & 0x80 ? 0xFF00 : 0x0000),
673 writedata = (uint32_t)sum;
676 //This doesn't seem right... Looks like it would muck up the low byte... !!! FIX !!!
677 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
679 if (!TOPNEN && writedata > 0xFFF)
684 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
688 if (LFU_NAN) writedata |= ~srcdata & ~dstdata;
689 if (LFU_NA) writedata |= ~srcdata & dstdata;
690 if (LFU_AN) writedata |= srcdata & ~dstdata;
691 if (LFU_A) writedata |= srcdata & dstdata;
694 //Although, this looks like it's OK... (even if it is shitty!)
695 //According to JTRM, this is part of the four things the blitter does with the write data (the other
696 //three being PATDSEL, ADDDSEL, and LFU (default). I'm not sure which gets precedence, this or PATDSEL
697 //(see above blit example)...
699 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
703 int intensity = srcdata & 0xFF;
704 int ia = gd_ia >> 16;
706 ia = 0xFFFFFF00 | ia;
710 if (intensity > 0xFF)
712 writedata = (srcdata & 0xFF00) | intensity;
721 //Tried 2nd below for Hover Strike: No dice.
722 if (/*a1_phrase_mode || */BKGWREN || !inhibit)
723 // if (/*a1_phrase_mode || BKGWREN ||*/ !inhibit)
725 /*if (((REG(A1_FLAGS) >> 3) & 0x07) == 5)
727 uint32_t offset = a1_addr+(PIXEL_OFFSET_32(a1)<<2);
728 // (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
729 if ((offset >= 0x1FF020 && offset <= 0x1FF03F) || (offset >= 0x1FF820 && offset <= 0x1FF83F))
730 WriteLog("32bpp pixel write: A1 Phrase mode --> ");
732 // write to the destination
733 WRITE_PIXEL(a1, REG(A1_FLAGS), writedata);
735 WRITE_ZDATA(a1, REG(A1_FLAGS), srczdata);
738 else // if (DSTA2) // Data movement: A1 -> A2
740 // load src data and Z
743 srcdata = READ_PIXEL(a1, REG(A1_FLAGS));
745 srczdata = READ_ZDATA(a1, REG(A1_FLAGS));
746 else if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
747 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
751 srcdata = READ_RDATA(SRCDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
752 if (cmd & 0x001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
753 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
756 // load dst data and Z
759 dstdata = READ_PIXEL(a2, REG(A2_FLAGS));
761 dstzdata = READ_ZDATA(a2, REG(A2_FLAGS));
763 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
767 dstdata = READ_RDATA(DSTDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
769 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
773 srczdata = z_i[colour_index] >> 16;
775 // apply z comparator
776 if (Z_OP_INF && srczdata < dstzdata) inhibit = 1;
777 if (Z_OP_EQU && srczdata == dstzdata) inhibit = 1;
778 if (Z_OP_SUP && srczdata > dstzdata) inhibit = 1;
780 // apply data comparator
781 //NOTE: The bit comparator (BCOMPEN) is NOT the same at the data comparator!
782 if (DCOMPEN | BCOMPEN)
786 // compare source pixel with pattern pixel
787 // AvP: Numbers are correct, but sprites are not!
788 //This doesn't seem to be a problem... But could still be wrong...
789 /* if (srcdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
790 // if (srcdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
792 // This is probably not 100% correct... It works in the 1bpp case
793 // (in A1 <- A2 mode, that is...)
794 // AvP: This is causing blocks to be written instead of bit patterns...
796 // NOTE: We really should separate out the BCOMPEN & DCOMPEN stuff!
797 /* uint32_t A1bpp = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
798 if (A1bpp == 1 || A1bpp == 16 || A1bpp == 8)
799 inhibit = (srcdata == 0 ? 1: 0);
801 WriteLog("Blitter: Bad BPP (%u) selected for BCOMPEN mode!\n", A1bpp);//*/
802 // What it boils down to is this:
808 // compare destination pixel with pattern pixel
809 if (dstdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
810 // if (dstdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
814 // This is DEFINITELY WRONG
815 // if (a1_phrase_mode || a2_phrase_mode)
816 // inhibit = !inhibit;
821 inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
822 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
825 // compute the write data and store
830 // use pattern data for write data
831 writedata = READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
835 // intensity addition
836 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
837 if (!(TOPBEN) && writedata > 0xFF)
839 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
840 if (!(TOPNEN) && writedata > 0xFFF)
842 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
847 writedata |= ~srcdata & ~dstdata;
849 writedata |= ~srcdata & dstdata;
851 writedata |= srcdata & ~dstdata;
853 writedata |= srcdata & dstdata;
857 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
861 int intensity = srcdata & 0xFF;
862 int ia = gd_ia >> 16;
864 ia = 0xFFFFFF00 | ia;
868 if (intensity > 0xFF)
870 writedata = (srcdata & 0xFF00) | intensity;
879 if (/*a2_phrase_mode || */BKGWREN || !inhibit)
883 uint32_t offset = a2_addr+(PIXEL_OFFSET_16(a2)<<1);
884 // (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
885 WriteLog("[%08X:%04X] ", offset, writedata);
887 // write to the destination
888 WRITE_PIXEL(a2, REG(A2_FLAGS), writedata);
891 WRITE_ZDATA(a2, REG(A2_FLAGS), srczdata);
895 // Update x and y (inner loop)
896 //Now it does! But crappy, crappy, crappy! !!! FIX !!! [DONE]
897 //This is less than ideal, but it works...
900 a1_x += a1_xadd, a1_y += a1_yadd;
901 a2_x = (a2_x + a2_xadd) & a2_mask_x, a2_y = (a2_y + a2_yadd) & a2_mask_y;
905 a1_y += a1_yadd, a2_y = (a2_y + a2_yadd) & a2_mask_y;
909 if (bitPos % bppSrc == 0)
910 a2_x = (a2_x + a2_xadd) & a2_mask_x;
914 a2_x = (a2_x + a2_xadd) & a2_mask_x;
915 if (bitPos % bppSrc == 0)
921 z_i[colour_index] += zadd;
923 if (GOURD || SRCSHADE)
925 gd_i[colour_index] += gd_ia;
926 //Hmm, this doesn't seem to do anything...
927 //But it is correct according to the JTRM...!
928 if ((int32_t)gd_i[colour_index] < 0)
929 gd_i[colour_index] = 0;
930 if (gd_i[colour_index] > 0x00FFFFFF)
931 gd_i[colour_index] = 0x00FFFFFF;//*/
933 gd_c[colour_index] += gd_ca;
934 if ((int32_t)gd_c[colour_index] < 0)
935 gd_c[colour_index] = 0;
936 if (gd_c[colour_index] > 0x000000FF)
937 gd_c[colour_index] = 0x000000FF;//*/
940 if (GOURD || SRCSHADE || GOURZ)
943 //This screws things up WORSE (for the BIOS opening screen)
944 // if (a1_phrase_mode || a2_phrase_mode)
945 colour_index = (colour_index + 1) & 0x03;
950 Here's the problem... The phrase mode code!
951 Blit! (00100000 -> 00148000) count: 327 x 267, A1/2_FLAGS: 00004420/00004420 [cmd: 41802E01]
952 CMD -> src: SRCEN dst: misc: a1ctl: UPDA1 UPDA2 mode: DSTA2 GOURZ ity: z-op: op: LFU_REPLACE ctrl: SRCSHADE
953 A1 step values: -327 (X), 1 (Y)
954 A2 step values: -327 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
955 A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
956 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
957 A1 x/y: 28/58, A2 x/y: 28/58 Pattern: 00EA7BEA77EA77EA SRCDATA: 7BFF7BFF7BFF7BFF
959 Below fixes it, but then borks:
962 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
963 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
964 A1 step values: -15 (X), 1 (Y)
965 A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
966 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
967 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
968 A1 x/y: 173/144, A2 x/y: 4052/0
970 Lesse, with pre-add we'd have:
973 00001111222233334444555566667777
976 |rolls back to here. Hmm.
979 //NOTE: The way to fix the CD BIOS is to uncomment below and comment the stuff after
980 // the phrase mode mucking around. But it fucks up everything else...
981 //#define SCREWY_CD_DEPENDENT
982 #ifdef SCREWY_CD_DEPENDENT
986 a2_y += a2_step_y;//*/
989 //New: Phrase mode taken into account! :-p
990 /* if (a1_phrase_mode) // v1
992 // Bump the pointer to the next phrase boundary
993 // Even though it works, this is crappy... Clean it up!
994 uint32_t size = 64 / a1_psize;
996 // Crappy kludge... ('aligning' source to destination)
997 if (a2_phrase_mode && DSTA2)
999 uint32_t extra = (a2_start >> 16) % size;
1000 a1_x += extra << 16;
1003 uint32_t newx = (a1_x >> 16) / size;
1004 uint32_t newxrem = (a1_x >> 16) % size;
1006 a1_x |= (((newx + (newxrem == 0 ? 0 : 1)) * size) & 0xFFFF) << 16;
1008 if (a1_phrase_mode) // v2
1010 // Bump the pointer to the next phrase boundary
1011 // Even though it works, this is crappy... Clean it up!
1012 uint32_t size = 64 / a1_psize;
1014 // Crappy kludge... ('aligning' source to destination)
1015 if (a2_phrase_mode && DSTA2)
1017 uint32_t extra = (a2_start >> 16) % size;
1018 a1_x += extra << 16;
1021 uint32_t pixelSize = (size - 1) << 16;
1022 a1_x = (a1_x + pixelSize) & ~pixelSize;
1025 /* if (a2_phrase_mode) // v1
1027 // Bump the pointer to the next phrase boundary
1028 // Even though it works, this is crappy... Clean it up!
1029 uint32_t size = 64 / a2_psize;
1031 // Crappy kludge... ('aligning' source to destination)
1032 // Prolly should do this for A1 channel as well... [DONE]
1033 if (a1_phrase_mode && !DSTA2)
1035 uint32_t extra = (a1_start >> 16) % size;
1036 a2_x += extra << 16;
1039 uint32_t newx = (a2_x >> 16) / size;
1040 uint32_t newxrem = (a2_x >> 16) % size;
1042 a2_x |= (((newx + (newxrem == 0 ? 0 : 1)) * size) & 0xFFFF) << 16;
1044 if (a2_phrase_mode) // v1
1046 // Bump the pointer to the next phrase boundary
1047 // Even though it works, this is crappy... Clean it up!
1048 uint32_t size = 64 / a2_psize;
1050 // Crappy kludge... ('aligning' source to destination)
1051 // Prolly should do this for A1 channel as well... [DONE]
1052 if (a1_phrase_mode && !DSTA2)
1054 uint32_t extra = (a1_start >> 16) % size;
1055 a2_x += extra << 16;
1058 uint32_t pixelSize = (size - 1) << 16;
1059 a2_x = (a2_x + pixelSize) & ~pixelSize;
1062 //Not entirely: This still mucks things up... !!! FIX !!!
1063 //Should this go before or after the phrase mode mucking around?
1064 #ifndef SCREWY_CD_DEPENDENT
1068 a2_y += a2_step_y;//*/
1072 // write values back to registers
1073 WREG(A1_PIXEL, (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
1074 WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
1075 WREG(A2_PIXEL, (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
1079 void blitter_blit(uint32_t cmd)
1081 //Apparently this is doing *something*, just not sure exactly what...
1082 /*if (cmd == 0x41802E01)
1084 WriteLog("BLIT: Found our blit. Was: %08X ", cmd);
1086 WriteLog("Is: %08X\n", cmd);
1089 uint32_t pitchValue[4] = { 0, 1, 3, 2 };
1092 dst = (cmd >> 3) & 0x07;
1093 misc = (cmd >> 6) & 0x03;
1094 a1ctl = (cmd >> 8) & 0x7;
1095 mode = (cmd >> 11) & 0x07;
1096 ity = (cmd >> 14) & 0x0F;
1097 zop = (cmd >> 18) & 0x07;
1098 op = (cmd >> 21) & 0x0F;
1099 ctrl = (cmd >> 25) & 0x3F;
1101 // Addresses in A1/2_BASE are *phrase* aligned, i.e., bottom three bits are ignored!
1102 // NOTE: This fixes Rayman's bad collision detection AND keeps T2K working!
1103 a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
1104 a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
1106 a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
1107 a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
1109 xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
1110 xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
1112 a1_pitch = pitchValue[(REG(A1_FLAGS) & 0x03)];
1113 a2_pitch = pitchValue[(REG(A2_FLAGS) & 0x03)];
1115 n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
1116 n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
1118 a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
1119 a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
1120 //According to the JTRM, X is restricted to 15 bits and Y is restricted to 12.
1121 //But it seems to fuck up T2K! !!! FIX !!!
1122 //Could it be sign extended??? Doesn't seem to be so according to JTRM
1123 // a1_x &= 0x7FFFFFFF, a1_y &= 0x0FFFFFFF;
1124 //Actually, it says that the X is 16 bits. But it still seems to mess with the Y when restricted to 12...
1125 // a1_y &= 0x0FFFFFFF;
1127 // a1_width = blitter_scanline_width[((REG(A1_FLAGS) & 0x00007E00) >> 9)];
1128 // According to JTRM, this must give a *whole number* of phrases in the current
1129 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
1130 uint32_t m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1131 a1_width = ((0x04 | m) << e) >> 2;//*/
1133 a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
1134 a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
1135 //According to the JTRM, X is restricted to 15 bits and Y is restricted to 12.
1136 //But it seems to fuck up T2K! !!! FIX !!!
1137 // a2_x &= 0x7FFFFFFF, a2_y &= 0x0FFFFFFF;
1138 //Actually, it says that the X is 16 bits. But it still seems to mess with the Y when restricted to 12...
1139 // a2_y &= 0x0FFFFFFF;
1141 // a2_width = blitter_scanline_width[((REG(A2_FLAGS) & 0x00007E00) >> 9)];
1142 // According to JTRM, this must give a *whole number* of phrases in the current
1143 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
1144 m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1145 a2_width = ((0x04 | m) << e) >> 2;//*/
1146 a2_mask_x = ((REG(A2_MASK) & 0x0000FFFF) << 16) | 0xFFFF;
1147 a2_mask_y = (REG(A2_MASK) & 0xFFFF0000) | 0xFFFF;
1149 // Check for "use mask" flag
1150 if (!(REG(A2_FLAGS) & 0x8000))
1152 a2_mask_x = 0xFFFFFFFF; // must be 16.16
1153 a2_mask_y = 0xFFFFFFFF; // must be 16.16
1158 // According to the official documentation, a hardware bug ties A2's yadd bit to A1's...
1159 a2_yadd = a1_yadd = (YADD1_A1 ? 1 << 16 : 0);
1164 // determine a1_xadd
1165 switch (xadd_a1_control)
1168 // This is a documented Jaguar bug relating to phrase mode and truncation... Look into it!
1169 // add phrase offset to X and truncate
1174 // add pixelsize (1) to X
1178 // add zero (for those nice vertical lines)
1182 // add the contents of the increment register
1183 a1_xadd = (REG(A1_INC) << 16) | (REG(A1_FINC) & 0x0000FFFF);
1184 a1_yadd = (REG(A1_INC) & 0xFFFF0000) | (REG(A1_FINC) >> 16);
1189 //Blit! (0011D000 -> 000B9600) count: 228 x 1, A1/2_FLAGS: 00073820/00064220 [cmd: 41802801]
1190 // A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 128 (1C), addctl: XADDINC YADD1 XSIGNADD YSIGNADD
1191 // A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADD0 YADD1 XSIGNADD YSIGNADD
1192 //if (YADD1_A1 && YADD1_A2 && xadd_a2_control == XADD0 && xadd_a1_control == XADDINC)// &&
1193 // uint32_t a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
1194 //Ok, so this ISN'T it... Prolly the XADDPHR code above that's doing it...
1195 //if (REG(A1_FLAGS) == 0x00073820 && REG(A2_FLAGS) == 0x00064220 && cmd == 0x41802801)
1196 // A1 x/y: 14368/7, A2 x/y: 150/36
1197 //This is it... The problem...
1198 //if ((a1_x >> 16) == 14368) // 14368 = $3820
1199 // return; //Lesse what we got...
1209 // determine a2_xadd
1210 switch (xadd_a2_control)
1213 // add phrase offset to X and truncate
1218 // add pixelsize (1) to X
1222 // add zero (for those nice vertical lines)
1225 //This really isn't a valid bit combo for A2... Shouldn't this cause the blitter to just say no?
1227 WriteLog("BLIT: Asked to use invalid bit combo (XADDINC) for A2...\n");
1228 // add the contents of the increment register
1229 // since there is no register for a2 we just add 1
1230 //Let's do nothing, since it's not listed as a valid bit combo...
1231 // a2_xadd = 1 << 16;
1238 // Modify outer loop steps based on blitter command
1246 a1_step_x = (REG(A1_FSTEP) & 0xFFFF),
1247 a1_step_y = (REG(A1_FSTEP) >> 16);
1250 a1_step_x |= ((REG(A1_STEP) & 0x0000FFFF) << 16),
1251 a1_step_y |= ((REG(A1_STEP) & 0xFFFF0000));
1254 a2_step_x = (REG(A2_STEP) & 0x0000FFFF) << 16,
1255 a2_step_y = (REG(A2_STEP) & 0xFFFF0000);
1257 outer_loop = n_lines;
1262 a1_clip_x = REG(A1_CLIP) & 0x7FFF,
1263 a1_clip_y = (REG(A1_CLIP) >> 16) & 0x7FFF;
1265 // This phrase sizing is incorrect as well... !!! FIX !!! [NOTHING TO FIX]
1266 // Err, this is pixel size... (and it's OK)
1267 a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
1268 a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
1275 for(int v=0; v<4; v++)
1276 z_i[v] = REG(PHRASEZ0 + v*4);
1280 if (GOURD || GOURZ || SRCSHADE)
1282 gd_c[0] = blitter_ram[PATTERNDATA + 6];
1283 gd_i[0] = ((uint32_t)blitter_ram[PATTERNDATA + 7] << 16)
1284 | ((uint32_t)blitter_ram[SRCDATA + 6] << 8) | blitter_ram[SRCDATA + 7];
1286 gd_c[1] = blitter_ram[PATTERNDATA + 4];
1287 gd_i[1] = ((uint32_t)blitter_ram[PATTERNDATA + 5] << 16)
1288 | ((uint32_t)blitter_ram[SRCDATA + 4] << 8) | blitter_ram[SRCDATA + 5];
1290 gd_c[2] = blitter_ram[PATTERNDATA + 2];
1291 gd_i[2] = ((uint32_t)blitter_ram[PATTERNDATA + 3] << 16)
1292 | ((uint32_t)blitter_ram[SRCDATA + 2] << 8) | blitter_ram[SRCDATA + 3];
1294 gd_c[3] = blitter_ram[PATTERNDATA + 0];
1295 gd_i[3] = ((uint32_t)blitter_ram[PATTERNDATA + 1] << 16)
1296 | ((uint32_t)blitter_ram[SRCDATA + 0] << 8) | blitter_ram[SRCDATA + 1];
1298 gouraud_add = REG(INTENSITYINC);
1300 gd_ia = gouraud_add & 0x00FFFFFF;
1301 if (gd_ia & 0x00800000)
1302 gd_ia = 0xFF000000 | gd_ia;
1304 gd_ca = (gouraud_add >> 24) & 0xFF;
1305 if (gd_ca & 0x00000080)
1306 gd_ca = 0xFFFFFF00 | gd_ca;
1309 // Bit comparitor fixing...
1312 // Determine the data flow direction...
1314 a2_step_x /= (1 << ((REG(A2_FLAGS) >> 3) & 0x07));
1318 /* if (BCOMPEN)//Kludge for Hover Strike... !!! FIX !!!
1320 // Determine the data flow direction...
1328 WriteLog("Blit!\n");
1329 WriteLog(" cmd = 0x%.8x\n",cmd);
1330 WriteLog(" a1_base = %08X\n", a1_addr);
1331 WriteLog(" a1_pitch = %d\n", a1_pitch);
1332 WriteLog(" a1_psize = %d\n", a1_psize);
1333 WriteLog(" a1_width = %d\n", a1_width);
1334 WriteLog(" a1_xadd = %f (phrase=%d)\n", (float)a1_xadd / 65536.0, a1_phrase_mode);
1335 WriteLog(" a1_yadd = %f\n", (float)a1_yadd / 65536.0);
1336 WriteLog(" a1_xstep = %f\n", (float)a1_step_x / 65536.0);
1337 WriteLog(" a1_ystep = %f\n", (float)a1_step_y / 65536.0);
1338 WriteLog(" a1_x = %f\n", (float)a1_x / 65536.0);
1339 WriteLog(" a1_y = %f\n", (float)a1_y / 65536.0);
1340 WriteLog(" a1_zoffs = %i\n",a1_zoffs);
1342 WriteLog(" a2_base = %08X\n", a2_addr);
1343 WriteLog(" a2_pitch = %d\n", a2_pitch);
1344 WriteLog(" a2_psize = %d\n", a2_psize);
1345 WriteLog(" a2_width = %d\n", a2_width);
1346 WriteLog(" a2_xadd = %f (phrase=%d)\n", (float)a2_xadd / 65536.0, a2_phrase_mode);
1347 WriteLog(" a2_yadd = %f\n", (float)a2_yadd / 65536.0);
1348 WriteLog(" a2_xstep = %f\n", (float)a2_step_x / 65536.0);
1349 WriteLog(" a2_ystep = %f\n", (float)a2_step_y / 65536.0);
1350 WriteLog(" a2_x = %f\n", (float)a2_x / 65536.0);
1351 WriteLog(" a2_y = %f\n", (float)a2_y / 65536.0);
1352 WriteLog(" a2_mask_x= 0x%.4x\n",a2_mask_x);
1353 WriteLog(" a2_mask_y= 0x%.4x\n",a2_mask_y);
1354 WriteLog(" a2_zoffs = %i\n",a2_zoffs);
1356 WriteLog(" count = %d x %d\n", n_pixels, n_lines);
1358 WriteLog(" command = %08X\n", cmd);
1359 WriteLog(" dsten = %i\n",DSTEN);
1360 WriteLog(" srcen = %i\n",SRCEN);
1361 WriteLog(" patdsel = %i\n",PATDSEL);
1362 WriteLog(" color = 0x%.8x\n",REG(PATTERNDATA));
1363 WriteLog(" dcompen = %i\n",DCOMPEN);
1364 WriteLog(" bcompen = %i\n",BCOMPEN);
1365 WriteLog(" cmpdst = %i\n",CMPDST);
1366 WriteLog(" GOURZ = %i\n",GOURZ);
1367 WriteLog(" GOURD = %i\n",GOURD);
1368 WriteLog(" SRCSHADE= %i\n",SRCSHADE);
1372 //NOTE: Pitch is ignored!
1374 //This *might* be the altimeter blits (they are)...
1375 //On captured screen, x-pos for black (inner) is 259, for pink is 257
1376 //Black is short by 3, pink is short by 1...
1378 Blit! (00110000 <- 000BF010) count: 9 x 31, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1379 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
1380 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1381 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1382 A1 x/y: 262/124, A2 x/y: 128/0
1383 Blit! (00110000 <- 000BF010) count: 5 x 38, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1384 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
1385 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1386 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1387 A1 x/y: 264/117, A2 x/y: 407/0
1389 Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1390 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
1391 A1 step values: -10 (X), 1 (Y)
1392 A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1393 A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1394 A1 x/y: 262/132, A2 x/y: 129/0
1395 Blit! (00110000 <- 000BF010) count: 5 x 27, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1396 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
1397 A1 step values: -8 (X), 1 (Y)
1398 A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1399 A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1400 A1 x/y: 264/128, A2 x/y: 336/0
1402 264v vCursor ends up here...
1406 262v vCursor ends up here...
1410 Fixed! Now for more:
1412 ; This looks like the ship icon in the upper left corner...
1414 Blit! (00110000 <- 0010B2A8) count: 11 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1415 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1416 A1 step values: -12 (X), 1 (Y)
1417 A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1418 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1419 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1420 A1 x/y: 20/24, A2 x/y: 5780/0
1424 More (not sure this is a blitter problem as much as it's a GPU problem):
1425 All but the "M" are trashed...
1426 This does *NOT* look like a blitter problem, as it's rendering properly...
1427 Actually, if you look at the A1 step values, there IS a discrepancy!
1431 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1432 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1433 A1 step values: -14 (X), 1 (Y)
1434 A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1435 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1436 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1437 A1 x/y: 134/144, A2 x/y: 2516/0
1442 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1443 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1444 A1 step values: -13 (X), 1 (Y)
1445 A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1446 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1447 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1448 A1 x/y: 147/144, A2 x/y: 2660/0
1452 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1453 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1454 A1 step values: -12 (X), 1 (Y)
1455 A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1456 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1457 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1458 A1 x/y: 160/144, A2 x/y: 3764/0
1462 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1463 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1464 A1 step values: -15 (X), 1 (Y)
1465 A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1466 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1467 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1468 A1 x/y: 173/144, A2 x/y: 4052/0
1471 //extern int op_start_log;
1474 const char * ctrlStr[4] = { "XADDPHR\0", "XADDPIX\0", "XADD0\0", "XADDINC\0" };
1475 const char * bppStr[8] = { "1bpp\0", "2bpp\0", "4bpp\0", "8bpp\0", "16bpp\0", "32bpp\0", "???\0", "!!!\0" };
1476 const char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
1477 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
1478 uint32_t /*src = cmd & 0x07, dst = (cmd >> 3) & 0x07, misc = (cmd >> 6) & 0x03,
1479 a1ctl = (cmd >> 8) & 0x07,*/ mode = (cmd >> 11) & 0x07/*, ity = (cmd >> 14) & 0x0F,
1480 zop = (cmd >> 18) & 0x07, op = (cmd >> 21) & 0x0F, ctrl = (cmd >> 25) & 0x3F*/;
1481 uint32_t a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
1482 uint32_t p1 = a1f & 0x07, p2 = a2f & 0x07,
1483 d1 = (a1f >> 3) & 0x07, d2 = (a2f >> 3) & 0x07,
1484 zo1 = (a1f >> 6) & 0x07, zo2 = (a2f >> 6) & 0x07,
1485 w1 = (a1f >> 9) & 0x3F, w2 = (a2f >> 9) & 0x3F,
1486 ac1 = (a1f >> 16) & 0x1F, ac2 = (a2f >> 16) & 0x1F;
1487 uint32_t iw1 = ((0x04 | (w1 & 0x03)) << ((w1 & 0x3C) >> 2)) >> 2;
1488 uint32_t iw2 = ((0x04 | (w2 & 0x03)) << ((w2 & 0x3C) >> 2)) >> 2;
1489 WriteLog("Blit! (%08X %s %08X) count: %d x %d, A1/2_FLAGS: %08X/%08X [cmd: %08X]\n", a1_addr, (mode&0x01 ? "->" : "<-"), a2_addr, n_pixels, n_lines, a1f, a2f, cmd);
1490 // WriteLog(" CMD -> src: %d, dst: %d, misc: %d, a1ctl: %d, mode: %d, ity: %1X, z-op: %d, op: %1X, ctrl: %02X\n", src, dst, misc, a1ctl, mode, ity, zop, op, ctrl);
1492 WriteLog(" CMD -> src: %s%s%s ", (cmd & 0x0001 ? "SRCEN " : ""), (cmd & 0x0002 ? "SRCENZ " : ""), (cmd & 0x0004 ? "SRCENX" : ""));
1493 WriteLog("dst: %s%s%s ", (cmd & 0x0008 ? "DSTEN " : ""), (cmd & 0x0010 ? "DSTENZ " : ""), (cmd & 0x0020 ? "DSTWRZ" : ""));
1494 WriteLog("misc: %s%s ", (cmd & 0x0040 ? "CLIP_A1 " : ""), (cmd & 0x0080 ? "???" : ""));
1495 WriteLog("a1ctl: %s%s%s ", (cmd & 0x0100 ? "UPDA1F " : ""), (cmd & 0x0200 ? "UPDA1 " : ""), (cmd & 0x0400 ? "UPDA2" : ""));
1496 WriteLog("mode: %s%s%s ", (cmd & 0x0800 ? "DSTA2 " : ""), (cmd & 0x1000 ? "GOURD " : ""), (cmd & 0x2000 ? "GOURZ" : ""));
1497 WriteLog("ity: %s%s%s%s ", (cmd & 0x4000 ? "TOPBEN " : ""), (cmd & 0x8000 ? "TOPNEN " : ""), (cmd & 0x00010000 ? "PATDSEL" : ""), (cmd & 0x00020000 ? "ADDDSEL" : ""));
1498 WriteLog("z-op: %s%s%s ", (cmd & 0x00040000 ? "ZMODELT " : ""), (cmd & 0x00080000 ? "ZMODEEQ " : ""), (cmd & 0x00100000 ? "ZMODEGT" : ""));
1499 WriteLog("op: %s ", opStr[(cmd >> 21) & 0x0F]);
1500 WriteLog("ctrl: %s%s%s%s%s%s\n", (cmd & 0x02000000 ? "CMPDST " : ""), (cmd & 0x04000000 ? "BCOMPEN " : ""), (cmd & 0x08000000 ? "DCOMPEN " : ""), (cmd & 0x10000000 ? "BKGWREN " : ""), (cmd & 0x20000000 ? "BUSHI " : ""), (cmd & 0x40000000 ? "SRCSHADE" : ""));
1503 WriteLog(" A1 step values: %d (X), %d (Y)\n", a1_step_x >> 16, a1_step_y >> 16);
1506 WriteLog(" A2 step values: %d (X), %d (Y) [mask (%sused): %08X - %08X/%08X]\n", a2_step_x >> 16, a2_step_y >> 16, (a2f & 0x8000 ? "" : "un"), REG(A2_MASK), a2_mask_x, a2_mask_y);
1508 WriteLog(" A1 -> pitch: %d phrases, depth: %s, z-off: %d, width: %d (%02X), addctl: %s %s %s %s\n", 1 << p1, bppStr[d1], zo1, iw1, w1, ctrlStr[ac1&0x03], (ac1&0x04 ? "YADD1" : "YADD0"), (ac1&0x08 ? "XSIGNSUB" : "XSIGNADD"), (ac1&0x10 ? "YSIGNSUB" : "YSIGNADD"));
1509 WriteLog(" A2 -> pitch: %d phrases, depth: %s, z-off: %d, width: %d (%02X), addctl: %s %s %s %s\n", 1 << p2, bppStr[d2], zo2, iw2, w2, ctrlStr[ac2&0x03], (ac2&0x04 ? "YADD1" : "YADD0"), (ac2&0x08 ? "XSIGNSUB" : "XSIGNADD"), (ac2&0x10 ? "YSIGNSUB" : "YSIGNADD"));
1510 WriteLog(" A1 x/y: %d/%d, A2 x/y: %d/%d Pattern: %08X%08X SRCDATA: %08X%08X\n", a1_x >> 16, a1_y >> 16, a2_x >> 16, a2_y >> 16, REG(PATTERNDATA), REG(PATTERNDATA + 4), REG(SRCDATA), REG(SRCDATA + 4));
1511 // blit_start_log = 0;
1512 // op_start_log = 1;
1515 blitter_working = 1;
1516 //#ifndef USE_GENERIC_BLITTER
1517 // if (!blitter_execute_cached_code(blitter_in_cache(cmd)))
1519 blitter_generic(cmd);
1521 /*if (blit_start_log)
1523 if (a1_addr == 0xF03000 && a2_addr == 0x004D58)
1525 WriteLog("\nBytes at 004D58:\n");
1526 for(int i=0x004D58; i<0x004D58+(10*127*4); i++)
1527 WriteLog("%02X ", JaguarReadByte(i));
1528 WriteLog("\nBytes at F03000:\n");
1529 for(int i=0xF03000; i<0xF03000+(6*127*4); i++)
1530 WriteLog("%02X ", JaguarReadByte(i));
1535 blitter_working = 0;
1537 #endif // of the #if 0 near the top...
1538 /*******************************************************************************
1539 ********************** STUFF CUT ABOVE THIS LINE! ******************************
1540 *******************************************************************************/
1543 void BlitterInit(void)
1549 void BlitterReset(void)
1551 memset(blitter_ram, 0x00, 0xA0);
1555 void BlitterDone(void)
1557 WriteLog("BLIT: Done.\n");
1561 uint8_t BlitterReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1566 //This isn't cycle accurate--how to fix? !!! FIX !!!
1567 //Probably have to do some multi-threaded implementation or at least a reentrant safe implementation...
1568 //Real hardware returns $00000805, just like the JTRM says.
1569 if (offset == (0x38 + 0))
1571 if (offset == (0x38 + 1))
1573 if (offset == (0x38 + 2))
1575 if (offset == (0x38 + 3))
1576 return 0x05; // always idle/never stopped (collision detection ignored!)
1578 // CHECK HERE ONCE THIS FIX HAS BEEN TESTED: [X]
1580 if (offset >= 0x04 && offset <= 0x07)
1581 //This is it. I wonder if it just ignores the lower three bits?
1582 //No, this is a documented Jaguar I bug. It also bites the read at $F02230 as well...
1583 return blitter_ram[offset + 0x08]; // A1_PIXEL ($F0220C) read at $F02204
1585 if (offset >= 0x2C && offset <= 0x2F)
1586 return blitter_ram[offset + 0x04]; // A2_PIXEL ($F02230) read at $F0222C
1588 return blitter_ram[offset];
1593 uint16_t BlitterReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1595 return ((uint16_t)BlitterReadByte(offset, who) << 8) | (uint16_t)BlitterReadByte(offset+1, who);
1600 uint32_t BlitterReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1602 return (BlitterReadWord(offset, who) << 16) | BlitterReadWord(offset+2, who);
1606 void BlitterWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
1608 /*if (offset & 0xFF == 0x7B)
1609 WriteLog("--> Wrote to B_STOP: value -> %02X\n", data);*/
1611 /*if ((offset >= PATTERNDATA) && (offset < PATTERNDATA + 8))
1613 printf("--> %s wrote %02X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - PATTERNDATA);
1617 // This handles writes to INTENSITY0-3 by also writing them to their proper places in
1618 // PATTERNDATA & SOURCEDATA (should do the same for the Z registers! !!! FIX !!! [DONE])
1619 if ((offset >= 0x7C) && (offset <= 0x9B))
1623 // INTENSITY registers 0-3
1625 case 0x7D: blitter_ram[PATTERNDATA + 7] = data; break;
1626 case 0x7E: blitter_ram[SRCDATA + 6] = data; break;
1627 case 0x7F: blitter_ram[SRCDATA + 7] = data; break;
1630 case 0x81: blitter_ram[PATTERNDATA + 5] = data; break;
1631 case 0x82: blitter_ram[SRCDATA + 4] = data; break;
1632 case 0x83: blitter_ram[SRCDATA + 5] = data; break;
1635 case 0x85: blitter_ram[PATTERNDATA + 3] = data; break;
1636 case 0x86: blitter_ram[SRCDATA + 2] = data; break;
1637 case 0x87: blitter_ram[SRCDATA + 3] = data; break;
1640 case 0x89: blitter_ram[PATTERNDATA + 1] = data; break;
1641 case 0x8A: blitter_ram[SRCDATA + 0] = data; break;
1642 case 0x8B: blitter_ram[SRCDATA + 1] = data; break;
1646 case 0x8C: blitter_ram[SRCZINT + 6] = data; break;
1647 case 0x8D: blitter_ram[SRCZINT + 7] = data; break;
1648 case 0x8E: blitter_ram[SRCZFRAC + 6] = data; break;
1649 case 0x8F: blitter_ram[SRCZFRAC + 7] = data; break;
1651 case 0x90: blitter_ram[SRCZINT + 4] = data; break;
1652 case 0x91: blitter_ram[SRCZINT + 5] = data; break;
1653 case 0x92: blitter_ram[SRCZFRAC + 4] = data; break;
1654 case 0x93: blitter_ram[SRCZFRAC + 5] = data; break;
1656 case 0x94: blitter_ram[SRCZINT + 2] = data; break;
1657 case 0x95: blitter_ram[SRCZINT + 3] = data; break;
1658 case 0x96: blitter_ram[SRCZFRAC + 2] = data; break;
1659 case 0x97: blitter_ram[SRCZFRAC + 3] = data; break;
1661 case 0x98: blitter_ram[SRCZINT + 0] = data; break;
1662 case 0x99: blitter_ram[SRCZINT + 1] = data; break;
1663 case 0x9A: blitter_ram[SRCZFRAC + 0] = data; break;
1664 case 0x9B: blitter_ram[SRCZFRAC + 1] = data; break;
1668 // It looks weird, but this is how the 64 bit registers are actually handled...!
1670 else if ((offset >= SRCDATA + 0) && (offset <= SRCDATA + 3)
1671 || (offset >= DSTDATA + 0) && (offset <= DSTDATA + 3)
1672 || (offset >= DSTZ + 0) && (offset <= DSTZ + 3)
1673 || (offset >= SRCZINT + 0) && (offset <= SRCZINT + 3)
1674 || (offset >= SRCZFRAC + 0) && (offset <= SRCZFRAC + 3)
1675 || (offset >= PATTERNDATA + 0) && (offset <= PATTERNDATA + 3))
1677 blitter_ram[offset + 4] = data;
1679 else if ((offset >= SRCDATA + 4) && (offset <= SRCDATA + 7)
1680 || (offset >= DSTDATA + 4) && (offset <= DSTDATA + 7)
1681 || (offset >= DSTZ + 4) && (offset <= DSTZ + 7)
1682 || (offset >= SRCZINT + 4) && (offset <= SRCZINT + 7)
1683 || (offset >= SRCZFRAC + 4) && (offset <= SRCZFRAC + 7)
1684 || (offset >= PATTERNDATA + 4) && (offset <= PATTERNDATA + 7))
1686 blitter_ram[offset - 4] = data;
1689 blitter_ram[offset] = data;
1693 void BlitterWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
1695 /*if (((offset & 0xFF) >= PATTERNDATA) && ((offset & 0xFF) < PATTERNDATA + 8))
1697 printf("----> %s wrote %04X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - (0xF02200 + PATTERNDATA));
1701 /* if (offset & 0xFF == A1_PIXEL && data == 14368)
1703 WriteLog("\n1\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1704 extern bool doGPUDis;
1707 if ((offset & 0xFF) == (A1_PIXEL + 2) && data == 14368)
1709 WriteLog("\n2\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1710 extern bool doGPUDis;
1715 BlitterWriteByte(offset + 0, data >> 8, who);
1716 BlitterWriteByte(offset + 1, data & 0xFF, who);
1718 if ((offset & 0xFF) == 0x3A)
1719 // I.e., the second write of 32-bit value--not convinced this is the best way to do this!
1720 // But then again, according to the Jaguar docs, this is correct...!
1721 /*extern int blit_start_log;
1722 extern bool doGPUDis;
1725 WriteLog("BLIT: Blitter started by %s...\n", whoName[who]);
1728 #ifdef USE_ORIGINAL_BLITTER
1729 blitter_blit(GET32(blitter_ram, 0x38));
1731 #ifdef USE_MIDSUMMER_BLITTER
1732 BlitterMidsummer(GET32(blitter_ram, 0x38));
1734 #ifdef USE_MIDSUMMER_BLITTER_MKII
1735 BlitterMidsummer2();
1741 void BlitterWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
1743 /*if (((offset & 0xFF) >= PATTERNDATA) && ((offset & 0xFF) < PATTERNDATA + 8))
1745 printf("------> %s wrote %08X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - (0xF02200 + PATTERNDATA));
1749 /* if ((offset & 0xFF) == A1_PIXEL && (data & 0xFFFF) == 14368)
1751 WriteLog("\n3\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1752 extern bool doGPUDis;
1757 BlitterWriteWord(offset + 0, data >> 16, who);
1758 BlitterWriteWord(offset + 2, data & 0xFFFF, who);
1764 const char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
1765 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
1766 uint32_t cmd = GET32(blitter_ram, 0x38);
1767 uint32_t m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1768 uint32_t a1_width = ((0x04 | m) << e) >> 2;
1769 m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1770 uint32_t a2_width = ((0x04 | m) << e) >> 2;
1772 WriteLog("Blit!\n");
1773 WriteLog(" COMMAND = %08X\n", cmd);
1774 WriteLog(" a1_base = %08X\n", REG(A1_BASE));
1775 WriteLog(" a1_flags = %08X (%c %c %c %c%c . %c%c%c%c%c%c %c%c%c %c%c%c . %c%c)\n", REG(A1_FLAGS),
1776 (REG(A1_FLAGS) & 0x100000 ? '1' : '0'),
1777 (REG(A1_FLAGS) & 0x080000 ? '1' : '0'),
1778 (REG(A1_FLAGS) & 0x040000 ? '1' : '0'),
1779 (REG(A1_FLAGS) & 0x020000 ? '1' : '0'),
1780 (REG(A1_FLAGS) & 0x010000 ? '1' : '0'),
1781 (REG(A1_FLAGS) & 0x004000 ? '1' : '0'),
1782 (REG(A1_FLAGS) & 0x002000 ? '1' : '0'),
1783 (REG(A1_FLAGS) & 0x001000 ? '1' : '0'),
1784 (REG(A1_FLAGS) & 0x000800 ? '1' : '0'),
1785 (REG(A1_FLAGS) & 0x000400 ? '1' : '0'),
1786 (REG(A1_FLAGS) & 0x000200 ? '1' : '0'),
1787 (REG(A1_FLAGS) & 0x000100 ? '1' : '0'),
1788 (REG(A1_FLAGS) & 0x000080 ? '1' : '0'),
1789 (REG(A1_FLAGS) & 0x000040 ? '1' : '0'),
1790 (REG(A1_FLAGS) & 0x000020 ? '1' : '0'),
1791 (REG(A1_FLAGS) & 0x000010 ? '1' : '0'),
1792 (REG(A1_FLAGS) & 0x000008 ? '1' : '0'),
1793 (REG(A1_FLAGS) & 0x000002 ? '1' : '0'),
1794 (REG(A1_FLAGS) & 0x000001 ? '1' : '0'));
1795 WriteLog(" pitch=%u, pixSz=%u, zOff=%u, width=%u, xCtrl=%u\n",
1796 REG(A1_FLAGS) & 0x00003, (REG(A1_FLAGS) & 0x00038) >> 3,
1797 (REG(A1_FLAGS) & 0x001C0) >> 6, a1_width, (REG(A1_FLAGS) & 0x30000) >> 16);
1798 WriteLog(" a1_clip = %u, %u (%08X)\n", GET16(blitter_ram, A1_CLIP + 2), GET16(blitter_ram, A1_CLIP + 0), GET32(blitter_ram, A1_CLIP));
1799 WriteLog(" a1_pixel = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A1_PIXEL + 2), (int16_t)GET16(blitter_ram, A1_PIXEL + 0), GET32(blitter_ram, A1_PIXEL));
1800 WriteLog(" a1_step = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A1_STEP + 2), (int16_t)GET16(blitter_ram, A1_STEP + 0), GET32(blitter_ram, A1_STEP));
1801 WriteLog(" a1_fstep = %u, %u (%08X)\n", GET16(blitter_ram, A1_FSTEP + 2), GET16(blitter_ram, A1_FSTEP + 0), GET32(blitter_ram, A1_FSTEP));
1802 WriteLog(" a1_fpixel= %u, %u (%08X)\n", GET16(blitter_ram, A1_FPIXEL + 2), GET16(blitter_ram, A1_FPIXEL + 0), GET32(blitter_ram, A1_FPIXEL));
1803 WriteLog(" a1_inc = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A1_INC + 2), (int16_t)GET16(blitter_ram, A1_INC + 0), GET32(blitter_ram, A1_INC));
1804 WriteLog(" a1_finc = %u, %u (%08X)\n", GET16(blitter_ram, A1_FINC + 2), GET16(blitter_ram, A1_FINC + 0), GET32(blitter_ram, A1_FINC));
1806 WriteLog(" a2_base = %08X\n", REG(A2_BASE));
1807 WriteLog(" a2_flags = %08X (%c %c %c %c%c %c %c%c%c%c%c%c %c%c%c %c%c%c . %c%c)\n", REG(A2_FLAGS),
1808 (REG(A2_FLAGS) & 0x100000 ? '1' : '0'),
1809 (REG(A2_FLAGS) & 0x080000 ? '1' : '0'),
1810 (REG(A2_FLAGS) & 0x040000 ? '1' : '0'),
1811 (REG(A2_FLAGS) & 0x020000 ? '1' : '0'),
1812 (REG(A2_FLAGS) & 0x010000 ? '1' : '0'),
1813 (REG(A2_FLAGS) & 0x008000 ? '1' : '0'),
1814 (REG(A2_FLAGS) & 0x004000 ? '1' : '0'),
1815 (REG(A2_FLAGS) & 0x002000 ? '1' : '0'),
1816 (REG(A2_FLAGS) & 0x001000 ? '1' : '0'),
1817 (REG(A2_FLAGS) & 0x000800 ? '1' : '0'),
1818 (REG(A2_FLAGS) & 0x000400 ? '1' : '0'),
1819 (REG(A2_FLAGS) & 0x000200 ? '1' : '0'),
1820 (REG(A2_FLAGS) & 0x000100 ? '1' : '0'),
1821 (REG(A2_FLAGS) & 0x000080 ? '1' : '0'),
1822 (REG(A2_FLAGS) & 0x000040 ? '1' : '0'),
1823 (REG(A2_FLAGS) & 0x000020 ? '1' : '0'),
1824 (REG(A2_FLAGS) & 0x000010 ? '1' : '0'),
1825 (REG(A2_FLAGS) & 0x000008 ? '1' : '0'),
1826 (REG(A2_FLAGS) & 0x000002 ? '1' : '0'),
1827 (REG(A2_FLAGS) & 0x000001 ? '1' : '0'));
1828 WriteLog(" pitch=%u, pixSz=%u, zOff=%u, width=%u, xCtrl=%u\n",
1829 REG(A2_FLAGS) & 0x00003, (REG(A2_FLAGS) & 0x00038) >> 3,
1830 (REG(A2_FLAGS) & 0x001C0) >> 6, a2_width, (REG(A2_FLAGS) & 0x30000) >> 16);
1831 WriteLog(" a2_mask = %u, %u (%08X)\n", GET16(blitter_ram, A2_MASK + 2), GET16(blitter_ram, A2_MASK + 0), GET32(blitter_ram, A2_MASK));
1832 WriteLog(" a2_pixel = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A2_PIXEL + 2), (int16_t)GET16(blitter_ram, A2_PIXEL + 0), GET32(blitter_ram, A2_PIXEL));
1833 WriteLog(" a2_step = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A2_STEP + 2), (int16_t)GET16(blitter_ram, A2_STEP + 0), GET32(blitter_ram, A2_STEP));
1835 WriteLog(" count = %d x %d\n", GET16(blitter_ram, PIXLINECOUNTER + 2), GET16(blitter_ram, PIXLINECOUNTER));
1837 WriteLog(" SRCEN = %s\n", (SRCEN ? "1" : "0"));
1838 WriteLog(" SRCENZ = %s\n", (SRCENZ ? "1" : "0"));
1839 WriteLog(" SRCENX = %s\n", (SRCENX ? "1" : "0"));
1840 WriteLog(" DSTEN = %s\n", (DSTEN ? "1" : "0"));
1841 WriteLog(" DSTENZ = %s\n", (DSTENZ ? "1" : "0"));
1842 WriteLog(" DSTWRZ = %s\n", (DSTWRZ ? "1" : "0"));
1843 WriteLog(" CLIPA1 = %s\n", (CLIPA1 ? "1" : "0"));
1844 WriteLog(" UPDA1F = %s\n", (UPDA1F ? "1" : "0"));
1845 WriteLog(" UPDA1 = %s\n", (UPDA1 ? "1" : "0"));
1846 WriteLog(" UPDA2 = %s\n", (UPDA2 ? "1" : "0"));
1847 WriteLog(" DSTA2 = %s\n", (DSTA2 ? "1" : "0"));
1848 WriteLog(" ZOP = %s %s %s\n", (Z_OP_INF ? "<" : ""), (Z_OP_EQU ? "=" : ""), (Z_OP_SUP ? ">" : ""));
1849 WriteLog("+-LFUFUNC = %s\n", opStr[(cmd >> 21) & 0x0F]);
1850 WriteLog("| PATDSEL = %s (PD=%08X%08X)\n", (PATDSEL ? "1" : "0"), REG(PATTERNDATA), REG(PATTERNDATA + 4));
1851 WriteLog("+-ADDDSEL = %s\n", (ADDDSEL ? "1" : "0"));
1852 WriteLog(" CMPDST = %s\n", (CMPDST ? "1" : "0"));
1853 WriteLog(" BCOMPEN = %s\n", (BCOMPEN ? "1" : "0"));
1854 WriteLog(" DCOMPEN = %s\n", (DCOMPEN ? "1" : "0"));
1855 WriteLog(" TOPBEN = %s\n", (TOPBEN ? "1" : "0"));
1856 WriteLog(" TOPNEN = %s\n", (TOPNEN ? "1" : "0"));
1857 WriteLog(" BKGWREN = %s\n", (BKGWREN ? "1" : "0"));
1858 WriteLog(" GOURD = %s (II=%08X, SD=%08X%08X)\n", (GOURD ? "1" : "0"), REG(INTENSITYINC), REG(SRCDATA), REG(SRCDATA + 4));
1859 WriteLog(" GOURZ = %s (ZI=%08X, ZD=%08X%08X, SZ1=%08X%08X, SZ2=%08X%08X)\n", (GOURZ ? "1" : "0"), REG(ZINC), REG(DSTZ), REG(DSTZ + 4),
1860 REG(SRCZINT), REG(SRCZINT + 4), REG(SRCZFRAC), REG(SRCZFRAC + 4));
1861 WriteLog(" SRCSHADE = %s\n", (SRCSHADE ? "1" : "0"));
1865 #ifdef USE_MIDSUMMER_BLITTER
1867 // Here's an attempt to write a blitter that conforms to the Midsummer specs--since
1868 // it's supposedly backwards compatible, it should work well...
1870 //#define LOG_BLITTER_MEMORY_ACCESSES
1872 #define DATINIT (false)
1873 #define TXTEXT (false)
1874 #define POLYGON (false)
1876 void BlitterMidsummer(uint32_t cmd)
1881 uint32_t outer_loop, inner_loop, a1_addr, a2_addr;
1882 int32_t a1_x, a1_y, a2_x, a2_y, a1_width, a2_width;
1883 uint8_t a1_phrase_mode, a2_phrase_mode;
1885 a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
1886 a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
1887 a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
1888 a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
1889 uint32_t m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1890 a1_width = ((0x04 | m) << e) >> 2;//*/
1891 a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
1892 a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
1893 m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1894 a2_width = ((0x04 | m) << e) >> 2;//*/
1896 a1_phrase_mode = a2_phrase_mode = 0;
1898 if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 0)
1901 if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 0)
1904 #define INNER0 (inner_loop == 0)
1905 #define OUTER0 (outer_loop == 0)
1907 // $01800005 has SRCENX, may have to investigate further...
1908 // $00011008 has GOURD & DSTEN.
1909 // $41802F41 has SRCSHADE, CLIPA1
1910 /*bool logBlit = false;
1911 if (cmd != 0x00010200 && cmd != 0x01800001 && cmd != 0x01800005
1912 && cmd != 0x00011008 && cmd !=0x41802F41)
1918 uint64_t srcData = GET64(blitter_ram, SRCDATA), srcXtraData,
1919 dstData = GET64(blitter_ram, DSTDATA), writeData;
1920 uint32_t srcAddr, dstAddr;
1921 uint8_t bitCount, a1PixelSize, a2PixelSize;
1923 // JTRM says phrase mode only works for 8BPP or higher, so let's try this...
1924 uint32_t phraseOffset[8] = { 8, 8, 8, 8, 4, 2, 0, 0 };
1925 uint8_t pixelShift[8] = { 3, 2, 1, 0, 1, 2, 0, 0 };
1927 a1PixelSize = (blitter_ram[A1_FLAGS + 3] >> 3) & 0x07;
1928 a2PixelSize = (blitter_ram[A2_FLAGS + 3] >> 3) & 0x07;
1930 outer_loop = GET16(blitter_ram, PIXLINECOUNTER + 0);
1932 if (outer_loop == 0)
1933 outer_loop = 0x10000;
1935 // We just list the states here and jump from state to state in order to
1936 // keep things somewhat clear. Optimization/cleanups later.
1938 //idle: // Blitter is idle, and will not perform any bus activity
1940 idle Blitter is off the bus, and no activity takes place.
1941 if GO if DATINIT goto init_if
1950 inner Inner loop is active, read and write cycles are performed
1952 inner: // Run inner loop state machine (asserts step from its idle state)
1953 inner_loop = GET16(blitter_ram, PIXLINECOUNTER + 2);
1955 if (inner_loop == 0)
1956 inner_loop = 0x10000;
1959 ------------------------------
1960 idle: Inactive, blitter is idle or passing round outer loop
1961 idle Another state in the outer loop is active. No bus transfers are performed.
1963 if SRCENX goto sreadx
1964 else if TXTEXT goto txtread
1965 else if SRCEN goto sread
1966 else if DSTEN goto dread
1967 else if DSTENZ goto dzread
1984 sreadx Extra source data read at the start of an inner loop pass.
1986 if SRCENZ goto szreadx
1987 else if TXTEXT goto txtread
1988 else if SRCEN goto sread
1989 else if DSTEN goto dread
1990 else if DSTENZ goto dzread
1993 sreadx: // Extra source data read
2008 szreadx Extra source Z read as the start of an inner loop pass.
2010 if TXTEXT goto txtread
2013 szreadx: // Extra source Z read
2020 txtread Read texture data from external memory. This state is only used for external texture.
2021 TEXTEXT is the condition TEXTMODE=1.
2024 else if DSTEN goto dread
2025 else if DSTENZ goto dzread
2028 txtread: // Read external texture data
2039 sread Source data read.
2041 if SRCENZ goto szread
2042 else if DSTEN goto dread
2043 else if DSTENZ goto dzread
2046 sread: // Source data read
2047 //The JTRM doesn't really specify the internal structure of the source data read, but I would
2048 //imagine that if it's in phrase mode that it starts by reading the phrase that the window is
2049 //pointing at. Likewise, the pixel (if in BPP 1, 2 & 4, chopped) otherwise. It probably still
2050 //transfers an entire phrase even in pixel mode.
2051 //Odd thought: Does it expand, e.g., 1 BPP pixels into 32 BPP internally? Hmm...
2054 a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
2055 a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
2056 a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
2057 a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
2058 xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
2059 xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
2060 a1_pitch = pitchValue[(REG(A1_FLAGS) & 0x03)];
2061 a2_pitch = pitchValue[(REG(A2_FLAGS) & 0x03)];
2062 n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
2063 n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
2064 a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
2065 a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
2066 a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
2067 a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
2070 a1_width = ((0x04 | m) << e) >> 2;
2071 a2_width = ((0x04 | m) << e) >> 2;
2073 // write values back to registers
2074 WREG(A1_PIXEL, (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
2075 WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
2076 WREG(A2_PIXEL, (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
2078 // Calculate the address to be read...
2080 //Need to fix phrase mode calcs here, since they should *step* by eight, not mulitply.
2081 //Also, need to fix various differing BPP modes here, since offset won't be correct except
2082 //for 8BPP. !!! FIX !!!
2083 srcAddr = (DSTA2 ? a1_addr : a2_addr);
2085 /* if ((DSTA2 ? a1_phrase_mode : a2_phrase_mode) == 1)
2087 srcAddr += (((DSTA2 ? a1_x : a2_x) >> 16)
2088 + (((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width)));
2092 // uint32_t pixAddr = ((DSTA2 ? a1_x : a2_x) >> 16)
2093 // + (((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
2094 int32_t pixAddr = (int16_t)((DSTA2 ? a1_x : a2_x) >> 16)
2095 + ((int16_t)((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
2097 if ((DSTA2 ? a1PixelSize : a2PixelSize) < 3)
2098 pixAddr >>= pixelShift[(DSTA2 ? a1PixelSize : a2PixelSize)];
2099 else if ((DSTA2 ? a1PixelSize : a2PixelSize) > 3)
2100 pixAddr <<= pixelShift[(DSTA2 ? a1PixelSize : a2PixelSize)];
2107 if ((DSTA2 ? a1_phrase_mode : a2_phrase_mode) == 1)
2109 srcData = ((uint64_t)JaguarReadLong(srcAddr, BLITTER) << 32)
2110 | (uint64_t)JaguarReadLong(srcAddr + 4, BLITTER);
2114 //1,2,&4BPP are wrong here... !!! FIX !!!
2115 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 0) // 1 BPP
2116 srcData = JaguarReadByte(srcAddr, BLITTER);
2117 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 1) // 2 BPP
2118 srcData = JaguarReadByte(srcAddr, BLITTER);
2119 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 2) // 4 BPP
2120 srcData = JaguarReadByte(srcAddr, BLITTER);
2121 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 3) // 8 BPP
2122 srcData = JaguarReadByte(srcAddr, BLITTER);
2123 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 4) // 16 BPP
2124 srcData = JaguarReadWord(srcAddr, BLITTER);
2125 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 5) // 32 BPP
2126 srcData = JaguarReadLong(srcAddr, BLITTER);
2129 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2131 WriteLog("BLITTER: srcAddr=%08X, srcData=%08X %08X\n", srcAddr, (uint32_t)(srcData >> 32), (uint32_t)(srcData & 0xFFFFFFFF));
2143 szread: // Source Z read
2145 szread Source Z read.
2148 else if DSTENZ goto dzread
2158 dread: // Destination data read
2160 dread Destination data read.
2162 if DSTENZ goto dzread
2165 // Calculate the destination address to be read...
2167 //Need to fix phrase mode calcs here, since they should *step* by eight, not mulitply.
2168 //Also, need to fix various differing BPP modes here, since offset won't be correct except
2169 //for 8BPP. !!! FIX !!!
2170 dstAddr = (DSTA2 ? a2_addr : a1_addr);
2173 // uint32_t pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
2174 // + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2175 int32_t pixAddr = (int16_t)((DSTA2 ? a2_x : a1_x) >> 16)
2176 + ((int16_t)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2178 if ((DSTA2 ? a2PixelSize : a1PixelSize) < 3)
2179 pixAddr >>= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2180 else if ((DSTA2 ? a2PixelSize : a1PixelSize) > 3)
2181 pixAddr <<= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2188 if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2190 dstData = ((uint64_t)JaguarReadLong(srcAddr, BLITTER) << 32)
2191 | (uint64_t)JaguarReadLong(srcAddr + 4, BLITTER);
2195 //1,2,&4BPP are wrong here... !!! FIX !!!
2196 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 0) // 1 BPP
2197 dstData = JaguarReadByte(dstAddr, BLITTER);
2198 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 1) // 2 BPP
2199 dstData = JaguarReadByte(dstAddr, BLITTER);
2200 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 2) // 4 BPP
2201 dstData = JaguarReadByte(dstAddr, BLITTER);
2202 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 3) // 8 BPP
2203 dstData = JaguarReadByte(dstAddr, BLITTER);
2204 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 4) // 16 BPP
2205 dstData = JaguarReadWord(dstAddr, BLITTER);
2206 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 5) // 32 BPP
2207 dstData = JaguarReadLong(dstAddr, BLITTER);
2210 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2212 WriteLog("BLITTER (dread): dstAddr=%08X, dstData=%08X %08X\n", dstAddr, (uint32_t)(dstData >> 32), (uint32_t)(dstData & 0xFFFFFFFF));
2220 dzread: // Destination Z read
2222 dzread Destination Z read.
2227 dwrite: // Destination data write
2229 dwrite Destination write. Every pass round the inner loop must go through this state..
2231 if DSTWRZ goto dzwrite
2232 else if INNER0 goto idle
2233 else if TXTEXT goto txtread
2234 else if SRCEN goto sread
2235 else if DSTEN goto dread
2236 else if DSTENZ goto dzread
2245 a1_xadd = 1.000000 (phrase=0)
2254 a2_xadd = 1.000000 (phrase=1)
2258 a2_mask_x= 0xFFFFFFFF
2259 a2_mask_y= 0xFFFFFFFF
2269 --LFUFUNC = LFU_CLEAR
2270 | PATDSEL = 1 (PD=77C7 7700 7700 7700)
2272 GOURD = 1 (II=00FC 1A00, SD=FF00 0000 0000 0000)
2275 //Still need to do CLIPA1 and SRCSHADE and GOURD and GOURZ...
2277 // Check clipping...
2281 uint16_t x = a1_x >> 16, y = a1_y >> 16;
2283 if (x >= GET16(blitter_ram, A1_CLIP + 2) || y >= GET16(blitter_ram, A1_CLIP))
2287 // Figure out what gets written...
2291 writeData = GET64(blitter_ram, PATTERNDATA);
2292 //GOURD works properly only in 16BPP mode...
2293 //SRCDATA holds the intensity fractions...
2294 //Does GOURD get calc'ed here or somewhere else???
2295 //Temporary testing kludge...
2297 // writeData >>= 48;
2298 // writeData = 0xFF88;
2299 //OK, it's not writing an entire strip of pixels... Why?
2300 //bad incrementing, that's why!
2304 // Apparently this only works with 16-bit pixels. Not sure if it works in phrase mode either.
2305 //Also, take TOPBEN & TOPNEN into account here as well...
2306 writeData = srcData + dstData;
2308 else // LFUFUNC is the default...
2313 writeData |= ~srcData & ~dstData;
2315 writeData |= ~srcData & dstData;
2317 writeData |= srcData & ~dstData;
2319 writeData |= srcData & dstData;
2322 // Calculate the address to be written...
2324 dstAddr = (DSTA2 ? a2_addr : a1_addr);
2326 /* if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2328 //both of these calculate the wrong address because they don't take into account
2330 dstAddr += ((DSTA2 ? a2_x : a1_x) >> 16)
2331 + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2335 /* dstAddr += ((DSTA2 ? a2_x : a1_x) >> 16)
2336 + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));*/
2337 // uint32_t pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
2338 // + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2339 int32_t pixAddr = (int16_t)((DSTA2 ? a2_x : a1_x) >> 16)
2340 + ((int16_t)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2342 if ((DSTA2 ? a2PixelSize : a1PixelSize) < 3)
2343 pixAddr >>= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2344 else if ((DSTA2 ? a2PixelSize : a1PixelSize) > 3)
2345 pixAddr <<= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2352 if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2354 JaguarWriteLong(dstAddr, writeData >> 32, BLITTER);
2355 JaguarWriteLong(dstAddr + 4, writeData & 0xFFFFFFFF, BLITTER);
2359 //1,2,&4BPP are wrong here... !!! FIX !!!
2360 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 0) // 1 BPP
2361 JaguarWriteByte(dstAddr, writeData, BLITTER);
2362 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 1) // 2 BPP
2363 JaguarWriteByte(dstAddr, writeData, BLITTER);
2364 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 2) // 4 BPP
2365 JaguarWriteByte(dstAddr, writeData, BLITTER);
2366 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 3) // 8 BPP
2367 JaguarWriteByte(dstAddr, writeData, BLITTER);
2368 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 4) // 16 BPP
2369 JaguarWriteWord(dstAddr, writeData, BLITTER);
2370 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 5) // 32 BPP
2371 JaguarWriteLong(dstAddr, writeData, BLITTER);
2374 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2376 WriteLog("BLITTER: dstAddr=%08X, writeData=%08X %08X\n", dstAddr, (uint32_t)(writeData >> 32), (uint32_t)(writeData & 0xFFFFFFFF));
2379 inhibitWrite://Should this go here? or on the other side of the X/Y incrementing?
2380 //Seems OK here... for now.
2382 // Do funky X/Y incrementation here as well... !!! FIX !!!
2384 // Handle A1 channel stepping
2386 if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 0)
2387 a1_x += phraseOffset[a1PixelSize] << 16;
2388 else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 1)
2389 a1_x += (blitter_ram[A1_FLAGS + 1] & 0x08 ? -1 << 16 : 1 << 16);
2390 /* else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 2)
2392 else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 3)
2394 //Always add the FINC here??? That was the problem with the BIOS screen... So perhaps.
2395 a1_x += GET16(blitter_ram, A1_FINC + 2);
2396 a1_y += GET16(blitter_ram, A1_FINC + 0);
2398 a1_x += GET16(blitter_ram, A1_INC + 2) << 16;
2399 a1_y += GET16(blitter_ram, A1_INC + 0) << 16;
2402 if ((blitter_ram[A1_FLAGS + 1] & 0x04) && (blitter_ram[A1_FLAGS + 1] & 0x03 != 3))
2403 a1_y += (blitter_ram[A1_FLAGS + 1] & 0x10 ? -1 << 16 : 1 << 16);
2405 // Handle A2 channel stepping
2407 if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 0)
2408 a2_x += phraseOffset[a2PixelSize] << 16;
2409 else if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 1)
2410 a2_x += (blitter_ram[A2_FLAGS + 1] & 0x08 ? -1 << 16 : 1 << 16);
2411 /* else if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 2)
2414 if (blitter_ram[A2_FLAGS + 1] & 0x04)
2415 a2_y += (blitter_ram[A2_FLAGS + 1] & 0x10 ? -1 << 16 : 1 << 16);
2417 //Need to fix this so that it subtracts (saturating, of course) the correct number of pixels
2418 //in phrase mode... !!! FIX !!! [DONE]
2419 //Need to fix this so that it counts down the correct item. Does it count the
2420 //source or the destination phrase mode???
2421 //It shouldn't matter, because we *should* end up processing the same amount
2422 //the same number of pixels... Not sure though.
2423 if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2425 if (inner_loop < phraseOffset[DSTA2 ? a2PixelSize : a1PixelSize])
2428 inner_loop -= phraseOffset[DSTA2 ? a2PixelSize : a1PixelSize];
2449 dzwrite: // Destination Z write
2451 dzwrite Destination Z write.
2454 else if TXTEXT goto txtread
2455 else if SRCEN goto sread
2456 else if DSTEN goto dread
2457 else if DSTENZ goto dzread
2474 ------------------------------
2475 if INDONE if OUTER0 goto idle
2476 else if UPDA1F goto a1fupdate
2477 else if UPDA1 goto a1update
2478 else if GOURZ.POLYGON goto zfupdate
2479 else if UPDA2 goto a2update
2480 else if DATINIT goto init_if
2493 //kill this, for now...
2494 // else if (GOURZ.POLYGON)
2503 a1fupdate: // Update A1 pointer fractions and more (see below)
2505 a1fupdate A1 step fraction is added to A1 pointer fraction
2506 POLYGON true: A1 step delta X and Y fraction parts are added to the A1
2507 step X and Y fraction parts (the value prior to this add is used for
2508 the step to pointer add).
2509 POLYGON true: inner count step fraction is added to the inner count
2511 POLYGON.GOURD true: the I fraction step is added to the computed
2512 intensity fraction parts +
2513 POLYGON.GOURD true: the I fraction step delta is added to the I
2518 #define A1_PIXEL ((uint32_t)0x0C) // Integer part of the pixel (Y.i and X.i)
2519 #define A1_STEP ((uint32_t)0x10) // Integer part of the step
2520 #define A1_FSTEP ((uint32_t)0x14) // Fractional part of the step
2521 #define A1_FPIXEL ((uint32_t)0x18) // Fractional part of the pixel (Y.f and X.f)
2524 // This is all kinda murky. All we have are the Midsummer docs to give us any guidance,
2525 // and it's incomplete or filled with errors (like above). Aarrrgggghhhhh!
2527 //This isn't right. Is it? I don't think the fractional parts are signed...
2528 // a1_x += (int32_t)((int16_t)GET16(blitter_ram, A1_FSTEP + 2));
2529 // a1_y += (int32_t)((int16_t)GET16(blitter_ram, A1_FSTEP + 0));
2530 a1_x += GET16(blitter_ram, A1_FSTEP + 2);
2531 a1_y += GET16(blitter_ram, A1_FSTEP + 0);
2535 a1update: // Update A1 pointer integers
2537 a1update A1 step is added to A1 pointer, with carry from the fractional add
2538 POLYGON true: A1 step delta X and Y integer parts are added to the A1
2539 step X and Y integer parts, with carry from the corresponding
2540 fractional part add (again, the value prior to this add is used for
2541 the step to pointer add).
2542 POLYGON true: inner count step is added to the inner count, with carry
2543 POLYGON.GOURD true: the I step is added to the computed intensities,
2545 POLYGON.GOURD true: the I step delta is added to the I step, with
2546 carry the texture X and Y step delta values are added to the X and Y
2548 if GOURZ.POLYGON goto zfupdate
2549 else if UPDA2 goto a2update
2550 else if DATINIT goto init_if
2553 a1_x += (int32_t)(GET16(blitter_ram, A1_STEP + 2) << 16);
2554 a1_y += (int32_t)(GET16(blitter_ram, A1_STEP + 0) << 16);
2557 //kill this, for now...
2558 // if (GOURZ.POLYGON)
2568 zfupdate: // Update computed Z step fractions
2570 zfupdate the Z fraction step is added to the computed Z fraction parts +
2571 the Z fraction step delta is added to the Z fraction step
2576 zupdate: // Update computed Z step integers
2578 zupdate the Z step is added to the computed Zs, with carry +
2579 the Z step delta is added to the Z step, with carry
2580 if UPDA2 goto a2update
2581 else if DATINIT goto init_if
2591 a2update: // Update A2 pointer
2593 a2update A2 step is added to the A2 pointer
2594 if DATINIT goto init_if
2597 a2_x += (int32_t)(GET16(blitter_ram, A2_STEP + 2) << 16);
2598 a2_y += (int32_t)(GET16(blitter_ram, A2_STEP + 0) << 16);
2606 init_if: // Initialise intensity fractions and texture X
2608 init_if Initialise the fractional part of the computed intensity fields, from
2609 the increment and step registers. The texture X integer and fractional
2610 parts can also be initialised.
2615 init_ii: // Initialise intensity integers and texture Y
2617 init_ii Initialise the integer part of the computed intensity, and texture Y
2618 integer and fractional parts
2619 if GOURZ goto init_zf
2627 init_zf: // Initialise Z fractions
2629 init_zf Initialise the fractional part of the computed Z fields.
2634 init_zi: // Initialise Z integers
2636 init_zi Initialise the integer part of the computed Z fields.
2643 The outer loop state machine fires off the inner loop, and controls the updating
2644 process between passes through the inner loop.
2646 + -- these functions are irrelevant if the DATINIT function is enabled, which it
2649 All these states will complete in one clock cycle, with the exception of the idle
2650 state, which means the blitter is quiescent; and the inner state, which takes as
2651 long as is required to complete one strip of pixels. It is therefore possible for
2652 the blitter to spend a maximum of nine clock cycles of inactivity between passes
2653 through the inner loop.
2663 // Here's attempt #2--taken from the Oberon chip specs!
2666 #ifdef USE_MIDSUMMER_BLITTER_MKII
2668 void ADDRGEN(uint32_t &, uint32_t &, bool, bool,
2669 uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t,
2670 uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t);
2671 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
2672 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
2673 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
2674 uint32_t zinc, uint32_t zstep);
2675 void ADD16SAT(uint16_t &r, uint8_t &co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh);
2676 void ADDAMUX(int16_t &adda_x, int16_t &adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
2677 int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
2678 int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
2679 bool adda_yconst, bool addareg, bool suba_x, bool suba_y);
2680 void ADDBMUX(int16_t &addb_x, int16_t &addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
2681 int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y);
2682 void DATAMUX(int16_t &data_x, int16_t &data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel);
2683 void ADDRADD(int16_t &addq_x, int16_t &addq_y, bool a1fracldi,
2684 uint16_t adda_x, uint16_t adda_y, uint16_t addb_x, uint16_t addb_y, uint8_t modx, bool suba_x, bool suba_y);
2685 void DATA(uint64_t &wdata, uint8_t &dcomp, uint8_t &zcomp, bool &nowrite,
2686 bool big_pix, bool cmpdst, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode, bool daddq_sel, uint8_t data_sel,
2687 uint8_t dbinh, uint8_t dend, uint8_t dstart, uint64_t dstd, uint32_t iinc, uint8_t lfu_func, uint64_t &patd, bool patdadd,
2688 bool phrase_mode, uint64_t srcd, bool srcdread, bool srczread, bool srcz2add, uint8_t zmode,
2689 bool bcompen, bool bkgwren, bool dcompen, uint8_t icount, uint8_t pixsize,
2690 uint64_t &srcz, uint64_t dstz, uint32_t zinc);
2691 void COMP_CTRL(uint8_t &dbinh, bool &nowrite,
2692 bool bcompen, bool big_pix, bool bkgwren, uint8_t dcomp, bool dcompen, uint8_t icount,
2693 uint8_t pixsize, bool phrase_mode, uint8_t srcd, uint8_t zcomp);
2694 #define VERBOSE_BLITTER_LOGGING
2696 void BlitterMidsummer2(void)
2701 if (startConciseBlitLogging)
2704 // Here's what the specs say the state machine does. Note that this can probably be
2705 // greatly simplified (also, it's different from what John has in his Oberon docs):
2706 //Will remove stuff that isn't in Jaguar I once fully described (stuff like texture won't
2707 //be described here at all)...
2709 uint32_t cmd = GET32(blitter_ram, COMMAND);
2714 cmd != 0x00010200 && // PATDSEL
2715 cmd != 0x01800001 // SRCEN LFUFUNC=C
2716 && cmd != 0x01800005
2717 //Boot ROM ATARI letters:
2718 && cmd != 0x00011008 // DSTEN GOURD PATDSEL
2719 //Boot ROM spinning cube:
2720 && cmd != 0x41802F41 // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 GOURZ ZMODE=0 LFUFUNC=C SRCSHADE
2722 && cmd != 0x01800E01 // SRCEN UPDA1 UPDA2 DSTA2 LFUFUNC=C
2723 //T2K TEMPEST letters:
2724 && cmd != 0x09800741 // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 LFUFUNC=C DCOMPEN
2725 //Static letters on Cybermorph intro screen:
2726 && cmd != 0x09800609 // SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C DCOMPEN
2727 //Static pic on title screen:
2728 && cmd != 0x01800601 // SRCEN UPDA1 UPDA2 LFUFUNC=C
2729 //Turning letters on Cybermorph intro screen:
2730 // && cmd != 0x09800F41 // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
2731 && cmd != 0x00113078 // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
2732 && cmd != 0x09900F39 // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
2733 && cmd != 0x09800209 // SRCEN DSTEN UPDA1 LFUFUNC=C DCOMPEN
2734 && cmd != 0x00011200 // UPDA1 GOURD PATDSEL
2735 //Start of Hover Strike (clearing screen):
2736 && cmd != 0x00010000 // PATDSEL
2737 //Hover Strike text:
2738 && cmd != 0x1401060C // SRCENX DSTEN UPDA1 UPDA2 PATDSEL BCOMPEN BKGWREN
2739 //Hover Strike 3D stuff
2740 && cmd != 0x01902839 // SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
2741 //Hover Strike darkening on intro to play (briefing) screen
2742 && cmd != 0x00020208 // DSTEN UPDA1 ADDDSEL
2743 //Trevor McFur stuff:
2744 && cmd != 0x05810601 // SRCEN UPDA1 UPDA2 PATDSEL BCOMPEN
2745 && cmd != 0x01800201 // SRCEN UPDA1 LFUFUNC=C
2747 && cmd != 0x00011000 // GOURD PATDSEL
2748 && cmd != 0x00011040 // CLIP_A1 GOURD PATDSEL
2750 && cmd != 0x01800000 // LFUFUNC=C
2751 && cmd != 0x01800401 //
2752 && cmd != 0x01800040 //
2753 && cmd != 0x00020008 //
2754 // && cmd != 0x09800F41 // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
2760 if (blit_start_log == 0) // Wait for the signal...
2761 logBlit = false;//*/
2762 //temp, for testing...
2763 /*if (cmd != 0x49820609)
2764 logBlit = false;//*/
2767 Some T2K unique blits:
2768 logBlit = F, cmd = 00010200 *
2769 logBlit = F, cmd = 00011000
2770 logBlit = F, cmd = 00011040
2771 logBlit = F, cmd = 01800005 *
2772 logBlit = F, cmd = 09800741 *
2774 Hover Strike mission selection screen:
2775 Blit! (CMD = 01902839) // SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
2777 Checkered Flag blits in the screw up zone:
2778 Blit! (CMD = 01800001) // SRCEN LFUFUNC=C
2779 Blit! (CMD = 01800000) // LFUFUNC=C
2780 Blit! (CMD = 00010000) // PATDSEL
2782 Wolfenstein 3D in the fuckup zone:
2783 Blit! (CMD = 01800000) // LFUFUNC=C
2786 //printf("logBlit = %s, cmd = %08X\n", (logBlit ? "T" : "F"), cmd);
2791 Blit! (CMD = 00011040)
2792 Flags: CLIP_A1 GOURD PATDSEL
2794 a1_base = 00100000, a2_base = 0081F6A8
2795 a1_x = 00A7, a1_y = 0014, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0001, a2_y = 0000
2796 a1_step_x = FE80, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFF8, a2_step_y = 0001
2797 a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
2798 a1_win_x = 0180, a1_win_y = 0118, a2_mask_x = 0000, a2_mask_y = 0000
2799 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
2800 a1_pixsize = 4, a2_pixsize = 4
2804 if (cmd == 0x00011040
2805 && (GET16(blitter_ram, A1_PIXEL + 2) == 0x00A7) && (GET16(blitter_ram, A1_PIXEL + 0) == 0x0014)
2806 && (GET16(blitter_ram, A2_PIXEL + 2) == 0x0001) && (GET16(blitter_ram, A2_PIXEL + 0) == 0x0000)
2807 && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 18))
2810 // Line states passed in via the command register
2812 bool srcen = (SRCEN), srcenx = (SRCENX), srcenz = (SRCENZ),
2813 dsten = (DSTEN), dstenz = (DSTENZ), dstwrz = (DSTWRZ), clip_a1 = (CLIPA1),
2814 upda1 = (UPDA1), upda1f = (UPDA1F), upda2 = (UPDA2), dsta2 = (DSTA2),
2815 gourd = (GOURD), gourz = (GOURZ), topben = (TOPBEN), topnen = (TOPNEN),
2816 patdsel = (PATDSEL), adddsel = (ADDDSEL), cmpdst = (CMPDST), bcompen = (BCOMPEN),
2817 dcompen = (DCOMPEN), bkgwren = (BKGWREN), srcshade = (SRCSHADE);
2819 uint8_t zmode = (cmd & 0x01C0000) >> 18, lfufunc = (cmd & 0x1E00000) >> 21;
2821 //Where to find various lines:
2823 // gourd -> dcontrol, inner, outer, state
2824 // gourz -> dcontrol, inner, outer, state
2825 // cmpdst -> blit, data, datacomp, state
2826 // bcompen -> acontrol, inner, mcontrol, state
2827 // dcompen -> inner, state
2828 // bkgwren -> inner, state
2829 // srcshade -> dcontrol, inner, state
2830 // adddsel -> dcontrol
2831 //NOTE: ADDDSEL takes precedence over PATDSEL, PATDSEL over LFU_FUNC
2832 #ifdef VERBOSE_BLITTER_LOGGING
2835 char zfs[512], lfus[512];
2836 zfs[0] = lfus[0] = 0;
2837 if (dstwrz || dstenz || gourz)
2838 sprintf(zfs, " ZMODE=%X", zmode);
2839 if (!(patdsel || adddsel))
2840 sprintf(lfus, " LFUFUNC=%X", lfufunc);
2841 WriteLog("\nBlit! (CMD = %08X)\nFlags:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", cmd,
2842 (srcen ? " SRCEN" : ""), (srcenx ? " SRCENX" : ""), (srcenz ? " SRCENZ" : ""),
2843 (dsten ? " DSTEN" : ""), (dstenz ? " DSTENZ" : ""), (dstwrz ? " DSTWRZ" : ""),
2844 (clip_a1 ? " CLIP_A1" : ""), (upda1 ? " UPDA1" : ""), (upda1f ? " UPDA1F" : ""),
2845 (upda2 ? " UPDA2" : ""), (dsta2 ? " DSTA2" : ""), (gourd ? " GOURD" : ""),
2846 (gourz ? " GOURZ" : ""), (topben ? " TOPBEN" : ""), (topnen ? " TOPNEN" : ""),
2847 (patdsel ? " PATDSEL" : ""), (adddsel ? " ADDDSEL" : ""), zfs, lfus, (cmpdst ? " CMPDST" : ""),
2848 (bcompen ? " BCOMPEN" : ""), (dcompen ? " DCOMPEN" : ""), (bkgwren ? " BKGWREN" : ""),
2849 (srcshade ? " SRCSHADE" : ""));
2850 WriteLog(" count = %d x %d\n", GET16(blitter_ram, PIXLINECOUNTER + 2), GET16(blitter_ram, PIXLINECOUNTER));
2854 // Lines that don't exist in Jaguar I (and will never be asserted)
2856 bool polygon = false, datinit = false, a1_stepld = false, a2_stepld = false, ext_int = false;
2857 bool istepadd = false, istepfadd = false, finneradd = false, inneradd = false;
2858 bool zstepfadd = false, zstepadd = false;
2860 // Various state lines (initial state--basically the reset state of the FDSYNCs)
2862 bool go = true, idle = true, inner = false, a1fupdate = false, a1update = false,
2863 zfupdate = false, zupdate = false, a2update = false, init_if = false, init_ii = false,
2864 init_zf = false, init_zi = false;
2866 bool outer0 = false, indone = false;
2868 bool idlei, inneri, a1fupdatei, a1updatei, zfupdatei, zupdatei, a2updatei, init_ifi, init_iii,
2871 bool notgzandp = !(gourz && polygon);
2873 // Various registers set up by user
2875 uint16_t ocount = GET16(blitter_ram, PIXLINECOUNTER);
2876 uint8_t a1_pitch = blitter_ram[A1_FLAGS + 3] & 0x03;
2877 uint8_t a2_pitch = blitter_ram[A2_FLAGS + 3] & 0x03;
2878 uint8_t a1_pixsize = (blitter_ram[A1_FLAGS + 3] & 0x38) >> 3;
2879 uint8_t a2_pixsize = (blitter_ram[A2_FLAGS + 3] & 0x38) >> 3;
2880 uint8_t a1_zoffset = (GET16(blitter_ram, A1_FLAGS + 2) >> 6) & 0x07;
2881 uint8_t a2_zoffset = (GET16(blitter_ram, A2_FLAGS + 2) >> 6) & 0x07;
2882 uint8_t a1_width = (blitter_ram[A1_FLAGS + 2] >> 1) & 0x3F;
2883 uint8_t a2_width = (blitter_ram[A2_FLAGS + 2] >> 1) & 0x3F;
2884 bool a2_mask = blitter_ram[A2_FLAGS + 2] & 0x80;
2885 uint8_t a1addx = blitter_ram[A1_FLAGS + 1] & 0x03, a2addx = blitter_ram[A2_FLAGS + 1] & 0x03;
2886 bool a1addy = blitter_ram[A1_FLAGS + 1] & 0x04, a2addy = blitter_ram[A2_FLAGS + 1] & 0x04;
2887 bool a1xsign = blitter_ram[A1_FLAGS + 1] & 0x08, a2xsign = blitter_ram[A2_FLAGS + 1] & 0x08;
2888 bool a1ysign = blitter_ram[A1_FLAGS + 1] & 0x10, a2ysign = blitter_ram[A2_FLAGS + 1] & 0x10;
2889 uint32_t a1_base = GET32(blitter_ram, A1_BASE) & 0xFFFFFFF8; // Phrase aligned by ignoring bottom 3 bits
2890 uint32_t a2_base = GET32(blitter_ram, A2_BASE) & 0xFFFFFFF8;
2892 uint16_t a1_win_x = GET16(blitter_ram, A1_CLIP + 2) & 0x7FFF;
2893 uint16_t a1_win_y = GET16(blitter_ram, A1_CLIP + 0) & 0x7FFF;
2894 int16_t a1_x = (int16_t)GET16(blitter_ram, A1_PIXEL + 2);
2895 int16_t a1_y = (int16_t)GET16(blitter_ram, A1_PIXEL + 0);
2896 int16_t a1_step_x = (int16_t)GET16(blitter_ram, A1_STEP + 2);
2897 int16_t a1_step_y = (int16_t)GET16(blitter_ram, A1_STEP + 0);
2898 uint16_t a1_stepf_x = GET16(blitter_ram, A1_FSTEP + 2);
2899 uint16_t a1_stepf_y = GET16(blitter_ram, A1_FSTEP + 0);
2900 uint16_t a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
2901 uint16_t a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
2902 int16_t a1_inc_x = (int16_t)GET16(blitter_ram, A1_INC + 2);
2903 int16_t a1_inc_y = (int16_t)GET16(blitter_ram, A1_INC + 0);
2904 uint16_t a1_incf_x = GET16(blitter_ram, A1_FINC + 2);
2905 uint16_t a1_incf_y = GET16(blitter_ram, A1_FINC + 0);
2907 int16_t a2_x = (int16_t)GET16(blitter_ram, A2_PIXEL + 2);
2908 int16_t a2_y = (int16_t)GET16(blitter_ram, A2_PIXEL + 0);
2909 uint16_t a2_mask_x = GET16(blitter_ram, A2_MASK + 2);
2910 uint16_t a2_mask_y = GET16(blitter_ram, A2_MASK + 0);
2911 int16_t a2_step_x = (int16_t)GET16(blitter_ram, A2_STEP + 2);
2912 int16_t a2_step_y = (int16_t)GET16(blitter_ram, A2_STEP + 0);
2914 uint64_t srcd1 = GET64(blitter_ram, SRCDATA);
2916 uint64_t dstd = GET64(blitter_ram, DSTDATA);
2917 uint64_t patd = GET64(blitter_ram, PATTERNDATA);
2918 uint32_t iinc = GET32(blitter_ram, INTENSITYINC);
2919 uint64_t srcz1 = GET64(blitter_ram, SRCZINT);
2920 uint64_t srcz2 = GET64(blitter_ram, SRCZFRAC);
2921 uint64_t dstz = GET64(blitter_ram, DSTZ);
2922 uint32_t zinc = GET32(blitter_ram, ZINC);
2923 uint32_t collision = GET32(blitter_ram, COLLISIONCTRL);// 0=RESUME, 1=ABORT, 2=STOPEN
2925 uint8_t pixsize = (dsta2 ? a2_pixsize : a1_pixsize); // From ACONTROL
2927 //Testing Trevor McFur--I *think* it's the circle on the lower RHS of the screen...
2929 if (cmd == 0x05810601 && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 96)
2930 && (GET16(blitter_ram, PIXLINECOUNTER + 0) == 72))
2933 //if (cmd == 0x1401060C) patd = 0xFFFFFFFFFFFFFFFFLL;
2934 //if (cmd == 0x1401060C) patd = 0x00000000000000FFLL;
2935 //If it's still not working (bcompen-patd) then see who's writing what to patd and where...
2936 //Still not OK. Check to see who's writing what to where in patd!
2937 //It looks like M68K is writing to the top half of patd... Hmm...
2939 ----> M68K wrote 0000 to byte 15737344 of PATTERNDATA...
2940 --> M68K wrote 00 to byte 0 of PATTERNDATA...
2941 --> M68K wrote 00 to byte 1 of PATTERNDATA...
2942 ----> M68K wrote 00FF to byte 15737346 of PATTERNDATA...
2943 --> M68K wrote 00 to byte 2 of PATTERNDATA...
2944 --> M68K wrote FF to byte 3 of PATTERNDATA...
2945 logBlit = F, cmd = 1401060C
2947 Wren0 := ND6 (wren\[0], gpua\[5], gpua\[6..8], bliten, gpu_memw);
2948 Wren1 := ND6 (wren\[1], gpua[5], gpua\[6..8], bliten, gpu_memw);
2949 Wren2 := ND6 (wren\[2], gpua\[5], gpua[6], gpua\[7..8], bliten, gpu_memw);
2950 Wren3 := ND6 (wren\[3], gpua[5], gpua[6], gpua\[7..8], bliten, gpu_memw);
2953 Dec0 := D38GH (a1baseld, a1flagld, a1winld, a1ptrld, a1stepld, a1stepfld, a1fracld, a1incld, gpua[2..4], wren\[0]);
2955 Dec1 := D38GH (a1incfld, a2baseld, a2flagld, a2maskld, a2ptrldg, a2stepld, cmdldt, countldt, gpua[2..4], wren\[1]);
2957 Dec2 := D38GH (srcd1ldg[0..1], dstdldg[0..1], dstzldg[0..1], srcz1ldg[0..1], gpua[2..4], wren\[2]);
2959 Dec3 := D38GH (srcz2ld[0..1], patdld[0..1], iincld, zincld, stopld, intld[0], gpua[2..4], wren\[3]);
2961 wren[3] is asserted when gpu address bus = 0 011x xx00
2962 patdld[0] -> 0 0110 1000 -> $F02268 (lo 32 bits)
2963 patdld[1] -> 0 0110 1100 -> $F0226C (hi 32 bits)
2965 So... It's reversed! The data organization of the patd register is [low 32][high 32]! !!! FIX !!! [DONE]
2966 And fix all the other 64 bit registers [DONE]
2968 /*if (cmd == 0x1401060C)
2970 printf("logBlit = %s, cmd = %08X\n", (logBlit ? "T" : "F"), cmd);
2974 if ((cmd == 0x00010200) && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 9))
2977 ; Pink altimeter bar
2979 Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
2980 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
2981 A1 step values: -10 (X), 1 (Y)
2982 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
2983 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
2984 A1 x/y: 262/132, A2 x/y: 129/0
2985 ;x-coord is 257 in pic, so add 5
2986 ;20 for ship, 33 for #... Let's see if we can find 'em!
2988 ; Black altimeter bar
2990 Blit! (00110000 <- 000BF010) count: 5 x 29, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
2991 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
2992 A1 step values: -8 (X), 1 (Y)
2993 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
2994 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
2995 A1 x/y: 264/126, A2 x/y: 336/0
2997 Here's the pink bar--note that it's phrase mode without dread, so how does this work???
2998 Not sure, but I *think* that somehow it MUXes the data at the write site in on the left or right side
2999 of the write data when masked in phrase mode. I'll have to do some tracing to see if this is the mechanism
3002 Blit! (CMD = 00010200)
3003 Flags: UPDA1 PATDSEL
3005 a1_base = 00110010, a2_base = 000BD7E0
3006 a1_x = 0106, a1_y = 0090, a1_frac_x = 0000, a1_frac_y = 8000, a2_x = 025A, a2_y = 0000
3007 a1_step_x = FFF6, a1_step_y = 0001, a1_stepf_x = 5E00, a1_stepf_y = D100, a2_step_x = FFF7, a2_step_y = 0001
3008 a1_inc_x = 0001, a1_inc_y = FFFF, a1_incf_x = 0000, a1_incf_y = E000
3009 a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
3010 a2_mask=F a1add=+phr/+0 a2add=+1/+0
3011 a1_pixsize = 4, a2_pixsize = 4
3012 srcd=BAC673AC2C92E578 dstd=0000000000000000 patd=74C074C074C074C0 iinc=0002E398
3013 srcz1=7E127E12000088DA srcz2=DBE06DF000000000 dstz=0000000000000000 zinc=FFFE4840, coll=0
3015 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3016 Entering INNER state...
3017 Entering DWRITE state...
3018 Dest write address/pix address: 0016A830/0 [dstart=20 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [7400000074C074C0] (icount=0007, inc=2)
3019 Entering A1_ADD state [a1_x=0106, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3020 Entering DWRITE state...
3021 Dest write address/pix address: 0016A850/0 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [74C074C074C074C0] (icount=0003, inc=4)
3022 Entering A1_ADD state [a1_x=0108, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3023 Entering DWRITE state...
3024 Dest write address/pix address: 0016A870/0 [dstart=0 dend=30 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [74C074C074C00000] (icount=FFFF, inc=4)
3025 Entering A1_ADD state [a1_x=010C, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3026 Entering IDLE_INNER state...
3027 Leaving INNER state... (ocount=000A)
3028 [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3029 Entering A1UPDATE state... (272/144 -> 262/145)
3030 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3031 Entering INNER state...
3036 a2addy = a1addy; // A2 channel Y add bit is tied to A1's
3038 //if (logBlit && (ocount > 20)) logBlit = false;
3039 #ifdef VERBOSE_BLITTER_LOGGING
3042 WriteLog(" a1_base = %08X, a2_base = %08X\n", a1_base, a2_base);
3043 WriteLog(" a1_x = %04X, a1_y = %04X, a1_frac_x = %04X, a1_frac_y = %04X, a2_x = %04X, a2_y = %04X\n", (uint16_t)a1_x, (uint16_t)a1_y, a1_frac_x, a1_frac_y, (uint16_t)a2_x, (uint16_t)a2_y);
3044 WriteLog(" a1_step_x = %04X, a1_step_y = %04X, a1_stepf_x = %04X, a1_stepf_y = %04X, a2_step_x = %04X, a2_step_y = %04X\n", (uint16_t)a1_step_x, (uint16_t)a1_step_y, a1_stepf_x, a1_stepf_y, (uint16_t)a2_step_x, (uint16_t)a2_step_y);
3045 WriteLog(" a1_inc_x = %04X, a1_inc_y = %04X, a1_incf_x = %04X, a1_incf_y = %04X\n", (uint16_t)a1_inc_x, (uint16_t)a1_inc_y, a1_incf_x, a1_incf_y);
3046 WriteLog(" a1_win_x = %04X, a1_win_y = %04X, a2_mask_x = %04X, a2_mask_y = %04X\n", a1_win_x, a1_win_y, a2_mask_x, a2_mask_y);
3047 char x_add_str[4][4] = { "phr", "1", "0", "inc" };
3048 WriteLog(" a2_mask=%s a1add=%s%s/%s%s a2add=%s%s/%s%s\n", (a2_mask ? "T" : "F"), (a1xsign ? "-" : "+"), x_add_str[a1addx],
3049 (a1ysign ? "-" : "+"), (a1addy ? "1" : "0"), (a2xsign ? "-" : "+"), x_add_str[a2addx],
3050 (a2ysign ? "-" : "+"), (a2addy ? "1" : "0"));
3051 WriteLog(" a1_pixsize = %u, a2_pixsize = %u\n", a1_pixsize, a2_pixsize);
3052 WriteLog(" srcd=%08X%08X dstd=%08X%08X patd=%08X%08X iinc=%08X\n",
3053 (uint32_t)(srcd1 >> 32), (uint32_t)(srcd1 & 0xFFFFFFFF),
3054 (uint32_t)(dstd >> 32), (uint32_t)(dstd & 0xFFFFFFFF),
3055 (uint32_t)(patd >> 32), (uint32_t)(patd & 0xFFFFFFFF), iinc);
3056 WriteLog(" srcz1=%08X%08X srcz2=%08X%08X dstz=%08X%08X zinc=%08X, coll=%X\n",
3057 (uint32_t)(srcz1 >> 32), (uint32_t)(srcz1 & 0xFFFFFFFF),
3058 (uint32_t)(srcz2 >> 32), (uint32_t)(srcz2 & 0xFFFFFFFF),
3059 (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF), zinc, collision);
3063 // Various state lines set up by user
3065 bool phrase_mode = ((!dsta2 && a1addx == 0) || (dsta2 && a2addx == 0) ? true : false); // From ACONTROL
3066 #ifdef VERBOSE_BLITTER_LOGGING
3068 WriteLog(" Phrase mode is %s\n", (phrase_mode ? "ON" : "off"));
3072 // Stopgap vars to simulate various lines
3074 uint16_t a1FracCInX = 0, a1FracCInY = 0;
3080 if ((idle && !go) || (inner && outer0 && indone))
3082 #ifdef VERBOSE_BLITTER_LOGGING
3084 WriteLog(" Entering IDLE state...\n");
3088 //Instead of a return, let's try breaking out of the loop...
3095 // INNER LOOP ACTIVE
3097 Entering DWRITE state... (icount=0000, inc=4)
3098 Entering IDLE_INNER state...
3099 Leaving INNER state... (ocount=00EF)
3100 [in=T a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3101 Entering INNER state...
3103 [in=F a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3106 if ((idle && go && !datinit)
3107 || (inner && !indone)
3108 || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && !datinit)
3109 || (a1update && !upda2 && notgzandp && !datinit)
3110 || (zupdate && !upda2 && !datinit)
3111 || (a2update && !datinit)
3112 || (init_ii && !gourz)
3120 // A1 FRACTION UPDATE
3122 if (inner && indone && !outer0 && upda1f)
3129 // A1 POINTER UPDATE
3132 || (inner && indone && !outer0 && !upda1f && upda1))
3139 // Z FRACTION UPDATE
3141 if ((a1update && gourz && polygon)
3142 || (inner && indone && !outer0 && !upda1f && !upda1 && gourz && polygon))
3158 // A2 POINTER UPDATE
3160 if ((a1update && upda2 && notgzandp)
3161 || (zupdate && upda2)
3162 || (inner && indone && !outer0 && !upda1f && notgzandp && !upda1 && upda2))
3169 // INITIALIZE INTENSITY FRACTION
3171 if ((zupdate && !upda2 && datinit)
3172 || (a1update && !upda2 && datinit && notgzandp)
3173 || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && datinit)
3174 || (a2update && datinit)
3175 || (idle && go && datinit))
3182 // INITIALIZE INTENSITY INTEGER
3191 // INITIALIZE Z FRACTION
3193 if (init_ii && gourz)
3200 // INITIALIZE Z INTEGER
3209 // Here we move the fooi into their foo counterparts in order to simulate the moving
3210 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
3214 a1fupdate = a1fupdatei;
3215 a1update = a1updatei;
3216 zfupdate = zfupdatei; // *
3217 zupdate = zupdatei; // *
3218 a2update = a2updatei;
3219 init_if = init_ifi; // *
3220 init_ii = init_iii; // *
3221 init_zf = init_zfi; // *
3222 init_zi = init_zii; // *
3223 // * denotes states that will never assert for Jaguar I
3224 #ifdef VERBOSE_BLITTER_LOGGING
3226 WriteLog(" [in=%c a1f=%c a1=%c zf=%c z=%c a2=%c iif=%c iii=%c izf=%c izi=%c]\n",
3227 (inner ? 'T' : 'F'), (a1fupdate ? 'T' : 'F'), (a1update ? 'T' : 'F'),
3228 (zfupdate ? 'T' : 'F'), (zupdate ? 'T' : 'F'), (a2update ? 'T' : 'F'),
3229 (init_if ? 'T' : 'F'), (init_ii ? 'T' : 'F'), (init_zf ? 'T' : 'F'),
3230 (init_zi ? 'T' : 'F'));
3233 // Now, depending on how we want to handle things, we could either put the implementation
3234 // of the various pieces up above, or handle them down below here.
3236 // Let's try postprocessing for now...
3241 #ifdef VERBOSE_BLITTER_LOGGING
3243 WriteLog(" Entering INNER state...\n");
3245 uint16_t icount = GET16(blitter_ram, PIXLINECOUNTER + 2);
3246 bool idle_inner = true, step = true, sreadx = false, szreadx = false, sread = false,
3247 szread = false, dread = false, dzread = false, dwrite = false, dzwrite = false;
3248 bool inner0 = false;
3249 bool idle_inneri, sreadxi, szreadxi, sreadi, szreadi, dreadi, dzreadi, dwritei, dzwritei;
3251 // State lines that will never assert in Jaguar I
3253 bool textext = false, txtread = false;
3256 uint8_t srcshift = 0;
3257 bool sshftld = true; // D flipflop (D -> Q): instart -> sshftld
3258 //NOTE: sshftld probably is only asserted at the beginning of the inner loop. !!! FIX !!!
3260 Blit! (CMD = 01800005)
3261 Flags: SRCEN SRCENX LFUFUNC=C
3263 a1_base = 00037290, a2_base = 000095D0
3264 a1_x = 0000, a1_y = 0000, a2_x = 0002, a2_y = 0000
3265 a1_pixsize = 4, a2_pixsize = 4
3266 srcd=0000000000000000, dstd=0000000000000000, patd=0000000000000000
3268 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3269 Entering INNER state...
3270 Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
3271 Source extra read address/pix address: 000095D4/0 [0000001C00540038]
3272 Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3273 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
3274 Source read address/pix address: 000095D8/0 [0054003800009814]
3275 Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3276 Entering DWRITE state...
3277 Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026E, inc=4)
3278 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3279 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
3280 Source read address/pix address: 000095E0/0 [00009968000377C7]
3281 Entering A2_ADD state [a2_x=0008, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3282 Entering DWRITE state...
3283 Dest write address/pix address: 00037298/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026A, inc=4)
3284 Entering A1_ADD state [a1_x=0004, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3287 // while (!idle_inner)
3292 if ((idle_inner && !step)
3293 || (dzwrite && step && inner0)
3294 || (dwrite && step && !dstwrz && inner0))
3296 #ifdef VERBOSE_BLITTER_LOGGING
3298 WriteLog(" Entering IDLE_INNER state...\n");
3304 idle_inneri = false;
3306 // EXTRA SOURCE DATA READ
3308 if ((idle_inner && step && srcenx)
3309 || (sreadx && !step))
3316 // EXTRA SOURCE ZED READ
3318 if ((sreadx && step && srcenz)
3319 || (szreadx && !step))
3326 // TEXTURE DATA READ (not implemented because not in Jaguar I)
3330 if ((szreadx && step && !textext)
3331 || (sreadx && step && !srcenz && srcen)
3332 || (idle_inner && step && !srcenx && !textext && srcen)
3333 || (dzwrite && step && !inner0 && !textext && srcen)
3334 || (dwrite && step && !dstwrz && !inner0 && !textext && srcen)
3335 || (txtread && step && srcen)
3336 || (sread && !step))
3345 if ((sread && step && srcenz)
3346 || (szread && !step))
3353 // DESTINATION DATA READ
3355 if ((szread && step && dsten)
3356 || (sread && step && !srcenz && dsten)
3357 || (sreadx && step && !srcenz && !textext && !srcen && dsten)
3358 || (idle_inner && step && !srcenx && !textext && !srcen && dsten)
3359 || (dzwrite && step && !inner0 && !textext && !srcen && dsten)
3360 || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && dsten)
3361 || (txtread && step && !srcen && dsten)
3362 || (dread && !step))
3369 // DESTINATION ZED READ
3371 if ((dread && step && dstenz)
3372 || (szread && step && !dsten && dstenz)
3373 || (sread && step && !srcenz && !dsten && dstenz)
3374 || (sreadx && step && !srcenz && !textext && !srcen && !dsten && dstenz)
3375 || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && dstenz)
3376 || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && dstenz)
3377 || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && dstenz)
3378 || (txtread && step && !srcen && !dsten && dstenz)
3379 || (dzread && !step))
3386 // DESTINATION DATA WRITE
3388 if ((dzread && step)
3389 || (dread && step && !dstenz)
3390 || (szread && step && !dsten && !dstenz)
3391 || (sread && step && !srcenz && !dsten && !dstenz)
3392 || (txtread && step && !srcen && !dsten && !dstenz)
3393 || (sreadx && step && !srcenz && !textext && !srcen && !dsten && !dstenz)
3394 || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && !dstenz)
3395 || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && !dstenz)
3396 || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && !dstenz)
3397 || (dwrite && !step))
3404 // DESTINATION ZED WRITE
3406 if ((dzwrite && !step)
3407 || (dwrite && step && dstwrz))
3414 //Kludge: A QnD way to make sure that sshftld is asserted only for the first
3415 // cycle of the inner loop...
3416 sshftld = idle_inner;
3418 // Here we move the fooi into their foo counterparts in order to simulate the moving
3419 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
3421 idle_inner = idle_inneri;
3431 // Here's a few more decodes--not sure if they're supposed to go here or not...
3433 bool srca_addi = (sreadxi && !srcenz) || (sreadi && !srcenz) || szreadxi || szreadi;
3435 bool dsta_addi = (dwritei && !dstwrz) || dzwritei;
3437 bool gensrc = sreadxi || szreadxi || sreadi || szreadi;
3438 bool gendst = dreadi || dzreadi || dwritei || dzwritei;
3439 bool gena2i = (gensrc && !dsta2) || (gendst && dsta2);
3441 bool zaddr = szreadx || szread || dzread || dzwrite;
3443 // Some stuff from MCONTROL.NET--not sure if this is the correct use of this decode or not...
3444 /*Fontread\ := OND1 (fontread\, sread[1], sreadx[1], bcompen);
3445 Fontread := INV1 (fontread, fontread\);
3446 Justt := NAN3 (justt, fontread\, phrase_mode, tactive\);
3447 Justify := TS (justify, justt, busen);*/
3448 bool fontread = (sread || sreadx) && bcompen;
3449 bool justify = !(!fontread && phrase_mode /*&& tactive*/);
3451 /* Generate inner loop update enables */
3453 A1_addi := MX2 (a1_addi, dsta_addi, srca_addi, dsta2);
3454 A2_addi := MX2 (a2_addi, srca_addi, dsta_addi, dsta2);
3455 A1_add := FD1 (a1_add, a1_add\, a1_addi, clk);
3456 A2_add := FD1 (a2_add, a2_add\, a2_addi, clk);
3457 A2_addb := BUF1 (a2_addb, a2_add);
3459 bool a1_add = (dsta2 ? srca_addi : dsta_addi);
3460 bool a2_add = (dsta2 ? dsta_addi : srca_addi);
3462 /* Address adder input A register selection
3463 000 A1 step integer part
3464 001 A1 step fraction part
3465 010 A1 increment integer part
3466 011 A1 increment fraction part
3470 bit 1 = /a2update . (a1_add . a1addx[0..1])
3471 bit 0 = /a2update . ( a1fupdate
3472 + a1_add . atick[0] . a1addx[0..1])
3473 The /a2update term on bits 0 and 1 is redundant.
3474 Now look-ahead based
3476 uint8_t addasel = (a1fupdate || (a1_add && a1addx == 3) ? 0x01 : 0x00);
3477 addasel |= (a1_add && a1addx == 3 ? 0x02 : 0x00);
3478 addasel |= (a2update ? 0x04 : 0x00);
3479 /* Address adder input A X constant selection
3480 adda_xconst[0..2] generate a power of 2 in the range 1-64 or all
3481 zeroes when they are all 1
3482 Remember - these are pixels, so to add one phrase the pixel size
3483 has to be taken into account to get the appropriate value.
3485 if a1addx[0..1] are 00 set 6 - pixel size
3486 if a1addx[0..1] are 01 set the value 000
3487 if a1addx[0..1] are 10 set the value 111
3489 JLH: Also, 11 will likewise set the value to 111
3491 uint8_t a1_xconst = 6 - a1_pixsize, a2_xconst = 6 - a2_pixsize;
3495 else if (a1addx & 0x02)
3500 else if (a2addx & 0x02)
3503 uint8_t adda_xconst = (a2_add ? a2_xconst : a1_xconst);
3504 /* Address adder input A Y constant selection
3505 22 June 94 - This was erroneous, because only the a1addy bit was reflected here.
3506 Therefore, the selection has to be controlled by a bug fix bit.
3507 JLH: Bug fix bit in Jaguar II--not in Jaguar I!
3509 bool adda_yconst = a1addy;
3510 /* Address adder input A register versus constant selection
3511 given by a1_add . a1addx[0..1]
3514 + a2_add . a2addx[0..1]
3517 bool addareg = ((a1_add && a1addx == 3) || a1update || a1fupdate
3518 || (a2_add && a2addx == 3) || a2update ? true : false);
3519 /* The adders can be put into subtract mode in add pixel size
3520 mode when the corresponding flags are set */
3521 bool suba_x = ((a1_add && a1xsign && a1addx == 1) || (a2_add && a2xsign && a2addx == 1) ? true : false);
3522 bool suba_y = ((a1_add && a1addy && a1ysign) || (a2_add && a2addy && a2ysign) ? true : false);
3523 /* Address adder input B selection
3530 + (a1_add . atick[0] . a1addx[0..1])
3531 + a1fupdate . a1_stepld
3532 + a1update . a1_stepld
3533 + a2update . a2_stepld
3534 Bit 0 = a2update + a2_add
3535 + a1fupdate . a1_stepld
3536 + a1update . a1_stepld
3537 + a2update . a2_stepld
3539 uint8_t addbsel = (a2update || a2_add || (a1fupdate && a1_stepld)
3540 || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x01 : 0x00);
3541 addbsel |= (a1fupdate || (a1_add && a1addx == 3) || (a1fupdate && a1_stepld)
3542 || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x02 : 0x00);
3544 /* The modulo bits are used to align X onto a phrase boundary when
3545 it is being updated by one phrase
3552 Masking is enabled for a1 when a1addx[0..1] is 00, and the value
3553 is 6 - the pixel size (again!)
3555 uint8_t maska1 = (a1_add && a1addx == 0 ? 6 - a1_pixsize : 0);
3556 uint8_t maska2 = (a2_add && a2addx == 0 ? 6 - a2_pixsize : 0);
3557 uint8_t modx = (a2_add ? maska2 : maska1);
3558 /* Generate load strobes for the increment updates */
3560 /*A1pldt := NAN2 (a1pldt, atick[1], a1_add);
3561 A1ptrldi := NAN2 (a1ptrldi, a1update\, a1pldt);
3563 A1fldt := NAN4 (a1fldt, atick[0], a1_add, a1addx[0..1]);
3564 A1fracldi := NAN2 (a1fracldi, a1fupdate\, a1fldt);
3566 A2pldt := NAN2 (a2pldt, atick[1], a2_add);
3567 A2ptrldi := NAN2 (a2ptrldi, a2update\, a2pldt);*/
3568 bool a1fracldi = a1fupdate || (a1_add && a1addx == 3);
3570 // Some more from DCONTROL...
3571 // atick[] just MAY be important here! We're assuming it's true and dropping the term...
3572 // That will probably screw up some of the lower terms that seem to rely on the timing of it...
3573 #warning srcdreadd is not properly initialized!
3574 bool srcdreadd = false; // Set in INNER.NET
3575 //Shadeadd\ := NAN2H (shadeadd\, dwrite, srcshade);
3576 //Shadeadd := INV2 (shadeadd, shadeadd\);
3577 bool shadeadd = dwrite && srcshade;
3578 /* Data adder control, input A selection
3579 000 Destination data
3580 001 Initialiser pixel value
3581 100 Source data - computed intensity fraction
3582 101 Pattern data - computed intensity
3583 110 Source zed 1 - computed zed
3584 111 Source zed 2 - computed zed fraction
3586 Bit 0 = dwrite . gourd . atick[1]
3587 + dzwrite . gourz . atick[0]
3590 + init_if + init_ii + init_zf + init_zi
3591 Bit 1 = dzwrite . gourz . (atick[0] + atick[1])
3594 Bit 2 = (gourd + gourz) . /(init_if + init_ii + init_zf + init_zi)
3597 uint8_t daddasel = ((dwrite && gourd) || (dzwrite && gourz) || istepadd || zstepfadd
3598 || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
3599 daddasel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
3600 daddasel |= (((gourd || gourz) && !(init_if || init_ii || init_zf || init_zi))
3601 || (dwrite && srcshade) ? 0x04 : 0x00);
3602 /* Data adder control, input B selection
3604 0001 Data initialiser increment
3605 0100 Bottom 16 bits of I increment repeated four times
3606 0101 Top 16 bits of I increment repeated four times
3607 0110 Bottom 16 bits of Z increment repeated four times
3608 0111 Top 16 bits of Z increment repeated four times
3609 1100 Bottom 16 bits of I step repeated four times
3610 1101 Top 16 bits of I step repeated four times
3611 1110 Bottom 16 bits of Z step repeated four times
3612 1111 Top 16 bits of Z step repeated four times
3614 Bit 0 = dwrite . gourd . atick[1]
3615 + dzwrite . gourz . atick[1]
3619 + init_if + init_ii + init_zf + init_zi
3620 Bit 1 = dzwrite . gourz . (atick[0] + atick[1])
3623 Bit 2 = dwrite . gourd . (atick[0] + atick[1])
3624 + dzwrite . gourz . (atick[0] + atick[1])
3626 + istepadd + istepfadd + zstepadd + zstepfadd
3627 Bit 3 = istepadd + istepfadd + zstepadd + zstepfadd
3629 uint8_t daddbsel = ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
3630 || istepadd || zstepadd || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
3631 daddbsel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
3632 daddbsel |= ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
3633 || istepadd || istepfadd || zstepadd || zstepfadd ? 0x04 : 0x00);
3634 daddbsel |= (istepadd && istepfadd && zstepadd && zstepfadd ? 0x08 : 0x00);
3635 /* Data adder mode control
3636 000 16-bit normal add
3637 001 16-bit saturating add with carry
3638 010 8-bit saturating add with carry, carry into top byte is
3640 011 8-bit saturating add with carry, carry into top byte and
3641 between top nybbles is inhibited (CRY)
3642 100 16-bit normal add with carry
3643 101 16-bit saturating add
3644 110 8-bit saturating add, carry into top byte is inhibited
3645 111 8-bit saturating add, carry into top byte and between top
3646 nybbles is inhibited
3648 The first five are used for Gouraud calculations, the latter three
3649 for adding source and destination data
3651 Bit 0 = dzwrite . gourz . atick[1]
3652 + dwrite . gourd . atick[1] . /topnen . /topben . /ext_int
3653 + dwrite . gourd . atick[1] . topnen . topben . /ext_int
3655 + istepadd . /topnen . /topben . /ext_int
3656 + istepadd . topnen . topben . /ext_int
3657 + /gourd . /gourz . /topnen . /topben
3658 + /gourd . /gourz . topnen . topben
3659 + shadeadd . /topnen . /topben
3660 + shadeadd . topnen . topben
3661 + init_ii . /topnen . /topben . /ext_int
3662 + init_ii . topnen . topben . /ext_int
3665 Bit 1 = dwrite . gourd . atick[1] . /topben . /ext_int
3666 + istepadd . /topben . /ext_int
3667 + /gourd . /gourz . /topben
3668 + shadeadd . /topben
3669 + init_ii . /topben . /ext_int
3671 Bit 2 = /gourd . /gourz
3673 + dwrite . gourd . atick[1] . ext_int
3674 + istepadd . ext_int
3677 uint8_t daddmode = ((dzwrite && gourz) || (dwrite && gourd && !topnen && !topben && !ext_int)
3678 || (dwrite && gourd && topnen && topben && !ext_int) || zstepadd
3679 || (istepadd && !topnen && !topben && !ext_int)
3680 || (istepadd && topnen && topben && !ext_int) || (!gourd && !gourz && !topnen && !topben)
3681 || (!gourd && !gourz && topnen && topben) || (shadeadd && !topnen && !topben)
3682 || (shadeadd && topnen && topben) || (init_ii && !topnen && !topben && !ext_int)
3683 || (init_ii && topnen && topben && !ext_int) || init_zi ? 0x01 : 0x00);
3684 daddmode |= ((dwrite && gourd && !topben && !ext_int) || (istepadd && !topben && !ext_int)
3685 || (!gourd && !gourz && !topben) || (shadeadd && !topben)
3686 || (init_ii && !topben && !ext_int) ? 0x02 : 0x00);
3687 daddmode |= ((!gourd && !gourz) || shadeadd || (dwrite && gourd && ext_int)
3688 || (istepadd && ext_int) || (init_ii && ext_int) ? 0x04 : 0x00);
3689 /* Data add load controls
3690 Pattern fraction (dest data) is loaded on
3691 dwrite . gourd . atick[0]
3692 + istepfadd . /datinit
3694 Pattern data is loaded on
3695 dwrite . gourd . atick[1]
3696 + istepadd . /datinit . /datinit
3698 Source z1 is loaded on
3699 dzwrite . gourz . atick[1]
3700 + zstepadd . /datinit . /datinit
3702 Source z2 is loaded on
3703 dzwrite . gourz . atick[0]
3706 Texture map shaded data is loaded on
3707 srcdreadd . srcshade
3709 bool patfadd = (dwrite && gourd) || (istepfadd && !datinit) || init_if;
3710 bool patdadd = (dwrite && gourd) || (istepadd && !datinit) || init_ii;
3711 bool srcz1add = (dzwrite && gourz) || (zstepadd && !datinit) || init_zi;
3712 bool srcz2add = (dzwrite && gourz) || zstepfadd || init_zf;
3713 bool srcshadd = srcdreadd && srcshade;
3714 bool daddq_sel = patfadd || patdadd || srcz1add || srcz2add || srcshadd;
3715 /* Select write data
3716 This has to be controlled from stage 1 of the pipe-line, delayed
3717 by one tick, as the write occurs in the cycle after the ack.
3724 Bit 0 = /patdsel . /adddsel
3729 uint8_t data_sel = ((!patdsel && !adddsel) || dzwrite ? 0x01 : 0x00)
3730 | (adddsel || dzwrite ? 0x02 : 0x00);
3732 uint32_t address, pixAddr;
3733 ADDRGEN(address, pixAddr, gena2i, zaddr,
3734 a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3735 a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3737 //Here's my guess as to how the addresses get truncated to phrase boundaries in phrase mode...
3739 address &= 0xFFFFF8;
3741 /* Generate source alignment shift
3742 -------------------------------
3743 The source alignment shift for data move is the difference between
3744 the source and destination X pointers, multiplied by the pixel
3745 size. Only the low six bits of the pointers are of interest, as
3746 pixel sizes are always a power of 2 and window rows are always
3749 When not in phrase mode, the top 3 bits of the shift value are
3752 Source shifting is also used to extract bits for bit-to-byte
3753 expansion in phrase mode. This involves only the bottom three
3754 bits of the shift value, and is based on the offset within the
3755 phrase of the destination X pointer, in pixels.
3757 Source shifting is disabled when srcen is not set.
3759 uint8_t dstxp = (dsta2 ? a2_x : a1_x) & 0x3F;
3760 uint8_t srcxp = (dsta2 ? a1_x : a2_x) & 0x3F;
3761 uint8_t shftv = ((dstxp - srcxp) << pixsize) & 0x3F;
3762 /* The phrase mode alignment count is given by the phrase offset
3763 of the first pixel, for bit to byte expansion */
3767 pobb = dstxp & 0x07;
3769 pobb = dstxp & 0x03;
3771 pobb = dstxp & 0x01;
3773 bool pobbsel = phrase_mode && bcompen;
3774 uint8_t loshd = (pobbsel ? pobb : shftv) & 0x07;
3775 uint8_t shfti = (srcen || pobbsel ? (sshftld ? loshd : srcshift & 0x07) : 0);
3776 /* Enable for high bits is srcen . phrase_mode */
3777 shfti |= (srcen && phrase_mode ? (sshftld ? shftv & 0x38 : srcshift & 0x38) : 0);
3782 #ifdef VERBOSE_BLITTER_LOGGING
3784 WriteLog(" Entering SREADX state...");
3786 //uint32_t srcAddr, pixAddr;
3787 //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
3788 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3789 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3791 srcd1 = ((uint64_t)JaguarReadLong(address + 0, BLITTER) << 32)
3792 | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3793 //Kludge to take pixel size into account...
3794 //Hmm. If we're not in phrase mode, this is most likely NOT going to be used...
3795 //Actually, it would be--because of BCOMPEN expansion, for example...
3804 else if (pixsize == 4)
3810 #ifdef VERBOSE_BLITTER_LOGGING
3812 WriteLog(" Source extra read address/pix address: %08X/%1X [%08X%08X]\n",
3813 address, pixAddr, (uint32_t)(srcd1 >> 32), (uint32_t)(srcd1 & 0xFFFFFFFF));
3819 #ifdef VERBOSE_BLITTER_LOGGING
3821 WriteLog(" Entering SZREADX state...");
3824 srcz1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3825 #ifdef VERBOSE_BLITTER_LOGGING
3827 WriteLog(" Src Z extra read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3828 (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF));
3834 #ifdef VERBOSE_BLITTER_LOGGING
3836 WriteLog(" Entering SREAD state...");
3838 //uint32_t srcAddr, pixAddr;
3839 //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
3840 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3841 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3843 srcd1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3844 //Kludge to take pixel size into account...
3853 else if (pixsize == 4)
3859 #ifdef VERBOSE_BLITTER_LOGGING
3862 WriteLog(" Source read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3863 (uint32_t)(srcd1 >> 32), (uint32_t)(srcd1 & 0xFFFFFFFF));
3871 #ifdef VERBOSE_BLITTER_LOGGING
3874 WriteLog(" Entering SZREAD state...");
3879 srcz1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3880 //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
3881 if (!phrase_mode && pixsize == 4)
3884 #ifdef VERBOSE_BLITTER_LOGGING
3887 WriteLog(" Src Z read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3888 (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF));
3895 #ifdef VERBOSE_BLITTER_LOGGING
3897 WriteLog(" Entering DREAD state...");
3899 //uint32_t dstAddr, pixAddr;
3900 //ADDRGEN(dstAddr, pixAddr, gena2i, zaddr,
3901 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3902 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3903 dstd = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3904 //Kludge to take pixel size into account...
3909 else if (pixsize == 4)
3914 #ifdef VERBOSE_BLITTER_LOGGING
3916 WriteLog(" Dest read address/pix address: %08X/%1X [%08X%08X]\n", address,
3917 pixAddr, (uint32_t)(dstd >> 32), (uint32_t)(dstd & 0xFFFFFFFF));
3923 // Is Z always 64 bit read? Or sometimes 16 bit (dependent on phrase_mode)?
3924 #ifdef VERBOSE_BLITTER_LOGGING
3926 WriteLog(" Entering DZREAD state...");
3928 dstz = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3929 //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
3930 if (!phrase_mode && pixsize == 4)
3933 #ifdef VERBOSE_BLITTER_LOGGING
3935 WriteLog(" Dest Z read address/pix address: %08X/%1X [%08X%08X]\n", address,
3936 pixAddr, (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF));
3940 // These vars should probably go further up in the code... !!! FIX !!!
3941 // We can't preassign these unless they're static...
3942 //uint64_t srcz = 0; // These are assigned to shut up stupid compiler warnings--dwrite is ALWAYS asserted
3943 //bool winhibit = false;
3946 //NOTE: SRCSHADE requires GOURZ to be set to work properly--another Jaguar I bug
3949 #ifdef VERBOSE_BLITTER_LOGGING
3951 WriteLog(" Entering DWRITE state...");
3953 //Counter is done on the dwrite state...! (We'll do it first, since it affects dstart/dend calculations.)
3954 //Here's the voodoo for figuring the correct amount of pixels in phrase mode (or not):
3955 int8_t inct = -((dsta2 ? a2_x : a1_x) & 0x07); // From INNER_CNT
3957 inc = (!phrase_mode || (phrase_mode && (inct & 0x01)) ? 0x01 : 0x00);
3958 inc |= (phrase_mode && (((pixsize == 3 || pixsize == 4) && (inct & 0x02)) || pixsize == 5 && !(inct & 0x01)) ? 0x02 : 0x00);
3959 inc |= (phrase_mode && ((pixsize == 3 && (inct & 0x04)) || (pixsize == 4 && !(inct & 0x03))) ? 0x04 : 0x00);
3960 inc |= (phrase_mode && pixsize == 3 && !(inct & 0x07) ? 0x08 : 0x00);
3962 uint16_t oldicount = icount; // Save icount to detect underflow...
3965 if (icount == 0 || ((icount & 0x8000) && !(oldicount & 0x8000)))
3967 // X/Y stepping is also done here, I think...No. It's done when a1_add or a2_add is asserted...
3969 //*********************************************************************************
3970 //Start & end write mask computations...
3971 //*********************************************************************************
3976 dstart = (dstxp & 0x07) << 3;
3978 dstart = (dstxp & 0x03) << 4;
3980 dstart = (dstxp & 0x01) << 5;
3982 dstart = (phrase_mode ? dstart : pixAddr & 0x07);
3984 //This is the other Jaguar I bug... Normally, should ALWAYS select a1_x here.
3985 uint16_t dstxwr = (dsta2 ? a2_x : a1_x) & 0x7FFE;
3986 uint16_t pseq = dstxwr ^ (a1_win_x & 0x7FFE);
3987 pseq = (pixsize == 5 ? pseq : pseq & 0x7FFC);
3988 pseq = ((pixsize & 0x06) == 4 ? pseq : pseq & 0x7FF8);
3989 bool penden = clip_a1 && (pseq == 0);
3990 uint8_t window_mask = 0;
3993 window_mask = (a1_win_x & 0x07) << 3;
3995 window_mask = (a1_win_x & 0x03) << 4;
3997 window_mask = (a1_win_x & 0x01) << 5;
3999 window_mask = (penden ? window_mask : 0);
4002 Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4003 Source extra read address/pix address: 000095D0/0 [000004E40000001C]
4004 Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4005 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4006 Source read address/pix address: 000095D8/0 [0054003800009814]
4007 Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4008 Entering DWRITE state...
4009 Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=20][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000001C00000000] (icount=026E, inc=4)
4010 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4012 (icount=026E, inc=4)
4013 icount & 0x03 = 0x02
4016 window_mask = 0x1000
4018 Therefore, it chooses the inner_mask over the window_mask every time! Argh!
4019 This is because we did this wrong:
4020 Innerm[3-5] := AN2 (inner_mask[3-5], imb[3-5], inner0);
4021 NOTE! This doesn't fix the problem because inner0 is asserted too late to help here. !!! FIX !!! [Should be DONE]
4024 /* The mask to be used if within one phrase of the end of the inner
4026 uint8_t inner_mask = 0;
4029 inner_mask = (icount & 0x07) << 3;
4031 inner_mask = (icount & 0x03) << 4;
4033 inner_mask = (icount & 0x01) << 5;
4036 /* The actual mask used should be the lesser of the window masks and
4037 the inner mask, where is all cases 000 means 1000. */
4038 window_mask = (window_mask == 0 ? 0x40 : window_mask);
4039 inner_mask = (inner_mask == 0 ? 0x40 : inner_mask);
4040 uint8_t emask = (window_mask > inner_mask ? inner_mask : window_mask);
4041 /* The mask to be used for the pixel size, to which must be added
4043 uint8_t pma = pixAddr + (1 << pixsize);
4044 /* Select the mask */
4045 uint8_t dend = (phrase_mode ? emask : pma);
4047 /* The cycle width in phrase mode is normally one phrase. However,
4048 at the start and end it may be narrower. The start and end masks
4049 are used to generate this. The width is given by:
4051 8 - start mask - (8 - end mask)
4052 = end mask - start mask
4054 This is only used for writes in phrase mode.
4055 Start and end from the address level of the pipeline are used.
4057 uint8_t pwidth = (((dend | dstart) & 0x07) == 0 ? 0x08 : (dend - dstart) & 0x07);
4059 //uint32_t dstAddr, pixAddr;
4060 //ADDRGEN(dstAddr, pixAddr, gena2i, zaddr,
4061 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
4062 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
4063 #ifdef VERBOSE_BLITTER_LOGGING
4065 WriteLog(" Dest write address/pix address: %08X/%1X", address, pixAddr);
4068 //More testing... This is almost certainly wrong, but how else does this work???
4069 //Seems to kinda work... But still, this doesn't seem to make any sense!
4070 if (phrase_mode && !dsten)
4071 dstd = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
4073 //Testing only... for now...
4074 //This is wrong because the write data is a combination of srcd and dstd--either run
4075 //thru the LFU or in PATDSEL or ADDDSEL mode. [DONE now, thru DATA module]
4076 // Precedence is ADDDSEL > PATDSEL > LFU.
4077 //Also, doesn't take into account the start & end masks, or the phrase width...
4080 // srcd2 = xxxx xxxx 0123 4567, srcd = 8901 2345 xxxx xxxx, srcshift = $20 (32)
4081 uint64_t srcd = (srcd2 << (64 - srcshift)) | (srcd1 >> srcshift);
4082 //bleh, ugly ugly ugly
4086 //NOTE: This only works with pixel sizes less than 8BPP...
4087 //DOUBLE NOTE: Still need to do regression testing to ensure that this doesn't break other stuff... !!! CHECK !!!
4088 if (!phrase_mode && srcshift != 0)
4089 srcd = ((srcd2 & 0xFF) << (8 - srcshift)) | ((srcd1 & 0xFF) >> srcshift);
4091 //Z DATA() stuff done here... And it has to be done before any Z shifting...
4092 //Note that we need to have phrase mode start/end support here... (Not since we moved it from dzwrite...!)
4094 Here are a couple of Cybermorph blits with Z:
4095 $00113078 // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
4096 $09900F39 // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
4098 We're having the same phrase mode overwrite problem we had with the pixels... !!! FIX !!!
4099 Odd. It's equating 0 with 0... Even though ZMODE is $04 (less than)!
4104 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
4105 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
4106 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
4107 uint32_t zinc, uint32_t zstep)
4110 uint8_t initcin[4] = { 0, 0, 0, 0 };
4111 ADDARRAY(addq, 7/*daddasel*/, 6/*daddbsel*/, 0/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
4112 srcz2 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
4113 ADDARRAY(addq, 6/*daddasel*/, 7/*daddbsel*/, 1/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
4114 srcz1 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
4116 #if 0//def VERBOSE_BLITTER_LOGGING
4118 WriteLog("\n[srcz1=%08X%08X, srcz2=%08X%08X, zinc=%08X",
4119 (uint32_t)(srcz1 >> 32), (uint32_t)(srcz1 & 0xFFFFFFFF),
4120 (uint32_t)(srcz2 >> 32), (uint32_t)(srcz2 & 0xFFFFFFFF), zinc);
4124 uint8_t zSrcShift = srcshift & 0x30;
4125 srcz = (srcz2 << (64 - zSrcShift)) | (srcz1 >> zSrcShift);
4126 //bleh, ugly ugly ugly
4130 #if 0//def VERBOSE_BLITTER_LOGGING
4132 WriteLog(" srcz=%08X%08X]\n", (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF));
4135 //When in SRCSHADE mode, it adds the IINC to the read source (from LFU???)
4136 //According to following line, it gets LFU mode. But does it feed the source into the LFU
4138 //Dest write address/pix address: 0014E83E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003F, inc=1)
4142 //NOTE: This is basically doubling the work done by DATA--since this is what
4143 // ADDARRAY is loaded with when srschshade is enabled... !!! FIX !!!
4144 // Also note that it doesn't work properly unless GOURZ is set--there's the clue!
4146 uint8_t initcin[4] = { 0, 0, 0, 0 };
4147 ADDARRAY(addq, 4/*daddasel*/, 5/*daddbsel*/, 7/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
4148 srcd = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
4150 //Seems to work... Not 100% sure tho.
4153 //Temporary kludge, to see if the fractional pattern does anything...
4155 //But it seems to mess up in Cybermorph... the shading should be smooth but it isn't...
4156 //Seems the carry out is lost again... !!! FIX !!! [DONE--see below]
4160 uint8_t initcin[4] = { 0, 0, 0, 0 };
4161 ADDARRAY(addq, 4/*daddasel*/, 4/*daddbsel*/, 0/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
4162 srcd1 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
4165 //Note that we still don't take atick[0] & [1] into account here, so this will skip half of the data needed... !!! FIX !!!
4166 //Not yet enumerated: dbinh, srcdread, srczread
4167 //Also, should do srcshift on the z value in phrase mode... !!! FIX !!! [DONE]
4168 //As well as add a srcz variable we can set external to this state... !!! FIX !!! [DONE]
4171 uint8_t dcomp, zcomp;
4172 DATA(wdata, dcomp, zcomp, winhibit,
4173 true, cmpdst, daddasel, daddbsel, daddmode, daddq_sel, data_sel, 0/*dbinh*/,
4174 dend, dstart, dstd, iinc, lfufunc, patd, patdadd,
4175 phrase_mode, srcd, false/*srcdread*/, false/*srczread*/, srcz2add, zmode,
4176 bcompen, bkgwren, dcompen, icount & 0x07, pixsize,
4179 Seems that the phrase mode writes with DCOMPEN and DSTEN are corrupting inside of DATA: !!! FIX !!!
4180 It's fairly random as well. 7CFE -> 7DFE, 7FCA -> 78CA, 7FA4 -> 78A4, 7F88 -> 8F88
4181 It could be related to an uninitialized variable, like the zmode bug...
4183 It was a bug in the dech38el data--it returned $FF for ungated instead of $00...
4185 Blit! (CMD = 09800609)
4186 Flags: SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C DCOMPEN
4188 a1_base = 00110000, a2_base = 0010B2A8
4189 a1_x = 004B, a1_y = 00D8, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0704, a2_y = 0000
4190 a1_step_x = FFF3, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFFC, a2_step_y = 0000
4191 a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4192 a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
4193 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4194 a1_pixsize = 4, a2_pixsize = 4
4195 srcd=0000000000000000 dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4196 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4198 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4199 Entering INNER state...
4200 Entering SREAD state... Source read address/pix address: 0010C0B0/0 [0000000078047804]
4201 Entering A2_ADD state [a2_x=0704, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4202 Entering DREAD state...
4203 Dest read address/pix address: 00197240/0 [0000000000000028]
4204 Entering DWRITE state...
4205 Dest write address/pix address: 00197240/0 [dstart=30 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000028] (icount=0009, inc=1)
4206 Entering A1_ADD state [a1_x=004B, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4207 Entering SREAD state... Source read address/pix address: 0010C0B8/0 [7804780478047804]
4208 Entering A2_ADD state [a2_x=0708, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4209 Entering DREAD state...
4210 Dest read address/pix address: 00197260/0 [0028000000200008]
4211 Entering DWRITE state...
4212 Dest write address/pix address: 00197260/0 [dstart=0 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0028780478047804] (icount=0005, inc=4)
4213 Entering A1_ADD state [a1_x=004C, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4214 Entering SREAD state... Source read address/pix address: 0010C0C0/0 [0000000000000000]
4215 Entering A2_ADD state [a2_x=070C, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4216 Entering DREAD state...
4217 Dest read address/pix address: 00197280/0 [0008001800180018]
4218 Entering DWRITE state...
4219 Dest write address/pix address: 00197280/0 [dstart=0 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [7804780478040018] (icount=0001, inc=4)
4220 Entering A1_ADD state [a1_x=0050, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4221 Entering SREAD state... Source read address/pix address: 0010C0C8/0 [000078047BFE7BFE]
4222 Entering A2_ADD state [a2_x=0710, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4223 Entering DREAD state...
4224 Dest read address/pix address: 001972A0/0 [0008002000000000]
4225 Entering DWRITE state...
4226 Dest write address/pix address: 001972A0/0 [dstart=0 dend=10 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0008002000000000] (icount=FFFD, inc=4)
4227 Entering A1_ADD state [a1_x=0054, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4228 Entering IDLE_INNER state...
4231 //Why isn't this taken care of in DATA? Because, DATA is modifying its local copy instead of the one used here.
4232 //!!! FIX !!! [DONE]
4241 a1_outside // A1 pointer is outside window bounds
4250 // The address is outside if negative, or if greater than or equal
4251 // to the window size
4253 A1_xcomp := MAG_15 (a1xgr, a1xeq, a1xlt, a1_x{0..14}, a1_win_x{0..14});
4254 A1_ycomp := MAG_15 (a1ygr, a1yeq, a1ylt, a1_y{0..14}, a1_win_y{0..14});
4255 A1_outside := OR6 (a1_outside, a1_x{15}, a1xgr, a1xeq, a1_y{15}, a1ygr, a1yeq);
4257 //NOTE: There seems to be an off-by-one bug here in the clip_a1 section... !!! FIX !!!
4258 // Actually, seems to be related to phrase mode writes...
4259 // Or is it? Could be related to non-15-bit compares as above?
4260 if (clip_a1 && ((a1_x & 0x8000) || (a1_y & 0x8000) || (a1_x >= a1_win_x) || (a1_y >= a1_win_y)))
4267 JaguarWriteLong(address + 0, wdata >> 32, BLITTER);
4268 JaguarWriteLong(address + 4, wdata & 0xFFFFFFFF, BLITTER);
4273 JaguarWriteLong(address, wdata & 0xFFFFFFFF, BLITTER);
4274 else if (pixsize == 4)
4275 JaguarWriteWord(address, wdata & 0x0000FFFF, BLITTER);
4277 JaguarWriteByte(address, wdata & 0x000000FF, BLITTER);
4281 #ifdef VERBOSE_BLITTER_LOGGING
4284 WriteLog(" [%08X%08X]%s", (uint32_t)(wdata >> 32), (uint32_t)(wdata & 0xFFFFFFFF), (winhibit ? "[X]" : ""));
4285 WriteLog(" (icount=%04X, inc=%u)\n", icount, (uint16_t)inc);
4286 WriteLog(" [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
4287 WriteLog("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
4294 // OK, here's the big insight: When NOT in GOURZ mode, srcz1 & 2 function EXACTLY the same way that
4295 // srcd1 & 2 work--there's an implicit shift from srcz1 to srcz2 whenever srcz1 is read.
4296 // OTHERWISE, srcz1 is the integer for the computed Z and srcz2 is the fractional part.
4297 // Writes to srcz1 & 2 follow the same pattern as the other 64-bit registers--low 32 at the low address,
4298 // high 32 at the high address (little endian!).
4299 // NOTE: GOURZ is still not properly supported. Check patd/patf handling...
4300 // Phrase mode start/end masks are not properly supported either...
4301 #ifdef VERBOSE_BLITTER_LOGGING
4304 WriteLog(" Entering DZWRITE state...");
4305 WriteLog(" Dest Z write address/pix address: %08X/%1X [%08X%08X]\n", address,
4306 pixAddr, (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF));
4309 //This is not correct... !!! FIX !!!
4310 //Should be OK now... We'll see...
4311 //Nope. Having the same starstep write problems in phrase mode as we had with pixels... !!! FIX !!!
4312 //This is not causing the problem in Hover Strike... :-/
4313 //The problem was with the SREADX not shifting. Still problems with Z comparisons & other text in pregame screen...
4318 JaguarWriteLong(address + 0, srcz >> 32, BLITTER);
4319 JaguarWriteLong(address + 4, srcz & 0xFFFFFFFF, BLITTER);
4324 JaguarWriteWord(address, srcz & 0x0000FFFF, BLITTER);
4327 #ifdef VERBOSE_BLITTER_LOGGING
4330 // printf(" [%08X%08X]\n", (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF));
4332 //printf(" [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
4333 WriteLog(" [dstart=? dend=? pwidth=? srcshift=%X]", srcshift);
4334 WriteLog("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
4341 This is because the address generator was using only 15 bits of the X when it should have
4344 There's a slight problem here: The X pointer isn't wrapping like it should when it hits
4345 the edge of the window... Notice how the X isn't reset at the edge of the window:
4347 Blit! (CMD = 00010000)
4350 a1_base = 000E8008, a2_base = 0001FA68
4351 a1_x = 0000, a1_y = 0000, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0000, a2_y = 0000
4352 a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
4353 a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4354 a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
4355 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4356 a1_pixsize = 5, a2_pixsize = 5
4357 srcd=7717771777177717 dstd=0000000000000000 patd=7730773077307730 iinc=00000000
4358 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4360 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4361 Entering INNER state...
4362 Entering DWRITE state... Dest write address/pix address: 000E8008/0 [7730773077307730] (icount=009E, inc=2)
4363 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4364 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4365 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4366 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4367 Entering DWRITE state... Dest write address/pix address: 000E8018/0 [7730773077307730] (icount=009C, inc=2)
4368 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4369 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4370 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4371 Entering A1_ADD state [a1_x=0002, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4375 Entering A1_ADD state [a1_x=009C, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4376 Entering DWRITE state... Dest write address/pix address: 000E84F8/0 [7730773077307730] (icount=0000, inc=2)
4377 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4378 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4379 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4380 Entering A1_ADD state [a1_x=009E, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4381 Entering IDLE_INNER state...
4383 Leaving INNER state... (ocount=0104)
4384 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4386 Entering INNER state...
4387 Entering DWRITE state... Dest write address/pix address: 000E8508/0 [7730773077307730] (icount=009E, inc=2)
4388 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4389 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4390 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4391 Entering A1_ADD state [a1_x=00A0, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4392 Entering DWRITE state... Dest write address/pix address: 000E8518/0 [7730773077307730] (icount=009C, inc=2)
4393 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4394 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4395 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4396 Entering A1_ADD state [a1_x=00A2, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4402 #ifdef VERBOSE_BLITTER_LOGGING
4405 //printf(" Entering A1_ADD state [addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4406 WriteLog(" Entering A1_ADD state [a1_x=%04X, a1_y=%04X, addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", a1_x, a1_y, addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4410 int16_t adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
4411 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4412 a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4413 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4414 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4416 #if 0//def VERBOSE_BLITTER_LOGGING
4419 WriteLog(" [adda_x=%d, adda_y=%d, addb_x=%d, addb_y=%d, addq_x=%d, addq_y=%d]\n", adda_x, adda_y, addb_x, addb_y, addq_x, addq_y);
4423 //Now, write to what???
4424 //a2ptrld comes from a2ptrldi...
4425 //I believe it's addbsel that determines the writeback...
4426 // This is where atick[0] & [1] come in, in determining which part (fractional, integer)
4427 // gets written to...
4430 //Kludge, to get A1 channel increment working...
4433 a1_frac_x = addq_x, a1_frac_y = addq_y;
4435 addasel = 2, addbsel = 0, a1fracldi = false;
4436 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4437 a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4438 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4439 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4441 a1_x = addq_x, a1_y = addq_y;
4444 a1_x = addq_x, a1_y = addq_y;
4449 #ifdef VERBOSE_BLITTER_LOGGING
4452 //printf(" Entering A2_ADD state [addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4453 WriteLog(" Entering A2_ADD state [a2_x=%04X, a2_y=%04X, addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", a2_x, a2_y, addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4457 //void ADDAMUX(int16_t &adda_x, int16_t &adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
4458 // int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
4459 // int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
4460 // bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
4461 //void ADDBMUX(int16_t &addb_x, int16_t &addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
4462 // int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y)
4463 //void ADDRADD(int16_t &addq_x, int16_t &addq_y, bool a1fracldi,
4464 // int16_t adda_x, int16_t adda_y, int16_t addb_x, int16_t addb_y, uint8_t modx, bool suba_x, bool suba_y)
4465 //void DATAMUX(int16_t &data_x, int16_t &data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel)
4466 int16_t adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
4467 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4468 a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4469 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4470 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4472 #if 0//def VERBOSE_BLITTER_LOGGING
4475 WriteLog(" [adda_x=%d, adda_y=%d, addb_x=%d, addb_y=%d, addq_x=%d, addq_y=%d]\n", adda_x, adda_y, addb_x, addb_y, addq_x, addq_y);
4479 //Now, write to what???
4480 //a2ptrld comes from a2ptrldi...
4481 //I believe it's addbsel that determines the writeback...
4487 Flags: SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 GOURZ ZMODE=0 LFUFUNC=C SRCSHADE
4489 a1_base = 0015B000, a2_base = 0014B000
4490 a1_x = 0000, a1_y = 0000, a1_frac_x = 8000, a1_frac_y = 8000, a2_x = 001F, a2_y = 0038
4491 a1_step_x = FFFFFFC0, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 2AAA, a2_step_x = FFFFFFC0, a2_step_y = 0001
4492 a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4493 a1_win_x = 0040, a1_win_y = 0040, a2_mask_x = 0000, a2_mask_y = 0000
4494 a2_mask=F a1add=+inc/+0 a2add=+1/+0
4495 a1_pixsize = 4, a2_pixsize = 4
4496 srcd=FF00FF00FF00FF00 dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4497 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, col=0
4499 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4500 Entering INNER state...
4501 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4502 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4503 Entering DWRITE state...
4504 Dest write address/pix address: 0014E83E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003F, inc=1)
4505 Entering A2_ADD state [a2_x=001F, a2_y=0038, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4506 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4507 Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4508 Entering DWRITE state...
4509 Dest write address/pix address: 0014E942/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003E, inc=1)
4510 Entering A2_ADD state [a2_x=0021, a2_y=0039, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4511 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4512 Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4513 Entering DWRITE state...
4514 Dest write address/pix address: 0014EA46/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003D, inc=1)
4515 Entering A2_ADD state [a2_x=0023, a2_y=003A, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4516 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4517 Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4518 Entering DWRITE state...
4519 Dest write address/pix address: 0014EB4A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003C, inc=1)
4520 Entering A2_ADD state [a2_x=0025, a2_y=003B, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4522 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4523 Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4524 Entering DWRITE state...
4525 Dest write address/pix address: 0015283A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=0000, inc=1)
4526 Entering A2_ADD state [a2_x=009D, a2_y=0077, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4527 Entering IDLE_INNER state...
4528 Leaving INNER state... (ocount=0036)
4529 [in=F a1f=T a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4530 Entering A1FUPDATE state...
4531 [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4532 Entering A1UPDATE state... (-32768/-32768 -> 32704/-32767)
4533 [in=F a1f=F a1=F zf=F z=F a2=T iif=F iii=F izf=F izi=F]
4534 Entering A2UPDATE state... (159/120 -> 95/121)
4535 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4536 Entering INNER state...
4539 #ifdef VERBOSE_BLITTER_LOGGING
4542 WriteLog(" Leaving INNER state...");
4547 // The outer counter is updated here as well on the clock cycle...
4549 /* the inner loop is started whenever another state is about to
4550 cause the inner state to go active */
4551 //Instart := ND7 (instart, innert[0], innert[2..7]);
4553 //Actually, it's done only when inner gets asserted without the 2nd line of conditions
4554 //(inner AND !indone)
4556 //Since we don't get here until the inner loop is finished (indone = true) we can get
4557 //away with doing it here...!
4562 #ifdef VERBOSE_BLITTER_LOGGING
4565 WriteLog(" (ocount=%04X)\n", ocount);
4573 #ifdef VERBOSE_BLITTER_LOGGING
4576 WriteLog(" Entering A1FUPDATE state...\n");
4580 uint32_t a1_frac_xt = (uint32_t)a1_frac_x + (uint32_t)a1_stepf_x;
4581 uint32_t a1_frac_yt = (uint32_t)a1_frac_y + (uint32_t)a1_stepf_y;
4582 a1FracCInX = a1_frac_xt >> 16;
4583 a1FracCInY = a1_frac_yt >> 16;
4584 a1_frac_x = (uint16_t)(a1_frac_xt & 0xFFFF);
4585 a1_frac_y = (uint16_t)(a1_frac_yt & 0xFFFF);
4590 #ifdef VERBOSE_BLITTER_LOGGING
4593 WriteLog(" Entering A1UPDATE state... (%d/%d -> ", a1_x, a1_y);
4597 a1_x += a1_step_x + a1FracCInX;
4598 a1_y += a1_step_y + a1FracCInY;
4599 #ifdef VERBOSE_BLITTER_LOGGING
4602 WriteLog("%d/%d)\n", a1_x, a1_y);
4610 #ifdef VERBOSE_BLITTER_LOGGING
4613 WriteLog(" Entering A2UPDATE state... (%d/%d -> ", a2_x, a2_y);
4619 #ifdef VERBOSE_BLITTER_LOGGING
4622 WriteLog("%d/%d)\n", a2_x, a2_y);
4629 // We never get here! !!! FIX !!!
4631 #ifdef VERBOSE_BLITTER_LOGGING
4634 WriteLog("Done!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n",
4635 GET16(blitter_ram, A1_PIXEL + 2),
4636 GET16(blitter_ram, A1_PIXEL + 0),
4637 GET16(blitter_ram, A1_FPIXEL + 2),
4638 GET16(blitter_ram, A1_FPIXEL + 0),
4639 GET16(blitter_ram, A2_PIXEL + 2),
4640 GET16(blitter_ram, A2_PIXEL + 0));
4645 // Write values back to registers (in real blitter, these are continuously updated)
4646 SET16(blitter_ram, A1_PIXEL + 2, a1_x);
4647 SET16(blitter_ram, A1_PIXEL + 0, a1_y);
4648 SET16(blitter_ram, A1_FPIXEL + 2, a1_frac_x);
4649 SET16(blitter_ram, A1_FPIXEL + 0, a1_frac_y);
4650 SET16(blitter_ram, A2_PIXEL + 2, a2_x);
4651 SET16(blitter_ram, A2_PIXEL + 0, a2_y);
4653 #ifdef VERBOSE_BLITTER_LOGGING
4656 WriteLog("Writeback!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n",
4657 GET16(blitter_ram, A1_PIXEL + 2),
4658 GET16(blitter_ram, A1_PIXEL + 0),
4659 GET16(blitter_ram, A1_FPIXEL + 2),
4660 GET16(blitter_ram, A1_FPIXEL + 0),
4661 GET16(blitter_ram, A2_PIXEL + 2),
4662 GET16(blitter_ram, A2_PIXEL + 0));
4670 int16_t a1_x = (int16_t)GET16(blitter_ram, A1_PIXEL + 2);
4671 int16_t a1_y = (int16_t)GET16(blitter_ram, A1_PIXEL + 0);
4672 uint16_t a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
4673 uint16_t a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
4674 int16_t a2_x = (int16_t)GET16(blitter_ram, A2_PIXEL + 2);
4675 int16_t a2_y = (int16_t)GET16(blitter_ram, A2_PIXEL + 0);
4677 Seems that the ending a1_x should be written between blits, but it doesn't seem to be...
4679 Blit! (CMD = 01800000)
4682 a1_base = 00050000, a2_base = 00070000
4683 a1_x = 0000, a1_y = 0000, a1_frac_x = 49CD, a1_frac_y = 0000, a2_x = 0033, a2_y = 0001
4684 a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 939A, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
4685 a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4686 a1_win_x = 0100, a1_win_y = 0020, a2_mask_x = 0000, a2_mask_y = 0000
4687 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4688 a1_pixsize = 4, a2_pixsize = 3
4689 srcd=DEDEDEDEDEDEDEDE dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4690 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4693 Blit! (CMD = 01800000)
4696 a1_base = 00050000, a2_base = 00070000
4697 a1_x = 0000, a1_y = 0000, a1_frac_x = 49CD, a1_frac_y = 0000, a2_x = 0033, a2_y = 0001
4698 a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 939A, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
4699 a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4700 a1_win_x = 0100, a1_win_y = 0020, a2_mask_x = 0000, a2_mask_y = 0000
4701 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4702 a1_pixsize = 4, a2_pixsize = 3
4703 srcd=D6D6D6D6D6D6D6D6 dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4704 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4710 // Various pieces of the blitter puzzle are teased out here...
4716 INT24/ address // byte address
4717 pixa[0..2] // bit part of address, un-pipe-lined
4733 apipe // load address pipe-line latch
4734 clk // co-processor clock
4735 gena2 // generate A2 as opposed to A1
4736 zaddr // generate Z address
4740 void ADDRGEN(uint32_t &address, uint32_t &pixa, bool gena2, bool zaddr,
4741 uint16_t a1_x, uint16_t a1_y, uint32_t a1_base, uint8_t a1_pitch, uint8_t a1_pixsize, uint8_t a1_width, uint8_t a1_zoffset,
4742 uint16_t a2_x, uint16_t a2_y, uint32_t a2_base, uint8_t a2_pitch, uint8_t a2_pixsize, uint8_t a2_width, uint8_t a2_zoffset)
4744 // uint16_t x = (gena2 ? a2_x : a1_x) & 0x7FFF;
4745 uint16_t x = (gena2 ? a2_x : a1_x) & 0xFFFF; // Actually uses all 16 bits to generate address...!
4746 uint16_t y = (gena2 ? a2_y : a1_y) & 0x0FFF;
4747 uint8_t width = (gena2 ? a2_width : a1_width);
4748 uint8_t pixsize = (gena2 ? a2_pixsize : a1_pixsize);
4749 uint8_t pitch = (gena2 ? a2_pitch : a1_pitch);
4750 uint32_t base = (gena2 ? a2_base : a1_base) >> 3;//Only upper 21 bits are passed around the bus? Seems like it...
4751 uint8_t zoffset = (gena2 ? a2_zoffset : a1_zoffset);
4753 uint32_t ytm = ((uint32_t)y << 2) + (width & 0x02 ? (uint32_t)y << 1 : 0) + (width & 0x01 ? (uint32_t)y : 0);
4755 uint32_t ya = (ytm << (width >> 2)) >> 2;
4757 uint32_t pa = ya + x;
4759 /*uint32*/ pixa = pa << pixsize;
4761 uint8_t pt = ((pitch & 0x01) && !(pitch & 0x02) ? 0x01 : 0x00)
4762 | (!(pitch & 0x01) && (pitch & 0x02) ? 0x02 : 0x00);
4763 // uint32_t phradr = pixa << pt;
4764 uint32_t phradr = (pixa >> 6) << pt;
4765 uint32_t shup = (pitch == 0x03 ? (pixa >> 6) : 0);
4767 uint8_t za = (zaddr ? zoffset : 0) & 0x03;
4768 // uint32_t addr = za + (phradr & 0x07) + (shup << 1) + base;
4769 uint32_t addr = za + phradr + (shup << 1) + base;
4770 /*uint32*/ address = ((pixa & 0x38) >> 3) | ((addr & 0x1FFFFF) << 3);
4771 #if 0//def VERBOSE_BLITTER_LOGGING
4774 WriteLog(" [gena2=%s, x=%04X, y=%04X, w=%1X, pxsz=%1X, ptch=%1X, b=%08X, zoff=%1X]\n", (gena2 ? "T" : "F"), x, y, width, pixsize, pitch, base, zoffset);
4775 WriteLog(" [ytm=%X, ya=%X, pa=%X, pixa=%X, pt=%X, phradr=%X, shup=%X, za=%X, addr=%X, address=%X]\n", ytm, ya, pa, pixa, pt, phradr, shup, za, addr, address);
4781 Entering INNER state...
4782 [gena2=T, x=0002, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4783 [ytm=0, ya=0, pa=2, pixa=20, pt=0, phradr=0, shup=0, za=0, addr=12BA, address=95D4]
4784 Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4785 Source extra read address/pix address: 000095D4/0 [0000001C00540038]
4786 Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4787 [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4788 [ytm=0, ya=0, pa=4, pixa=40, pt=0, phradr=1, shup=0, za=0, addr=12BB, address=95D8]
4789 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
4790 Source read address/pix address: 000095D8/0 [0054003800009814]
4791 Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4792 [gena2=F, x=0000, y=0000, w=20, pxsz=4, ptch=0, b=00006E52, zoff=0]
4793 [ytm=0, ya=0, pa=0, pixa=0, pt=0, phradr=0, shup=0, za=0, addr=6E52, address=37290]
4794 Entering DWRITE state...
4795 Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026E, inc=4)
4796 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4797 [gena2=T, x=0008, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4798 [ytm=0, ya=0, pa=8, pixa=80, pt=0, phradr=2, shup=0, za=0, addr=12BC, address=95E0]
4802 Entering SREAD state...
4803 [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4804 [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=40, shup=0, za=0, addr=10AC, address=8560]
4805 Source read address/pix address: 00008560/0 [8C27981B327E00F0]
4807 2nd pass (still wrong):
4808 Entering SREAD state...
4809 [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4810 [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=40, shup=0, za=0, addr=10EC, address=8760]
4811 Source read address/pix address: 00008760/0 [00E06DC04581880C]
4814 Entering SREAD state...
4815 [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4816 [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=1, shup=0, za=0, addr=10AD, address=8568]
4817 Source read address/pix address: 00008568/0 [6267981A327C00F0]
4819 OK, now we're back into incorrect (or is it?):
4820 Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4821 Source extra read address/pix address: 000095D4/0 [0000 001C 0054 0038]
4822 Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4823 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
4824 Source read address/pix address: 000095D8/0 [0054 0038 0000 9814]
4825 Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4826 I think this may be correct...!
4831 // source and destination address update conditions
4833 Sraat0 := AN2 (sraat[0], sreadxi, srcenz\);
4834 Sraat1 := AN2 (sraat[1], sreadi, srcenz\);
4835 Srca_addi := OR4 (srca_addi, szreadxi, szreadi, sraat[0..1]);
4836 Srca_add := FD1Q (srca_add, srca_addi, clk);
4838 Dstaat := AN2 (dstaat, dwritei, dstwrz\);
4839 Dsta_addi := OR2 (dsta_addi, dzwritei, dstaat);
4840 // Dsta_add := FD1Q (dsta_add, dsta_addi, clk);
4842 // source and destination address generate conditions
4844 Gensrc := OR4 (gensrc, sreadxi, szreadxi, sreadi, szreadi);
4845 Gendst := OR4 (gendst, dreadi, dzreadi, dwritei, dzwritei);
4846 Dsta2\ := INV1 (dsta2\, dsta2);
4847 Gena2t0 := NAN2 (gena2t[0], gensrc, dsta2\);
4848 Gena2t1 := NAN2 (gena2t[1], gendst, dsta2);
4849 Gena2i := NAN2 (gena2i, gena2t[0..1]);
4850 Gena2 := FD1QU (gena2, gena2i, clk);
4852 Zaddr := OR4 (zaddr, szreadx, szread, dzread, dzwrite);
4857 // Basically, the above translates to:
4858 bool srca_addi = (sreadxi && !srcenz) || (sreadi && !srcenz) || szreadxi || szreadi;
4860 bool dsta_addi = (dwritei && !dstwrz) || dzwritei;
4862 bool gensrc = sreadxi || szreadxi || sreadi || szreadi;
4863 bool gendst = dreadi || szreadi || dwritei || dzwritei;
4864 bool gena2i = (gensrc && !dsta2) || (gendst && dsta2);
4866 bool zaddr = szreadx || szread || dzread || dzwrite;
4870 // source data reads
4872 Srcdpset\ := NAN2 (srcdpset\, readreq, sread);
4873 Srcdpt1 := NAN2 (srcdpt[1], srcdpend, srcdack\);
4874 Srcdpt2 := NAN2 (srcdpt[2], srcdpset\, srcdpt[1]);
4875 Srcdpend := FD2Q (srcdpend, srcdpt[2], clk, reset\);
4877 Srcdxpset\ := NAN2 (srcdxpset\, readreq, sreadx);
4878 Srcdxpt1 := NAN2 (srcdxpt[1], srcdxpend, srcdxack\);
4879 Srcdxpt2 := NAN2 (srcdxpt[2], srcdxpset\, srcdxpt[1]);
4880 Srcdxpend := FD2Q (srcdxpend, srcdxpt[2], clk, reset\);
4882 Sdpend := OR2 (sdpend, srcdxpend, srcdpend);
4883 Srcdreadt := AN2 (srcdreadt, sdpend, read_ack);
4885 //2/9/92 - enhancement?
4886 //Load srcdread on the next tick as well to modify it in srcshade
4888 Srcdreadd := FD1Q (srcdreadd, srcdreadt, clk);
4889 Srcdread := AOR1 (srcdread, srcshade, srcdreadd, srcdreadt);
4893 Srczpset\ := NAN2 (srczpset\, readreq, szread);
4894 Srczpt1 := NAN2 (srczpt[1], srczpend, srczack\);
4895 Srczpt2 := NAN2 (srczpt[2], srczpset\, srczpt[1]);
4896 Srczpend := FD2Q (srczpend, srczpt[2], clk, reset\);
4898 Srczxpset\ := NAN2 (srczxpset\, readreq, szreadx);
4899 Srczxpt1 := NAN2 (srczxpt[1], srczxpend, srczxack\);
4900 Srczxpt2 := NAN2 (srczxpt[2], srczxpset\, srczxpt[1]);
4901 Srczxpend := FD2Q (srczxpend, srczxpt[2], clk, reset\);
4903 Szpend := OR2 (szpend, srczpend, srczxpend);
4904 Srczread := AN2 (srczread, szpend, read_ack);
4906 // destination data reads
4908 Dstdpset\ := NAN2 (dstdpset\, readreq, dread);
4909 Dstdpt0 := NAN2 (dstdpt[0], dstdpend, dstdack\);
4910 Dstdpt1 := NAN2 (dstdpt[1], dstdpset\, dstdpt[0]);
4911 Dstdpend := FD2Q (dstdpend, dstdpt[1], clk, reset\);
4912 Dstdread := AN2 (dstdread, dstdpend, read_ack);
4914 // destination zed reads
4916 Dstzpset\ := NAN2 (dstzpset\, readreq, dzread);
4917 Dstzpt0 := NAN2 (dstzpt[0], dstzpend, dstzack\);
4918 Dstzpt1 := NAN2 (dstzpt[1], dstzpset\, dstzpt[0]);
4919 Dstzpend := FD2Q (dstzpend, dstzpt[1], clk, reset\);
4920 Dstzread := AN2 (dstzread, dstzpend, read_ack);
4925 // Basically, the above translates to:
4926 bool srcdpend = (readreq && sread) || (srcdpend && !srcdack);
4927 bool srcdxpend = (readreq && sreadx) || (srcdxpend && !srcdxack);
4928 bool sdpend = srcxpend || srcdpend;
4929 bool srcdread = ((sdpend && read_ack) && srcshade) || (sdpend && read_ack);//the latter term is lookahead
4934 ////////////////////////////////////////////////////////////////////////////////////////////
4935 ////////////////////////////////////////////////////////////////////////////////////////////
4936 // Here's an important bit: The source data adder logic. Need to track down the inputs!!! //
4937 ////////////////////////////////////////////////////////////////////////////////////////////
4938 ////////////////////////////////////////////////////////////////////////////////////////////
4945 daddasel[0..2] // data adder input A selection
4950 initcin[0..3] // carry into the adders from the initializers
4951 initinc[0..63] // the initialisation increment
4952 initpix[0..15] // Data initialiser pixel value
4964 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
4965 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
4966 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
4967 uint32_t zinc, uint32_t zstep)
4969 uint32_t initpix2 = ((uint32_t)initpix << 16) | initpix;
4970 uint32_t addalo[8], addahi[8];
4971 addalo[0] = dstd & 0xFFFFFFFF;
4972 addalo[1] = initpix2;
4975 addalo[4] = srcd & 0xFFFFFFFF;
4976 addalo[5] = patd & 0xFFFFFFFF;
4977 addalo[6] = srcz1 & 0xFFFFFFFF;
4978 addalo[7] = srcz2 & 0xFFFFFFFF;
4979 addahi[0] = dstd >> 32;
4980 addahi[1] = initpix2;
4983 addahi[4] = srcd >> 32;
4984 addahi[5] = patd >> 32;
4985 addahi[6] = srcz1 >> 32;
4986 addahi[7] = srcz2 >> 32;
4988 adda[0] = addalo[daddasel] & 0xFFFF;
4989 adda[1] = addalo[daddasel] >> 16;
4990 adda[2] = addahi[daddasel] & 0xFFFF;
4991 adda[3] = addahi[daddasel] >> 16;
4993 uint16_t wordmux[8];
4994 wordmux[0] = iinc & 0xFFFF;
4995 wordmux[1] = iinc >> 16;
4996 wordmux[2] = zinc & 0xFFFF;
4997 wordmux[3] = zinc >> 16;;
4998 wordmux[4] = istep & 0xFFFF;
4999 wordmux[5] = istep >> 16;;
5000 wordmux[6] = zstep & 0xFFFF;
5001 wordmux[7] = zstep >> 16;;
5002 uint16_t word = wordmux[((daddbsel & 0x08) >> 1) | (daddbsel & 0x03)];
5004 bool dbsel2 = daddbsel & 0x04;
5005 bool iincsel = (daddbsel & 0x01) && !(daddbsel & 0x04);
5007 if (!dbsel2 && !iincsel)
5008 addb[0] = srcd & 0xFFFF,
5009 addb[1] = (srcd >> 16) & 0xFFFF,
5010 addb[2] = (srcd >> 32) & 0xFFFF,
5011 addb[3] = (srcd >> 48) & 0xFFFF;
5012 else if (dbsel2 && !iincsel)
5013 addb[0] = addb[1] = addb[2] = addb[3] = word;
5014 else if (!dbsel2 && iincsel)
5015 addb[0] = initinc & 0xFFFF,
5016 addb[1] = (initinc >> 16) & 0xFFFF,
5017 addb[2] = (initinc >> 32) & 0xFFFF,
5018 addb[3] = (initinc >> 48) & 0xFFFF;
5020 addb[0] = addb[1] = addb[2] = addb[3] = 0;
5022 uint8_t cinsel = (daddmode >= 1 && daddmode <= 4 ? 1 : 0);
5024 static uint8_t co[4];//These are preserved between calls...
5027 for(int i=0; i<4; i++)
5028 cin[i] = initcin[i] | (co[i] & cinsel);
5030 bool eightbit = daddmode & 0x02;
5031 bool sat = daddmode & 0x03;
5032 bool hicinh = ((daddmode & 0x03) == 0x03);
5034 //Note that the carry out is saved between calls to this function...
5035 for(int i=0; i<4; i++)
5036 ADD16SAT(addq[i], co[i], adda[i], addb[i], cin[i], sat, eightbit, hicinh);
5053 void ADD16SAT(uint16_t &r, uint8_t &co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh)
5057 printf("--> [sat=%s 8b=%s hicinh=%s] %04X + %04X (+ %u) = ", (sat ? "T" : "F"), (eightbit ? "T" : "F"), (hicinh ? "T" : "F"), a, b, cin);
5061 uint32_t qt = (a & 0xFF) + (b & 0xFF) + cin;
5062 carry[0] = (qt & 0x0100 ? 1 : 0);
5063 uint16_t q = qt & 0x00FF;
5064 carry[1] = (carry[0] && !eightbit ? carry[0] : 0);
5065 qt = (a & 0x0F00) + (b & 0x0F00) + (carry[1] << 8);
5066 carry[2] = (qt & 0x1000 ? 1 : 0);
5068 carry[3] = (carry[2] && !hicinh ? carry[2] : 0);
5069 qt = (a & 0xF000) + (b & 0xF000) + (carry[3] << 12);
5070 co = (qt & 0x10000 ? 1 : 0);
5073 uint8_t btop = (eightbit ? (b & 0x0080) >> 7 : (b & 0x8000) >> 15);
5074 uint8_t ctop = (eightbit ? carry[0] : co);
5076 bool saturate = sat && (btop ^ ctop);
5077 bool hisaturate = saturate && !eightbit;
5080 printf("bt=%u ct=%u s=%u hs=%u] ", btop, ctop, saturate, hisaturate);
5084 r = (saturate ? (ctop ? 0x00FF : 0x0000) : q & 0x00FF);
5085 r |= (hisaturate ? (ctop ? 0xFF00 : 0x0000) : q & 0xFF00);
5088 printf("%04X (co=%u)\n", r, co);
5094 /** ADDAMUX - Address adder input A selection *******************
5096 This module generates the data loaded into the address adder input A. This is
5097 the update value, and can be one of four registers : A1 step, A2 step, A1
5098 increment and A1 fraction. It can complement these values to perform
5099 subtraction, and it can generate constants to increment / decrement the window
5102 addasel[0..2] select the register to add
5104 000 A1 step integer part
5105 001 A1 step fraction part
5106 010 A1 increment integer part
5107 011 A1 increment fraction part
5110 adda_xconst[0..2] generate a power of 2 in the range 1-64 or all zeroes when
5113 addareg selects register value to be added as opposed to constant
5116 suba_x, suba_y complement the X and Y values
5142 void ADDAMUX(int16_t &adda_x, int16_t &adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
5143 int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
5144 int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
5145 bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
5148 /*INT16/ addac_x, addac_y, addar_x, addar_y, addart_x, addart_y,
5149 INT16/ addas_x, addas_y, suba_x16, suba_y16
5153 Zero := TIE0 (zero);*/
5155 /* Multiplex the register terms */
5157 /*Addaselb[0-2] := BUF8 (addaselb[0-2], addasel[0-2]);
5158 Addart_x := MX4 (addart_x, a1_step_x, a1_stepf_x, a1_inc_x, a1_incf_x, addaselb[0..1]);
5159 Addar_x := MX2 (addar_x, addart_x, a2_step_x, addaselb[2]);
5160 Addart_y := MX4 (addart_y, a1_step_y, a1_stepf_y, a1_inc_y, a1_incf_y, addaselb[0..1]);
5161 Addar_y := MX2 (addar_y, addart_y, a2_step_y, addaselb[2]);*/
5163 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5164 int16_t xterm[4], yterm[4];
5165 xterm[0] = a1_step_x, xterm[1] = a1_stepf_x, xterm[2] = a1_inc_x, xterm[3] = a1_incf_x;
5166 yterm[0] = a1_step_y, yterm[1] = a1_stepf_y, yterm[2] = a1_inc_y, yterm[3] = a1_incf_y;
5167 int16_t addar_x = (addasel & 0x04 ? a2_step_x : xterm[addasel & 0x03]);
5168 int16_t addar_y = (addasel & 0x04 ? a2_step_y : yterm[addasel & 0x03]);
5169 //////////////////////////////////////////////////////////////////////////////////////
5171 /* Generate a constant value - this is a power of 2 in the range
5172 0-64, or zero. The control bits are adda_xconst[0..2], when they
5173 are all 1 the result is 0.
5174 Constants for Y can only be 0 or 1 */
5176 /*Addac_xlo := D38H (addac_x[0..6], unused[0], adda_xconst[0..2]);
5177 Unused[0] := DUMMY (unused[0]);
5179 Addac_x := JOIN (addac_x, addac_x[0..6], zero, zero, zero, zero, zero, zero, zero, zero, zero);
5180 Addac_y := JOIN (addac_y, adda_yconst, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero,
5181 zero, zero, zero, zero, zero);*/
5182 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5183 int16_t addac_x = (adda_xconst == 0x07 ? 0 : 1 << adda_xconst);
5184 int16_t addac_y = (adda_yconst ? 0x01 : 0);
5185 //////////////////////////////////////////////////////////////////////////////////////
5187 /* Select between constant value and register value */
5189 /*Addas_x := MX2 (addas_x, addac_x, addar_x, addareg);
5190 Addas_y := MX2 (addas_y, addac_y, addar_y, addareg);*/
5191 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5192 int16_t addas_x = (addareg ? addar_x : addac_x);
5193 int16_t addas_y = (addareg ? addar_y : addac_y);
5194 //////////////////////////////////////////////////////////////////////////////////////
5196 /* Complement these values (complement flag gives adder carry in)*/
5198 /*Suba_x16 := JOIN (suba_x16, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x,
5199 suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x);
5200 Suba_y16 := JOIN (suba_y16, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y,
5201 suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y);
5202 Adda_x := EO (adda_x, suba_x16, addas_x);
5203 Adda_y := EO (adda_y, suba_y16, addas_y);*/
5204 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5205 adda_x = addas_x ^ (suba_x ? 0xFFFF : 0x0000);
5206 adda_y = addas_y ^ (suba_y ? 0xFFFF : 0x0000);
5207 //////////////////////////////////////////////////////////////////////////////////////
5213 /** ADDBMUX - Address adder input B selection *******************
5215 This module selects the register to be updated by the address
5216 adder. This can be one of three registers, the A1 and A2
5217 pointers, or the A1 fractional part. It can also be zero, so that the step
5218 registers load directly into the pointers.
5233 INT16/ zero16 :LOCAL;
5235 void ADDBMUX(int16_t &addb_x, int16_t &addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
5236 int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y)
5239 /*Zero := TIE0 (zero);
5240 Zero16 := JOIN (zero16, zero, zero, zero, zero, zero, zero, zero,
5241 zero, zero, zero, zero, zero, zero, zero, zero, zero);
5242 Addbselb[0-1] := BUF8 (addbselb[0-1], addbsel[0-1]);
5243 Addb_x := MX4 (addb_x, a1_x, a2_x, a1_frac_x, zero16, addbselb[0..1]);
5244 Addb_y := MX4 (addb_y, a1_y, a2_y, a1_frac_y, zero16, addbselb[0..1]);*/
5245 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5246 int16_t xterm[4], yterm[4];
5247 xterm[0] = a1_x, xterm[1] = a2_x, xterm[2] = a1_frac_x, xterm[3] = 0;
5248 yterm[0] = a1_y, yterm[1] = a2_y, yterm[2] = a1_frac_y, yterm[3] = 0;
5249 addb_x = xterm[addbsel & 0x03];
5250 addb_y = yterm[addbsel & 0x03];
5251 //////////////////////////////////////////////////////////////////////////////////////
5257 /** DATAMUX - Address local data bus selection ******************
5259 Select between the adder output and the input data bus
5272 INT16/ gpu_lo, gpu_hi
5275 void DATAMUX(int16_t &data_x, int16_t &data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel)
5277 /*Gpu_lo := JOIN (gpu_lo, gpu_din{0..15});
5278 Gpu_hi := JOIN (gpu_hi, gpu_din{16..31});
5280 Addqselb := BUF8 (addqselb, addqsel);
5281 Data_x := MX2 (data_x, gpu_lo, addq_x, addqselb);
5282 Data_y := MX2 (data_y, gpu_hi, addq_y, addqselb);*/
5283 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5284 data_x = (addqsel ? addq_x : (int16_t)(gpu_din & 0xFFFF));
5285 data_y = (addqsel ? addq_y : (int16_t)(gpu_din >> 16));
5286 //////////////////////////////////////////////////////////////////////////////////////
5292 /******************************************************************
5296 Blitter Address Adder
5297 ---------------------
5298 The blitter address adder is a pair of sixteen bit adders, one
5299 each for X and Y. The multiplexing of the input terms is
5300 performed elsewhere, but this adder can also perform modulo
5301 arithmetic to align X-addresses onto phrase boundaries.
5303 modx[0..2] take values
5310 ******************************************************************/
5312 /*IMPORT duplo, tosh;
5318 a1fracldi // propagate address adder carry
5323 clk[0] // co-processor clock
5331 Zero := TIE0 (zero);*/
5332 void ADDRADD(int16_t &addq_x, int16_t &addq_y, bool a1fracldi,
5333 uint16_t adda_x, uint16_t adda_y, uint16_t addb_x, uint16_t addb_y, uint8_t modx, bool suba_x, bool suba_y)
5336 /* Perform the addition */
5338 /*Adder_x := ADD16 (addqt_x[0..15], co_x, adda_x{0..15}, addb_x{0..15}, ci_x);
5339 Adder_y := ADD16 (addq_y[0..15], co_y, adda_y{0..15}, addb_y{0..15}, ci_y);*/
5341 /* latch carry and propagate if required */
5343 /*Cxt0 := AN2 (cxt[0], co_x, a1fracldi);
5344 Cxt1 := FD1Q (cxt[1], cxt[0], clk[0]);
5345 Ci_x := EO (ci_x, cxt[1], suba_x);
5347 yt0 := AN2 (cyt[0], co_y, a1fracldi);
5348 Cyt1 := FD1Q (cyt[1], cyt[0], clk[0]);
5349 Ci_y := EO (ci_y, cyt[1], suba_y);*/
5351 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5352 //I'm sure the following will generate a bunch of warnings, but will have to do for now.
5353 static uint16_t co_x = 0, co_y = 0; // Carry out has to propogate between function calls...
5354 uint16_t ci_x = co_x ^ (suba_x ? 1 : 0);
5355 uint16_t ci_y = co_y ^ (suba_y ? 1 : 0);
5356 uint32_t addqt_x = adda_x + addb_x + ci_x;
5357 uint32_t addqt_y = adda_y + addb_y + ci_y;
5358 co_x = ((addqt_x & 0x10000) && a1fracldi ? 1 : 0);
5359 co_y = ((addqt_y & 0x10000) && a1fracldi ? 1 : 0);
5360 //////////////////////////////////////////////////////////////////////////////////////
5362 /* Mask low bits of X to 0 if required */
5364 /*Masksel := D38H (unused[0], masksel[0..4], maskbit[5], unused[1], modx[0..2]);
5366 Maskbit[0-4] := OR2 (maskbit[0-4], masksel[0-4], maskbit[1-5]);
5368 Mask[0-5] := MX2 (addq_x[0-5], addqt_x[0-5], zero, maskbit[0-5]);
5370 Addq_x := JOIN (addq_x, addq_x[0..5], addqt_x[6..15]);
5371 Addq_y := JOIN (addq_y, addq_y[0..15]);*/
5373 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5374 int16_t mask[8] = { 0xFFFF, 0xFFFE, 0xFFFC, 0xFFF8, 0xFFF0, 0xFFE0, 0xFFC0, 0x0000 };
5375 addq_x = addqt_x & mask[modx];
5376 addq_y = addqt_y & 0xFFFF;
5377 //////////////////////////////////////////////////////////////////////////////////////
5379 //Unused[0-1] := DUMMY (unused[0-1]);
5387 wdata[0..63] // co-processor write data bus
5389 dcomp[0..7] // data byte equal flags
5390 srcd[0..7] // bits to use for bit to byte expansion
5391 zcomp[0..3] // output from Z comparators
5393 a1_x[0..1] // low two bits of A1 X pointer
5394 big_pix // pixel organisation is big-endian
5395 blitter_active // blitter is active
5396 clk // co-processor clock
5397 cmpdst // compare dest rather than source
5398 colorld // load the pattern color fields
5399 daddasel[0..2] // data adder input A selection
5400 daddbsel[0..3] // data adder input B selection
5401 daddmode[0..2] // data adder mode
5402 daddq_sel // select adder output vs. GPU data
5403 data[0..63] // co-processor read data bus
5404 data_ena // enable write data
5405 data_sel[0..1] // select data to write
5406 dbinh\[0..7] // byte oriented changed data inhibits
5407 dend[0..5] // end of changed write data zone
5408 dpipe[0..1] // load computed data pipe-line latch
5409 dstart[0..5] // start of changed write data zone
5410 dstdld[0..1] // dest data load (two halves)
5411 dstzld[0..1] // dest zed load (two halves)
5412 ext_int // enable extended precision intensity calculations
5413 INT32/ gpu_din // GPU data bus
5414 iincld // I increment load
5415 iincldx // alternate I increment load
5416 init_if // initialise I fraction phase
5417 init_ii // initialise I integer phase
5418 init_zf // initialise Z fraction phase
5419 intld[0..3] // computed intensities load
5420 istepadd // intensity step integer add
5421 istepfadd // intensity step fraction add
5422 istepld // I step load
5423 istepdld // I step delta load
5424 lfu_func[0..3] // LFU function code
5425 patdadd // pattern data gouraud add
5426 patdld[0..1] // pattern data load (two halves)
5427 pdsel[0..1] // select pattern data type
5428 phrase_mode // phrase write mode
5429 reload // transfer contents of double buffers
5430 reset\ // system reset
5431 srcd1ld[0..1] // source register 1 load (two halves)
5432 srcdread // source data read load enable
5433 srczread // source zed read load enable
5434 srcshift[0..5] // source alignment shift
5435 srcz1ld[0..1] // source zed 1 load (two halves)
5436 srcz2add // zed fraction gouraud add
5437 srcz2ld[0..1] // source zed 2 load (two halves)
5438 textrgb // texture mapping in RGB mode
5439 txtd[0..63] // data from the texture unit
5440 zedld[0..3] // computed zeds load
5441 zincld // Z increment load
5442 zmode[0..2] // Z comparator mode
5443 zpipe[0..1] // load computed zed pipe-line latch
5444 zstepadd // zed step integer add
5445 zstepfadd // zed step fraction add
5446 zstepld // Z step load
5447 zstepdld // Z step delta load
5451 void DATA(uint64_t &wdata, uint8_t &dcomp, uint8_t &zcomp, bool &nowrite,
5452 bool big_pix, bool cmpdst, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode, bool daddq_sel, uint8_t data_sel,
5453 uint8_t dbinh, uint8_t dend, uint8_t dstart, uint64_t dstd, uint32_t iinc, uint8_t lfu_func, uint64_t &patd, bool patdadd,
5454 bool phrase_mode, uint64_t srcd, bool srcdread, bool srczread, bool srcz2add, uint8_t zmode,
5455 bool bcompen, bool bkgwren, bool dcompen, uint8_t icount, uint8_t pixsize,
5456 uint64_t &srcz, uint64_t dstz, uint32_t zinc)
5459 Stuff we absolutely *need* to have passed in/out:
5461 patdadd, dstd, srcd, patd, daddasel, daddbsel, daddmode, iinc, srcz1, srcz2, big_pix, phrase_mode, cmpdst
5463 changed patd (wdata I guess...) (Nope. We pass it back directly now...)
5466 // Source data registers
5468 /*Data_src := DATA_SRC (srcdlo, srcdhi, srcz[0..1], srczo[0..1], srczp[0..1], srcz1[0..1], srcz2[0..1], big_pix,
5469 clk, gpu_din, intld[0..3], local_data0, local_data1, srcd1ld[0..1], srcdread, srczread, srcshift[0..5],
5470 srcz1ld[0..1], srcz2add, srcz2ld[0..1], zedld[0..3], zpipe[0..1]);
5471 Srcd[0-7] := JOIN (srcd[0-7], srcdlo{0-7});
5472 Srcd[8-31] := JOIN (srcd[8-31], srcdlo{8-31});
5473 Srcd[32-63] := JOIN (srcd[32-63], srcdhi{0-31});*/
5475 // Destination data registers
5477 /*Data_dst := DATA_DST (dstd[0..63], dstz[0..1], clk, dstdld[0..1], dstzld[0..1], load_data[0..1]);
5478 Dstdlo := JOIN (dstdlo, dstd[0..31]);
5479 Dstdhi := JOIN (dstdhi, dstd[32..63]);*/
5481 // Pattern and Color data registers
5483 // Looks like this is simply another register file for the pattern data registers. No adding or anything funky
5484 // going on. Note that patd & patdv will output the same info.
5485 // Patdldl/h (patdld[0..1]) can select the local_data bus to overwrite the current pattern data...
5486 // Actually, it can be either patdld OR patdadd...!
5487 /*Data_pat := DATA_PAT (colord[0..15], int0dp[8..10], int1dp[8..10], int2dp[8..10], int3dp[8..10], mixsel[0..2],
5488 patd[0..63], patdv[0..1], clk, colorld, dpipe[0], ext_int, gpu_din, intld[0..3], local_data0, local_data1,
5489 patdadd, patdld[0..1], reload, reset\);
5490 Patdlo := JOIN (patdlo, patd[0..31]);
5491 Patdhi := JOIN (patdhi, patd[32..63]);*/
5493 // Multiplying data Mixer (NOT IN JAGUAR I)
5495 /*Datamix := DATAMIX (patdo[0..1], clk, colord[0..15], dpipe[1], dstd[0..63], int0dp[8..10], int1dp[8..10],
5496 int2dp[8..10], int3dp[8..10], mixsel[0..2], patd[0..63], pdsel[0..1], srcd[0..63], textrgb, txtd[0..63]);*/
5498 // Logic function unit
5500 /*Lfu := LFU (lfu[0..1], srcdlo, srcdhi, dstdlo, dstdhi, lfu_func[0..3]);*/
5501 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5502 uint64_t funcmask[2] = { 0, 0xFFFFFFFFFFFFFFFFLL };
5503 uint64_t func0 = funcmask[lfu_func & 0x01];
5504 uint64_t func1 = funcmask[(lfu_func >> 1) & 0x01];
5505 uint64_t func2 = funcmask[(lfu_func >> 2) & 0x01];
5506 uint64_t func3 = funcmask[(lfu_func >> 3) & 0x01];
5507 uint64_t lfu = (~srcd & ~dstd & func0) | (~srcd & dstd & func1) | (srcd & ~dstd & func2) | (srcd & dstd & func3);
5508 //////////////////////////////////////////////////////////////////////////////////////
5510 // Increment and Step Registers
5512 // Does it do anything without the step add lines? Check it!
5513 // No. This is pretty much just a register file without the Jaguar II lines...
5514 /*Inc_step := INC_STEP (iinc, istep[0..31], zinc, zstep[0..31], clk, ext_int, gpu_din, iincld, iincldx, istepadd,
5515 istepfadd, istepld, istepdld, reload, reset\, zincld, zstepadd, zstepfadd, zstepld, zstepdld);
5516 Istep := JOIN (istep, istep[0..31]);
5517 Zstep := JOIN (zstep, zstep[0..31]);*/
5519 // Pixel data comparator
5521 /*Datacomp := DATACOMP (dcomp[0..7], cmpdst, dstdlo, dstdhi, patdlo, patdhi, srcdlo, srcdhi);*/
5522 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5524 uint64_t cmpd = patd ^ (cmpdst ? dstd : srcd);
5526 if ((cmpd & 0x00000000000000FFLL) == 0)
5528 if ((cmpd & 0x000000000000FF00LL) == 0)
5530 if ((cmpd & 0x0000000000FF0000LL) == 0)
5532 if ((cmpd & 0x00000000FF000000LL) == 0)
5534 if ((cmpd & 0x000000FF00000000LL) == 0)
5536 if ((cmpd & 0x0000FF0000000000LL) == 0)
5538 if ((cmpd & 0x00FF000000000000LL) == 0)
5540 if ((cmpd & 0xFF00000000000000LL) == 0)
5542 //////////////////////////////////////////////////////////////////////////////////////
5544 // Zed comparator for Z-buffer operations
5546 /*Zedcomp := ZEDCOMP (zcomp[0..3], srczp[0..1], dstz[0..1], zmode[0..2]);*/
5547 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5548 //srczp is srcz pipelined, also it goes through a source shift as well...
5549 /*The shift is basically like so (each piece is 16 bits long):
5552 srcz1lolo srcz1lohi srcz1hilo srcz1hihi srcrz2lolo srcz2lohi srcz2hilo
5554 with srcshift bits 4 & 5 selecting the start position
5556 //So... basically what we have here is:
5559 if ((((srcz & 0x000000000000FFFFLL) < (dstz & 0x000000000000FFFFLL)) && (zmode & 0x01))
5560 || (((srcz & 0x000000000000FFFFLL) == (dstz & 0x000000000000FFFFLL)) && (zmode & 0x02))
5561 || (((srcz & 0x000000000000FFFFLL) > (dstz & 0x000000000000FFFFLL)) && (zmode & 0x04)))
5564 if ((((srcz & 0x00000000FFFF0000LL) < (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x01))
5565 || (((srcz & 0x00000000FFFF0000LL) == (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x02))
5566 || (((srcz & 0x00000000FFFF0000LL) > (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x04)))
5569 if ((((srcz & 0x0000FFFF00000000LL) < (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x01))
5570 || (((srcz & 0x0000FFFF00000000LL) == (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x02))
5571 || (((srcz & 0x0000FFFF00000000LL) > (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x04)))
5574 if ((((srcz & 0xFFFF000000000000LL) < (dstz & 0xFFFF000000000000LL)) && (zmode & 0x01))
5575 || (((srcz & 0xFFFF000000000000LL) == (dstz & 0xFFFF000000000000LL)) && (zmode & 0x02))
5576 || (((srcz & 0xFFFF000000000000LL) > (dstz & 0xFFFF000000000000LL)) && (zmode & 0x04)))
5579 //TEMP, TO TEST IF ZCOMP IS THE CULPRIT...
5580 //Nope, this is NOT the problem...
5582 // We'll do the comparison/bit/byte inhibits here, since that's they way it happens
5583 // in the real thing (dcomp goes out to COMP_CTRL and back into DATA through dbinh)...
5587 COMP_CTRL(dbinht, nowrite,
5588 bcompen, true/*big_pix*/, bkgwren, dcomp, dcompen, icount, pixsize, phrase_mode, srcd & 0xFF, zcomp);
5594 #ifdef VERBOSE_BLITTER_LOGGING
5596 WriteLog("\n[dcomp=%02X zcomp=%02X dbinh=%02X]\n", dcomp, zcomp, dbinh);
5599 //////////////////////////////////////////////////////////////////////////////////////
5602 // The data initializer - allows all four initial values to be computed from one (NOT IN JAGUAR I)
5604 /*Datinit := DATINIT (initcin[0..3], initinc[0..63], initpix[0..15], a1_x[0..1], big_pix, clk, iinc, init_if, init_ii,
5605 init_zf, istep[0..31], zinc, zstep[0..31]);*/
5607 // Adder array for Z and intensity increments
5609 /*Addarray := ADDARRAY (addq[0..3], clk, daddasel[0..2], daddbsel[0..3], daddmode[0..2], dstdlo, dstdhi, iinc,
5610 initcin[0..3], initinc[0..63], initpix[0..15], istep, patdv[0..1], srcdlo, srcdhi, srcz1[0..1],
5611 srcz2[0..1], reset\, zinc, zstep);*/
5612 /*void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
5613 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
5614 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
5615 uint32_t zinc, uint32_t zstep)*/
5616 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5618 uint8_t initcin[4] = { 0, 0, 0, 0 };
5619 ADDARRAY(addq, daddasel, daddbsel, daddmode, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
5621 //This is normally done asynchronously above (thru local_data) when in patdadd mode...
5622 //And now it's passed back to the caller to be persistent between calls...!
5623 //But it's causing some serious fuck-ups in T2K now... !!! FIX !!! [DONE--???]
5624 //Weird! It doesn't anymore...!
5626 patd = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
5627 //////////////////////////////////////////////////////////////////////////////////////
5629 // Local data bus multiplexer
5631 /*Local_mux := LOCAL_MUX (local_data[0..1], load_data[0..1],
5632 addq[0..3], gpu_din, data[0..63], blitter_active, daddq_sel);
5633 Local_data0 := JOIN (local_data0, local_data[0]);
5634 Local_data1 := JOIN (local_data1, local_data[1]);*/
5635 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5636 //////////////////////////////////////////////////////////////////////////////////////
5638 // Data output multiplexer and tri-state drive
5640 /*Data_mux := DATA_MUX (wdata[0..63], addq[0..3], big_pix, dstdlo, dstdhi, dstz[0..1], data_sel[0..1], data_ena,
5641 dstart[0..5], dend[0..5], dbinh\[0..7], lfu[0..1], patdo[0..1], phrase_mode, srczo[0..1]);*/
5642 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5643 // NOTE: patdo comes from DATAMIX and can be considered the same as patd for Jaguar I
5645 //////////////////////////////////////////////////////////////////////////////////////
5649 wdata[0..63] // co-processor rwrite data bus
5652 big_pix // Pixel organisation is big-endian
5657 data_sel[0..1] // source of write data
5658 data_ena // enable write data onto read/write bus
5659 dstart[0..5] // start of changed write data
5660 dend[0..5] // end of changed write data
5661 dbinh\[0..7] // byte oriented changed data inhibits
5664 phrase_mode // phrase write mode
5669 /*INT32/ addql[0..1], ddatlo, ddathi zero32
5673 Phrase_mode\ := INV1 (phrase_mode\, phrase_mode);
5674 Zero := TIE0 (zero);
5675 Zero32 := JOIN (zero32, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero);*/
5677 /* Generate a changed data mask */
5679 /*Edis := OR6 (edis\, dend[0..5]);
5680 Ecoarse := DECL38E (e_coarse\[0..7], dend[3..5], edis\);
5681 E_coarse[0] := INV1 (e_coarse[0], e_coarse\[0]);
5682 Efine := DECL38E (unused[0], e_fine\[1..7], dend[0..2], e_coarse[0]);*/
5683 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5684 uint8_t decl38e[2][8] = { { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
5685 { 0xFE, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x7F } };
5686 uint8_t dech38[8] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
5687 uint8_t dech38el[2][8] = { { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 },
5688 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } };
5690 int en = (dend & 0x3F ? 1 : 0);
5691 uint8_t e_coarse = decl38e[en][(dend & 0x38) >> 3]; // Actually, this is e_coarse inverted...
5692 uint8_t e_fine = decl38e[(e_coarse & 0x01) ^ 0x01][dend & 0x07];
5694 //////////////////////////////////////////////////////////////////////////////////////
5696 /*Scoarse := DECH38 (s_coarse[0..7], dstart[3..5]);
5697 Sfen\ := INV1 (sfen\, s_coarse[0]);
5698 Sfine := DECH38EL (s_fine[0..7], dstart[0..2], sfen\);*/
5699 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5700 uint8_t s_coarse = dech38[(dstart & 0x38) >> 3];
5701 uint8_t s_fine = dech38el[(s_coarse & 0x01) ^ 0x01][dstart & 0x07];
5702 //////////////////////////////////////////////////////////////////////////////////////
5704 /*Maskt[0] := BUF1 (maskt[0], s_fine[0]);
5705 Maskt[1-7] := OAN1P (maskt[1-7], maskt[0-6], s_fine[1-7], e_fine\[1-7]);*/
5706 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5707 uint16_t maskt = s_fine & 0x0001;
5708 maskt |= (((maskt & 0x0001) || (s_fine & 0x02)) && (e_fine & 0x02) ? 0x0002 : 0x0000);
5709 maskt |= (((maskt & 0x0002) || (s_fine & 0x04)) && (e_fine & 0x04) ? 0x0004 : 0x0000);
5710 maskt |= (((maskt & 0x0004) || (s_fine & 0x08)) && (e_fine & 0x08) ? 0x0008 : 0x0000);
5711 maskt |= (((maskt & 0x0008) || (s_fine & 0x10)) && (e_fine & 0x10) ? 0x0010 : 0x0000);
5712 maskt |= (((maskt & 0x0010) || (s_fine & 0x20)) && (e_fine & 0x20) ? 0x0020 : 0x0000);
5713 maskt |= (((maskt & 0x0020) || (s_fine & 0x40)) && (e_fine & 0x40) ? 0x0040 : 0x0000);
5714 maskt |= (((maskt & 0x0040) || (s_fine & 0x80)) && (e_fine & 0x80) ? 0x0080 : 0x0000);
5715 //////////////////////////////////////////////////////////////////////////////////////
5717 /* Produce a look-ahead on the ripple carry:
5718 masktla = s_coarse[0] . /e_coarse[0] */
5719 /*Masktla := AN2 (masktla, s_coarse[0], e_coarse\[0]);
5720 Maskt[8] := OAN1P (maskt[8], masktla, s_coarse[1], e_coarse\[1]);
5721 Maskt[9-14] := OAN1P (maskt[9-14], maskt[8-13], s_coarse[2-7], e_coarse\[2-7]);*/
5722 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5723 maskt |= (((s_coarse & e_coarse & 0x01) || (s_coarse & 0x02)) && (e_coarse & 0x02) ? 0x0100 : 0x0000);
5724 maskt |= (((maskt & 0x0100) || (s_coarse & 0x04)) && (e_coarse & 0x04) ? 0x0200 : 0x0000);
5725 maskt |= (((maskt & 0x0200) || (s_coarse & 0x08)) && (e_coarse & 0x08) ? 0x0400 : 0x0000);
5726 maskt |= (((maskt & 0x0400) || (s_coarse & 0x10)) && (e_coarse & 0x10) ? 0x0800 : 0x0000);
5727 maskt |= (((maskt & 0x0800) || (s_coarse & 0x20)) && (e_coarse & 0x20) ? 0x1000 : 0x0000);
5728 maskt |= (((maskt & 0x1000) || (s_coarse & 0x40)) && (e_coarse & 0x40) ? 0x2000 : 0x0000);
5729 maskt |= (((maskt & 0x2000) || (s_coarse & 0x80)) && (e_coarse & 0x80) ? 0x4000 : 0x0000);
5730 //////////////////////////////////////////////////////////////////////////////////////
5732 /* The bit terms are mirrored for big-endian pixels outside phrase
5733 mode. The byte terms are mirrored for big-endian pixels in phrase
5736 /*Mirror_bit := AN2M (mir_bit, phrase_mode\, big_pix);
5737 Mirror_byte := AN2H (mir_byte, phrase_mode, big_pix);
5739 Masktb[14] := BUF1 (masktb[14], maskt[14]);
5740 Masku[0] := MX4 (masku[0], maskt[0], maskt[7], maskt[14], zero, mir_bit, mir_byte);
5741 Masku[1] := MX4 (masku[1], maskt[1], maskt[6], maskt[14], zero, mir_bit, mir_byte);
5742 Masku[2] := MX4 (masku[2], maskt[2], maskt[5], maskt[14], zero, mir_bit, mir_byte);
5743 Masku[3] := MX4 (masku[3], maskt[3], maskt[4], masktb[14], zero, mir_bit, mir_byte);
5744 Masku[4] := MX4 (masku[4], maskt[4], maskt[3], masktb[14], zero, mir_bit, mir_byte);
5745 Masku[5] := MX4 (masku[5], maskt[5], maskt[2], masktb[14], zero, mir_bit, mir_byte);
5746 Masku[6] := MX4 (masku[6], maskt[6], maskt[1], masktb[14], zero, mir_bit, mir_byte);
5747 Masku[7] := MX4 (masku[7], maskt[7], maskt[0], masktb[14], zero, mir_bit, mir_byte);
5748 Masku[8] := MX2 (masku[8], maskt[8], maskt[13], mir_byte);
5749 Masku[9] := MX2 (masku[9], maskt[9], maskt[12], mir_byte);
5750 Masku[10] := MX2 (masku[10], maskt[10], maskt[11], mir_byte);
5751 Masku[11] := MX2 (masku[11], maskt[11], maskt[10], mir_byte);
5752 Masku[12] := MX2 (masku[12], maskt[12], maskt[9], mir_byte);
5753 Masku[13] := MX2 (masku[13], maskt[13], maskt[8], mir_byte);
5754 Masku[14] := MX2 (masku[14], maskt[14], maskt[0], mir_byte);*/
5755 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5756 bool mir_bit = true/*big_pix*/ && !phrase_mode;
5757 bool mir_byte = true/*big_pix*/ && phrase_mode;
5758 uint16_t masku = maskt;
5763 masku |= (maskt >> 7) & 0x0001;
5764 masku |= (maskt >> 5) & 0x0002;
5765 masku |= (maskt >> 3) & 0x0004;
5766 masku |= (maskt >> 1) & 0x0008;
5767 masku |= (maskt << 1) & 0x0010;
5768 masku |= (maskt << 3) & 0x0020;
5769 masku |= (maskt << 5) & 0x0040;
5770 masku |= (maskt << 7) & 0x0080;
5776 masku |= (maskt >> 14) & 0x0001;
5777 masku |= (maskt >> 13) & 0x0002;
5778 masku |= (maskt >> 12) & 0x0004;
5779 masku |= (maskt >> 11) & 0x0008;
5780 masku |= (maskt >> 10) & 0x0010;
5781 masku |= (maskt >> 9) & 0x0020;
5782 masku |= (maskt >> 8) & 0x0040;
5783 masku |= (maskt >> 7) & 0x0080;
5785 masku |= (maskt >> 5) & 0x0100;
5786 masku |= (maskt >> 3) & 0x0200;
5787 masku |= (maskt >> 1) & 0x0400;
5788 masku |= (maskt << 1) & 0x0800;
5789 masku |= (maskt << 3) & 0x1000;
5790 masku |= (maskt << 5) & 0x2000;
5791 masku |= (maskt << 7) & 0x4000;
5793 //////////////////////////////////////////////////////////////////////////////////////
5795 /* The maskt terms define the area for changed data, but the byte
5796 inhibit terms can override these */
5798 /*Mask[0-7] := AN2 (mask[0-7], masku[0-7], dbinh\[0]);
5799 Mask[8-14] := AN2H (mask[8-14], masku[8-14], dbinh\[1-7]);*/
5800 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5801 uint16_t mask = masku & (!(dbinh & 0x01) ? 0xFFFF : 0xFF00);
5802 mask &= ~(((uint16_t)dbinh & 0x00FE) << 7);
5803 //////////////////////////////////////////////////////////////////////////////////////
5805 /*Addql[0] := JOIN (addql[0], addq[0..1]);
5806 Addql[1] := JOIN (addql[1], addq[2..3]);
5808 Dsel0b[0-1] := BUF8 (dsel0b[0-1], data_sel[0]);
5809 Dsel1b[0-1] := BUF8 (dsel1b[0-1], data_sel[1]);
5810 Ddatlo := MX4 (ddatlo, patd[0], lfu[0], addql[0], zero32, dsel0b[0], dsel1b[0]);
5811 Ddathi := MX4 (ddathi, patd[1], lfu[1], addql[1], zero32, dsel0b[1], dsel1b[1]);*/
5812 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5816 dmux[2] = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
5818 uint64_t ddat = dmux[data_sel];
5819 //////////////////////////////////////////////////////////////////////////////////////
5821 /*Zed_sel := AN2 (zed_sel, data_sel[0..1]);
5822 Zed_selb[0-1] := BUF8 (zed_selb[0-1], zed_sel);
5824 Dat[0-7] := MX4 (dat[0-7], dstdlo{0-7}, ddatlo{0-7}, dstzlo{0-7}, srczlo{0-7}, mask[0-7], zed_selb[0]);
5825 Dat[8-15] := MX4 (dat[8-15], dstdlo{8-15}, ddatlo{8-15}, dstzlo{8-15}, srczlo{8-15}, mask[8], zed_selb[0]);
5826 Dat[16-23] := MX4 (dat[16-23], dstdlo{16-23}, ddatlo{16-23}, dstzlo{16-23}, srczlo{16-23}, mask[9], zed_selb[0]);
5827 Dat[24-31] := MX4 (dat[24-31], dstdlo{24-31}, ddatlo{24-31}, dstzlo{24-31}, srczlo{24-31}, mask[10], zed_selb[0]);
5828 Dat[32-39] := MX4 (dat[32-39], dstdhi{0-7}, ddathi{0-7}, dstzhi{0-7}, srczhi{0-7}, mask[11], zed_selb[1]);
5829 Dat[40-47] := MX4 (dat[40-47], dstdhi{8-15}, ddathi{8-15}, dstzhi{8-15}, srczhi{8-15}, mask[12], zed_selb[1]);
5830 Dat[48-55] := MX4 (dat[48-55], dstdhi{16-23}, ddathi{16-23}, dstzhi{16-23}, srczhi{16-23}, mask[13], zed_selb[1]);
5831 Dat[56-63] := MX4 (dat[56-63], dstdhi{24-31}, ddathi{24-31}, dstzhi{24-31}, srczhi{24-31}, mask[14], zed_selb[1]);*/
5832 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5833 wdata = ((ddat & mask) | (dstd & ~mask)) & 0x00000000000000FFLL;
5834 wdata |= (mask & 0x0100 ? ddat : dstd) & 0x000000000000FF00LL;
5835 wdata |= (mask & 0x0200 ? ddat : dstd) & 0x0000000000FF0000LL;
5836 wdata |= (mask & 0x0400 ? ddat : dstd) & 0x00000000FF000000LL;
5837 wdata |= (mask & 0x0800 ? ddat : dstd) & 0x000000FF00000000LL;
5838 wdata |= (mask & 0x1000 ? ddat : dstd) & 0x0000FF0000000000LL;
5839 wdata |= (mask & 0x2000 ? ddat : dstd) & 0x00FF000000000000LL;
5840 wdata |= (mask & 0x4000 ? ddat : dstd) & 0xFF00000000000000LL;
5843 printf("\n[ddat=%08X%08X dstd=%08X%08X wdata=%08X%08X mask=%04X]\n",
5844 (uint32_t)(ddat >> 32), (uint32_t)(ddat & 0xFFFFFFFF),
5845 (uint32_t)(dstd >> 32), (uint32_t)(dstd & 0xFFFFFFFF),
5846 (uint32_t)(wdata >> 32), (uint32_t)(wdata & 0xFFFFFFFF), mask);
5849 //This is a crappy way of handling this, but it should work for now...
5851 zwdata = ((srcz & mask) | (dstz & ~mask)) & 0x00000000000000FFLL;
5852 zwdata |= (mask & 0x0100 ? srcz : dstz) & 0x000000000000FF00LL;
5853 zwdata |= (mask & 0x0200 ? srcz : dstz) & 0x0000000000FF0000LL;
5854 zwdata |= (mask & 0x0400 ? srcz : dstz) & 0x00000000FF000000LL;
5855 zwdata |= (mask & 0x0800 ? srcz : dstz) & 0x000000FF00000000LL;
5856 zwdata |= (mask & 0x1000 ? srcz : dstz) & 0x0000FF0000000000LL;
5857 zwdata |= (mask & 0x2000 ? srcz : dstz) & 0x00FF000000000000LL;
5858 zwdata |= (mask & 0x4000 ? srcz : dstz) & 0xFF00000000000000LL;
5861 WriteLog("\n[srcz=%08X%08X dstz=%08X%08X zwdata=%08X%08X mask=%04X]\n",
5862 (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF),
5863 (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF),
5864 (uint32_t)(zwdata >> 32), (uint32_t)(zwdata & 0xFFFFFFFF), mask);
5868 //////////////////////////////////////////////////////////////////////////////////////
5870 /*Data_enab[0-1] := BUF8 (data_enab[0-1], data_ena);
5871 Datadrv[0-31] := TS (wdata[0-31], dat[0-31], data_enab[0]);
5872 Datadrv[32-63] := TS (wdata[32-63], dat[32-63], data_enab[1]);
5874 Unused[0] := DUMMY (unused[0]);
5880 /** COMP_CTRL - Comparator output control logic *****************
5882 This block is responsible for taking the comparator outputs and
5883 using them as appropriate to inhibit writes. Two methods are
5884 supported for inhibiting write data:
5886 - suppression of the inner loop controlled write operation
5887 - a set of eight byte inhibit lines to write back dest data
5889 The first technique is used in pixel oriented modes, the second in
5890 phrase mode, but the phrase mode form is only applicable to eight
5891 and sixteen bit pixel modes.
5893 Writes can be suppressed by data being equal, by the Z comparator
5894 conditions being met, or by the bit to pixel expansion scheme.
5896 Pipe-lining issues: the data derived comparator outputs are stable
5897 until the next data read, well after the affected write from this
5898 operation. However, the inner counter bits can count immediately
5899 before the ack for the last write. Therefore, it is necessary to
5900 delay bcompbit select terms by one inner loop pipe-line stage,
5901 when generating the select for the data control - the output is
5902 delayed one further tick to give it write data timing (2/34).
5904 There is also a problem with computed data - the new values are
5905 calculated before the write associated with the old value has been
5906 performed. The is taken care of within the zed comparator by
5907 pipe-lining the comparator inputs where appropriate.
5910 //#define LOG_COMP_CTRL
5912 dbinh\[0..7] // destination byte inhibit lines
5913 nowrite // suppress inner loop write operation
5915 bcompen // bit selector inhibit enable
5916 big_pix // pixels are big-endian
5917 bkgwren // enable dest data write in pix inhibit
5918 clk // co-processor clock
5919 dcomp[0..7] // output of data byte comparators
5920 dcompen // data comparator inhibit enable
5921 icount[0..2] // low bits of inner count
5922 pixsize[0..2] // destination pixel size
5923 phrase_mode // phrase write mode
5924 srcd[0..7] // bits to use for bit to byte expansion
5925 step_inner // inner loop advance
5926 zcomp[0..3] // output of word zed comparators
5928 void COMP_CTRL(uint8_t &dbinh, bool &nowrite,
5929 bool bcompen, bool big_pix, bool bkgwren, uint8_t dcomp, bool dcompen, uint8_t icount,
5930 uint8_t pixsize, bool phrase_mode, uint8_t srcd, uint8_t zcomp)
5934 /*Bkgwren\ := INV1 (bkgwren\, bkgwren);
5935 Phrase_mode\ := INV1 (phrase_mode\, phrase_mode);
5936 Pixsize\[0-2] := INV2 (pixsize\[0-2], pixsize[0-2]);*/
5938 /* The bit comparator bits are derived from the source data, which
5939 will have been suitably aligned for phrase mode. The contents of
5940 the inner counter are used to select which bit to use.
5942 When not in phrase mode the inner count value is used to select
5943 one bit. It is assumed that the count has already occurred, so,
5944 7 selects bit 0, etc. In big-endian pixel mode, this turns round,
5945 so that a count of 7 selects bit 7.
5947 In phrase mode, the eight bits are used directly, and this mode is
5948 only applicable to 8-bit pixel mode (2/34) */
5950 /*Bcompselt[0-2] := EO (bcompselt[0-2], icount[0-2], big_pix);
5951 Bcompbit := MX8 (bcompbit, srcd[7], srcd[6], srcd[5],
5952 srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompselt[0..2]);
5953 Bcompbit\ := INV1 (bcompbit\, bcompbit);*/
5954 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5955 #ifdef LOG_COMP_CTRL
5958 WriteLog("\n [bcompen=%s dcompen=%s phrase_mode=%s bkgwren=%s dcomp=%02X zcomp=%02X]", (bcompen ? "T" : "F"), (dcompen ? "T" : "F"), (phrase_mode ? "T" : "F"), (bkgwren ? "T" : "F"), dcomp, zcomp);
5963 uint8_t bcompselt = (big_pix ? ~icount : icount) & 0x07;
5964 uint8_t bitmask[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
5965 bool bcompbit = srcd & bitmask[bcompselt];
5966 //////////////////////////////////////////////////////////////////////////////////////
5968 /* pipe-line the count */
5969 /*Bcompsel[0-2] := FDSYNC (bcompsel[0-2], bcompselt[0-2], step_inner, clk);
5970 Bcompbt := MX8 (bcompbitpt, srcd[7], srcd[6], srcd[5],
5971 srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompsel[0..2]);
5972 Bcompbitp := FD1Q (bcompbitp, bcompbitpt, clk);
5973 Bcompbitp\ := INV1 (bcompbitp\, bcompbitp);*/
5975 /* For pixel mode, generate the write inhibit signal for all modes
5976 on bit inhibit, for 8 and 16 bit modes on comparator inhibit, and
5977 for 16 bit mode on Z inhibit
5979 Nowrite = bcompen . /bcompbit . /phrase_mode
5980 + dcompen . dcomp[0] . /phrase_mode . pixsize = 011
5981 + dcompen . dcomp[0..1] . /phrase_mode . pixsize = 100
5982 + zcomp[0] . /phrase_mode . pixsize = 100
5985 /*Nowt0 := NAN3 (nowt[0], bcompen, bcompbit\, phrase_mode\);
5986 Nowt1 := ND6 (nowt[1], dcompen, dcomp[0], phrase_mode\, pixsize\[2], pixsize[0..1]);
5987 Nowt2 := ND7 (nowt[2], dcompen, dcomp[0..1], phrase_mode\, pixsize[2], pixsize\[0..1]);
5988 Nowt3 := NAN5 (nowt[3], zcomp[0], phrase_mode\, pixsize[2], pixsize\[0..1]);
5989 Nowt4 := NAN4 (nowt[4], nowt[0..3]);
5990 Nowrite := AN2 (nowrite, nowt[4], bkgwren\);*/
5991 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5992 nowrite = ((bcompen && !bcompbit && !phrase_mode)
5993 || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
5994 || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
5995 || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4)))
5997 //////////////////////////////////////////////////////////////////////////////////////
5999 /*Winht := NAN3 (winht, bcompen, bcompbitp\, phrase_mode\);
6000 Winhibit := NAN4 (winhibit, winht, nowt[1..3]);*/
6001 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6002 //This is the same as above, but with bcompbit delayed one tick and called 'winhibit'
6003 //Small difference: Besides the pipeline effect, it's also not using !bkgwren...
6004 // bool winhibit = (bcompen && !
6005 bool winhibit = (bcompen && !bcompbit && !phrase_mode)
6006 || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
6007 || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
6008 || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4));
6009 #ifdef LOG_COMP_CTRL
6012 WriteLog("[nw=%s wi=%s]", (nowrite ? "T" : "F"), (winhibit ? "T" : "F"));
6016 //////////////////////////////////////////////////////////////////////////////////////
6018 /* For phrase mode, generate the byte inhibit signals for eight bit
6019 mode 011, or sixteen bit mode 100
6020 dbinh\[0] = pixsize[2] . zcomp[0]
6021 + pixsize[2] . dcomp[0] . dcomp[1] . dcompen
6022 + /pixsize[2] . dcomp[0] . dcompen
6023 + /srcd[0] . bcompen
6025 Inhibits 0-3 are also used when not in phrase mode to write back
6029 /*Srcd\[0-7] := INV1 (srcd\[0-7], srcd[0-7]);
6031 Di0t0 := NAN2H (di0t[0], pixsize[2], zcomp[0]);
6032 Di0t1 := NAN4H (di0t[1], pixsize[2], dcomp[0..1], dcompen);
6033 Di0t2 := NAN2 (di0t[2], srcd\[0], bcompen);
6034 Di0t3 := NAN3 (di0t[3], pixsize\[2], dcomp[0], dcompen);
6035 Di0t4 := NAN4 (di0t[4], di0t[0..3]);
6036 Dbinh[0] := ANR1P (dbinh\[0], di0t[4], phrase_mode, winhibit);*/
6037 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6039 bool di0t0_1 = ((pixsize & 0x04) && (zcomp & 0x01))
6040 || ((pixsize & 0x04) && (dcomp & 0x01) && (dcomp & 0x02) && dcompen);
6041 bool di0t4 = di0t0_1
6042 || (!(srcd & 0x01) && bcompen)
6043 || (!(pixsize & 0x04) && (dcomp & 0x01) && dcompen);
6044 dbinh |= (!((di0t4 && phrase_mode) || winhibit) ? 0x01 : 0x00);
6045 #ifdef LOG_COMP_CTRL
6048 WriteLog("[di0t0_1=%s di0t4=%s]", (di0t0_1 ? "T" : "F"), (di0t4 ? "T" : "F"));
6052 //////////////////////////////////////////////////////////////////////////////////////
6054 /*Di1t0 := NAN3 (di1t[0], pixsize\[2], dcomp[1], dcompen);
6055 Di1t1 := NAN2 (di1t[1], srcd\[1], bcompen);
6056 Di1t2 := NAN4 (di1t[2], di0t[0..1], di1t[0..1]);
6057 Dbinh[1] := ANR1 (dbinh\[1], di1t[2], phrase_mode, winhibit);*/
6058 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6059 bool di1t2 = di0t0_1
6060 || (!(srcd & 0x02) && bcompen)
6061 || (!(pixsize & 0x04) && (dcomp & 0x02) && dcompen);
6062 dbinh |= (!((di1t2 && phrase_mode) || winhibit) ? 0x02 : 0x00);
6063 #ifdef LOG_COMP_CTRL
6066 WriteLog("[di1t2=%s]", (di1t2 ? "T" : "F"));
6070 //////////////////////////////////////////////////////////////////////////////////////
6072 /*Di2t0 := NAN2H (di2t[0], pixsize[2], zcomp[1]);
6073 Di2t1 := NAN4H (di2t[1], pixsize[2], dcomp[2..3], dcompen);
6074 Di2t2 := NAN2 (di2t[2], srcd\[2], bcompen);
6075 Di2t3 := NAN3 (di2t[3], pixsize\[2], dcomp[2], dcompen);
6076 Di2t4 := NAN4 (di2t[4], di2t[0..3]);
6077 Dbinh[2] := ANR1 (dbinh\[2], di2t[4], phrase_mode, winhibit);*/
6078 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6079 //[bcompen=F dcompen=T phrase_mode=T bkgwren=F][nw=F wi=F]
6080 //[di0t0_1=F di0t4=F][di1t2=F][di2t0_1=T di2t4=T][di3t2=T][di4t0_1=F di2t4=F][di5t2=F][di6t0_1=F di6t4=F][di7t2=F]
6081 //[dcomp=$00 dbinh=$0C][7804780400007804] (icount=0005, inc=4)
6082 bool di2t0_1 = ((pixsize & 0x04) && (zcomp & 0x02))
6083 || ((pixsize & 0x04) && (dcomp & 0x04) && (dcomp & 0x08) && dcompen);
6084 bool di2t4 = di2t0_1
6085 || (!(srcd & 0x04) && bcompen)
6086 || (!(pixsize & 0x04) && (dcomp & 0x04) && dcompen);
6087 dbinh |= (!((di2t4 && phrase_mode) || winhibit) ? 0x04 : 0x00);
6088 #ifdef LOG_COMP_CTRL
6091 WriteLog("[di2t0_1=%s di2t4=%s]", (di2t0_1 ? "T" : "F"), (di2t4 ? "T" : "F"));
6095 //////////////////////////////////////////////////////////////////////////////////////
6097 /*Di3t0 := NAN3 (di3t[0], pixsize\[2], dcomp[3], dcompen);
6098 Di3t1 := NAN2 (di3t[1], srcd\[3], bcompen);
6099 Di3t2 := NAN4 (di3t[2], di2t[0..1], di3t[0..1]);
6100 Dbinh[3] := ANR1 (dbinh\[3], di3t[2], phrase_mode, winhibit);*/
6101 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6102 bool di3t2 = di2t0_1
6103 || (!(srcd & 0x08) && bcompen)
6104 || (!(pixsize & 0x04) && (dcomp & 0x08) && dcompen);
6105 dbinh |= (!((di3t2 && phrase_mode) || winhibit) ? 0x08 : 0x00);
6106 #ifdef LOG_COMP_CTRL
6109 WriteLog("[di3t2=%s]", (di3t2 ? "T" : "F"));
6113 //////////////////////////////////////////////////////////////////////////////////////
6115 /*Di4t0 := NAN2H (di4t[0], pixsize[2], zcomp[2]);
6116 Di4t1 := NAN4H (di4t[1], pixsize[2], dcomp[4..5], dcompen);
6117 Di4t2 := NAN2 (di4t[2], srcd\[4], bcompen);
6118 Di4t3 := NAN3 (di4t[3], pixsize\[2], dcomp[4], dcompen);
6119 Di4t4 := NAN4 (di4t[4], di4t[0..3]);
6120 Dbinh[4] := NAN2 (dbinh\[4], di4t[4], phrase_mode);*/
6121 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6122 bool di4t0_1 = ((pixsize & 0x04) && (zcomp & 0x04))
6123 || ((pixsize & 0x04) && (dcomp & 0x10) && (dcomp & 0x20) && dcompen);
6124 bool di4t4 = di4t0_1
6125 || (!(srcd & 0x10) && bcompen)
6126 || (!(pixsize & 0x04) && (dcomp & 0x10) && dcompen);
6127 dbinh |= (!(di4t4 && phrase_mode) ? 0x10 : 0x00);
6128 #ifdef LOG_COMP_CTRL
6131 WriteLog("[di4t0_1=%s di2t4=%s]", (di4t0_1 ? "T" : "F"), (di4t4 ? "T" : "F"));
6135 //////////////////////////////////////////////////////////////////////////////////////
6137 /*Di5t0 := NAN3 (di5t[0], pixsize\[2], dcomp[5], dcompen);
6138 Di5t1 := NAN2 (di5t[1], srcd\[5], bcompen);
6139 Di5t2 := NAN4 (di5t[2], di4t[0..1], di5t[0..1]);
6140 Dbinh[5] := NAN2 (dbinh\[5], di5t[2], phrase_mode);*/
6141 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6142 bool di5t2 = di4t0_1
6143 || (!(srcd & 0x20) && bcompen)
6144 || (!(pixsize & 0x04) && (dcomp & 0x20) && dcompen);
6145 dbinh |= (!(di5t2 && phrase_mode) ? 0x20 : 0x00);
6146 #ifdef LOG_COMP_CTRL
6149 WriteLog("[di5t2=%s]", (di5t2 ? "T" : "F"));
6153 //////////////////////////////////////////////////////////////////////////////////////
6155 /*Di6t0 := NAN2H (di6t[0], pixsize[2], zcomp[3]);
6156 Di6t1 := NAN4H (di6t[1], pixsize[2], dcomp[6..7], dcompen);
6157 Di6t2 := NAN2 (di6t[2], srcd\[6], bcompen);
6158 Di6t3 := NAN3 (di6t[3], pixsize\[2], dcomp[6], dcompen);
6159 Di6t4 := NAN4 (di6t[4], di6t[0..3]);
6160 Dbinh[6] := NAN2 (dbinh\[6], di6t[4], phrase_mode);*/
6161 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6162 bool di6t0_1 = ((pixsize & 0x04) && (zcomp & 0x08))
6163 || ((pixsize & 0x04) && (dcomp & 0x40) && (dcomp & 0x80) && dcompen);
6164 bool di6t4 = di6t0_1
6165 || (!(srcd & 0x40) && bcompen)
6166 || (!(pixsize & 0x04) && (dcomp & 0x40) && dcompen);
6167 dbinh |= (!(di6t4 && phrase_mode) ? 0x40 : 0x00);
6168 #ifdef LOG_COMP_CTRL
6171 WriteLog("[di6t0_1=%s di6t4=%s]", (di6t0_1 ? "T" : "F"), (di6t4 ? "T" : "F"));
6175 //////////////////////////////////////////////////////////////////////////////////////
6177 /*Di7t0 := NAN3 (di7t[0], pixsize\[2], dcomp[7], dcompen);
6178 Di7t1 := NAN2 (di7t[1], srcd\[7], bcompen);
6179 Di7t2 := NAN4 (di7t[2], di6t[0..1], di7t[0..1]);
6180 Dbinh[7] := NAN2 (dbinh\[7], di7t[2], phrase_mode);*/
6181 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6182 bool di7t2 = di6t0_1
6183 || (!(srcd & 0x80) && bcompen)
6184 || (!(pixsize & 0x04) && (dcomp & 0x80) && dcompen);
6185 dbinh |= (!(di7t2 && phrase_mode) ? 0x80 : 0x00);
6186 #ifdef LOG_COMP_CTRL
6189 WriteLog("[di7t2=%s]", (di7t2 ? "T" : "F"));
6193 //////////////////////////////////////////////////////////////////////////////////////
6198 #ifdef LOG_COMP_CTRL
6201 WriteLog("[dcomp=$%02X dbinh=$%02X]\n ", dcomp, dbinh);
6208 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6209 //////////////////////////////////////////////////////////////////////////////////////
6211 // !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
6212 // !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
6213 // !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!