5 // (C) 2010 Underground Software
7 // JLH = James Hammons <jlhamm@acm.org>
10 // --- ---------- -----------------------------------------------------------
11 // JLH 01/16/2010 Created this log ;-)
15 // I owe a debt of gratitude to Curt Vendel and to John Mathieson--to Curt
16 // for supplying the Oberon ASIC nets and to John for making them available
17 // to Curt. ;-) Without that excellent documentation which shows *exactly*
18 // what's going on inside the TOM chip, we'd all still be guessing as to how
19 // the wily blitter and other pieces of the Jaguar puzzle actually work.
32 // Various conditional compilation goodies...
36 #define USE_ORIGINAL_BLITTER
37 //#define USE_MIDSUMMER_BLITTER
38 #define USE_MIDSUMMER_BLITTER_MKII
40 #ifdef USE_ORIGINAL_BLITTER
41 #ifdef USE_MIDSUMMER_BLITTER_MKII
42 #define USE_BOTH_BLITTERS
47 // External global variables
49 extern int jaguar_active_memory_dumps;
51 // Local global variables
53 int start_logging = 0;
54 uint8_t blitter_working = 0;
55 bool startConciseBlitLogging = false;
58 // Blitter register RAM (most of it is hidden from the user)
60 static uint8_t blitter_ram[0x100];
64 bool specialLog = false;
65 extern int effect_start;
66 extern int blit_start_log;
67 void BlitterMidsummer(uint32_t cmd);
68 void BlitterMidsummer2(void);
70 #define REG(A) (((uint32_t)blitter_ram[(A)] << 24) | ((uint32_t)blitter_ram[(A)+1] << 16) \
71 | ((uint32_t)blitter_ram[(A)+2] << 8) | (uint32_t)blitter_ram[(A)+3])
72 #define WREG(A,D) (blitter_ram[(A)] = ((D)>>24)&0xFF, blitter_ram[(A)+1] = ((D)>>16)&0xFF, \
73 blitter_ram[(A)+2] = ((D)>>8)&0xFF, blitter_ram[(A)+3] = (D)&0xFF)
75 // Blitter registers (offsets from F02200)
77 #define A1_BASE ((uint32_t)0x00)
78 #define A1_FLAGS ((uint32_t)0x04)
79 #define A1_CLIP ((uint32_t)0x08) // Height and width values for clipping
80 #define A1_PIXEL ((uint32_t)0x0C) // Integer part of the pixel (Y.i and X.i)
81 #define A1_STEP ((uint32_t)0x10) // Integer part of the step
82 #define A1_FSTEP ((uint32_t)0x14) // Fractional part of the step
83 #define A1_FPIXEL ((uint32_t)0x18) // Fractional part of the pixel (Y.f and X.f)
84 #define A1_INC ((uint32_t)0x1C) // Integer part of the increment
85 #define A1_FINC ((uint32_t)0x20) // Fractional part of the increment
86 #define A2_BASE ((uint32_t)0x24)
87 #define A2_FLAGS ((uint32_t)0x28)
88 #define A2_MASK ((uint32_t)0x2C) // Modulo values for x and y (M.y and M.x)
89 #define A2_PIXEL ((uint32_t)0x30) // Integer part of the pixel (no fractional part for A2)
90 #define A2_STEP ((uint32_t)0x34) // Integer part of the step (no fractional part for A2)
91 #define COMMAND ((uint32_t)0x38)
92 #define PIXLINECOUNTER ((uint32_t)0x3C) // Inner & outer loop values
93 #define SRCDATA ((uint32_t)0x40)
94 #define DSTDATA ((uint32_t)0x48)
95 #define DSTZ ((uint32_t)0x50)
96 #define SRCZINT ((uint32_t)0x58)
97 #define SRCZFRAC ((uint32_t)0x60)
98 #define PATTERNDATA ((uint32_t)0x68)
99 #define INTENSITYINC ((uint32_t)0x70)
100 #define ZINC ((uint32_t)0x74)
101 #define COLLISIONCTRL ((uint32_t)0x78)
102 #define PHRASEINT0 ((uint32_t)0x7C)
103 #define PHRASEINT1 ((uint32_t)0x80)
104 #define PHRASEINT2 ((uint32_t)0x84)
105 #define PHRASEINT3 ((uint32_t)0x88)
106 #define PHRASEZ0 ((uint32_t)0x8C)
107 #define PHRASEZ1 ((uint32_t)0x90)
108 #define PHRASEZ2 ((uint32_t)0x94)
109 #define PHRASEZ3 ((uint32_t)0x98)
111 // Blitter command bits
113 #define SRCEN (cmd & 0x00000001)
114 #define SRCENZ (cmd & 0x00000002)
115 #define SRCENX (cmd & 0x00000004)
116 #define DSTEN (cmd & 0x00000008)
117 #define DSTENZ (cmd & 0x00000010)
118 #define DSTWRZ (cmd & 0x00000020)
119 #define CLIPA1 (cmd & 0x00000040)
121 #define UPDA1F (cmd & 0x00000100)
122 #define UPDA1 (cmd & 0x00000200)
123 #define UPDA2 (cmd & 0x00000400)
125 #define DSTA2 (cmd & 0x00000800)
127 #define Z_OP_INF (cmd & 0x00040000)
128 #define Z_OP_EQU (cmd & 0x00080000)
129 #define Z_OP_SUP (cmd & 0x00100000)
131 #define LFU_NAN (cmd & 0x00200000)
132 #define LFU_NA (cmd & 0x00400000)
133 #define LFU_AN (cmd & 0x00800000)
134 #define LFU_A (cmd & 0x01000000)
136 #define CMPDST (cmd & 0x02000000)
137 #define BCOMPEN (cmd & 0x04000000)
138 #define DCOMPEN (cmd & 0x08000000)
140 #define PATDSEL (cmd & 0x00010000)
141 #define ADDDSEL (cmd & 0x00020000)
142 #define TOPBEN (cmd & 0x00004000)
143 #define TOPNEN (cmd & 0x00008000)
144 #define BKGWREN (cmd & 0x10000000)
145 #define GOURD (cmd & 0x00001000)
146 #define GOURZ (cmd & 0x00002000)
147 #define SRCSHADE (cmd & 0x40000000)
155 #define XSIGNSUB_A1 (REG(A1_FLAGS)&0x080000)
156 #define XSIGNSUB_A2 (REG(A2_FLAGS)&0x080000)
158 #define YSIGNSUB_A1 (REG(A1_FLAGS)&0x100000)
159 #define YSIGNSUB_A2 (REG(A2_FLAGS)&0x100000)
161 #define YADD1_A1 (REG(A1_FLAGS)&0x040000)
162 #define YADD1_A2 (REG(A2_FLAGS)&0x040000)
164 /*******************************************************************************
165 ********************** STUFF CUT BELOW THIS LINE! ******************************
166 *******************************************************************************/
167 #ifdef USE_ORIGINAL_BLITTER // We're ditching this crap for now...
169 //Put 'em back, once we fix the problem!!! [KO]
171 #define PIXEL_SHIFT_1(a) (((~a##_x) >> 16) & 7)
172 #define PIXEL_OFFSET_1(a) (((((uint32_t)a##_y >> 16) * a##_width / 8) + (((uint32_t)a##_x >> 19) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 19) & 7))
173 #define READ_PIXEL_1(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER) >> PIXEL_SHIFT_1(a)) & 0x01)
174 //#define READ_PIXEL_1(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a)) >> PIXEL_SHIFT_1(a)) & 0x01)
177 #define PIXEL_SHIFT_2(a) (((~a##_x) >> 15) & 6)
178 #define PIXEL_OFFSET_2(a) (((((uint32_t)a##_y >> 16) * a##_width / 4) + (((uint32_t)a##_x >> 18) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 18) & 7))
179 #define READ_PIXEL_2(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER) >> PIXEL_SHIFT_2(a)) & 0x03)
180 //#define READ_PIXEL_2(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a)) >> PIXEL_SHIFT_2(a)) & 0x03)
183 #define PIXEL_SHIFT_4(a) (((~a##_x) >> 14) & 4)
184 #define PIXEL_OFFSET_4(a) (((((uint32_t)a##_y >> 16) * (a##_width/2)) + (((uint32_t)a##_x >> 17) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 17) & 7))
185 #define READ_PIXEL_4(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER) >> PIXEL_SHIFT_4(a)) & 0x0f)
186 //#define READ_PIXEL_4(a) ((JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a)) >> PIXEL_SHIFT_4(a)) & 0x0f)
189 #define PIXEL_OFFSET_8(a) (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~7)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 7))
190 #define READ_PIXEL_8(a) (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a), BLITTER))
191 //#define READ_PIXEL_8(a) (JaguarReadByte(a##_addr+PIXEL_OFFSET_8(a)))
194 #define PIXEL_OFFSET_16(a) (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~3)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 3))
195 #define READ_PIXEL_16(a) (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), BLITTER))
196 //#define READ_PIXEL_16(a) (JaguarReadWord(a##_addr+(PIXEL_OFFSET_16(a)<<1)))
199 #define PIXEL_OFFSET_32(a) (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
200 #define READ_PIXEL_32(a) (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), BLITTER))
201 //#define READ_PIXEL_32(a) (JaguarReadLong(a##_addr+(PIXEL_OFFSET_32(a)<<2)))
204 #define READ_PIXEL(a,f) (\
205 (((f>>3)&0x07) == 0) ? (READ_PIXEL_1(a)) : \
206 (((f>>3)&0x07) == 1) ? (READ_PIXEL_2(a)) : \
207 (((f>>3)&0x07) == 2) ? (READ_PIXEL_4(a)) : \
208 (((f>>3)&0x07) == 3) ? (READ_PIXEL_8(a)) : \
209 (((f>>3)&0x07) == 4) ? (READ_PIXEL_16(a)) : \
210 (((f>>3)&0x07) == 5) ? (READ_PIXEL_32(a)) : 0)
212 // 16 bpp z data read
213 #define ZDATA_OFFSET_16(a) (PIXEL_OFFSET_16(a) + a##_zoffs * 4)
214 #define READ_ZDATA_16(a) (JaguarReadWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), BLITTER))
215 //#define READ_ZDATA_16(a) (JaguarReadWord(a##_addr+(ZDATA_OFFSET_16(a)<<1)))
218 #define READ_ZDATA(a,f) (READ_ZDATA_16(a))
220 // 16 bpp z data write
221 #define WRITE_ZDATA_16(a,d) { JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d, BLITTER); }
222 //#define WRITE_ZDATA_16(a,d) { JaguarWriteWord(a##_addr+(ZDATA_OFFSET_16(a)<<1), d); }
225 #define WRITE_ZDATA(a,f,d) WRITE_ZDATA_16(a,d);
228 #define READ_RDATA_1(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 19) & 0x04))) >> (((uint32_t)a##_x >> 16) & 0x1F)) & 0x0001 : (REG(r) & 0x0001))
231 #define READ_RDATA_2(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 18) & 0x04))) >> (((uint32_t)a##_x >> 15) & 0x3E)) & 0x0003 : (REG(r) & 0x0003))
234 #define READ_RDATA_4(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 17) & 0x04))) >> (((uint32_t)a##_x >> 14) & 0x28)) & 0x000F : (REG(r) & 0x000F))
237 #define READ_RDATA_8(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 16) & 0x04))) >> (((uint32_t)a##_x >> 13) & 0x18)) & 0x00FF : (REG(r) & 0x00FF))
239 // 16 bpp r data read
240 #define READ_RDATA_16(r,a,p) ((p) ? ((REG(r+(((uint32_t)a##_x >> 15) & 0x04))) >> (((uint32_t)a##_x >> 12) & 0x10)) & 0xFFFF : (REG(r) & 0xFFFF))
242 // 32 bpp r data read
243 #define READ_RDATA_32(r,a,p) ((p) ? REG(r+(((uint32_t)a##_x >> 14) & 0x04)) : REG(r))
245 // register data read
246 #define READ_RDATA(r,a,f,p) (\
247 (((f>>3)&0x07) == 0) ? (READ_RDATA_1(r,a,p)) : \
248 (((f>>3)&0x07) == 1) ? (READ_RDATA_2(r,a,p)) : \
249 (((f>>3)&0x07) == 2) ? (READ_RDATA_4(r,a,p)) : \
250 (((f>>3)&0x07) == 3) ? (READ_RDATA_8(r,a,p)) : \
251 (((f>>3)&0x07) == 4) ? (READ_RDATA_16(r,a,p)) : \
252 (((f>>3)&0x07) == 5) ? (READ_RDATA_32(r,a,p)) : 0)
255 #define WRITE_PIXEL_1(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_1(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a), BLITTER)&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a)), BLITTER); }
256 //#define WRITE_PIXEL_1(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_1(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_1(a))&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a))); }
259 #define WRITE_PIXEL_2(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_2(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a), BLITTER)&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a)), BLITTER); }
260 //#define WRITE_PIXEL_2(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_2(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_2(a))&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a))); }
263 #define WRITE_PIXEL_4(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_4(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a), BLITTER)&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a)), BLITTER); }
264 //#define WRITE_PIXEL_4(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_4(a), (JaguarReadByte(a##_addr+PIXEL_OFFSET_4(a))&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a))); }
267 #define WRITE_PIXEL_8(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_8(a), d, BLITTER); }
268 //#define WRITE_PIXEL_8(a,d) { JaguarWriteByte(a##_addr+PIXEL_OFFSET_8(a), d); }
270 // 16 bpp pixel write
271 //#define WRITE_PIXEL_16(a,d) { JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1),d); }
272 #define WRITE_PIXEL_16(a,d) { JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d, BLITTER); if (specialLog) WriteLog("Pixel write address: %08X\n", a##_addr+(PIXEL_OFFSET_16(a)<<1)); }
273 //#define WRITE_PIXEL_16(a,d) { JaguarWriteWord(a##_addr+(PIXEL_OFFSET_16(a)<<1), d); if (specialLog) WriteLog("Pixel write address: %08X\n", a##_addr+(PIXEL_OFFSET_16(a)<<1)); }
275 // 32 bpp pixel write
276 #define WRITE_PIXEL_32(a,d) { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d, BLITTER); }
277 //#define WRITE_PIXEL_32(a,d) { JaguarWriteLong(a##_addr+(PIXEL_OFFSET_32(a)<<2), d); }
280 #define WRITE_PIXEL(a,f,d) {\
281 switch ((f>>3)&0x07) { \
282 case 0: WRITE_PIXEL_1(a,d); break; \
283 case 1: WRITE_PIXEL_2(a,d); break; \
284 case 2: WRITE_PIXEL_4(a,d); break; \
285 case 3: WRITE_PIXEL_8(a,d); break; \
286 case 4: WRITE_PIXEL_16(a,d); break; \
287 case 5: WRITE_PIXEL_32(a,d); break; \
290 // Width in Pixels of a Scanline
291 // This is a pretranslation of the value found in the A1 & A2 flags: It's really a floating point value
292 // of the form EEEEMM where MM is the mantissa with an implied "1." in front of it and the EEEE value is
293 // the exponent. Valid values for the exponent range from 0 to 11 (decimal). It's easiest to think of it
294 // as a floating point bit pattern being followed by a number of zeroes. So, e.g., 001101 translates to
295 // 1.01 (the "1." being implied) x (2 ^ 3) or 1010 -> 10 in base 10 (i.e., 1.01 with the decimal place
296 // being shifted to the right 3 places).
297 /*static uint32_t blitter_scanline_width[48] =
299 0, 0, 0, 0, // Note: This would really translate to 1, 1, 1, 1
309 1024, 1280, 1536, 1792,
310 2048, 2560, 3072, 3584
313 //static uint8_t * tom_ram_8;
314 //static uint8_t * paletteRam;
318 static uint8_t a1ctl;
324 static uint32_t a1_addr;
325 static uint32_t a2_addr;
326 static int32_t a1_zoffs;
327 static int32_t a2_zoffs;
328 static uint32_t xadd_a1_control;
329 static uint32_t xadd_a2_control;
330 static int32_t a1_pitch;
331 static int32_t a2_pitch;
332 static uint32_t n_pixels;
333 static uint32_t n_lines;
336 static int32_t a1_width;
339 static int32_t a2_width;
340 static int32_t a2_mask_x;
341 static int32_t a2_mask_y;
342 static int32_t a1_xadd;
343 static int32_t a1_yadd;
344 static int32_t a2_xadd;
345 static int32_t a2_yadd;
346 static uint8_t a1_phrase_mode;
347 static uint8_t a2_phrase_mode;
348 static int32_t a1_step_x = 0;
349 static int32_t a1_step_y = 0;
350 static int32_t a2_step_x = 0;
351 static int32_t a2_step_y = 0;
352 static uint32_t outer_loop;
353 static uint32_t inner_loop;
354 static uint32_t a2_psize;
355 static uint32_t a1_psize;
356 static uint32_t gouraud_add;
357 //static uint32_t gouraud_data;
358 //static uint16_t gint[4];
359 //static uint16_t gfrac[4];
360 //static uint8_t gcolour[4];
363 static int gd_ia, gd_ca;
364 static int colour_index = 0;
366 static uint32_t z_i[4];
368 static int32_t a1_clip_x, a1_clip_y;
370 // In the spirit of "get it right first, *then* optimize" I've taken the liberty
371 // of removing all the unnecessary code caching. If it turns out to be a good way
372 // to optimize the blitter, then we may revisit it in the future...
375 // Generic blit handler
377 void blitter_generic(uint32_t cmd)
380 Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
381 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
382 A1 step values: -2 (X), 1 (Y)
383 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
384 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
385 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
386 A1 x/y: 100/12, A2 x/y: 106/0 Pattern: 000000F300000000
389 // specialLog = true;
390 /*if (cmd == 0x1401060C && blit_start_log)
391 specialLog = true;//*/
393 //uint32_t logGo = ((cmd == 0x01800E01 && REG(A1_BASE) == 0x898000) ? 1 : 0);
394 uint32_t srcdata, srczdata, dstdata, dstzdata, writedata, inhibit;
395 uint32_t bppSrc = (DSTA2 ? 1 << ((REG(A1_FLAGS) >> 3) & 0x07) : 1 << ((REG(A2_FLAGS) >> 3) & 0x07));
399 WriteLog("About to do n x m blit (BM width is ? pixels)...\n");
400 WriteLog("A1_STEP_X/Y = %08X/%08X, A2_STEP_X/Y = %08X/%08X\n", a1_step_x, a1_step_y, a2_step_x, a2_step_y);
414 WriteLog(" A1_X/Y = %08X/%08X, A2_X/Y = %08X/%08X\n", a1_x, a1_y, a2_x, a2_y);
416 uint32_t a1_start = a1_x, a2_start = a2_x, bitPos = 0;
418 //Kludge for Hover Strike...
419 //I wonder if this kludge is in conjunction with the SRCENX down below...
420 // This isn't so much a kludge but the way things work in BCOMPEN mode...!
421 if (BCOMPEN && SRCENX)
423 if (n_pixels < bppSrc)
424 bitPos = bppSrc - n_pixels;
427 inner_loop = n_pixels;
432 WriteLog(" A1_X/Y = %08X/%08X, A2_X/Y = %08X/%08X\n", a1_x, a1_y, a2_x, a2_y);
434 srcdata = srczdata = dstdata = dstzdata = writedata = inhibit = 0;
436 if (!DSTA2) // Data movement: A1 <- A2
438 // load src data and Z
440 if (SRCEN || SRCENX) // Not sure if this is correct... (seems to be...!)
442 srcdata = READ_PIXEL(a2, REG(A2_FLAGS));
445 srczdata = READ_ZDATA(a2, REG(A2_FLAGS));
446 else if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
447 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
449 else // Use SRCDATA register...
451 srcdata = READ_RDATA(SRCDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
453 if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
454 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
457 // load dst data and Z
460 dstdata = READ_PIXEL(a1, REG(A1_FLAGS));
463 dstzdata = READ_ZDATA(a1, REG(A1_FLAGS));
465 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
469 dstdata = READ_RDATA(DSTDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
472 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
475 /*This wasn't working... // a1 clipping
476 if (cmd & 0x00000040)
478 if (a1_x < 0 || a1_y < 0 || (a1_x >> 16) >= (REG(A1_CLIP) & 0x7FFF)
479 || (a1_y >> 16) >= ((REG(A1_CLIP) >> 16) & 0x7FFF))
484 srczdata = z_i[colour_index] >> 16;
486 // apply z comparator
487 if (Z_OP_INF && srczdata < dstzdata) inhibit = 1;
488 if (Z_OP_EQU && srczdata == dstzdata) inhibit = 1;
489 if (Z_OP_SUP && srczdata > dstzdata) inhibit = 1;
491 // apply data comparator
492 // Note: DCOMPEN only works in 8/16 bpp modes! !!! FIX !!!
493 // Does BCOMPEN only work in 1 bpp mode???
494 // No, but it always does a 1 bit expansion no matter what the BPP of the channel is set to. !!! FIX !!!
495 // This is bit tricky... We need to fix the XADD value so that it acts like a 1BPP value while inside
497 if (DCOMPEN | BCOMPEN)
499 //Temp, for testing Hover Strike
500 //Doesn't seem to do it... Why?
501 //What needs to happen here is twofold. First, the address generator in the outer loop has
502 //to honor the BPP when calculating the start address (which it kinda does already). Second,
503 //it has to step bit by bit when using BCOMPEN. How to do this???
505 //small problem with this approach: it's not accurate... We need a proper address to begin with
506 //and *then* we can do the bit stepping from there the way it's *supposed* to be done... !!! FIX !!!
509 uint32_t pixShift = (~bitPos) & (bppSrc - 1);
510 srcdata = (srcdata >> pixShift) & 0x01;
513 // if (bitPos % bppSrc == 0)
514 // a2_x += 0x00010000;
517 Interesting (Hover Strike--large letter):
519 Blit! (0018FA70 <- 008DDC40) count: 2 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
520 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
521 A1 step values: -2 (X), 1 (Y)
522 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
523 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
524 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
525 A1 x/y: 100/12, A2 x/y: 106/0 Pattern: 000000F300000000
527 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
528 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
529 A1 step values: -8 (X), 1 (Y)
530 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
531 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
532 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
533 A1 x/y: 102/12, A2 x/y: 107/0 Pattern: 000000F300000000
535 Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
536 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
537 A1 step values: -1 (X), 1 (Y)
538 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
539 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
540 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
541 A1 x/y: 118/12, A2 x/y: 70/0 Pattern: 000000F300000000
543 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
544 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
545 A1 step values: -8 (X), 1 (Y)
546 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
547 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
548 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
549 A1 x/y: 119/12, A2 x/y: 71/0 Pattern: 000000F300000000
551 Blit! (0018FA70 <- 008DDC40) count: 1 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
552 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
553 A1 step values: -1 (X), 1 (Y)
554 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
555 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
556 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
557 A1 x/y: 127/12, A2 x/y: 66/0 Pattern: 000000F300000000
559 Blit! (0018FA70 <- 008DDC40) count: 8 x 13, A1/2_FLAGS: 00014218/00013C18 [cmd: 1401060C]
560 CMD -> src: SRCENX dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl: BCOMPEN BKGWREN
561 A1 step values: -8 (X), 1 (Y)
562 A2 step values: -1 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
563 A1 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
564 A2 -> pitch: 1 phrases, depth: 8bpp, z-off: 0, width: 192 (1E), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
565 A1 x/y: 128/12, A2 x/y: 67/0 Pattern: 000000F300000000
571 //WriteLog("Blitter: BCOMPEN set on command %08X inhibit prev:%u, now:", cmd, inhibit);
572 // compare source pixel with pattern pixel
574 Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd: 05810001]
575 CMD -> src: SRCEN dst: misc: a1ctl: mode: ity: PATDSEL z-op: op: LFU_REPLACE ctrl: BCOMPEN
576 A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
577 A2 -> pitch: 1 phrases, depth: 1bpp, z-off: 0, width: 16 (10), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
581 // AvP is still wrong, could be cuz it's doing A1 -> A2...
583 // Src is the 1bpp bitmap... DST is the PATTERN!!!
584 // This seems to solve at least ONE of the problems with MC3D...
585 // Why should this be inverted???
586 // Bcuz it is. This is supposed to be used only for a bit -> pixel expansion...
587 /* if (srcdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
588 // if (srcdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
590 /* uint32_t A2bpp = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
591 if (A2bpp == 1 || A2bpp == 16 || A2bpp == 8)
592 inhibit = (srcdata == 0 ? 1: 0);
593 // inhibit = !srcdata;
595 WriteLog("Blitter: Bad BPP (%u) selected for BCOMPEN mode!\n", A2bpp);//*/
596 // What it boils down to is this:
603 // compare destination pixel with pattern pixel
604 if (dstdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
605 // if (dstdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
609 // This is DEFINITELY WRONG
610 // if (a1_phrase_mode || a2_phrase_mode)
611 // inhibit = !inhibit;
616 inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
617 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
620 // compute the write data and store
623 // Houston, we have a problem...
624 // Look here, at PATDSEL and GOURD. If both are active (as they are on the BIOS intro), then there's
626 //Blit! (00100000 <- 000095D0) count: 3 x 1, A1/2_FLAGS: 00014220/00004020 [cmd: 00011008]
627 // CMD -> src: dst: DSTEN misc: a1ctl: mode: GOURD ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
628 // A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
629 // A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
630 // A1 x/y: 90/171, A2 x/y: 808/0 Pattern: 776D770077007700
634 // use pattern data for write data
635 writedata = READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
639 /*if (blit_start_log)
640 WriteLog("BLIT: ADDDSEL srcdata: %08X\, dstdata: %08X, ", srcdata, dstdata);//*/
642 // intensity addition
643 //Ok, this is wrong... Or is it? Yes, it's wrong! !!! FIX !!!
644 /* writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
645 if (!(TOPBEN) && writedata > 0xFF)
648 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
649 if (!(TOPNEN) && writedata > 0xFFF)
650 // writedata = 0xFFF;
652 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);//*/
653 //notneeded--writedata &= 0xFFFF;
654 /*if (blit_start_log)
655 WriteLog("writedata: %08X\n", writedata);//*/
657 Hover Strike ADDDSEL blit:
659 Blit! (00098D90 <- 0081DDC0) count: 320 x 287, A1/2_FLAGS: 00004220/00004020 [cmd: 00020208]
660 CMD -> src: dst: DSTEN misc: a1ctl: UPDA1 mode: ity: ADDDSEL z-op: op: LFU_CLEAR ctrl:
661 A1 step values: -320 (X), 1 (Y)
662 A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
663 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 256 (20), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
664 A1 x/y: 0/0, A2 x/y: 3288/0 Pattern: 0000000000000000 SRCDATA: 00FD00FD00FD00FD
666 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
670 //This is correct now, but slow...
671 int16_t s = (srcdata & 0xFF) | (srcdata & 0x80 ? 0xFF00 : 0x0000),
680 writedata = (uint32_t)sum;
683 //This doesn't seem right... Looks like it would muck up the low byte... !!! FIX !!!
684 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
686 if (!TOPNEN && writedata > 0xFFF)
691 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
695 if (LFU_NAN) writedata |= ~srcdata & ~dstdata;
696 if (LFU_NA) writedata |= ~srcdata & dstdata;
697 if (LFU_AN) writedata |= srcdata & ~dstdata;
698 if (LFU_A) writedata |= srcdata & dstdata;
701 //Although, this looks like it's OK... (even if it is shitty!)
702 //According to JTRM, this is part of the four things the blitter does with the write data (the other
703 //three being PATDSEL, ADDDSEL, and LFU (default). I'm not sure which gets precedence, this or PATDSEL
704 //(see above blit example)...
706 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
710 int intensity = srcdata & 0xFF;
711 int ia = gd_ia >> 16;
713 ia = 0xFFFFFF00 | ia;
717 if (intensity > 0xFF)
719 writedata = (srcdata & 0xFF00) | intensity;
728 //Tried 2nd below for Hover Strike: No dice.
729 if (/*a1_phrase_mode || */BKGWREN || !inhibit)
730 // if (/*a1_phrase_mode || BKGWREN ||*/ !inhibit)
732 /*if (((REG(A1_FLAGS) >> 3) & 0x07) == 5)
734 uint32_t offset = a1_addr+(PIXEL_OFFSET_32(a1)<<2);
735 // (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
736 if ((offset >= 0x1FF020 && offset <= 0x1FF03F) || (offset >= 0x1FF820 && offset <= 0x1FF83F))
737 WriteLog("32bpp pixel write: A1 Phrase mode --> ");
739 // write to the destination
740 WRITE_PIXEL(a1, REG(A1_FLAGS), writedata);
742 WRITE_ZDATA(a1, REG(A1_FLAGS), srczdata);
745 else // if (DSTA2) // Data movement: A1 -> A2
747 // load src data and Z
750 srcdata = READ_PIXEL(a1, REG(A1_FLAGS));
752 srczdata = READ_ZDATA(a1, REG(A1_FLAGS));
753 else if (cmd & 0x0001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
754 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
758 srcdata = READ_RDATA(SRCDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
759 if (cmd & 0x001C020) // PATDSEL | TOPBEN | TOPNEN | DSTWRZ
760 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
763 // load dst data and Z
766 dstdata = READ_PIXEL(a2, REG(A2_FLAGS));
768 dstzdata = READ_ZDATA(a2, REG(A2_FLAGS));
770 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
774 dstdata = READ_RDATA(DSTDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
776 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
780 srczdata = z_i[colour_index] >> 16;
782 // apply z comparator
783 if (Z_OP_INF && srczdata < dstzdata) inhibit = 1;
784 if (Z_OP_EQU && srczdata == dstzdata) inhibit = 1;
785 if (Z_OP_SUP && srczdata > dstzdata) inhibit = 1;
787 // apply data comparator
788 //NOTE: The bit comparator (BCOMPEN) is NOT the same at the data comparator!
789 if (DCOMPEN | BCOMPEN)
793 // compare source pixel with pattern pixel
794 // AvP: Numbers are correct, but sprites are not!
795 //This doesn't seem to be a problem... But could still be wrong...
796 /* if (srcdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
797 // if (srcdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
799 // This is probably not 100% correct... It works in the 1bpp case
800 // (in A1 <- A2 mode, that is...)
801 // AvP: This is causing blocks to be written instead of bit patterns...
803 // NOTE: We really should separate out the BCOMPEN & DCOMPEN stuff!
804 /* uint32_t A1bpp = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
805 if (A1bpp == 1 || A1bpp == 16 || A1bpp == 8)
806 inhibit = (srcdata == 0 ? 1: 0);
808 WriteLog("Blitter: Bad BPP (%u) selected for BCOMPEN mode!\n", A1bpp);//*/
809 // What it boils down to is this:
815 // compare destination pixel with pattern pixel
816 if (dstdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
817 // if (dstdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
821 // This is DEFINITELY WRONG
822 // if (a1_phrase_mode || a2_phrase_mode)
823 // inhibit = !inhibit;
828 inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
829 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
832 // compute the write data and store
837 // use pattern data for write data
838 writedata = READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
842 // intensity addition
843 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
844 if (!(TOPBEN) && writedata > 0xFF)
846 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
847 if (!(TOPNEN) && writedata > 0xFFF)
849 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
854 writedata |= ~srcdata & ~dstdata;
856 writedata |= ~srcdata & dstdata;
858 writedata |= srcdata & ~dstdata;
860 writedata |= srcdata & dstdata;
864 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
868 int intensity = srcdata & 0xFF;
869 int ia = gd_ia >> 16;
871 ia = 0xFFFFFF00 | ia;
875 if (intensity > 0xFF)
877 writedata = (srcdata & 0xFF00) | intensity;
886 if (/*a2_phrase_mode || */BKGWREN || !inhibit)
890 uint32_t offset = a2_addr+(PIXEL_OFFSET_16(a2)<<1);
891 // (((((uint32_t)a##_y >> 16) * a##_width) + (((uint32_t)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((uint32_t)a##_x >> 16) & 1))
892 WriteLog("[%08X:%04X] ", offset, writedata);
894 // write to the destination
895 WRITE_PIXEL(a2, REG(A2_FLAGS), writedata);
898 WRITE_ZDATA(a2, REG(A2_FLAGS), srczdata);
902 // Update x and y (inner loop)
903 //Now it does! But crappy, crappy, crappy! !!! FIX !!! [DONE]
904 //This is less than ideal, but it works...
907 a1_x += a1_xadd, a1_y += a1_yadd;
908 a2_x = (a2_x + a2_xadd) & a2_mask_x, a2_y = (a2_y + a2_yadd) & a2_mask_y;
912 a1_y += a1_yadd, a2_y = (a2_y + a2_yadd) & a2_mask_y;
916 if (bitPos % bppSrc == 0)
917 a2_x = (a2_x + a2_xadd) & a2_mask_x;
921 a2_x = (a2_x + a2_xadd) & a2_mask_x;
922 if (bitPos % bppSrc == 0)
928 z_i[colour_index] += zadd;
930 if (GOURD || SRCSHADE)
932 gd_i[colour_index] += gd_ia;
933 //Hmm, this doesn't seem to do anything...
934 //But it is correct according to the JTRM...!
935 if ((int32_t)gd_i[colour_index] < 0)
936 gd_i[colour_index] = 0;
937 if (gd_i[colour_index] > 0x00FFFFFF)
938 gd_i[colour_index] = 0x00FFFFFF;//*/
940 gd_c[colour_index] += gd_ca;
941 if ((int32_t)gd_c[colour_index] < 0)
942 gd_c[colour_index] = 0;
943 if (gd_c[colour_index] > 0x000000FF)
944 gd_c[colour_index] = 0x000000FF;//*/
947 if (GOURD || SRCSHADE || GOURZ)
950 //This screws things up WORSE (for the BIOS opening screen)
951 // if (a1_phrase_mode || a2_phrase_mode)
952 colour_index = (colour_index + 1) & 0x03;
957 Here's the problem... The phrase mode code!
958 Blit! (00100000 -> 00148000) count: 327 x 267, A1/2_FLAGS: 00004420/00004420 [cmd: 41802E01]
959 CMD -> src: SRCEN dst: misc: a1ctl: UPDA1 UPDA2 mode: DSTA2 GOURZ ity: z-op: op: LFU_REPLACE ctrl: SRCSHADE
960 A1 step values: -327 (X), 1 (Y)
961 A2 step values: -327 (X), 1 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
962 A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
963 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
964 A1 x/y: 28/58, A2 x/y: 28/58 Pattern: 00EA7BEA77EA77EA SRCDATA: 7BFF7BFF7BFF7BFF
966 Below fixes it, but then borks:
969 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
970 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
971 A1 step values: -15 (X), 1 (Y)
972 A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
973 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
974 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
975 A1 x/y: 173/144, A2 x/y: 4052/0
977 Lesse, with pre-add we'd have:
980 00001111222233334444555566667777
983 |rolls back to here. Hmm.
986 //NOTE: The way to fix the CD BIOS is to uncomment below and comment the stuff after
987 // the phrase mode mucking around. But it fucks up everything else...
988 //#define SCREWY_CD_DEPENDENT
989 #ifdef SCREWY_CD_DEPENDENT
993 a2_y += a2_step_y;//*/
996 //New: Phrase mode taken into account! :-p
997 /* if (a1_phrase_mode) // v1
999 // Bump the pointer to the next phrase boundary
1000 // Even though it works, this is crappy... Clean it up!
1001 uint32_t size = 64 / a1_psize;
1003 // Crappy kludge... ('aligning' source to destination)
1004 if (a2_phrase_mode && DSTA2)
1006 uint32_t extra = (a2_start >> 16) % size;
1007 a1_x += extra << 16;
1010 uint32_t newx = (a1_x >> 16) / size;
1011 uint32_t newxrem = (a1_x >> 16) % size;
1013 a1_x |= (((newx + (newxrem == 0 ? 0 : 1)) * size) & 0xFFFF) << 16;
1015 if (a1_phrase_mode) // v2
1017 // Bump the pointer to the next phrase boundary
1018 // Even though it works, this is crappy... Clean it up!
1019 uint32_t size = 64 / a1_psize;
1021 // Crappy kludge... ('aligning' source to destination)
1022 if (a2_phrase_mode && DSTA2)
1024 uint32_t extra = (a2_start >> 16) % size;
1025 a1_x += extra << 16;
1028 uint32_t pixelSize = (size - 1) << 16;
1029 a1_x = (a1_x + pixelSize) & ~pixelSize;
1032 /* if (a2_phrase_mode) // v1
1034 // Bump the pointer to the next phrase boundary
1035 // Even though it works, this is crappy... Clean it up!
1036 uint32_t size = 64 / a2_psize;
1038 // Crappy kludge... ('aligning' source to destination)
1039 // Prolly should do this for A1 channel as well... [DONE]
1040 if (a1_phrase_mode && !DSTA2)
1042 uint32_t extra = (a1_start >> 16) % size;
1043 a2_x += extra << 16;
1046 uint32_t newx = (a2_x >> 16) / size;
1047 uint32_t newxrem = (a2_x >> 16) % size;
1049 a2_x |= (((newx + (newxrem == 0 ? 0 : 1)) * size) & 0xFFFF) << 16;
1051 if (a2_phrase_mode) // v1
1053 // Bump the pointer to the next phrase boundary
1054 // Even though it works, this is crappy... Clean it up!
1055 uint32_t size = 64 / a2_psize;
1057 // Crappy kludge... ('aligning' source to destination)
1058 // Prolly should do this for A1 channel as well... [DONE]
1059 if (a1_phrase_mode && !DSTA2)
1061 uint32_t extra = (a1_start >> 16) % size;
1062 a2_x += extra << 16;
1065 uint32_t pixelSize = (size - 1) << 16;
1066 a2_x = (a2_x + pixelSize) & ~pixelSize;
1069 //Not entirely: This still mucks things up... !!! FIX !!!
1070 //Should this go before or after the phrase mode mucking around?
1071 #ifndef SCREWY_CD_DEPENDENT
1075 a2_y += a2_step_y;//*/
1079 // write values back to registers
1080 WREG(A1_PIXEL, (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
1081 WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
1082 WREG(A2_PIXEL, (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
1086 void blitter_blit(uint32_t cmd)
1088 //Apparently this is doing *something*, just not sure exactly what...
1089 /*if (cmd == 0x41802E01)
1091 WriteLog("BLIT: Found our blit. Was: %08X ", cmd);
1093 WriteLog("Is: %08X\n", cmd);
1096 uint32_t pitchValue[4] = { 0, 1, 3, 2 };
1099 dst = (cmd >> 3) & 0x07;
1100 misc = (cmd >> 6) & 0x03;
1101 a1ctl = (cmd >> 8) & 0x7;
1102 mode = (cmd >> 11) & 0x07;
1103 ity = (cmd >> 14) & 0x0F;
1104 zop = (cmd >> 18) & 0x07;
1105 op = (cmd >> 21) & 0x0F;
1106 ctrl = (cmd >> 25) & 0x3F;
1108 // Addresses in A1/2_BASE are *phrase* aligned, i.e., bottom three bits are ignored!
1109 // NOTE: This fixes Rayman's bad collision detection AND keeps T2K working!
1110 a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
1111 a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
1113 a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
1114 a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
1116 xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
1117 xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
1119 a1_pitch = pitchValue[(REG(A1_FLAGS) & 0x03)];
1120 a2_pitch = pitchValue[(REG(A2_FLAGS) & 0x03)];
1122 n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
1123 n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
1125 a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
1126 a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
1127 //According to the JTRM, X is restricted to 15 bits and Y is restricted to 12.
1128 //But it seems to fuck up T2K! !!! FIX !!!
1129 //Could it be sign extended??? Doesn't seem to be so according to JTRM
1130 // a1_x &= 0x7FFFFFFF, a1_y &= 0x0FFFFFFF;
1131 //Actually, it says that the X is 16 bits. But it still seems to mess with the Y when restricted to 12...
1132 // a1_y &= 0x0FFFFFFF;
1134 // a1_width = blitter_scanline_width[((REG(A1_FLAGS) & 0x00007E00) >> 9)];
1135 // According to JTRM, this must give a *whole number* of phrases in the current
1136 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
1137 uint32_t m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1138 a1_width = ((0x04 | m) << e) >> 2;//*/
1140 a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
1141 a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
1142 //According to the JTRM, X is restricted to 15 bits and Y is restricted to 12.
1143 //But it seems to fuck up T2K! !!! FIX !!!
1144 // a2_x &= 0x7FFFFFFF, a2_y &= 0x0FFFFFFF;
1145 //Actually, it says that the X is 16 bits. But it still seems to mess with the Y when restricted to 12...
1146 // a2_y &= 0x0FFFFFFF;
1148 // a2_width = blitter_scanline_width[((REG(A2_FLAGS) & 0x00007E00) >> 9)];
1149 // According to JTRM, this must give a *whole number* of phrases in the current
1150 // pixel size (this means the lookup above is WRONG)... !!! FIX !!!
1151 m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1152 a2_width = ((0x04 | m) << e) >> 2;//*/
1153 a2_mask_x = ((REG(A2_MASK) & 0x0000FFFF) << 16) | 0xFFFF;
1154 a2_mask_y = (REG(A2_MASK) & 0xFFFF0000) | 0xFFFF;
1156 // Check for "use mask" flag
1157 if (!(REG(A2_FLAGS) & 0x8000))
1159 a2_mask_x = 0xFFFFFFFF; // must be 16.16
1160 a2_mask_y = 0xFFFFFFFF; // must be 16.16
1165 // According to the official documentation, a hardware bug ties A2's yadd bit to A1's...
1166 a2_yadd = a1_yadd = (YADD1_A1 ? 1 << 16 : 0);
1171 // determine a1_xadd
1172 switch (xadd_a1_control)
1175 // This is a documented Jaguar bug relating to phrase mode and truncation... Look into it!
1176 // add phrase offset to X and truncate
1181 // add pixelsize (1) to X
1185 // add zero (for those nice vertical lines)
1189 // add the contents of the increment register
1190 a1_xadd = (REG(A1_INC) << 16) | (REG(A1_FINC) & 0x0000FFFF);
1191 a1_yadd = (REG(A1_INC) & 0xFFFF0000) | (REG(A1_FINC) >> 16);
1196 //Blit! (0011D000 -> 000B9600) count: 228 x 1, A1/2_FLAGS: 00073820/00064220 [cmd: 41802801]
1197 // A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 128 (1C), addctl: XADDINC YADD1 XSIGNADD YSIGNADD
1198 // A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 320 (21), addctl: XADD0 YADD1 XSIGNADD YSIGNADD
1199 //if (YADD1_A1 && YADD1_A2 && xadd_a2_control == XADD0 && xadd_a1_control == XADDINC)// &&
1200 // uint32_t a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
1201 //Ok, so this ISN'T it... Prolly the XADDPHR code above that's doing it...
1202 //if (REG(A1_FLAGS) == 0x00073820 && REG(A2_FLAGS) == 0x00064220 && cmd == 0x41802801)
1203 // A1 x/y: 14368/7, A2 x/y: 150/36
1204 //This is it... The problem...
1205 //if ((a1_x >> 16) == 14368) // 14368 = $3820
1206 // return; //Lesse what we got...
1216 // determine a2_xadd
1217 switch (xadd_a2_control)
1220 // add phrase offset to X and truncate
1225 // add pixelsize (1) to X
1229 // add zero (for those nice vertical lines)
1232 //This really isn't a valid bit combo for A2... Shouldn't this cause the blitter to just say no?
1234 WriteLog("BLIT: Asked to use invalid bit combo (XADDINC) for A2...\n");
1235 // add the contents of the increment register
1236 // since there is no register for a2 we just add 1
1237 //Let's do nothing, since it's not listed as a valid bit combo...
1238 // a2_xadd = 1 << 16;
1245 // Modify outer loop steps based on blitter command
1253 a1_step_x = (REG(A1_FSTEP) & 0xFFFF),
1254 a1_step_y = (REG(A1_FSTEP) >> 16);
1257 a1_step_x |= ((REG(A1_STEP) & 0x0000FFFF) << 16),
1258 a1_step_y |= ((REG(A1_STEP) & 0xFFFF0000));
1261 a2_step_x = (REG(A2_STEP) & 0x0000FFFF) << 16,
1262 a2_step_y = (REG(A2_STEP) & 0xFFFF0000);
1264 outer_loop = n_lines;
1269 a1_clip_x = REG(A1_CLIP) & 0x7FFF,
1270 a1_clip_y = (REG(A1_CLIP) >> 16) & 0x7FFF;
1272 // This phrase sizing is incorrect as well... !!! FIX !!! [NOTHING TO FIX]
1273 // Err, this is pixel size... (and it's OK)
1274 a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
1275 a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
1282 for(int v=0; v<4; v++)
1283 z_i[v] = REG(PHRASEZ0 + v*4);
1287 if (GOURD || GOURZ || SRCSHADE)
1289 gd_c[0] = blitter_ram[PATTERNDATA + 6];
1290 gd_i[0] = ((uint32_t)blitter_ram[PATTERNDATA + 7] << 16)
1291 | ((uint32_t)blitter_ram[SRCDATA + 6] << 8) | blitter_ram[SRCDATA + 7];
1293 gd_c[1] = blitter_ram[PATTERNDATA + 4];
1294 gd_i[1] = ((uint32_t)blitter_ram[PATTERNDATA + 5] << 16)
1295 | ((uint32_t)blitter_ram[SRCDATA + 4] << 8) | blitter_ram[SRCDATA + 5];
1297 gd_c[2] = blitter_ram[PATTERNDATA + 2];
1298 gd_i[2] = ((uint32_t)blitter_ram[PATTERNDATA + 3] << 16)
1299 | ((uint32_t)blitter_ram[SRCDATA + 2] << 8) | blitter_ram[SRCDATA + 3];
1301 gd_c[3] = blitter_ram[PATTERNDATA + 0];
1302 gd_i[3] = ((uint32_t)blitter_ram[PATTERNDATA + 1] << 16)
1303 | ((uint32_t)blitter_ram[SRCDATA + 0] << 8) | blitter_ram[SRCDATA + 1];
1305 gouraud_add = REG(INTENSITYINC);
1307 gd_ia = gouraud_add & 0x00FFFFFF;
1308 if (gd_ia & 0x00800000)
1309 gd_ia = 0xFF000000 | gd_ia;
1311 gd_ca = (gouraud_add >> 24) & 0xFF;
1312 if (gd_ca & 0x00000080)
1313 gd_ca = 0xFFFFFF00 | gd_ca;
1316 // Bit comparitor fixing...
1319 // Determine the data flow direction...
1321 a2_step_x /= (1 << ((REG(A2_FLAGS) >> 3) & 0x07));
1325 /* if (BCOMPEN)//Kludge for Hover Strike... !!! FIX !!!
1327 // Determine the data flow direction...
1335 WriteLog("Blit!\n");
1336 WriteLog(" cmd = 0x%.8x\n",cmd);
1337 WriteLog(" a1_base = %08X\n", a1_addr);
1338 WriteLog(" a1_pitch = %d\n", a1_pitch);
1339 WriteLog(" a1_psize = %d\n", a1_psize);
1340 WriteLog(" a1_width = %d\n", a1_width);
1341 WriteLog(" a1_xadd = %f (phrase=%d)\n", (float)a1_xadd / 65536.0, a1_phrase_mode);
1342 WriteLog(" a1_yadd = %f\n", (float)a1_yadd / 65536.0);
1343 WriteLog(" a1_xstep = %f\n", (float)a1_step_x / 65536.0);
1344 WriteLog(" a1_ystep = %f\n", (float)a1_step_y / 65536.0);
1345 WriteLog(" a1_x = %f\n", (float)a1_x / 65536.0);
1346 WriteLog(" a1_y = %f\n", (float)a1_y / 65536.0);
1347 WriteLog(" a1_zoffs = %i\n",a1_zoffs);
1349 WriteLog(" a2_base = %08X\n", a2_addr);
1350 WriteLog(" a2_pitch = %d\n", a2_pitch);
1351 WriteLog(" a2_psize = %d\n", a2_psize);
1352 WriteLog(" a2_width = %d\n", a2_width);
1353 WriteLog(" a2_xadd = %f (phrase=%d)\n", (float)a2_xadd / 65536.0, a2_phrase_mode);
1354 WriteLog(" a2_yadd = %f\n", (float)a2_yadd / 65536.0);
1355 WriteLog(" a2_xstep = %f\n", (float)a2_step_x / 65536.0);
1356 WriteLog(" a2_ystep = %f\n", (float)a2_step_y / 65536.0);
1357 WriteLog(" a2_x = %f\n", (float)a2_x / 65536.0);
1358 WriteLog(" a2_y = %f\n", (float)a2_y / 65536.0);
1359 WriteLog(" a2_mask_x= 0x%.4x\n",a2_mask_x);
1360 WriteLog(" a2_mask_y= 0x%.4x\n",a2_mask_y);
1361 WriteLog(" a2_zoffs = %i\n",a2_zoffs);
1363 WriteLog(" count = %d x %d\n", n_pixels, n_lines);
1365 WriteLog(" command = %08X\n", cmd);
1366 WriteLog(" dsten = %i\n",DSTEN);
1367 WriteLog(" srcen = %i\n",SRCEN);
1368 WriteLog(" patdsel = %i\n",PATDSEL);
1369 WriteLog(" color = 0x%.8x\n",REG(PATTERNDATA));
1370 WriteLog(" dcompen = %i\n",DCOMPEN);
1371 WriteLog(" bcompen = %i\n",BCOMPEN);
1372 WriteLog(" cmpdst = %i\n",CMPDST);
1373 WriteLog(" GOURZ = %i\n",GOURZ);
1374 WriteLog(" GOURD = %i\n",GOURD);
1375 WriteLog(" SRCSHADE= %i\n",SRCSHADE);
1379 //NOTE: Pitch is ignored!
1381 //This *might* be the altimeter blits (they are)...
1382 //On captured screen, x-pos for black (inner) is 259, for pink is 257
1383 //Black is short by 3, pink is short by 1...
1385 Blit! (00110000 <- 000BF010) count: 9 x 31, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1386 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
1387 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1388 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1389 A1 x/y: 262/124, A2 x/y: 128/0
1390 Blit! (00110000 <- 000BF010) count: 5 x 38, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1391 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
1392 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1393 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1394 A1 x/y: 264/117, A2 x/y: 407/0
1396 Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1397 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
1398 A1 step values: -10 (X), 1 (Y)
1399 A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1400 A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1401 A1 x/y: 262/132, A2 x/y: 129/0
1402 Blit! (00110000 <- 000BF010) count: 5 x 27, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
1403 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
1404 A1 step values: -8 (X), 1 (Y)
1405 A1 -> pitch: 4(2) phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1406 A2 -> pitch: 1(0) phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
1407 A1 x/y: 264/128, A2 x/y: 336/0
1409 264v vCursor ends up here...
1413 262v vCursor ends up here...
1417 Fixed! Now for more:
1419 ; This looks like the ship icon in the upper left corner...
1421 Blit! (00110000 <- 0010B2A8) count: 11 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1422 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1423 A1 step values: -12 (X), 1 (Y)
1424 A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1425 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1426 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1427 A1 x/y: 20/24, A2 x/y: 5780/0
1431 More (not sure this is a blitter problem as much as it's a GPU problem):
1432 All but the "M" are trashed...
1433 This does *NOT* look like a blitter problem, as it's rendering properly...
1434 Actually, if you look at the A1 step values, there IS a discrepancy!
1438 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1439 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1440 A1 step values: -14 (X), 1 (Y)
1441 A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1442 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1443 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1444 A1 x/y: 134/144, A2 x/y: 2516/0
1449 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1450 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1451 A1 step values: -13 (X), 1 (Y)
1452 A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1453 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1454 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1455 A1 x/y: 147/144, A2 x/y: 2660/0
1459 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1460 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1461 A1 step values: -12 (X), 1 (Y)
1462 A2 step values: 0 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1463 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1464 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1465 A1 x/y: 160/144, A2 x/y: 3764/0
1469 Blit! (00110000 <- 0010B2A8) count: 12 x 12, A1/2_FLAGS: 000042E2/00000020 [cmd: 09800609]
1470 CMD -> src: SRCEN dst: DSTEN misc: a1ctl: UPDA1 UPDA2 mode: ity: z-op: op: LFU_REPLACE ctrl: DCOMPEN
1471 A1 step values: -15 (X), 1 (Y)
1472 A2 step values: -4 (X), 0 (Y) [mask (unused): 00000000 - FFFFFFFF/FFFFFFFF]
1473 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1474 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
1475 A1 x/y: 173/144, A2 x/y: 4052/0
1478 //extern int op_start_log;
1481 const char * ctrlStr[4] = { "XADDPHR\0", "XADDPIX\0", "XADD0\0", "XADDINC\0" };
1482 const char * bppStr[8] = { "1bpp\0", "2bpp\0", "4bpp\0", "8bpp\0", "16bpp\0", "32bpp\0", "???\0", "!!!\0" };
1483 const char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
1484 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
1485 uint32_t /*src = cmd & 0x07, dst = (cmd >> 3) & 0x07, misc = (cmd >> 6) & 0x03,
1486 a1ctl = (cmd >> 8) & 0x07,*/ mode = (cmd >> 11) & 0x07/*, ity = (cmd >> 14) & 0x0F,
1487 zop = (cmd >> 18) & 0x07, op = (cmd >> 21) & 0x0F, ctrl = (cmd >> 25) & 0x3F*/;
1488 uint32_t a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
1489 uint32_t p1 = a1f & 0x07, p2 = a2f & 0x07,
1490 d1 = (a1f >> 3) & 0x07, d2 = (a2f >> 3) & 0x07,
1491 zo1 = (a1f >> 6) & 0x07, zo2 = (a2f >> 6) & 0x07,
1492 w1 = (a1f >> 9) & 0x3F, w2 = (a2f >> 9) & 0x3F,
1493 ac1 = (a1f >> 16) & 0x1F, ac2 = (a2f >> 16) & 0x1F;
1494 uint32_t iw1 = ((0x04 | (w1 & 0x03)) << ((w1 & 0x3C) >> 2)) >> 2;
1495 uint32_t iw2 = ((0x04 | (w2 & 0x03)) << ((w2 & 0x3C) >> 2)) >> 2;
1496 WriteLog("Blit! (%08X %s %08X) count: %d x %d, A1/2_FLAGS: %08X/%08X [cmd: %08X]\n", a1_addr, (mode&0x01 ? "->" : "<-"), a2_addr, n_pixels, n_lines, a1f, a2f, cmd);
1497 // WriteLog(" CMD -> src: %d, dst: %d, misc: %d, a1ctl: %d, mode: %d, ity: %1X, z-op: %d, op: %1X, ctrl: %02X\n", src, dst, misc, a1ctl, mode, ity, zop, op, ctrl);
1499 WriteLog(" CMD -> src: %s%s%s ", (cmd & 0x0001 ? "SRCEN " : ""), (cmd & 0x0002 ? "SRCENZ " : ""), (cmd & 0x0004 ? "SRCENX" : ""));
1500 WriteLog("dst: %s%s%s ", (cmd & 0x0008 ? "DSTEN " : ""), (cmd & 0x0010 ? "DSTENZ " : ""), (cmd & 0x0020 ? "DSTWRZ" : ""));
1501 WriteLog("misc: %s%s ", (cmd & 0x0040 ? "CLIP_A1 " : ""), (cmd & 0x0080 ? "???" : ""));
1502 WriteLog("a1ctl: %s%s%s ", (cmd & 0x0100 ? "UPDA1F " : ""), (cmd & 0x0200 ? "UPDA1 " : ""), (cmd & 0x0400 ? "UPDA2" : ""));
1503 WriteLog("mode: %s%s%s ", (cmd & 0x0800 ? "DSTA2 " : ""), (cmd & 0x1000 ? "GOURD " : ""), (cmd & 0x2000 ? "GOURZ" : ""));
1504 WriteLog("ity: %s%s%s%s ", (cmd & 0x4000 ? "TOPBEN " : ""), (cmd & 0x8000 ? "TOPNEN " : ""), (cmd & 0x00010000 ? "PATDSEL" : ""), (cmd & 0x00020000 ? "ADDDSEL" : ""));
1505 WriteLog("z-op: %s%s%s ", (cmd & 0x00040000 ? "ZMODELT " : ""), (cmd & 0x00080000 ? "ZMODEEQ " : ""), (cmd & 0x00100000 ? "ZMODEGT" : ""));
1506 WriteLog("op: %s ", opStr[(cmd >> 21) & 0x0F]);
1507 WriteLog("ctrl: %s%s%s%s%s%s\n", (cmd & 0x02000000 ? "CMPDST " : ""), (cmd & 0x04000000 ? "BCOMPEN " : ""), (cmd & 0x08000000 ? "DCOMPEN " : ""), (cmd & 0x10000000 ? "BKGWREN " : ""), (cmd & 0x20000000 ? "BUSHI " : ""), (cmd & 0x40000000 ? "SRCSHADE" : ""));
1510 WriteLog(" A1 step values: %d (X), %d (Y)\n", a1_step_x >> 16, a1_step_y >> 16);
1513 WriteLog(" A2 step values: %d (X), %d (Y) [mask (%sused): %08X - %08X/%08X]\n", a2_step_x >> 16, a2_step_y >> 16, (a2f & 0x8000 ? "" : "un"), REG(A2_MASK), a2_mask_x, a2_mask_y);
1515 WriteLog(" A1 -> pitch: %d phrases, depth: %s, z-off: %d, width: %d (%02X), addctl: %s %s %s %s\n", 1 << p1, bppStr[d1], zo1, iw1, w1, ctrlStr[ac1&0x03], (ac1&0x04 ? "YADD1" : "YADD0"), (ac1&0x08 ? "XSIGNSUB" : "XSIGNADD"), (ac1&0x10 ? "YSIGNSUB" : "YSIGNADD"));
1516 WriteLog(" A2 -> pitch: %d phrases, depth: %s, z-off: %d, width: %d (%02X), addctl: %s %s %s %s\n", 1 << p2, bppStr[d2], zo2, iw2, w2, ctrlStr[ac2&0x03], (ac2&0x04 ? "YADD1" : "YADD0"), (ac2&0x08 ? "XSIGNSUB" : "XSIGNADD"), (ac2&0x10 ? "YSIGNSUB" : "YSIGNADD"));
1517 WriteLog(" A1 x/y: %d/%d, A2 x/y: %d/%d Pattern: %08X%08X SRCDATA: %08X%08X\n", a1_x >> 16, a1_y >> 16, a2_x >> 16, a2_y >> 16, REG(PATTERNDATA), REG(PATTERNDATA + 4), REG(SRCDATA), REG(SRCDATA + 4));
1518 // blit_start_log = 0;
1519 // op_start_log = 1;
1522 blitter_working = 1;
1523 //#ifndef USE_GENERIC_BLITTER
1524 // if (!blitter_execute_cached_code(blitter_in_cache(cmd)))
1526 blitter_generic(cmd);
1528 /*if (blit_start_log)
1530 if (a1_addr == 0xF03000 && a2_addr == 0x004D58)
1532 WriteLog("\nBytes at 004D58:\n");
1533 for(int i=0x004D58; i<0x004D58+(10*127*4); i++)
1534 WriteLog("%02X ", JaguarReadByte(i));
1535 WriteLog("\nBytes at F03000:\n");
1536 for(int i=0xF03000; i<0xF03000+(6*127*4); i++)
1537 WriteLog("%02X ", JaguarReadByte(i));
1542 blitter_working = 0;
1544 #endif // of the #if 0 near the top...
1545 /*******************************************************************************
1546 ********************** STUFF CUT ABOVE THIS LINE! ******************************
1547 *******************************************************************************/
1550 void BlitterInit(void)
1556 void BlitterReset(void)
1558 memset(blitter_ram, 0x00, 0xA0);
1562 void BlitterDone(void)
1564 WriteLog("BLIT: Done.\n");
1568 uint8_t BlitterReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1573 //This isn't cycle accurate--how to fix? !!! FIX !!!
1574 //Probably have to do some multi-threaded implementation or at least a reentrant safe implementation...
1575 //Real hardware returns $00000805, just like the JTRM says.
1576 if (offset == (0x38 + 0))
1578 if (offset == (0x38 + 1))
1580 if (offset == (0x38 + 2))
1582 if (offset == (0x38 + 3))
1583 return 0x05; // always idle/never stopped (collision detection ignored!)
1585 // CHECK HERE ONCE THIS FIX HAS BEEN TESTED: [X]
1587 if (offset >= 0x04 && offset <= 0x07)
1588 //This is it. I wonder if it just ignores the lower three bits?
1589 //No, this is a documented Jaguar I bug. It also bites the read at $F02230 as well...
1590 return blitter_ram[offset + 0x08]; // A1_PIXEL ($F0220C) read at $F02204
1592 if (offset >= 0x2C && offset <= 0x2F)
1593 return blitter_ram[offset + 0x04]; // A2_PIXEL ($F02230) read at $F0222C
1595 return blitter_ram[offset];
1600 uint16_t BlitterReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1602 return ((uint16_t)BlitterReadByte(offset, who) << 8) | (uint16_t)BlitterReadByte(offset+1, who);
1607 uint32_t BlitterReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
1609 return (BlitterReadWord(offset, who) << 16) | BlitterReadWord(offset+2, who);
1613 void BlitterWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
1615 /*if (offset & 0xFF == 0x7B)
1616 WriteLog("--> Wrote to B_STOP: value -> %02X\n", data);*/
1618 /*if ((offset >= PATTERNDATA) && (offset < PATTERNDATA + 8))
1620 printf("--> %s wrote %02X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - PATTERNDATA);
1624 // This handles writes to INTENSITY0-3 by also writing them to their proper places in
1625 // PATTERNDATA & SOURCEDATA (should do the same for the Z registers! !!! FIX !!! [DONE])
1626 if ((offset >= 0x7C) && (offset <= 0x9B))
1630 // INTENSITY registers 0-3
1632 case 0x7D: blitter_ram[PATTERNDATA + 7] = data; break;
1633 case 0x7E: blitter_ram[SRCDATA + 6] = data; break;
1634 case 0x7F: blitter_ram[SRCDATA + 7] = data; break;
1637 case 0x81: blitter_ram[PATTERNDATA + 5] = data; break;
1638 case 0x82: blitter_ram[SRCDATA + 4] = data; break;
1639 case 0x83: blitter_ram[SRCDATA + 5] = data; break;
1642 case 0x85: blitter_ram[PATTERNDATA + 3] = data; break;
1643 case 0x86: blitter_ram[SRCDATA + 2] = data; break;
1644 case 0x87: blitter_ram[SRCDATA + 3] = data; break;
1647 case 0x89: blitter_ram[PATTERNDATA + 1] = data; break;
1648 case 0x8A: blitter_ram[SRCDATA + 0] = data; break;
1649 case 0x8B: blitter_ram[SRCDATA + 1] = data; break;
1653 case 0x8C: blitter_ram[SRCZINT + 6] = data; break;
1654 case 0x8D: blitter_ram[SRCZINT + 7] = data; break;
1655 case 0x8E: blitter_ram[SRCZFRAC + 6] = data; break;
1656 case 0x8F: blitter_ram[SRCZFRAC + 7] = data; break;
1658 case 0x90: blitter_ram[SRCZINT + 4] = data; break;
1659 case 0x91: blitter_ram[SRCZINT + 5] = data; break;
1660 case 0x92: blitter_ram[SRCZFRAC + 4] = data; break;
1661 case 0x93: blitter_ram[SRCZFRAC + 5] = data; break;
1663 case 0x94: blitter_ram[SRCZINT + 2] = data; break;
1664 case 0x95: blitter_ram[SRCZINT + 3] = data; break;
1665 case 0x96: blitter_ram[SRCZFRAC + 2] = data; break;
1666 case 0x97: blitter_ram[SRCZFRAC + 3] = data; break;
1668 case 0x98: blitter_ram[SRCZINT + 0] = data; break;
1669 case 0x99: blitter_ram[SRCZINT + 1] = data; break;
1670 case 0x9A: blitter_ram[SRCZFRAC + 0] = data; break;
1671 case 0x9B: blitter_ram[SRCZFRAC + 1] = data; break;
1675 // It looks weird, but this is how the 64 bit registers are actually handled...!
1677 else if ((offset >= SRCDATA + 0) && (offset <= SRCDATA + 3)
1678 || (offset >= DSTDATA + 0) && (offset <= DSTDATA + 3)
1679 || (offset >= DSTZ + 0) && (offset <= DSTZ + 3)
1680 || (offset >= SRCZINT + 0) && (offset <= SRCZINT + 3)
1681 || (offset >= SRCZFRAC + 0) && (offset <= SRCZFRAC + 3)
1682 || (offset >= PATTERNDATA + 0) && (offset <= PATTERNDATA + 3))
1684 blitter_ram[offset + 4] = data;
1686 else if ((offset >= SRCDATA + 4) && (offset <= SRCDATA + 7)
1687 || (offset >= DSTDATA + 4) && (offset <= DSTDATA + 7)
1688 || (offset >= DSTZ + 4) && (offset <= DSTZ + 7)
1689 || (offset >= SRCZINT + 4) && (offset <= SRCZINT + 7)
1690 || (offset >= SRCZFRAC + 4) && (offset <= SRCZFRAC + 7)
1691 || (offset >= PATTERNDATA + 4) && (offset <= PATTERNDATA + 7))
1693 blitter_ram[offset - 4] = data;
1696 blitter_ram[offset] = data;
1700 void BlitterWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
1702 /*if (((offset & 0xFF) >= PATTERNDATA) && ((offset & 0xFF) < PATTERNDATA + 8))
1704 printf("----> %s wrote %04X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - (0xF02200 + PATTERNDATA));
1708 /* if (offset & 0xFF == A1_PIXEL && data == 14368)
1710 WriteLog("\n1\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1711 extern bool doGPUDis;
1714 if ((offset & 0xFF) == (A1_PIXEL + 2) && data == 14368)
1716 WriteLog("\n2\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1717 extern bool doGPUDis;
1722 BlitterWriteByte(offset + 0, data >> 8, who);
1723 BlitterWriteByte(offset + 1, data & 0xFF, who);
1725 if ((offset & 0xFF) == 0x3A)
1726 // I.e., the second write of 32-bit value--not convinced this is the best way to do this!
1727 // But then again, according to the Jaguar docs, this is correct...!
1728 /*extern int blit_start_log;
1729 extern bool doGPUDis;
1732 WriteLog("BLIT: Blitter started by %s...\n", whoName[who]);
1735 #ifndef USE_BOTH_BLITTERS
1736 #ifdef USE_ORIGINAL_BLITTER
1737 blitter_blit(GET32(blitter_ram, 0x38));
1739 #ifdef USE_MIDSUMMER_BLITTER
1740 BlitterMidsummer(GET32(blitter_ram, 0x38));
1742 #ifdef USE_MIDSUMMER_BLITTER_MKII
1743 BlitterMidsummer2();
1747 if (vjs.useFastBlitter)
1748 blitter_blit(GET32(blitter_ram, 0x38));
1750 BlitterMidsummer2();
1757 void BlitterWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
1759 /*if (((offset & 0xFF) >= PATTERNDATA) && ((offset & 0xFF) < PATTERNDATA + 8))
1761 printf("------> %s wrote %08X to byte %u of PATTERNDATA...\n", whoName[who], data, offset - (0xF02200 + PATTERNDATA));
1765 /* if ((offset & 0xFF) == A1_PIXEL && (data & 0xFFFF) == 14368)
1767 WriteLog("\n3\nA1_PIXEL written by %s (%u)...\n\n\n", whoName[who], data);
1768 extern bool doGPUDis;
1773 BlitterWriteWord(offset + 0, data >> 16, who);
1774 BlitterWriteWord(offset + 2, data & 0xFFFF, who);
1780 const char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
1781 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
1782 uint32_t cmd = GET32(blitter_ram, 0x38);
1783 uint32_t m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1784 uint32_t a1_width = ((0x04 | m) << e) >> 2;
1785 m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1786 uint32_t a2_width = ((0x04 | m) << e) >> 2;
1788 WriteLog("Blit!\n");
1789 WriteLog(" COMMAND = %08X\n", cmd);
1790 WriteLog(" a1_base = %08X\n", REG(A1_BASE));
1791 WriteLog(" a1_flags = %08X (%c %c %c %c%c . %c%c%c%c%c%c %c%c%c %c%c%c . %c%c)\n", REG(A1_FLAGS),
1792 (REG(A1_FLAGS) & 0x100000 ? '1' : '0'),
1793 (REG(A1_FLAGS) & 0x080000 ? '1' : '0'),
1794 (REG(A1_FLAGS) & 0x040000 ? '1' : '0'),
1795 (REG(A1_FLAGS) & 0x020000 ? '1' : '0'),
1796 (REG(A1_FLAGS) & 0x010000 ? '1' : '0'),
1797 (REG(A1_FLAGS) & 0x004000 ? '1' : '0'),
1798 (REG(A1_FLAGS) & 0x002000 ? '1' : '0'),
1799 (REG(A1_FLAGS) & 0x001000 ? '1' : '0'),
1800 (REG(A1_FLAGS) & 0x000800 ? '1' : '0'),
1801 (REG(A1_FLAGS) & 0x000400 ? '1' : '0'),
1802 (REG(A1_FLAGS) & 0x000200 ? '1' : '0'),
1803 (REG(A1_FLAGS) & 0x000100 ? '1' : '0'),
1804 (REG(A1_FLAGS) & 0x000080 ? '1' : '0'),
1805 (REG(A1_FLAGS) & 0x000040 ? '1' : '0'),
1806 (REG(A1_FLAGS) & 0x000020 ? '1' : '0'),
1807 (REG(A1_FLAGS) & 0x000010 ? '1' : '0'),
1808 (REG(A1_FLAGS) & 0x000008 ? '1' : '0'),
1809 (REG(A1_FLAGS) & 0x000002 ? '1' : '0'),
1810 (REG(A1_FLAGS) & 0x000001 ? '1' : '0'));
1811 WriteLog(" pitch=%u, pixSz=%u, zOff=%u, width=%u, xCtrl=%u\n",
1812 REG(A1_FLAGS) & 0x00003, (REG(A1_FLAGS) & 0x00038) >> 3,
1813 (REG(A1_FLAGS) & 0x001C0) >> 6, a1_width, (REG(A1_FLAGS) & 0x30000) >> 16);
1814 WriteLog(" a1_clip = %u, %u (%08X)\n", GET16(blitter_ram, A1_CLIP + 2), GET16(blitter_ram, A1_CLIP + 0), GET32(blitter_ram, A1_CLIP));
1815 WriteLog(" a1_pixel = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A1_PIXEL + 2), (int16_t)GET16(blitter_ram, A1_PIXEL + 0), GET32(blitter_ram, A1_PIXEL));
1816 WriteLog(" a1_step = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A1_STEP + 2), (int16_t)GET16(blitter_ram, A1_STEP + 0), GET32(blitter_ram, A1_STEP));
1817 WriteLog(" a1_fstep = %u, %u (%08X)\n", GET16(blitter_ram, A1_FSTEP + 2), GET16(blitter_ram, A1_FSTEP + 0), GET32(blitter_ram, A1_FSTEP));
1818 WriteLog(" a1_fpixel= %u, %u (%08X)\n", GET16(blitter_ram, A1_FPIXEL + 2), GET16(blitter_ram, A1_FPIXEL + 0), GET32(blitter_ram, A1_FPIXEL));
1819 WriteLog(" a1_inc = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A1_INC + 2), (int16_t)GET16(blitter_ram, A1_INC + 0), GET32(blitter_ram, A1_INC));
1820 WriteLog(" a1_finc = %u, %u (%08X)\n", GET16(blitter_ram, A1_FINC + 2), GET16(blitter_ram, A1_FINC + 0), GET32(blitter_ram, A1_FINC));
1822 WriteLog(" a2_base = %08X\n", REG(A2_BASE));
1823 WriteLog(" a2_flags = %08X (%c %c %c %c%c %c %c%c%c%c%c%c %c%c%c %c%c%c . %c%c)\n", REG(A2_FLAGS),
1824 (REG(A2_FLAGS) & 0x100000 ? '1' : '0'),
1825 (REG(A2_FLAGS) & 0x080000 ? '1' : '0'),
1826 (REG(A2_FLAGS) & 0x040000 ? '1' : '0'),
1827 (REG(A2_FLAGS) & 0x020000 ? '1' : '0'),
1828 (REG(A2_FLAGS) & 0x010000 ? '1' : '0'),
1829 (REG(A2_FLAGS) & 0x008000 ? '1' : '0'),
1830 (REG(A2_FLAGS) & 0x004000 ? '1' : '0'),
1831 (REG(A2_FLAGS) & 0x002000 ? '1' : '0'),
1832 (REG(A2_FLAGS) & 0x001000 ? '1' : '0'),
1833 (REG(A2_FLAGS) & 0x000800 ? '1' : '0'),
1834 (REG(A2_FLAGS) & 0x000400 ? '1' : '0'),
1835 (REG(A2_FLAGS) & 0x000200 ? '1' : '0'),
1836 (REG(A2_FLAGS) & 0x000100 ? '1' : '0'),
1837 (REG(A2_FLAGS) & 0x000080 ? '1' : '0'),
1838 (REG(A2_FLAGS) & 0x000040 ? '1' : '0'),
1839 (REG(A2_FLAGS) & 0x000020 ? '1' : '0'),
1840 (REG(A2_FLAGS) & 0x000010 ? '1' : '0'),
1841 (REG(A2_FLAGS) & 0x000008 ? '1' : '0'),
1842 (REG(A2_FLAGS) & 0x000002 ? '1' : '0'),
1843 (REG(A2_FLAGS) & 0x000001 ? '1' : '0'));
1844 WriteLog(" pitch=%u, pixSz=%u, zOff=%u, width=%u, xCtrl=%u\n",
1845 REG(A2_FLAGS) & 0x00003, (REG(A2_FLAGS) & 0x00038) >> 3,
1846 (REG(A2_FLAGS) & 0x001C0) >> 6, a2_width, (REG(A2_FLAGS) & 0x30000) >> 16);
1847 WriteLog(" a2_mask = %u, %u (%08X)\n", GET16(blitter_ram, A2_MASK + 2), GET16(blitter_ram, A2_MASK + 0), GET32(blitter_ram, A2_MASK));
1848 WriteLog(" a2_pixel = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A2_PIXEL + 2), (int16_t)GET16(blitter_ram, A2_PIXEL + 0), GET32(blitter_ram, A2_PIXEL));
1849 WriteLog(" a2_step = %d, %d (%08X)\n", (int16_t)GET16(blitter_ram, A2_STEP + 2), (int16_t)GET16(blitter_ram, A2_STEP + 0), GET32(blitter_ram, A2_STEP));
1851 WriteLog(" count = %d x %d\n", GET16(blitter_ram, PIXLINECOUNTER + 2), GET16(blitter_ram, PIXLINECOUNTER));
1853 WriteLog(" SRCEN = %s\n", (SRCEN ? "1" : "0"));
1854 WriteLog(" SRCENZ = %s\n", (SRCENZ ? "1" : "0"));
1855 WriteLog(" SRCENX = %s\n", (SRCENX ? "1" : "0"));
1856 WriteLog(" DSTEN = %s\n", (DSTEN ? "1" : "0"));
1857 WriteLog(" DSTENZ = %s\n", (DSTENZ ? "1" : "0"));
1858 WriteLog(" DSTWRZ = %s\n", (DSTWRZ ? "1" : "0"));
1859 WriteLog(" CLIPA1 = %s\n", (CLIPA1 ? "1" : "0"));
1860 WriteLog(" UPDA1F = %s\n", (UPDA1F ? "1" : "0"));
1861 WriteLog(" UPDA1 = %s\n", (UPDA1 ? "1" : "0"));
1862 WriteLog(" UPDA2 = %s\n", (UPDA2 ? "1" : "0"));
1863 WriteLog(" DSTA2 = %s\n", (DSTA2 ? "1" : "0"));
1864 WriteLog(" ZOP = %s %s %s\n", (Z_OP_INF ? "<" : ""), (Z_OP_EQU ? "=" : ""), (Z_OP_SUP ? ">" : ""));
1865 WriteLog("+-LFUFUNC = %s\n", opStr[(cmd >> 21) & 0x0F]);
1866 WriteLog("| PATDSEL = %s (PD=%08X%08X)\n", (PATDSEL ? "1" : "0"), REG(PATTERNDATA), REG(PATTERNDATA + 4));
1867 WriteLog("+-ADDDSEL = %s\n", (ADDDSEL ? "1" : "0"));
1868 WriteLog(" CMPDST = %s\n", (CMPDST ? "1" : "0"));
1869 WriteLog(" BCOMPEN = %s\n", (BCOMPEN ? "1" : "0"));
1870 WriteLog(" DCOMPEN = %s\n", (DCOMPEN ? "1" : "0"));
1871 WriteLog(" TOPBEN = %s\n", (TOPBEN ? "1" : "0"));
1872 WriteLog(" TOPNEN = %s\n", (TOPNEN ? "1" : "0"));
1873 WriteLog(" BKGWREN = %s\n", (BKGWREN ? "1" : "0"));
1874 WriteLog(" GOURD = %s (II=%08X, SD=%08X%08X)\n", (GOURD ? "1" : "0"), REG(INTENSITYINC), REG(SRCDATA), REG(SRCDATA + 4));
1875 WriteLog(" GOURZ = %s (ZI=%08X, ZD=%08X%08X, SZ1=%08X%08X, SZ2=%08X%08X)\n", (GOURZ ? "1" : "0"), REG(ZINC), REG(DSTZ), REG(DSTZ + 4),
1876 REG(SRCZINT), REG(SRCZINT + 4), REG(SRCZFRAC), REG(SRCZFRAC + 4));
1877 WriteLog(" SRCSHADE = %s\n", (SRCSHADE ? "1" : "0"));
1881 #ifdef USE_MIDSUMMER_BLITTER
1883 // Here's an attempt to write a blitter that conforms to the Midsummer specs--since
1884 // it's supposedly backwards compatible, it should work well...
1886 //#define LOG_BLITTER_MEMORY_ACCESSES
1888 #define DATINIT (false)
1889 #define TXTEXT (false)
1890 #define POLYGON (false)
1892 void BlitterMidsummer(uint32_t cmd)
1897 uint32_t outer_loop, inner_loop, a1_addr, a2_addr;
1898 int32_t a1_x, a1_y, a2_x, a2_y, a1_width, a2_width;
1899 uint8_t a1_phrase_mode, a2_phrase_mode;
1901 a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
1902 a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
1903 a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
1904 a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
1905 uint32_t m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1906 a1_width = ((0x04 | m) << e) >> 2;//*/
1907 a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
1908 a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
1909 m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1910 a2_width = ((0x04 | m) << e) >> 2;//*/
1912 a1_phrase_mode = a2_phrase_mode = 0;
1914 if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 0)
1917 if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 0)
1920 #define INNER0 (inner_loop == 0)
1921 #define OUTER0 (outer_loop == 0)
1923 // $01800005 has SRCENX, may have to investigate further...
1924 // $00011008 has GOURD & DSTEN.
1925 // $41802F41 has SRCSHADE, CLIPA1
1926 /*bool logBlit = false;
1927 if (cmd != 0x00010200 && cmd != 0x01800001 && cmd != 0x01800005
1928 && cmd != 0x00011008 && cmd !=0x41802F41)
1934 uint64_t srcData = GET64(blitter_ram, SRCDATA), srcXtraData,
1935 dstData = GET64(blitter_ram, DSTDATA), writeData;
1936 uint32_t srcAddr, dstAddr;
1937 uint8_t bitCount, a1PixelSize, a2PixelSize;
1939 // JTRM says phrase mode only works for 8BPP or higher, so let's try this...
1940 uint32_t phraseOffset[8] = { 8, 8, 8, 8, 4, 2, 0, 0 };
1941 uint8_t pixelShift[8] = { 3, 2, 1, 0, 1, 2, 0, 0 };
1943 a1PixelSize = (blitter_ram[A1_FLAGS + 3] >> 3) & 0x07;
1944 a2PixelSize = (blitter_ram[A2_FLAGS + 3] >> 3) & 0x07;
1946 outer_loop = GET16(blitter_ram, PIXLINECOUNTER + 0);
1948 if (outer_loop == 0)
1949 outer_loop = 0x10000;
1951 // We just list the states here and jump from state to state in order to
1952 // keep things somewhat clear. Optimization/cleanups later.
1954 //idle: // Blitter is idle, and will not perform any bus activity
1956 idle Blitter is off the bus, and no activity takes place.
1957 if GO if DATINIT goto init_if
1966 inner Inner loop is active, read and write cycles are performed
1968 inner: // Run inner loop state machine (asserts step from its idle state)
1969 inner_loop = GET16(blitter_ram, PIXLINECOUNTER + 2);
1971 if (inner_loop == 0)
1972 inner_loop = 0x10000;
1975 ------------------------------
1976 idle: Inactive, blitter is idle or passing round outer loop
1977 idle Another state in the outer loop is active. No bus transfers are performed.
1979 if SRCENX goto sreadx
1980 else if TXTEXT goto txtread
1981 else if SRCEN goto sread
1982 else if DSTEN goto dread
1983 else if DSTENZ goto dzread
2000 sreadx Extra source data read at the start of an inner loop pass.
2002 if SRCENZ goto szreadx
2003 else if TXTEXT goto txtread
2004 else if SRCEN goto sread
2005 else if DSTEN goto dread
2006 else if DSTENZ goto dzread
2009 sreadx: // Extra source data read
2024 szreadx Extra source Z read as the start of an inner loop pass.
2026 if TXTEXT goto txtread
2029 szreadx: // Extra source Z read
2036 txtread Read texture data from external memory. This state is only used for external texture.
2037 TEXTEXT is the condition TEXTMODE=1.
2040 else if DSTEN goto dread
2041 else if DSTENZ goto dzread
2044 txtread: // Read external texture data
2055 sread Source data read.
2057 if SRCENZ goto szread
2058 else if DSTEN goto dread
2059 else if DSTENZ goto dzread
2062 sread: // Source data read
2063 //The JTRM doesn't really specify the internal structure of the source data read, but I would
2064 //imagine that if it's in phrase mode that it starts by reading the phrase that the window is
2065 //pointing at. Likewise, the pixel (if in BPP 1, 2 & 4, chopped) otherwise. It probably still
2066 //transfers an entire phrase even in pixel mode.
2067 //Odd thought: Does it expand, e.g., 1 BPP pixels into 32 BPP internally? Hmm...
2070 a1_addr = REG(A1_BASE) & 0xFFFFFFF8;
2071 a2_addr = REG(A2_BASE) & 0xFFFFFFF8;
2072 a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
2073 a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
2074 xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
2075 xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
2076 a1_pitch = pitchValue[(REG(A1_FLAGS) & 0x03)];
2077 a2_pitch = pitchValue[(REG(A2_FLAGS) & 0x03)];
2078 n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
2079 n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
2080 a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
2081 a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
2082 a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
2083 a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
2086 a1_width = ((0x04 | m) << e) >> 2;
2087 a2_width = ((0x04 | m) << e) >> 2;
2089 // write values back to registers
2090 WREG(A1_PIXEL, (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
2091 WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
2092 WREG(A2_PIXEL, (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
2094 // Calculate the address to be read...
2096 //Need to fix phrase mode calcs here, since they should *step* by eight, not mulitply.
2097 //Also, need to fix various differing BPP modes here, since offset won't be correct except
2098 //for 8BPP. !!! FIX !!!
2099 srcAddr = (DSTA2 ? a1_addr : a2_addr);
2101 /* if ((DSTA2 ? a1_phrase_mode : a2_phrase_mode) == 1)
2103 srcAddr += (((DSTA2 ? a1_x : a2_x) >> 16)
2104 + (((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width)));
2108 // uint32_t pixAddr = ((DSTA2 ? a1_x : a2_x) >> 16)
2109 // + (((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
2110 int32_t pixAddr = (int16_t)((DSTA2 ? a1_x : a2_x) >> 16)
2111 + ((int16_t)((DSTA2 ? a1_y : a2_y) >> 16) * (DSTA2 ? a1_width : a2_width));
2113 if ((DSTA2 ? a1PixelSize : a2PixelSize) < 3)
2114 pixAddr >>= pixelShift[(DSTA2 ? a1PixelSize : a2PixelSize)];
2115 else if ((DSTA2 ? a1PixelSize : a2PixelSize) > 3)
2116 pixAddr <<= pixelShift[(DSTA2 ? a1PixelSize : a2PixelSize)];
2123 if ((DSTA2 ? a1_phrase_mode : a2_phrase_mode) == 1)
2125 srcData = ((uint64_t)JaguarReadLong(srcAddr, BLITTER) << 32)
2126 | (uint64_t)JaguarReadLong(srcAddr + 4, BLITTER);
2130 //1,2,&4BPP are wrong here... !!! FIX !!!
2131 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 0) // 1 BPP
2132 srcData = JaguarReadByte(srcAddr, BLITTER);
2133 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 1) // 2 BPP
2134 srcData = JaguarReadByte(srcAddr, BLITTER);
2135 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 2) // 4 BPP
2136 srcData = JaguarReadByte(srcAddr, BLITTER);
2137 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 3) // 8 BPP
2138 srcData = JaguarReadByte(srcAddr, BLITTER);
2139 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 4) // 16 BPP
2140 srcData = JaguarReadWord(srcAddr, BLITTER);
2141 if ((DSTA2 ? a1PixelSize : a2PixelSize) == 5) // 32 BPP
2142 srcData = JaguarReadLong(srcAddr, BLITTER);
2145 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2147 WriteLog("BLITTER: srcAddr=%08X, srcData=%08X %08X\n", srcAddr, (uint32_t)(srcData >> 32), (uint32_t)(srcData & 0xFFFFFFFF));
2159 szread: // Source Z read
2161 szread Source Z read.
2164 else if DSTENZ goto dzread
2174 dread: // Destination data read
2176 dread Destination data read.
2178 if DSTENZ goto dzread
2181 // Calculate the destination address to be read...
2183 //Need to fix phrase mode calcs here, since they should *step* by eight, not mulitply.
2184 //Also, need to fix various differing BPP modes here, since offset won't be correct except
2185 //for 8BPP. !!! FIX !!!
2186 dstAddr = (DSTA2 ? a2_addr : a1_addr);
2189 // uint32_t pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
2190 // + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2191 int32_t pixAddr = (int16_t)((DSTA2 ? a2_x : a1_x) >> 16)
2192 + ((int16_t)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2194 if ((DSTA2 ? a2PixelSize : a1PixelSize) < 3)
2195 pixAddr >>= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2196 else if ((DSTA2 ? a2PixelSize : a1PixelSize) > 3)
2197 pixAddr <<= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2204 if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2206 dstData = ((uint64_t)JaguarReadLong(srcAddr, BLITTER) << 32)
2207 | (uint64_t)JaguarReadLong(srcAddr + 4, BLITTER);
2211 //1,2,&4BPP are wrong here... !!! FIX !!!
2212 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 0) // 1 BPP
2213 dstData = JaguarReadByte(dstAddr, BLITTER);
2214 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 1) // 2 BPP
2215 dstData = JaguarReadByte(dstAddr, BLITTER);
2216 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 2) // 4 BPP
2217 dstData = JaguarReadByte(dstAddr, BLITTER);
2218 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 3) // 8 BPP
2219 dstData = JaguarReadByte(dstAddr, BLITTER);
2220 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 4) // 16 BPP
2221 dstData = JaguarReadWord(dstAddr, BLITTER);
2222 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 5) // 32 BPP
2223 dstData = JaguarReadLong(dstAddr, BLITTER);
2226 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2228 WriteLog("BLITTER (dread): dstAddr=%08X, dstData=%08X %08X\n", dstAddr, (uint32_t)(dstData >> 32), (uint32_t)(dstData & 0xFFFFFFFF));
2236 dzread: // Destination Z read
2238 dzread Destination Z read.
2243 dwrite: // Destination data write
2245 dwrite Destination write. Every pass round the inner loop must go through this state..
2247 if DSTWRZ goto dzwrite
2248 else if INNER0 goto idle
2249 else if TXTEXT goto txtread
2250 else if SRCEN goto sread
2251 else if DSTEN goto dread
2252 else if DSTENZ goto dzread
2261 a1_xadd = 1.000000 (phrase=0)
2270 a2_xadd = 1.000000 (phrase=1)
2274 a2_mask_x= 0xFFFFFFFF
2275 a2_mask_y= 0xFFFFFFFF
2285 --LFUFUNC = LFU_CLEAR
2286 | PATDSEL = 1 (PD=77C7 7700 7700 7700)
2288 GOURD = 1 (II=00FC 1A00, SD=FF00 0000 0000 0000)
2291 //Still need to do CLIPA1 and SRCSHADE and GOURD and GOURZ...
2293 // Check clipping...
2297 uint16_t x = a1_x >> 16, y = a1_y >> 16;
2299 if (x >= GET16(blitter_ram, A1_CLIP + 2) || y >= GET16(blitter_ram, A1_CLIP))
2303 // Figure out what gets written...
2307 writeData = GET64(blitter_ram, PATTERNDATA);
2308 //GOURD works properly only in 16BPP mode...
2309 //SRCDATA holds the intensity fractions...
2310 //Does GOURD get calc'ed here or somewhere else???
2311 //Temporary testing kludge...
2313 // writeData >>= 48;
2314 // writeData = 0xFF88;
2315 //OK, it's not writing an entire strip of pixels... Why?
2316 //bad incrementing, that's why!
2320 // Apparently this only works with 16-bit pixels. Not sure if it works in phrase mode either.
2321 //Also, take TOPBEN & TOPNEN into account here as well...
2322 writeData = srcData + dstData;
2324 else // LFUFUNC is the default...
2329 writeData |= ~srcData & ~dstData;
2331 writeData |= ~srcData & dstData;
2333 writeData |= srcData & ~dstData;
2335 writeData |= srcData & dstData;
2338 // Calculate the address to be written...
2340 dstAddr = (DSTA2 ? a2_addr : a1_addr);
2342 /* if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2344 //both of these calculate the wrong address because they don't take into account
2346 dstAddr += ((DSTA2 ? a2_x : a1_x) >> 16)
2347 + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2351 /* dstAddr += ((DSTA2 ? a2_x : a1_x) >> 16)
2352 + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));*/
2353 // uint32_t pixAddr = ((DSTA2 ? a2_x : a1_x) >> 16)
2354 // + (((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2355 int32_t pixAddr = (int16_t)((DSTA2 ? a2_x : a1_x) >> 16)
2356 + ((int16_t)((DSTA2 ? a2_y : a1_y) >> 16) * (DSTA2 ? a2_width : a1_width));
2358 if ((DSTA2 ? a2PixelSize : a1PixelSize) < 3)
2359 pixAddr >>= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2360 else if ((DSTA2 ? a2PixelSize : a1PixelSize) > 3)
2361 pixAddr <<= pixelShift[(DSTA2 ? a2PixelSize : a1PixelSize)];
2368 if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2370 JaguarWriteLong(dstAddr, writeData >> 32, BLITTER);
2371 JaguarWriteLong(dstAddr + 4, writeData & 0xFFFFFFFF, BLITTER);
2375 //1,2,&4BPP are wrong here... !!! FIX !!!
2376 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 0) // 1 BPP
2377 JaguarWriteByte(dstAddr, writeData, BLITTER);
2378 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 1) // 2 BPP
2379 JaguarWriteByte(dstAddr, writeData, BLITTER);
2380 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 2) // 4 BPP
2381 JaguarWriteByte(dstAddr, writeData, BLITTER);
2382 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 3) // 8 BPP
2383 JaguarWriteByte(dstAddr, writeData, BLITTER);
2384 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 4) // 16 BPP
2385 JaguarWriteWord(dstAddr, writeData, BLITTER);
2386 if ((DSTA2 ? a2PixelSize : a1PixelSize) == 5) // 32 BPP
2387 JaguarWriteLong(dstAddr, writeData, BLITTER);
2390 #ifdef LOG_BLITTER_MEMORY_ACCESSES
2392 WriteLog("BLITTER: dstAddr=%08X, writeData=%08X %08X\n", dstAddr, (uint32_t)(writeData >> 32), (uint32_t)(writeData & 0xFFFFFFFF));
2395 inhibitWrite://Should this go here? or on the other side of the X/Y incrementing?
2396 //Seems OK here... for now.
2398 // Do funky X/Y incrementation here as well... !!! FIX !!!
2400 // Handle A1 channel stepping
2402 if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 0)
2403 a1_x += phraseOffset[a1PixelSize] << 16;
2404 else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 1)
2405 a1_x += (blitter_ram[A1_FLAGS + 1] & 0x08 ? -1 << 16 : 1 << 16);
2406 /* else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 2)
2408 else if ((blitter_ram[A1_FLAGS + 1] & 0x03) == 3)
2410 //Always add the FINC here??? That was the problem with the BIOS screen... So perhaps.
2411 a1_x += GET16(blitter_ram, A1_FINC + 2);
2412 a1_y += GET16(blitter_ram, A1_FINC + 0);
2414 a1_x += GET16(blitter_ram, A1_INC + 2) << 16;
2415 a1_y += GET16(blitter_ram, A1_INC + 0) << 16;
2418 if ((blitter_ram[A1_FLAGS + 1] & 0x04) && (blitter_ram[A1_FLAGS + 1] & 0x03 != 3))
2419 a1_y += (blitter_ram[A1_FLAGS + 1] & 0x10 ? -1 << 16 : 1 << 16);
2421 // Handle A2 channel stepping
2423 if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 0)
2424 a2_x += phraseOffset[a2PixelSize] << 16;
2425 else if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 1)
2426 a2_x += (blitter_ram[A2_FLAGS + 1] & 0x08 ? -1 << 16 : 1 << 16);
2427 /* else if ((blitter_ram[A2_FLAGS + 1] & 0x03) == 2)
2430 if (blitter_ram[A2_FLAGS + 1] & 0x04)
2431 a2_y += (blitter_ram[A2_FLAGS + 1] & 0x10 ? -1 << 16 : 1 << 16);
2433 //Need to fix this so that it subtracts (saturating, of course) the correct number of pixels
2434 //in phrase mode... !!! FIX !!! [DONE]
2435 //Need to fix this so that it counts down the correct item. Does it count the
2436 //source or the destination phrase mode???
2437 //It shouldn't matter, because we *should* end up processing the same amount
2438 //the same number of pixels... Not sure though.
2439 if ((DSTA2 ? a2_phrase_mode : a1_phrase_mode) == 1)
2441 if (inner_loop < phraseOffset[DSTA2 ? a2PixelSize : a1PixelSize])
2444 inner_loop -= phraseOffset[DSTA2 ? a2PixelSize : a1PixelSize];
2465 dzwrite: // Destination Z write
2467 dzwrite Destination Z write.
2470 else if TXTEXT goto txtread
2471 else if SRCEN goto sread
2472 else if DSTEN goto dread
2473 else if DSTENZ goto dzread
2490 ------------------------------
2491 if INDONE if OUTER0 goto idle
2492 else if UPDA1F goto a1fupdate
2493 else if UPDA1 goto a1update
2494 else if GOURZ.POLYGON goto zfupdate
2495 else if UPDA2 goto a2update
2496 else if DATINIT goto init_if
2509 //kill this, for now...
2510 // else if (GOURZ.POLYGON)
2519 a1fupdate: // Update A1 pointer fractions and more (see below)
2521 a1fupdate A1 step fraction is added to A1 pointer fraction
2522 POLYGON true: A1 step delta X and Y fraction parts are added to the A1
2523 step X and Y fraction parts (the value prior to this add is used for
2524 the step to pointer add).
2525 POLYGON true: inner count step fraction is added to the inner count
2527 POLYGON.GOURD true: the I fraction step is added to the computed
2528 intensity fraction parts +
2529 POLYGON.GOURD true: the I fraction step delta is added to the I
2534 #define A1_PIXEL ((uint32_t)0x0C) // Integer part of the pixel (Y.i and X.i)
2535 #define A1_STEP ((uint32_t)0x10) // Integer part of the step
2536 #define A1_FSTEP ((uint32_t)0x14) // Fractional part of the step
2537 #define A1_FPIXEL ((uint32_t)0x18) // Fractional part of the pixel (Y.f and X.f)
2540 // This is all kinda murky. All we have are the Midsummer docs to give us any guidance,
2541 // and it's incomplete or filled with errors (like above). Aarrrgggghhhhh!
2543 //This isn't right. Is it? I don't think the fractional parts are signed...
2544 // a1_x += (int32_t)((int16_t)GET16(blitter_ram, A1_FSTEP + 2));
2545 // a1_y += (int32_t)((int16_t)GET16(blitter_ram, A1_FSTEP + 0));
2546 a1_x += GET16(blitter_ram, A1_FSTEP + 2);
2547 a1_y += GET16(blitter_ram, A1_FSTEP + 0);
2551 a1update: // Update A1 pointer integers
2553 a1update A1 step is added to A1 pointer, with carry from the fractional add
2554 POLYGON true: A1 step delta X and Y integer parts are added to the A1
2555 step X and Y integer parts, with carry from the corresponding
2556 fractional part add (again, the value prior to this add is used for
2557 the step to pointer add).
2558 POLYGON true: inner count step is added to the inner count, with carry
2559 POLYGON.GOURD true: the I step is added to the computed intensities,
2561 POLYGON.GOURD true: the I step delta is added to the I step, with
2562 carry the texture X and Y step delta values are added to the X and Y
2564 if GOURZ.POLYGON goto zfupdate
2565 else if UPDA2 goto a2update
2566 else if DATINIT goto init_if
2569 a1_x += (int32_t)(GET16(blitter_ram, A1_STEP + 2) << 16);
2570 a1_y += (int32_t)(GET16(blitter_ram, A1_STEP + 0) << 16);
2573 //kill this, for now...
2574 // if (GOURZ.POLYGON)
2584 zfupdate: // Update computed Z step fractions
2586 zfupdate the Z fraction step is added to the computed Z fraction parts +
2587 the Z fraction step delta is added to the Z fraction step
2592 zupdate: // Update computed Z step integers
2594 zupdate the Z step is added to the computed Zs, with carry +
2595 the Z step delta is added to the Z step, with carry
2596 if UPDA2 goto a2update
2597 else if DATINIT goto init_if
2607 a2update: // Update A2 pointer
2609 a2update A2 step is added to the A2 pointer
2610 if DATINIT goto init_if
2613 a2_x += (int32_t)(GET16(blitter_ram, A2_STEP + 2) << 16);
2614 a2_y += (int32_t)(GET16(blitter_ram, A2_STEP + 0) << 16);
2622 init_if: // Initialise intensity fractions and texture X
2624 init_if Initialise the fractional part of the computed intensity fields, from
2625 the increment and step registers. The texture X integer and fractional
2626 parts can also be initialised.
2631 init_ii: // Initialise intensity integers and texture Y
2633 init_ii Initialise the integer part of the computed intensity, and texture Y
2634 integer and fractional parts
2635 if GOURZ goto init_zf
2643 init_zf: // Initialise Z fractions
2645 init_zf Initialise the fractional part of the computed Z fields.
2650 init_zi: // Initialise Z integers
2652 init_zi Initialise the integer part of the computed Z fields.
2659 The outer loop state machine fires off the inner loop, and controls the updating
2660 process between passes through the inner loop.
2662 + -- these functions are irrelevant if the DATINIT function is enabled, which it
2665 All these states will complete in one clock cycle, with the exception of the idle
2666 state, which means the blitter is quiescent; and the inner state, which takes as
2667 long as is required to complete one strip of pixels. It is therefore possible for
2668 the blitter to spend a maximum of nine clock cycles of inactivity between passes
2669 through the inner loop.
2679 // Here's attempt #2--taken from the Oberon chip specs!
2682 #ifdef USE_MIDSUMMER_BLITTER_MKII
2684 void ADDRGEN(uint32_t &, uint32_t &, bool, bool,
2685 uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t,
2686 uint16_t, uint16_t, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t);
2687 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
2688 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
2689 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
2690 uint32_t zinc, uint32_t zstep);
2691 void ADD16SAT(uint16_t &r, uint8_t &co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh);
2692 void ADDAMUX(int16_t &adda_x, int16_t &adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
2693 int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
2694 int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
2695 bool adda_yconst, bool addareg, bool suba_x, bool suba_y);
2696 void ADDBMUX(int16_t &addb_x, int16_t &addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
2697 int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y);
2698 void DATAMUX(int16_t &data_x, int16_t &data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel);
2699 void ADDRADD(int16_t &addq_x, int16_t &addq_y, bool a1fracldi,
2700 uint16_t adda_x, uint16_t adda_y, uint16_t addb_x, uint16_t addb_y, uint8_t modx, bool suba_x, bool suba_y);
2701 void DATA(uint64_t &wdata, uint8_t &dcomp, uint8_t &zcomp, bool &nowrite,
2702 bool big_pix, bool cmpdst, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode, bool daddq_sel, uint8_t data_sel,
2703 uint8_t dbinh, uint8_t dend, uint8_t dstart, uint64_t dstd, uint32_t iinc, uint8_t lfu_func, uint64_t &patd, bool patdadd,
2704 bool phrase_mode, uint64_t srcd, bool srcdread, bool srczread, bool srcz2add, uint8_t zmode,
2705 bool bcompen, bool bkgwren, bool dcompen, uint8_t icount, uint8_t pixsize,
2706 uint64_t &srcz, uint64_t dstz, uint32_t zinc);
2707 void COMP_CTRL(uint8_t &dbinh, bool &nowrite,
2708 bool bcompen, bool big_pix, bool bkgwren, uint8_t dcomp, bool dcompen, uint8_t icount,
2709 uint8_t pixsize, bool phrase_mode, uint8_t srcd, uint8_t zcomp);
2710 #define VERBOSE_BLITTER_LOGGING
2712 void BlitterMidsummer2(void)
2717 if (startConciseBlitLogging)
2720 // Here's what the specs say the state machine does. Note that this can probably be
2721 // greatly simplified (also, it's different from what John has in his Oberon docs):
2722 //Will remove stuff that isn't in Jaguar I once fully described (stuff like texture won't
2723 //be described here at all)...
2725 uint32_t cmd = GET32(blitter_ram, COMMAND);
2730 cmd != 0x00010200 && // PATDSEL
2731 cmd != 0x01800001 // SRCEN LFUFUNC=C
2732 && cmd != 0x01800005
2733 //Boot ROM ATARI letters:
2734 && cmd != 0x00011008 // DSTEN GOURD PATDSEL
2735 //Boot ROM spinning cube:
2736 && cmd != 0x41802F41 // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 GOURZ ZMODE=0 LFUFUNC=C SRCSHADE
2738 && cmd != 0x01800E01 // SRCEN UPDA1 UPDA2 DSTA2 LFUFUNC=C
2739 //T2K TEMPEST letters:
2740 && cmd != 0x09800741 // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 LFUFUNC=C DCOMPEN
2741 //Static letters on Cybermorph intro screen:
2742 && cmd != 0x09800609 // SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C DCOMPEN
2743 //Static pic on title screen:
2744 && cmd != 0x01800601 // SRCEN UPDA1 UPDA2 LFUFUNC=C
2745 //Turning letters on Cybermorph intro screen:
2746 // && cmd != 0x09800F41 // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
2747 && cmd != 0x00113078 // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
2748 && cmd != 0x09900F39 // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
2749 && cmd != 0x09800209 // SRCEN DSTEN UPDA1 LFUFUNC=C DCOMPEN
2750 && cmd != 0x00011200 // UPDA1 GOURD PATDSEL
2751 //Start of Hover Strike (clearing screen):
2752 && cmd != 0x00010000 // PATDSEL
2753 //Hover Strike text:
2754 && cmd != 0x1401060C // SRCENX DSTEN UPDA1 UPDA2 PATDSEL BCOMPEN BKGWREN
2755 //Hover Strike 3D stuff
2756 && cmd != 0x01902839 // SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
2757 //Hover Strike darkening on intro to play (briefing) screen
2758 && cmd != 0x00020208 // DSTEN UPDA1 ADDDSEL
2759 //Trevor McFur stuff:
2760 && cmd != 0x05810601 // SRCEN UPDA1 UPDA2 PATDSEL BCOMPEN
2761 && cmd != 0x01800201 // SRCEN UPDA1 LFUFUNC=C
2763 && cmd != 0x00011000 // GOURD PATDSEL
2764 && cmd != 0x00011040 // CLIP_A1 GOURD PATDSEL
2766 && cmd != 0x01800000 // LFUFUNC=C
2767 && cmd != 0x01800401 //
2768 && cmd != 0x01800040 //
2769 && cmd != 0x00020008 //
2770 // && cmd != 0x09800F41 // SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 LFUFUNC=C DCOMPEN
2776 if (blit_start_log == 0) // Wait for the signal...
2777 logBlit = false;//*/
2778 //temp, for testing...
2779 /*if (cmd != 0x49820609)
2780 logBlit = false;//*/
2783 Some T2K unique blits:
2784 logBlit = F, cmd = 00010200 *
2785 logBlit = F, cmd = 00011000
2786 logBlit = F, cmd = 00011040
2787 logBlit = F, cmd = 01800005 *
2788 logBlit = F, cmd = 09800741 *
2790 Hover Strike mission selection screen:
2791 Blit! (CMD = 01902839) // SRCEN DSTEN DSTENZ DSTWRZ DSTA2 GOURZ ZMODE=4 LFUFUNC=C
2793 Checkered Flag blits in the screw up zone:
2794 Blit! (CMD = 01800001) // SRCEN LFUFUNC=C
2795 Blit! (CMD = 01800000) // LFUFUNC=C
2796 Blit! (CMD = 00010000) // PATDSEL
2798 Wolfenstein 3D in the fuckup zone:
2799 Blit! (CMD = 01800000) // LFUFUNC=C
2802 //printf("logBlit = %s, cmd = %08X\n", (logBlit ? "T" : "F"), cmd);
2807 Blit! (CMD = 00011040)
2808 Flags: CLIP_A1 GOURD PATDSEL
2810 a1_base = 00100000, a2_base = 0081F6A8
2811 a1_x = 00A7, a1_y = 0014, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0001, a2_y = 0000
2812 a1_step_x = FE80, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFF8, a2_step_y = 0001
2813 a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
2814 a1_win_x = 0180, a1_win_y = 0118, a2_mask_x = 0000, a2_mask_y = 0000
2815 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
2816 a1_pixsize = 4, a2_pixsize = 4
2820 if (cmd == 0x00011040
2821 && (GET16(blitter_ram, A1_PIXEL + 2) == 0x00A7) && (GET16(blitter_ram, A1_PIXEL + 0) == 0x0014)
2822 && (GET16(blitter_ram, A2_PIXEL + 2) == 0x0001) && (GET16(blitter_ram, A2_PIXEL + 0) == 0x0000)
2823 && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 18))
2826 // Line states passed in via the command register
2828 bool srcen = (SRCEN), srcenx = (SRCENX), srcenz = (SRCENZ),
2829 dsten = (DSTEN), dstenz = (DSTENZ), dstwrz = (DSTWRZ), clip_a1 = (CLIPA1),
2830 upda1 = (UPDA1), upda1f = (UPDA1F), upda2 = (UPDA2), dsta2 = (DSTA2),
2831 gourd = (GOURD), gourz = (GOURZ), topben = (TOPBEN), topnen = (TOPNEN),
2832 patdsel = (PATDSEL), adddsel = (ADDDSEL), cmpdst = (CMPDST), bcompen = (BCOMPEN),
2833 dcompen = (DCOMPEN), bkgwren = (BKGWREN), srcshade = (SRCSHADE);
2835 uint8_t zmode = (cmd & 0x01C0000) >> 18, lfufunc = (cmd & 0x1E00000) >> 21;
2837 //Where to find various lines:
2839 // gourd -> dcontrol, inner, outer, state
2840 // gourz -> dcontrol, inner, outer, state
2841 // cmpdst -> blit, data, datacomp, state
2842 // bcompen -> acontrol, inner, mcontrol, state
2843 // dcompen -> inner, state
2844 // bkgwren -> inner, state
2845 // srcshade -> dcontrol, inner, state
2846 // adddsel -> dcontrol
2847 //NOTE: ADDDSEL takes precedence over PATDSEL, PATDSEL over LFU_FUNC
2848 #ifdef VERBOSE_BLITTER_LOGGING
2851 char zfs[512], lfus[512];
2852 zfs[0] = lfus[0] = 0;
2853 if (dstwrz || dstenz || gourz)
2854 sprintf(zfs, " ZMODE=%X", zmode);
2855 if (!(patdsel || adddsel))
2856 sprintf(lfus, " LFUFUNC=%X", lfufunc);
2857 WriteLog("\nBlit! (CMD = %08X)\nFlags:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", cmd,
2858 (srcen ? " SRCEN" : ""), (srcenx ? " SRCENX" : ""), (srcenz ? " SRCENZ" : ""),
2859 (dsten ? " DSTEN" : ""), (dstenz ? " DSTENZ" : ""), (dstwrz ? " DSTWRZ" : ""),
2860 (clip_a1 ? " CLIP_A1" : ""), (upda1 ? " UPDA1" : ""), (upda1f ? " UPDA1F" : ""),
2861 (upda2 ? " UPDA2" : ""), (dsta2 ? " DSTA2" : ""), (gourd ? " GOURD" : ""),
2862 (gourz ? " GOURZ" : ""), (topben ? " TOPBEN" : ""), (topnen ? " TOPNEN" : ""),
2863 (patdsel ? " PATDSEL" : ""), (adddsel ? " ADDDSEL" : ""), zfs, lfus, (cmpdst ? " CMPDST" : ""),
2864 (bcompen ? " BCOMPEN" : ""), (dcompen ? " DCOMPEN" : ""), (bkgwren ? " BKGWREN" : ""),
2865 (srcshade ? " SRCSHADE" : ""));
2866 WriteLog(" count = %d x %d\n", GET16(blitter_ram, PIXLINECOUNTER + 2), GET16(blitter_ram, PIXLINECOUNTER));
2870 // Lines that don't exist in Jaguar I (and will never be asserted)
2872 bool polygon = false, datinit = false, a1_stepld = false, a2_stepld = false, ext_int = false;
2873 bool istepadd = false, istepfadd = false, finneradd = false, inneradd = false;
2874 bool zstepfadd = false, zstepadd = false;
2876 // Various state lines (initial state--basically the reset state of the FDSYNCs)
2878 bool go = true, idle = true, inner = false, a1fupdate = false, a1update = false,
2879 zfupdate = false, zupdate = false, a2update = false, init_if = false, init_ii = false,
2880 init_zf = false, init_zi = false;
2882 bool outer0 = false, indone = false;
2884 bool idlei, inneri, a1fupdatei, a1updatei, zfupdatei, zupdatei, a2updatei, init_ifi, init_iii,
2887 bool notgzandp = !(gourz && polygon);
2889 // Various registers set up by user
2891 uint16_t ocount = GET16(blitter_ram, PIXLINECOUNTER);
2892 uint8_t a1_pitch = blitter_ram[A1_FLAGS + 3] & 0x03;
2893 uint8_t a2_pitch = blitter_ram[A2_FLAGS + 3] & 0x03;
2894 uint8_t a1_pixsize = (blitter_ram[A1_FLAGS + 3] & 0x38) >> 3;
2895 uint8_t a2_pixsize = (blitter_ram[A2_FLAGS + 3] & 0x38) >> 3;
2896 uint8_t a1_zoffset = (GET16(blitter_ram, A1_FLAGS + 2) >> 6) & 0x07;
2897 uint8_t a2_zoffset = (GET16(blitter_ram, A2_FLAGS + 2) >> 6) & 0x07;
2898 uint8_t a1_width = (blitter_ram[A1_FLAGS + 2] >> 1) & 0x3F;
2899 uint8_t a2_width = (blitter_ram[A2_FLAGS + 2] >> 1) & 0x3F;
2900 bool a2_mask = blitter_ram[A2_FLAGS + 2] & 0x80;
2901 uint8_t a1addx = blitter_ram[A1_FLAGS + 1] & 0x03, a2addx = blitter_ram[A2_FLAGS + 1] & 0x03;
2902 bool a1addy = blitter_ram[A1_FLAGS + 1] & 0x04, a2addy = blitter_ram[A2_FLAGS + 1] & 0x04;
2903 bool a1xsign = blitter_ram[A1_FLAGS + 1] & 0x08, a2xsign = blitter_ram[A2_FLAGS + 1] & 0x08;
2904 bool a1ysign = blitter_ram[A1_FLAGS + 1] & 0x10, a2ysign = blitter_ram[A2_FLAGS + 1] & 0x10;
2905 uint32_t a1_base = GET32(blitter_ram, A1_BASE) & 0xFFFFFFF8; // Phrase aligned by ignoring bottom 3 bits
2906 uint32_t a2_base = GET32(blitter_ram, A2_BASE) & 0xFFFFFFF8;
2908 uint16_t a1_win_x = GET16(blitter_ram, A1_CLIP + 2) & 0x7FFF;
2909 uint16_t a1_win_y = GET16(blitter_ram, A1_CLIP + 0) & 0x7FFF;
2910 int16_t a1_x = (int16_t)GET16(blitter_ram, A1_PIXEL + 2);
2911 int16_t a1_y = (int16_t)GET16(blitter_ram, A1_PIXEL + 0);
2912 int16_t a1_step_x = (int16_t)GET16(blitter_ram, A1_STEP + 2);
2913 int16_t a1_step_y = (int16_t)GET16(blitter_ram, A1_STEP + 0);
2914 uint16_t a1_stepf_x = GET16(blitter_ram, A1_FSTEP + 2);
2915 uint16_t a1_stepf_y = GET16(blitter_ram, A1_FSTEP + 0);
2916 uint16_t a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
2917 uint16_t a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
2918 int16_t a1_inc_x = (int16_t)GET16(blitter_ram, A1_INC + 2);
2919 int16_t a1_inc_y = (int16_t)GET16(blitter_ram, A1_INC + 0);
2920 uint16_t a1_incf_x = GET16(blitter_ram, A1_FINC + 2);
2921 uint16_t a1_incf_y = GET16(blitter_ram, A1_FINC + 0);
2923 int16_t a2_x = (int16_t)GET16(blitter_ram, A2_PIXEL + 2);
2924 int16_t a2_y = (int16_t)GET16(blitter_ram, A2_PIXEL + 0);
2925 uint16_t a2_mask_x = GET16(blitter_ram, A2_MASK + 2);
2926 uint16_t a2_mask_y = GET16(blitter_ram, A2_MASK + 0);
2927 int16_t a2_step_x = (int16_t)GET16(blitter_ram, A2_STEP + 2);
2928 int16_t a2_step_y = (int16_t)GET16(blitter_ram, A2_STEP + 0);
2930 uint64_t srcd1 = GET64(blitter_ram, SRCDATA);
2932 uint64_t dstd = GET64(blitter_ram, DSTDATA);
2933 uint64_t patd = GET64(blitter_ram, PATTERNDATA);
2934 uint32_t iinc = GET32(blitter_ram, INTENSITYINC);
2935 uint64_t srcz1 = GET64(blitter_ram, SRCZINT);
2936 uint64_t srcz2 = GET64(blitter_ram, SRCZFRAC);
2937 uint64_t dstz = GET64(blitter_ram, DSTZ);
2938 uint32_t zinc = GET32(blitter_ram, ZINC);
2939 uint32_t collision = GET32(blitter_ram, COLLISIONCTRL);// 0=RESUME, 1=ABORT, 2=STOPEN
2941 uint8_t pixsize = (dsta2 ? a2_pixsize : a1_pixsize); // From ACONTROL
2943 //Testing Trevor McFur--I *think* it's the circle on the lower RHS of the screen...
2945 if (cmd == 0x05810601 && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 96)
2946 && (GET16(blitter_ram, PIXLINECOUNTER + 0) == 72))
2949 //if (cmd == 0x1401060C) patd = 0xFFFFFFFFFFFFFFFFLL;
2950 //if (cmd == 0x1401060C) patd = 0x00000000000000FFLL;
2951 //If it's still not working (bcompen-patd) then see who's writing what to patd and where...
2952 //Still not OK. Check to see who's writing what to where in patd!
2953 //It looks like M68K is writing to the top half of patd... Hmm...
2955 ----> M68K wrote 0000 to byte 15737344 of PATTERNDATA...
2956 --> M68K wrote 00 to byte 0 of PATTERNDATA...
2957 --> M68K wrote 00 to byte 1 of PATTERNDATA...
2958 ----> M68K wrote 00FF to byte 15737346 of PATTERNDATA...
2959 --> M68K wrote 00 to byte 2 of PATTERNDATA...
2960 --> M68K wrote FF to byte 3 of PATTERNDATA...
2961 logBlit = F, cmd = 1401060C
2963 Wren0 := ND6 (wren\[0], gpua\[5], gpua\[6..8], bliten, gpu_memw);
2964 Wren1 := ND6 (wren\[1], gpua[5], gpua\[6..8], bliten, gpu_memw);
2965 Wren2 := ND6 (wren\[2], gpua\[5], gpua[6], gpua\[7..8], bliten, gpu_memw);
2966 Wren3 := ND6 (wren\[3], gpua[5], gpua[6], gpua\[7..8], bliten, gpu_memw);
2969 Dec0 := D38GH (a1baseld, a1flagld, a1winld, a1ptrld, a1stepld, a1stepfld, a1fracld, a1incld, gpua[2..4], wren\[0]);
2971 Dec1 := D38GH (a1incfld, a2baseld, a2flagld, a2maskld, a2ptrldg, a2stepld, cmdldt, countldt, gpua[2..4], wren\[1]);
2973 Dec2 := D38GH (srcd1ldg[0..1], dstdldg[0..1], dstzldg[0..1], srcz1ldg[0..1], gpua[2..4], wren\[2]);
2975 Dec3 := D38GH (srcz2ld[0..1], patdld[0..1], iincld, zincld, stopld, intld[0], gpua[2..4], wren\[3]);
2977 wren[3] is asserted when gpu address bus = 0 011x xx00
2978 patdld[0] -> 0 0110 1000 -> $F02268 (lo 32 bits)
2979 patdld[1] -> 0 0110 1100 -> $F0226C (hi 32 bits)
2981 So... It's reversed! The data organization of the patd register is [low 32][high 32]! !!! FIX !!! [DONE]
2982 And fix all the other 64 bit registers [DONE]
2984 /*if (cmd == 0x1401060C)
2986 printf("logBlit = %s, cmd = %08X\n", (logBlit ? "T" : "F"), cmd);
2990 if ((cmd == 0x00010200) && (GET16(blitter_ram, PIXLINECOUNTER + 2) == 9))
2993 ; Pink altimeter bar
2995 Blit! (00110000 <- 000BF010) count: 9 x 23, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
2996 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
2997 A1 step values: -10 (X), 1 (Y)
2998 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
2999 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
3000 A1 x/y: 262/132, A2 x/y: 129/0
3001 ;x-coord is 257 in pic, so add 5
3002 ;20 for ship, 33 for #... Let's see if we can find 'em!
3004 ; Black altimeter bar
3006 Blit! (00110000 <- 000BF010) count: 5 x 29, A1/2_FLAGS: 000042E2/00010020 [cmd: 00010200]
3007 CMD -> src: dst: misc: a1ctl: UPDA1 mode: ity: PATDSEL z-op: op: LFU_CLEAR ctrl:
3008 A1 step values: -8 (X), 1 (Y)
3009 A1 -> pitch: 4 phrases, depth: 16bpp, z-off: 3, width: 320 (21), addctl: XADDPHR YADD0 XSIGNADD YSIGNADD
3010 A2 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 1 (00), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
3011 A1 x/y: 264/126, A2 x/y: 336/0
3013 Here's the pink bar--note that it's phrase mode without dread, so how does this work???
3014 Not sure, but I *think* that somehow it MUXes the data at the write site in on the left or right side
3015 of the write data when masked in phrase mode. I'll have to do some tracing to see if this is the mechanism
3018 Blit! (CMD = 00010200)
3019 Flags: UPDA1 PATDSEL
3021 a1_base = 00110010, a2_base = 000BD7E0
3022 a1_x = 0106, a1_y = 0090, a1_frac_x = 0000, a1_frac_y = 8000, a2_x = 025A, a2_y = 0000
3023 a1_step_x = FFF6, a1_step_y = 0001, a1_stepf_x = 5E00, a1_stepf_y = D100, a2_step_x = FFF7, a2_step_y = 0001
3024 a1_inc_x = 0001, a1_inc_y = FFFF, a1_incf_x = 0000, a1_incf_y = E000
3025 a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
3026 a2_mask=F a1add=+phr/+0 a2add=+1/+0
3027 a1_pixsize = 4, a2_pixsize = 4
3028 srcd=BAC673AC2C92E578 dstd=0000000000000000 patd=74C074C074C074C0 iinc=0002E398
3029 srcz1=7E127E12000088DA srcz2=DBE06DF000000000 dstz=0000000000000000 zinc=FFFE4840, coll=0
3031 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3032 Entering INNER state...
3033 Entering DWRITE state...
3034 Dest write address/pix address: 0016A830/0 [dstart=20 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [7400000074C074C0] (icount=0007, inc=2)
3035 Entering A1_ADD state [a1_x=0106, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3036 Entering DWRITE state...
3037 Dest write address/pix address: 0016A850/0 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [74C074C074C074C0] (icount=0003, inc=4)
3038 Entering A1_ADD state [a1_x=0108, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3039 Entering DWRITE state...
3040 Dest write address/pix address: 0016A870/0 [dstart=0 dend=30 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F] [74C074C074C00000] (icount=FFFF, inc=4)
3041 Entering A1_ADD state [a1_x=010C, a1_y=0090, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3042 Entering IDLE_INNER state...
3043 Leaving INNER state... (ocount=000A)
3044 [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3045 Entering A1UPDATE state... (272/144 -> 262/145)
3046 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3047 Entering INNER state...
3052 a2addy = a1addy; // A2 channel Y add bit is tied to A1's
3054 //if (logBlit && (ocount > 20)) logBlit = false;
3055 #ifdef VERBOSE_BLITTER_LOGGING
3058 WriteLog(" a1_base = %08X, a2_base = %08X\n", a1_base, a2_base);
3059 WriteLog(" a1_x = %04X, a1_y = %04X, a1_frac_x = %04X, a1_frac_y = %04X, a2_x = %04X, a2_y = %04X\n", (uint16_t)a1_x, (uint16_t)a1_y, a1_frac_x, a1_frac_y, (uint16_t)a2_x, (uint16_t)a2_y);
3060 WriteLog(" a1_step_x = %04X, a1_step_y = %04X, a1_stepf_x = %04X, a1_stepf_y = %04X, a2_step_x = %04X, a2_step_y = %04X\n", (uint16_t)a1_step_x, (uint16_t)a1_step_y, a1_stepf_x, a1_stepf_y, (uint16_t)a2_step_x, (uint16_t)a2_step_y);
3061 WriteLog(" a1_inc_x = %04X, a1_inc_y = %04X, a1_incf_x = %04X, a1_incf_y = %04X\n", (uint16_t)a1_inc_x, (uint16_t)a1_inc_y, a1_incf_x, a1_incf_y);
3062 WriteLog(" a1_win_x = %04X, a1_win_y = %04X, a2_mask_x = %04X, a2_mask_y = %04X\n", a1_win_x, a1_win_y, a2_mask_x, a2_mask_y);
3063 char x_add_str[4][4] = { "phr", "1", "0", "inc" };
3064 WriteLog(" a2_mask=%s a1add=%s%s/%s%s a2add=%s%s/%s%s\n", (a2_mask ? "T" : "F"), (a1xsign ? "-" : "+"), x_add_str[a1addx],
3065 (a1ysign ? "-" : "+"), (a1addy ? "1" : "0"), (a2xsign ? "-" : "+"), x_add_str[a2addx],
3066 (a2ysign ? "-" : "+"), (a2addy ? "1" : "0"));
3067 WriteLog(" a1_pixsize = %u, a2_pixsize = %u\n", a1_pixsize, a2_pixsize);
3068 WriteLog(" srcd=%08X%08X dstd=%08X%08X patd=%08X%08X iinc=%08X\n",
3069 (uint32_t)(srcd1 >> 32), (uint32_t)(srcd1 & 0xFFFFFFFF),
3070 (uint32_t)(dstd >> 32), (uint32_t)(dstd & 0xFFFFFFFF),
3071 (uint32_t)(patd >> 32), (uint32_t)(patd & 0xFFFFFFFF), iinc);
3072 WriteLog(" srcz1=%08X%08X srcz2=%08X%08X dstz=%08X%08X zinc=%08X, coll=%X\n",
3073 (uint32_t)(srcz1 >> 32), (uint32_t)(srcz1 & 0xFFFFFFFF),
3074 (uint32_t)(srcz2 >> 32), (uint32_t)(srcz2 & 0xFFFFFFFF),
3075 (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF), zinc, collision);
3079 // Various state lines set up by user
3081 bool phrase_mode = ((!dsta2 && a1addx == 0) || (dsta2 && a2addx == 0) ? true : false); // From ACONTROL
3082 #ifdef VERBOSE_BLITTER_LOGGING
3084 WriteLog(" Phrase mode is %s\n", (phrase_mode ? "ON" : "off"));
3088 // Stopgap vars to simulate various lines
3090 uint16_t a1FracCInX = 0, a1FracCInY = 0;
3096 if ((idle && !go) || (inner && outer0 && indone))
3098 #ifdef VERBOSE_BLITTER_LOGGING
3100 WriteLog(" Entering IDLE state...\n");
3104 //Instead of a return, let's try breaking out of the loop...
3111 // INNER LOOP ACTIVE
3113 Entering DWRITE state... (icount=0000, inc=4)
3114 Entering IDLE_INNER state...
3115 Leaving INNER state... (ocount=00EF)
3116 [in=T a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3117 Entering INNER state...
3119 [in=F a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3122 if ((idle && go && !datinit)
3123 || (inner && !indone)
3124 || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && !datinit)
3125 || (a1update && !upda2 && notgzandp && !datinit)
3126 || (zupdate && !upda2 && !datinit)
3127 || (a2update && !datinit)
3128 || (init_ii && !gourz)
3136 // A1 FRACTION UPDATE
3138 if (inner && indone && !outer0 && upda1f)
3145 // A1 POINTER UPDATE
3148 || (inner && indone && !outer0 && !upda1f && upda1))
3155 // Z FRACTION UPDATE
3157 if ((a1update && gourz && polygon)
3158 || (inner && indone && !outer0 && !upda1f && !upda1 && gourz && polygon))
3174 // A2 POINTER UPDATE
3176 if ((a1update && upda2 && notgzandp)
3177 || (zupdate && upda2)
3178 || (inner && indone && !outer0 && !upda1f && notgzandp && !upda1 && upda2))
3185 // INITIALIZE INTENSITY FRACTION
3187 if ((zupdate && !upda2 && datinit)
3188 || (a1update && !upda2 && datinit && notgzandp)
3189 || (inner && indone && !outer0 && !upda1f && !upda1 && notgzandp && !upda2 && datinit)
3190 || (a2update && datinit)
3191 || (idle && go && datinit))
3198 // INITIALIZE INTENSITY INTEGER
3207 // INITIALIZE Z FRACTION
3209 if (init_ii && gourz)
3216 // INITIALIZE Z INTEGER
3225 // Here we move the fooi into their foo counterparts in order to simulate the moving
3226 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
3230 a1fupdate = a1fupdatei;
3231 a1update = a1updatei;
3232 zfupdate = zfupdatei; // *
3233 zupdate = zupdatei; // *
3234 a2update = a2updatei;
3235 init_if = init_ifi; // *
3236 init_ii = init_iii; // *
3237 init_zf = init_zfi; // *
3238 init_zi = init_zii; // *
3239 // * denotes states that will never assert for Jaguar I
3240 #ifdef VERBOSE_BLITTER_LOGGING
3242 WriteLog(" [in=%c a1f=%c a1=%c zf=%c z=%c a2=%c iif=%c iii=%c izf=%c izi=%c]\n",
3243 (inner ? 'T' : 'F'), (a1fupdate ? 'T' : 'F'), (a1update ? 'T' : 'F'),
3244 (zfupdate ? 'T' : 'F'), (zupdate ? 'T' : 'F'), (a2update ? 'T' : 'F'),
3245 (init_if ? 'T' : 'F'), (init_ii ? 'T' : 'F'), (init_zf ? 'T' : 'F'),
3246 (init_zi ? 'T' : 'F'));
3249 // Now, depending on how we want to handle things, we could either put the implementation
3250 // of the various pieces up above, or handle them down below here.
3252 // Let's try postprocessing for now...
3257 #ifdef VERBOSE_BLITTER_LOGGING
3259 WriteLog(" Entering INNER state...\n");
3261 uint16_t icount = GET16(blitter_ram, PIXLINECOUNTER + 2);
3262 bool idle_inner = true, step = true, sreadx = false, szreadx = false, sread = false,
3263 szread = false, dread = false, dzread = false, dwrite = false, dzwrite = false;
3264 bool inner0 = false;
3265 bool idle_inneri, sreadxi, szreadxi, sreadi, szreadi, dreadi, dzreadi, dwritei, dzwritei;
3267 // State lines that will never assert in Jaguar I
3269 bool textext = false, txtread = false;
3272 uint8_t srcshift = 0;
3273 bool sshftld = true; // D flipflop (D -> Q): instart -> sshftld
3274 //NOTE: sshftld probably is only asserted at the beginning of the inner loop. !!! FIX !!!
3276 Blit! (CMD = 01800005)
3277 Flags: SRCEN SRCENX LFUFUNC=C
3279 a1_base = 00037290, a2_base = 000095D0
3280 a1_x = 0000, a1_y = 0000, a2_x = 0002, a2_y = 0000
3281 a1_pixsize = 4, a2_pixsize = 4
3282 srcd=0000000000000000, dstd=0000000000000000, patd=0000000000000000
3284 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
3285 Entering INNER state...
3286 Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
3287 Source extra read address/pix address: 000095D4/0 [0000001C00540038]
3288 Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3289 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
3290 Source read address/pix address: 000095D8/0 [0054003800009814]
3291 Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3292 Entering DWRITE state...
3293 Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026E, inc=4)
3294 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3295 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
3296 Source read address/pix address: 000095E0/0 [00009968000377C7]
3297 Entering A2_ADD state [a2_x=0008, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3298 Entering DWRITE state...
3299 Dest write address/pix address: 00037298/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026A, inc=4)
3300 Entering A1_ADD state [a1_x=0004, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
3303 // while (!idle_inner)
3308 if ((idle_inner && !step)
3309 || (dzwrite && step && inner0)
3310 || (dwrite && step && !dstwrz && inner0))
3312 #ifdef VERBOSE_BLITTER_LOGGING
3314 WriteLog(" Entering IDLE_INNER state...\n");
3320 idle_inneri = false;
3322 // EXTRA SOURCE DATA READ
3324 if ((idle_inner && step && srcenx)
3325 || (sreadx && !step))
3332 // EXTRA SOURCE ZED READ
3334 if ((sreadx && step && srcenz)
3335 || (szreadx && !step))
3342 // TEXTURE DATA READ (not implemented because not in Jaguar I)
3346 if ((szreadx && step && !textext)
3347 || (sreadx && step && !srcenz && srcen)
3348 || (idle_inner && step && !srcenx && !textext && srcen)
3349 || (dzwrite && step && !inner0 && !textext && srcen)
3350 || (dwrite && step && !dstwrz && !inner0 && !textext && srcen)
3351 || (txtread && step && srcen)
3352 || (sread && !step))
3361 if ((sread && step && srcenz)
3362 || (szread && !step))
3369 // DESTINATION DATA READ
3371 if ((szread && step && dsten)
3372 || (sread && step && !srcenz && dsten)
3373 || (sreadx && step && !srcenz && !textext && !srcen && dsten)
3374 || (idle_inner && step && !srcenx && !textext && !srcen && dsten)
3375 || (dzwrite && step && !inner0 && !textext && !srcen && dsten)
3376 || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && dsten)
3377 || (txtread && step && !srcen && dsten)
3378 || (dread && !step))
3385 // DESTINATION ZED READ
3387 if ((dread && step && dstenz)
3388 || (szread && step && !dsten && dstenz)
3389 || (sread && step && !srcenz && !dsten && dstenz)
3390 || (sreadx && step && !srcenz && !textext && !srcen && !dsten && dstenz)
3391 || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && dstenz)
3392 || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && dstenz)
3393 || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && dstenz)
3394 || (txtread && step && !srcen && !dsten && dstenz)
3395 || (dzread && !step))
3402 // DESTINATION DATA WRITE
3404 if ((dzread && step)
3405 || (dread && step && !dstenz)
3406 || (szread && step && !dsten && !dstenz)
3407 || (sread && step && !srcenz && !dsten && !dstenz)
3408 || (txtread && step && !srcen && !dsten && !dstenz)
3409 || (sreadx && step && !srcenz && !textext && !srcen && !dsten && !dstenz)
3410 || (idle_inner && step && !srcenx && !textext && !srcen && !dsten && !dstenz)
3411 || (dzwrite && step && !inner0 && !textext && !srcen && !dsten && !dstenz)
3412 || (dwrite && step && !dstwrz && !inner0 && !textext && !srcen && !dsten && !dstenz)
3413 || (dwrite && !step))
3420 // DESTINATION ZED WRITE
3422 if ((dzwrite && !step)
3423 || (dwrite && step && dstwrz))
3430 //Kludge: A QnD way to make sure that sshftld is asserted only for the first
3431 // cycle of the inner loop...
3432 sshftld = idle_inner;
3434 // Here we move the fooi into their foo counterparts in order to simulate the moving
3435 // of data into the various FDSYNCs... Each time we loop we simulate one clock cycle...
3437 idle_inner = idle_inneri;
3447 // Here's a few more decodes--not sure if they're supposed to go here or not...
3449 bool srca_addi = (sreadxi && !srcenz) || (sreadi && !srcenz) || szreadxi || szreadi;
3451 bool dsta_addi = (dwritei && !dstwrz) || dzwritei;
3453 bool gensrc = sreadxi || szreadxi || sreadi || szreadi;
3454 bool gendst = dreadi || dzreadi || dwritei || dzwritei;
3455 bool gena2i = (gensrc && !dsta2) || (gendst && dsta2);
3457 bool zaddr = szreadx || szread || dzread || dzwrite;
3459 // Some stuff from MCONTROL.NET--not sure if this is the correct use of this decode or not...
3460 /*Fontread\ := OND1 (fontread\, sread[1], sreadx[1], bcompen);
3461 Fontread := INV1 (fontread, fontread\);
3462 Justt := NAN3 (justt, fontread\, phrase_mode, tactive\);
3463 Justify := TS (justify, justt, busen);*/
3464 bool fontread = (sread || sreadx) && bcompen;
3465 bool justify = !(!fontread && phrase_mode /*&& tactive*/);
3467 /* Generate inner loop update enables */
3469 A1_addi := MX2 (a1_addi, dsta_addi, srca_addi, dsta2);
3470 A2_addi := MX2 (a2_addi, srca_addi, dsta_addi, dsta2);
3471 A1_add := FD1 (a1_add, a1_add\, a1_addi, clk);
3472 A2_add := FD1 (a2_add, a2_add\, a2_addi, clk);
3473 A2_addb := BUF1 (a2_addb, a2_add);
3475 bool a1_add = (dsta2 ? srca_addi : dsta_addi);
3476 bool a2_add = (dsta2 ? dsta_addi : srca_addi);
3478 /* Address adder input A register selection
3479 000 A1 step integer part
3480 001 A1 step fraction part
3481 010 A1 increment integer part
3482 011 A1 increment fraction part
3486 bit 1 = /a2update . (a1_add . a1addx[0..1])
3487 bit 0 = /a2update . ( a1fupdate
3488 + a1_add . atick[0] . a1addx[0..1])
3489 The /a2update term on bits 0 and 1 is redundant.
3490 Now look-ahead based
3492 uint8_t addasel = (a1fupdate || (a1_add && a1addx == 3) ? 0x01 : 0x00);
3493 addasel |= (a1_add && a1addx == 3 ? 0x02 : 0x00);
3494 addasel |= (a2update ? 0x04 : 0x00);
3495 /* Address adder input A X constant selection
3496 adda_xconst[0..2] generate a power of 2 in the range 1-64 or all
3497 zeroes when they are all 1
3498 Remember - these are pixels, so to add one phrase the pixel size
3499 has to be taken into account to get the appropriate value.
3501 if a1addx[0..1] are 00 set 6 - pixel size
3502 if a1addx[0..1] are 01 set the value 000
3503 if a1addx[0..1] are 10 set the value 111
3505 JLH: Also, 11 will likewise set the value to 111
3507 uint8_t a1_xconst = 6 - a1_pixsize, a2_xconst = 6 - a2_pixsize;
3511 else if (a1addx & 0x02)
3516 else if (a2addx & 0x02)
3519 uint8_t adda_xconst = (a2_add ? a2_xconst : a1_xconst);
3520 /* Address adder input A Y constant selection
3521 22 June 94 - This was erroneous, because only the a1addy bit was reflected here.
3522 Therefore, the selection has to be controlled by a bug fix bit.
3523 JLH: Bug fix bit in Jaguar II--not in Jaguar I!
3525 bool adda_yconst = a1addy;
3526 /* Address adder input A register versus constant selection
3527 given by a1_add . a1addx[0..1]
3530 + a2_add . a2addx[0..1]
3533 bool addareg = ((a1_add && a1addx == 3) || a1update || a1fupdate
3534 || (a2_add && a2addx == 3) || a2update ? true : false);
3535 /* The adders can be put into subtract mode in add pixel size
3536 mode when the corresponding flags are set */
3537 bool suba_x = ((a1_add && a1xsign && a1addx == 1) || (a2_add && a2xsign && a2addx == 1) ? true : false);
3538 bool suba_y = ((a1_add && a1addy && a1ysign) || (a2_add && a2addy && a2ysign) ? true : false);
3539 /* Address adder input B selection
3546 + (a1_add . atick[0] . a1addx[0..1])
3547 + a1fupdate . a1_stepld
3548 + a1update . a1_stepld
3549 + a2update . a2_stepld
3550 Bit 0 = a2update + a2_add
3551 + a1fupdate . a1_stepld
3552 + a1update . a1_stepld
3553 + a2update . a2_stepld
3555 uint8_t addbsel = (a2update || a2_add || (a1fupdate && a1_stepld)
3556 || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x01 : 0x00);
3557 addbsel |= (a1fupdate || (a1_add && a1addx == 3) || (a1fupdate && a1_stepld)
3558 || (a1update && a1_stepld) || (a2update && a2_stepld) ? 0x02 : 0x00);
3560 /* The modulo bits are used to align X onto a phrase boundary when
3561 it is being updated by one phrase
3568 Masking is enabled for a1 when a1addx[0..1] is 00, and the value
3569 is 6 - the pixel size (again!)
3571 uint8_t maska1 = (a1_add && a1addx == 0 ? 6 - a1_pixsize : 0);
3572 uint8_t maska2 = (a2_add && a2addx == 0 ? 6 - a2_pixsize : 0);
3573 uint8_t modx = (a2_add ? maska2 : maska1);
3574 /* Generate load strobes for the increment updates */
3576 /*A1pldt := NAN2 (a1pldt, atick[1], a1_add);
3577 A1ptrldi := NAN2 (a1ptrldi, a1update\, a1pldt);
3579 A1fldt := NAN4 (a1fldt, atick[0], a1_add, a1addx[0..1]);
3580 A1fracldi := NAN2 (a1fracldi, a1fupdate\, a1fldt);
3582 A2pldt := NAN2 (a2pldt, atick[1], a2_add);
3583 A2ptrldi := NAN2 (a2ptrldi, a2update\, a2pldt);*/
3584 bool a1fracldi = a1fupdate || (a1_add && a1addx == 3);
3586 // Some more from DCONTROL...
3587 // atick[] just MAY be important here! We're assuming it's true and dropping the term...
3588 // That will probably screw up some of the lower terms that seem to rely on the timing of it...
3589 #warning srcdreadd is not properly initialized!
3590 bool srcdreadd = false; // Set in INNER.NET
3591 //Shadeadd\ := NAN2H (shadeadd\, dwrite, srcshade);
3592 //Shadeadd := INV2 (shadeadd, shadeadd\);
3593 bool shadeadd = dwrite && srcshade;
3594 /* Data adder control, input A selection
3595 000 Destination data
3596 001 Initialiser pixel value
3597 100 Source data - computed intensity fraction
3598 101 Pattern data - computed intensity
3599 110 Source zed 1 - computed zed
3600 111 Source zed 2 - computed zed fraction
3602 Bit 0 = dwrite . gourd . atick[1]
3603 + dzwrite . gourz . atick[0]
3606 + init_if + init_ii + init_zf + init_zi
3607 Bit 1 = dzwrite . gourz . (atick[0] + atick[1])
3610 Bit 2 = (gourd + gourz) . /(init_if + init_ii + init_zf + init_zi)
3613 uint8_t daddasel = ((dwrite && gourd) || (dzwrite && gourz) || istepadd || zstepfadd
3614 || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
3615 daddasel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
3616 daddasel |= (((gourd || gourz) && !(init_if || init_ii || init_zf || init_zi))
3617 || (dwrite && srcshade) ? 0x04 : 0x00);
3618 /* Data adder control, input B selection
3620 0001 Data initialiser increment
3621 0100 Bottom 16 bits of I increment repeated four times
3622 0101 Top 16 bits of I increment repeated four times
3623 0110 Bottom 16 bits of Z increment repeated four times
3624 0111 Top 16 bits of Z increment repeated four times
3625 1100 Bottom 16 bits of I step repeated four times
3626 1101 Top 16 bits of I step repeated four times
3627 1110 Bottom 16 bits of Z step repeated four times
3628 1111 Top 16 bits of Z step repeated four times
3630 Bit 0 = dwrite . gourd . atick[1]
3631 + dzwrite . gourz . atick[1]
3635 + init_if + init_ii + init_zf + init_zi
3636 Bit 1 = dzwrite . gourz . (atick[0] + atick[1])
3639 Bit 2 = dwrite . gourd . (atick[0] + atick[1])
3640 + dzwrite . gourz . (atick[0] + atick[1])
3642 + istepadd + istepfadd + zstepadd + zstepfadd
3643 Bit 3 = istepadd + istepfadd + zstepadd + zstepfadd
3645 uint8_t daddbsel = ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
3646 || istepadd || zstepadd || init_if || init_ii || init_zf || init_zi ? 0x01 : 0x00);
3647 daddbsel |= ((dzwrite && gourz) || zstepadd || zstepfadd ? 0x02 : 0x00);
3648 daddbsel |= ((dwrite && gourd) || (dzwrite && gourz) || (dwrite && srcshade)
3649 || istepadd || istepfadd || zstepadd || zstepfadd ? 0x04 : 0x00);
3650 daddbsel |= (istepadd && istepfadd && zstepadd && zstepfadd ? 0x08 : 0x00);
3651 /* Data adder mode control
3652 000 16-bit normal add
3653 001 16-bit saturating add with carry
3654 010 8-bit saturating add with carry, carry into top byte is
3656 011 8-bit saturating add with carry, carry into top byte and
3657 between top nybbles is inhibited (CRY)
3658 100 16-bit normal add with carry
3659 101 16-bit saturating add
3660 110 8-bit saturating add, carry into top byte is inhibited
3661 111 8-bit saturating add, carry into top byte and between top
3662 nybbles is inhibited
3664 The first five are used for Gouraud calculations, the latter three
3665 for adding source and destination data
3667 Bit 0 = dzwrite . gourz . atick[1]
3668 + dwrite . gourd . atick[1] . /topnen . /topben . /ext_int
3669 + dwrite . gourd . atick[1] . topnen . topben . /ext_int
3671 + istepadd . /topnen . /topben . /ext_int
3672 + istepadd . topnen . topben . /ext_int
3673 + /gourd . /gourz . /topnen . /topben
3674 + /gourd . /gourz . topnen . topben
3675 + shadeadd . /topnen . /topben
3676 + shadeadd . topnen . topben
3677 + init_ii . /topnen . /topben . /ext_int
3678 + init_ii . topnen . topben . /ext_int
3681 Bit 1 = dwrite . gourd . atick[1] . /topben . /ext_int
3682 + istepadd . /topben . /ext_int
3683 + /gourd . /gourz . /topben
3684 + shadeadd . /topben
3685 + init_ii . /topben . /ext_int
3687 Bit 2 = /gourd . /gourz
3689 + dwrite . gourd . atick[1] . ext_int
3690 + istepadd . ext_int
3693 uint8_t daddmode = ((dzwrite && gourz) || (dwrite && gourd && !topnen && !topben && !ext_int)
3694 || (dwrite && gourd && topnen && topben && !ext_int) || zstepadd
3695 || (istepadd && !topnen && !topben && !ext_int)
3696 || (istepadd && topnen && topben && !ext_int) || (!gourd && !gourz && !topnen && !topben)
3697 || (!gourd && !gourz && topnen && topben) || (shadeadd && !topnen && !topben)
3698 || (shadeadd && topnen && topben) || (init_ii && !topnen && !topben && !ext_int)
3699 || (init_ii && topnen && topben && !ext_int) || init_zi ? 0x01 : 0x00);
3700 daddmode |= ((dwrite && gourd && !topben && !ext_int) || (istepadd && !topben && !ext_int)
3701 || (!gourd && !gourz && !topben) || (shadeadd && !topben)
3702 || (init_ii && !topben && !ext_int) ? 0x02 : 0x00);
3703 daddmode |= ((!gourd && !gourz) || shadeadd || (dwrite && gourd && ext_int)
3704 || (istepadd && ext_int) || (init_ii && ext_int) ? 0x04 : 0x00);
3705 /* Data add load controls
3706 Pattern fraction (dest data) is loaded on
3707 dwrite . gourd . atick[0]
3708 + istepfadd . /datinit
3710 Pattern data is loaded on
3711 dwrite . gourd . atick[1]
3712 + istepadd . /datinit . /datinit
3714 Source z1 is loaded on
3715 dzwrite . gourz . atick[1]
3716 + zstepadd . /datinit . /datinit
3718 Source z2 is loaded on
3719 dzwrite . gourz . atick[0]
3722 Texture map shaded data is loaded on
3723 srcdreadd . srcshade
3725 bool patfadd = (dwrite && gourd) || (istepfadd && !datinit) || init_if;
3726 bool patdadd = (dwrite && gourd) || (istepadd && !datinit) || init_ii;
3727 bool srcz1add = (dzwrite && gourz) || (zstepadd && !datinit) || init_zi;
3728 bool srcz2add = (dzwrite && gourz) || zstepfadd || init_zf;
3729 bool srcshadd = srcdreadd && srcshade;
3730 bool daddq_sel = patfadd || patdadd || srcz1add || srcz2add || srcshadd;
3731 /* Select write data
3732 This has to be controlled from stage 1 of the pipe-line, delayed
3733 by one tick, as the write occurs in the cycle after the ack.
3740 Bit 0 = /patdsel . /adddsel
3745 uint8_t data_sel = ((!patdsel && !adddsel) || dzwrite ? 0x01 : 0x00)
3746 | (adddsel || dzwrite ? 0x02 : 0x00);
3748 uint32_t address, pixAddr;
3749 ADDRGEN(address, pixAddr, gena2i, zaddr,
3750 a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3751 a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3753 //Here's my guess as to how the addresses get truncated to phrase boundaries in phrase mode...
3755 address &= 0xFFFFF8;
3757 /* Generate source alignment shift
3758 -------------------------------
3759 The source alignment shift for data move is the difference between
3760 the source and destination X pointers, multiplied by the pixel
3761 size. Only the low six bits of the pointers are of interest, as
3762 pixel sizes are always a power of 2 and window rows are always
3765 When not in phrase mode, the top 3 bits of the shift value are
3768 Source shifting is also used to extract bits for bit-to-byte
3769 expansion in phrase mode. This involves only the bottom three
3770 bits of the shift value, and is based on the offset within the
3771 phrase of the destination X pointer, in pixels.
3773 Source shifting is disabled when srcen is not set.
3775 uint8_t dstxp = (dsta2 ? a2_x : a1_x) & 0x3F;
3776 uint8_t srcxp = (dsta2 ? a1_x : a2_x) & 0x3F;
3777 uint8_t shftv = ((dstxp - srcxp) << pixsize) & 0x3F;
3778 /* The phrase mode alignment count is given by the phrase offset
3779 of the first pixel, for bit to byte expansion */
3783 pobb = dstxp & 0x07;
3785 pobb = dstxp & 0x03;
3787 pobb = dstxp & 0x01;
3789 bool pobbsel = phrase_mode && bcompen;
3790 uint8_t loshd = (pobbsel ? pobb : shftv) & 0x07;
3791 uint8_t shfti = (srcen || pobbsel ? (sshftld ? loshd : srcshift & 0x07) : 0);
3792 /* Enable for high bits is srcen . phrase_mode */
3793 shfti |= (srcen && phrase_mode ? (sshftld ? shftv & 0x38 : srcshift & 0x38) : 0);
3798 #ifdef VERBOSE_BLITTER_LOGGING
3800 WriteLog(" Entering SREADX state...");
3802 //uint32_t srcAddr, pixAddr;
3803 //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
3804 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3805 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3807 srcd1 = ((uint64_t)JaguarReadLong(address + 0, BLITTER) << 32)
3808 | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3809 //Kludge to take pixel size into account...
3810 //Hmm. If we're not in phrase mode, this is most likely NOT going to be used...
3811 //Actually, it would be--because of BCOMPEN expansion, for example...
3820 else if (pixsize == 4)
3826 #ifdef VERBOSE_BLITTER_LOGGING
3828 WriteLog(" Source extra read address/pix address: %08X/%1X [%08X%08X]\n",
3829 address, pixAddr, (uint32_t)(srcd1 >> 32), (uint32_t)(srcd1 & 0xFFFFFFFF));
3835 #ifdef VERBOSE_BLITTER_LOGGING
3837 WriteLog(" Entering SZREADX state...");
3840 srcz1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3841 #ifdef VERBOSE_BLITTER_LOGGING
3843 WriteLog(" Src Z extra read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3844 (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF));
3850 #ifdef VERBOSE_BLITTER_LOGGING
3852 WriteLog(" Entering SREAD state...");
3854 //uint32_t srcAddr, pixAddr;
3855 //ADDRGEN(srcAddr, pixAddr, gena2i, zaddr,
3856 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3857 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3859 srcd1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3860 //Kludge to take pixel size into account...
3869 else if (pixsize == 4)
3875 #ifdef VERBOSE_BLITTER_LOGGING
3878 WriteLog(" Source read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3879 (uint32_t)(srcd1 >> 32), (uint32_t)(srcd1 & 0xFFFFFFFF));
3887 #ifdef VERBOSE_BLITTER_LOGGING
3890 WriteLog(" Entering SZREAD state...");
3895 srcz1 = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3896 //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
3897 if (!phrase_mode && pixsize == 4)
3900 #ifdef VERBOSE_BLITTER_LOGGING
3903 WriteLog(" Src Z read address/pix address: %08X/%1X [%08X%08X]\n", address, pixAddr,
3904 (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF));
3911 #ifdef VERBOSE_BLITTER_LOGGING
3913 WriteLog(" Entering DREAD state...");
3915 //uint32_t dstAddr, pixAddr;
3916 //ADDRGEN(dstAddr, pixAddr, gena2i, zaddr,
3917 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
3918 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
3919 dstd = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3920 //Kludge to take pixel size into account...
3925 else if (pixsize == 4)
3930 #ifdef VERBOSE_BLITTER_LOGGING
3932 WriteLog(" Dest read address/pix address: %08X/%1X [%08X%08X]\n", address,
3933 pixAddr, (uint32_t)(dstd >> 32), (uint32_t)(dstd & 0xFFFFFFFF));
3939 // Is Z always 64 bit read? Or sometimes 16 bit (dependent on phrase_mode)?
3940 #ifdef VERBOSE_BLITTER_LOGGING
3942 WriteLog(" Entering DZREAD state...");
3944 dstz = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
3945 //Kludge to take pixel size into account... I believe that it only has to take 16BPP mode into account. Not sure tho.
3946 if (!phrase_mode && pixsize == 4)
3949 #ifdef VERBOSE_BLITTER_LOGGING
3951 WriteLog(" Dest Z read address/pix address: %08X/%1X [%08X%08X]\n", address,
3952 pixAddr, (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF));
3956 // These vars should probably go further up in the code... !!! FIX !!!
3957 // We can't preassign these unless they're static...
3958 //uint64_t srcz = 0; // These are assigned to shut up stupid compiler warnings--dwrite is ALWAYS asserted
3959 //bool winhibit = false;
3962 //NOTE: SRCSHADE requires GOURZ to be set to work properly--another Jaguar I bug
3965 #ifdef VERBOSE_BLITTER_LOGGING
3967 WriteLog(" Entering DWRITE state...");
3969 //Counter is done on the dwrite state...! (We'll do it first, since it affects dstart/dend calculations.)
3970 //Here's the voodoo for figuring the correct amount of pixels in phrase mode (or not):
3971 int8_t inct = -((dsta2 ? a2_x : a1_x) & 0x07); // From INNER_CNT
3973 inc = (!phrase_mode || (phrase_mode && (inct & 0x01)) ? 0x01 : 0x00);
3974 inc |= (phrase_mode && (((pixsize == 3 || pixsize == 4) && (inct & 0x02)) || pixsize == 5 && !(inct & 0x01)) ? 0x02 : 0x00);
3975 inc |= (phrase_mode && ((pixsize == 3 && (inct & 0x04)) || (pixsize == 4 && !(inct & 0x03))) ? 0x04 : 0x00);
3976 inc |= (phrase_mode && pixsize == 3 && !(inct & 0x07) ? 0x08 : 0x00);
3978 uint16_t oldicount = icount; // Save icount to detect underflow...
3981 if (icount == 0 || ((icount & 0x8000) && !(oldicount & 0x8000)))
3983 // X/Y stepping is also done here, I think...No. It's done when a1_add or a2_add is asserted...
3985 //*********************************************************************************
3986 //Start & end write mask computations...
3987 //*********************************************************************************
3992 dstart = (dstxp & 0x07) << 3;
3994 dstart = (dstxp & 0x03) << 4;
3996 dstart = (dstxp & 0x01) << 5;
3998 dstart = (phrase_mode ? dstart : pixAddr & 0x07);
4000 //This is the other Jaguar I bug... Normally, should ALWAYS select a1_x here.
4001 uint16_t dstxwr = (dsta2 ? a2_x : a1_x) & 0x7FFE;
4002 uint16_t pseq = dstxwr ^ (a1_win_x & 0x7FFE);
4003 pseq = (pixsize == 5 ? pseq : pseq & 0x7FFC);
4004 pseq = ((pixsize & 0x06) == 4 ? pseq : pseq & 0x7FF8);
4005 bool penden = clip_a1 && (pseq == 0);
4006 uint8_t window_mask = 0;
4009 window_mask = (a1_win_x & 0x07) << 3;
4011 window_mask = (a1_win_x & 0x03) << 4;
4013 window_mask = (a1_win_x & 0x01) << 5;
4015 window_mask = (penden ? window_mask : 0);
4018 Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4019 Source extra read address/pix address: 000095D0/0 [000004E40000001C]
4020 Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4021 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4022 Source read address/pix address: 000095D8/0 [0054003800009814]
4023 Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4024 Entering DWRITE state...
4025 Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=20][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000001C00000000] (icount=026E, inc=4)
4026 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4028 (icount=026E, inc=4)
4029 icount & 0x03 = 0x02
4032 window_mask = 0x1000
4034 Therefore, it chooses the inner_mask over the window_mask every time! Argh!
4035 This is because we did this wrong:
4036 Innerm[3-5] := AN2 (inner_mask[3-5], imb[3-5], inner0);
4037 NOTE! This doesn't fix the problem because inner0 is asserted too late to help here. !!! FIX !!! [Should be DONE]
4040 /* The mask to be used if within one phrase of the end of the inner
4042 uint8_t inner_mask = 0;
4045 inner_mask = (icount & 0x07) << 3;
4047 inner_mask = (icount & 0x03) << 4;
4049 inner_mask = (icount & 0x01) << 5;
4052 /* The actual mask used should be the lesser of the window masks and
4053 the inner mask, where is all cases 000 means 1000. */
4054 window_mask = (window_mask == 0 ? 0x40 : window_mask);
4055 inner_mask = (inner_mask == 0 ? 0x40 : inner_mask);
4056 uint8_t emask = (window_mask > inner_mask ? inner_mask : window_mask);
4057 /* The mask to be used for the pixel size, to which must be added
4059 uint8_t pma = pixAddr + (1 << pixsize);
4060 /* Select the mask */
4061 uint8_t dend = (phrase_mode ? emask : pma);
4063 /* The cycle width in phrase mode is normally one phrase. However,
4064 at the start and end it may be narrower. The start and end masks
4065 are used to generate this. The width is given by:
4067 8 - start mask - (8 - end mask)
4068 = end mask - start mask
4070 This is only used for writes in phrase mode.
4071 Start and end from the address level of the pipeline are used.
4073 uint8_t pwidth = (((dend | dstart) & 0x07) == 0 ? 0x08 : (dend - dstart) & 0x07);
4075 //uint32_t dstAddr, pixAddr;
4076 //ADDRGEN(dstAddr, pixAddr, gena2i, zaddr,
4077 // a1_x, a1_y, a1_base, a1_pitch, a1_pixsize, a1_width, a1_zoffset,
4078 // a2_x, a2_y, a2_base, a2_pitch, a2_pixsize, a2_width, a2_zoffset);
4079 #ifdef VERBOSE_BLITTER_LOGGING
4081 WriteLog(" Dest write address/pix address: %08X/%1X", address, pixAddr);
4084 //More testing... This is almost certainly wrong, but how else does this work???
4085 //Seems to kinda work... But still, this doesn't seem to make any sense!
4086 if (phrase_mode && !dsten)
4087 dstd = ((uint64_t)JaguarReadLong(address, BLITTER) << 32) | (uint64_t)JaguarReadLong(address + 4, BLITTER);
4089 //Testing only... for now...
4090 //This is wrong because the write data is a combination of srcd and dstd--either run
4091 //thru the LFU or in PATDSEL or ADDDSEL mode. [DONE now, thru DATA module]
4092 // Precedence is ADDDSEL > PATDSEL > LFU.
4093 //Also, doesn't take into account the start & end masks, or the phrase width...
4096 // srcd2 = xxxx xxxx 0123 4567, srcd = 8901 2345 xxxx xxxx, srcshift = $20 (32)
4097 uint64_t srcd = (srcd2 << (64 - srcshift)) | (srcd1 >> srcshift);
4098 //bleh, ugly ugly ugly
4102 //NOTE: This only works with pixel sizes less than 8BPP...
4103 //DOUBLE NOTE: Still need to do regression testing to ensure that this doesn't break other stuff... !!! CHECK !!!
4104 if (!phrase_mode && srcshift != 0)
4105 srcd = ((srcd2 & 0xFF) << (8 - srcshift)) | ((srcd1 & 0xFF) >> srcshift);
4107 //Z DATA() stuff done here... And it has to be done before any Z shifting...
4108 //Note that we need to have phrase mode start/end support here... (Not since we moved it from dzwrite...!)
4110 Here are a couple of Cybermorph blits with Z:
4111 $00113078 // DSTEN DSTENZ DSTWRZ CLIP_A1 GOURD GOURZ PATDSEL ZMODE=4
4112 $09900F39 // SRCEN DSTEN DSTENZ DSTWRZ UPDA1 UPDA1F UPDA2 DSTA2 ZMODE=4 LFUFUNC=C DCOMPEN
4114 We're having the same phrase mode overwrite problem we had with the pixels... !!! FIX !!!
4115 Odd. It's equating 0 with 0... Even though ZMODE is $04 (less than)!
4120 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
4121 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
4122 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
4123 uint32_t zinc, uint32_t zstep)
4126 uint8_t initcin[4] = { 0, 0, 0, 0 };
4127 ADDARRAY(addq, 7/*daddasel*/, 6/*daddbsel*/, 0/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
4128 srcz2 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
4129 ADDARRAY(addq, 6/*daddasel*/, 7/*daddbsel*/, 1/*daddmode*/, 0, 0, initcin, 0, 0, 0, 0, 0, srcz1, srcz2, zinc, 0);
4130 srcz1 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
4132 #if 0//def VERBOSE_BLITTER_LOGGING
4134 WriteLog("\n[srcz1=%08X%08X, srcz2=%08X%08X, zinc=%08X",
4135 (uint32_t)(srcz1 >> 32), (uint32_t)(srcz1 & 0xFFFFFFFF),
4136 (uint32_t)(srcz2 >> 32), (uint32_t)(srcz2 & 0xFFFFFFFF), zinc);
4140 uint8_t zSrcShift = srcshift & 0x30;
4141 srcz = (srcz2 << (64 - zSrcShift)) | (srcz1 >> zSrcShift);
4142 //bleh, ugly ugly ugly
4146 #if 0//def VERBOSE_BLITTER_LOGGING
4148 WriteLog(" srcz=%08X%08X]\n", (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF));
4151 //When in SRCSHADE mode, it adds the IINC to the read source (from LFU???)
4152 //According to following line, it gets LFU mode. But does it feed the source into the LFU
4154 //Dest write address/pix address: 0014E83E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003F, inc=1)
4158 //NOTE: This is basically doubling the work done by DATA--since this is what
4159 // ADDARRAY is loaded with when srschshade is enabled... !!! FIX !!!
4160 // Also note that it doesn't work properly unless GOURZ is set--there's the clue!
4162 uint8_t initcin[4] = { 0, 0, 0, 0 };
4163 ADDARRAY(addq, 4/*daddasel*/, 5/*daddbsel*/, 7/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
4164 srcd = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
4166 //Seems to work... Not 100% sure tho.
4169 //Temporary kludge, to see if the fractional pattern does anything...
4171 //But it seems to mess up in Cybermorph... the shading should be smooth but it isn't...
4172 //Seems the carry out is lost again... !!! FIX !!! [DONE--see below]
4176 uint8_t initcin[4] = { 0, 0, 0, 0 };
4177 ADDARRAY(addq, 4/*daddasel*/, 4/*daddbsel*/, 0/*daddmode*/, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
4178 srcd1 = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
4181 //Note that we still don't take atick[0] & [1] into account here, so this will skip half of the data needed... !!! FIX !!!
4182 //Not yet enumerated: dbinh, srcdread, srczread
4183 //Also, should do srcshift on the z value in phrase mode... !!! FIX !!! [DONE]
4184 //As well as add a srcz variable we can set external to this state... !!! FIX !!! [DONE]
4187 uint8_t dcomp, zcomp;
4188 DATA(wdata, dcomp, zcomp, winhibit,
4189 true, cmpdst, daddasel, daddbsel, daddmode, daddq_sel, data_sel, 0/*dbinh*/,
4190 dend, dstart, dstd, iinc, lfufunc, patd, patdadd,
4191 phrase_mode, srcd, false/*srcdread*/, false/*srczread*/, srcz2add, zmode,
4192 bcompen, bkgwren, dcompen, icount & 0x07, pixsize,
4195 Seems that the phrase mode writes with DCOMPEN and DSTEN are corrupting inside of DATA: !!! FIX !!!
4196 It's fairly random as well. 7CFE -> 7DFE, 7FCA -> 78CA, 7FA4 -> 78A4, 7F88 -> 8F88
4197 It could be related to an uninitialized variable, like the zmode bug...
4199 It was a bug in the dech38el data--it returned $FF for ungated instead of $00...
4201 Blit! (CMD = 09800609)
4202 Flags: SRCEN DSTEN UPDA1 UPDA2 LFUFUNC=C DCOMPEN
4204 a1_base = 00110000, a2_base = 0010B2A8
4205 a1_x = 004B, a1_y = 00D8, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0704, a2_y = 0000
4206 a1_step_x = FFF3, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = FFFC, a2_step_y = 0000
4207 a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4208 a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
4209 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4210 a1_pixsize = 4, a2_pixsize = 4
4211 srcd=0000000000000000 dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4212 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4214 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4215 Entering INNER state...
4216 Entering SREAD state... Source read address/pix address: 0010C0B0/0 [0000000078047804]
4217 Entering A2_ADD state [a2_x=0704, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4218 Entering DREAD state...
4219 Dest read address/pix address: 00197240/0 [0000000000000028]
4220 Entering DWRITE state...
4221 Dest write address/pix address: 00197240/0 [dstart=30 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0000000000000028] (icount=0009, inc=1)
4222 Entering A1_ADD state [a1_x=004B, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4223 Entering SREAD state... Source read address/pix address: 0010C0B8/0 [7804780478047804]
4224 Entering A2_ADD state [a2_x=0708, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4225 Entering DREAD state...
4226 Dest read address/pix address: 00197260/0 [0028000000200008]
4227 Entering DWRITE state...
4228 Dest write address/pix address: 00197260/0 [dstart=0 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0028780478047804] (icount=0005, inc=4)
4229 Entering A1_ADD state [a1_x=004C, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4230 Entering SREAD state... Source read address/pix address: 0010C0C0/0 [0000000000000000]
4231 Entering A2_ADD state [a2_x=070C, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4232 Entering DREAD state...
4233 Dest read address/pix address: 00197280/0 [0008001800180018]
4234 Entering DWRITE state...
4235 Dest write address/pix address: 00197280/0 [dstart=0 dend=40 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [7804780478040018] (icount=0001, inc=4)
4236 Entering A1_ADD state [a1_x=0050, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4237 Entering SREAD state... Source read address/pix address: 0010C0C8/0 [000078047BFE7BFE]
4238 Entering A2_ADD state [a2_x=0710, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4239 Entering DREAD state...
4240 Dest read address/pix address: 001972A0/0 [0008002000000000]
4241 Entering DWRITE state...
4242 Dest write address/pix address: 001972A0/0 [dstart=0 dend=10 pwidth=8 srcshift=30][daas=0 dabs=0 dam=7 ds=1 daq=F] [0008002000000000] (icount=FFFD, inc=4)
4243 Entering A1_ADD state [a1_x=0054, a1_y=00D8, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4244 Entering IDLE_INNER state...
4247 //Why isn't this taken care of in DATA? Because, DATA is modifying its local copy instead of the one used here.
4248 //!!! FIX !!! [DONE]
4257 a1_outside // A1 pointer is outside window bounds
4266 // The address is outside if negative, or if greater than or equal
4267 // to the window size
4269 A1_xcomp := MAG_15 (a1xgr, a1xeq, a1xlt, a1_x{0..14}, a1_win_x{0..14});
4270 A1_ycomp := MAG_15 (a1ygr, a1yeq, a1ylt, a1_y{0..14}, a1_win_y{0..14});
4271 A1_outside := OR6 (a1_outside, a1_x{15}, a1xgr, a1xeq, a1_y{15}, a1ygr, a1yeq);
4273 //NOTE: There seems to be an off-by-one bug here in the clip_a1 section... !!! FIX !!!
4274 // Actually, seems to be related to phrase mode writes...
4275 // Or is it? Could be related to non-15-bit compares as above?
4276 if (clip_a1 && ((a1_x & 0x8000) || (a1_y & 0x8000) || (a1_x >= a1_win_x) || (a1_y >= a1_win_y)))
4283 JaguarWriteLong(address + 0, wdata >> 32, BLITTER);
4284 JaguarWriteLong(address + 4, wdata & 0xFFFFFFFF, BLITTER);
4289 JaguarWriteLong(address, wdata & 0xFFFFFFFF, BLITTER);
4290 else if (pixsize == 4)
4291 JaguarWriteWord(address, wdata & 0x0000FFFF, BLITTER);
4293 JaguarWriteByte(address, wdata & 0x000000FF, BLITTER);
4297 #ifdef VERBOSE_BLITTER_LOGGING
4300 WriteLog(" [%08X%08X]%s", (uint32_t)(wdata >> 32), (uint32_t)(wdata & 0xFFFFFFFF), (winhibit ? "[X]" : ""));
4301 WriteLog(" (icount=%04X, inc=%u)\n", icount, (uint16_t)inc);
4302 WriteLog(" [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
4303 WriteLog("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
4310 // OK, here's the big insight: When NOT in GOURZ mode, srcz1 & 2 function EXACTLY the same way that
4311 // srcd1 & 2 work--there's an implicit shift from srcz1 to srcz2 whenever srcz1 is read.
4312 // OTHERWISE, srcz1 is the integer for the computed Z and srcz2 is the fractional part.
4313 // Writes to srcz1 & 2 follow the same pattern as the other 64-bit registers--low 32 at the low address,
4314 // high 32 at the high address (little endian!).
4315 // NOTE: GOURZ is still not properly supported. Check patd/patf handling...
4316 // Phrase mode start/end masks are not properly supported either...
4317 #ifdef VERBOSE_BLITTER_LOGGING
4320 WriteLog(" Entering DZWRITE state...");
4321 WriteLog(" Dest Z write address/pix address: %08X/%1X [%08X%08X]\n", address,
4322 pixAddr, (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF));
4325 //This is not correct... !!! FIX !!!
4326 //Should be OK now... We'll see...
4327 //Nope. Having the same starstep write problems in phrase mode as we had with pixels... !!! FIX !!!
4328 //This is not causing the problem in Hover Strike... :-/
4329 //The problem was with the SREADX not shifting. Still problems with Z comparisons & other text in pregame screen...
4334 JaguarWriteLong(address + 0, srcz >> 32, BLITTER);
4335 JaguarWriteLong(address + 4, srcz & 0xFFFFFFFF, BLITTER);
4340 JaguarWriteWord(address, srcz & 0x0000FFFF, BLITTER);
4343 #ifdef VERBOSE_BLITTER_LOGGING
4346 // printf(" [%08X%08X]\n", (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF));
4348 //printf(" [dstart=%X dend=%X pwidth=%X srcshift=%X]", dstart, dend, pwidth, srcshift);
4349 WriteLog(" [dstart=? dend=? pwidth=? srcshift=%X]", srcshift);
4350 WriteLog("[daas=%X dabs=%X dam=%X ds=%X daq=%s]\n", daddasel, daddbsel, daddmode, data_sel, (daddq_sel ? "T" : "F"));
4357 This is because the address generator was using only 15 bits of the X when it should have
4360 There's a slight problem here: The X pointer isn't wrapping like it should when it hits
4361 the edge of the window... Notice how the X isn't reset at the edge of the window:
4363 Blit! (CMD = 00010000)
4366 a1_base = 000E8008, a2_base = 0001FA68
4367 a1_x = 0000, a1_y = 0000, a1_frac_x = 0000, a1_frac_y = 0000, a2_x = 0000, a2_y = 0000
4368 a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 0000, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
4369 a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4370 a1_win_x = 0000, a1_win_y = 0000, a2_mask_x = 0000, a2_mask_y = 0000
4371 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4372 a1_pixsize = 5, a2_pixsize = 5
4373 srcd=7717771777177717 dstd=0000000000000000 patd=7730773077307730 iinc=00000000
4374 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4376 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4377 Entering INNER state...
4378 Entering DWRITE state... Dest write address/pix address: 000E8008/0 [7730773077307730] (icount=009E, inc=2)
4379 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4380 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4381 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4382 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4383 Entering DWRITE state... Dest write address/pix address: 000E8018/0 [7730773077307730] (icount=009C, inc=2)
4384 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4385 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4386 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4387 Entering A1_ADD state [a1_x=0002, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4391 Entering A1_ADD state [a1_x=009C, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4392 Entering DWRITE state... Dest write address/pix address: 000E84F8/0 [7730773077307730] (icount=0000, inc=2)
4393 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4394 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4395 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4396 Entering A1_ADD state [a1_x=009E, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4397 Entering IDLE_INNER state...
4399 Leaving INNER state... (ocount=0104)
4400 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4402 Entering INNER state...
4403 Entering DWRITE state... Dest write address/pix address: 000E8508/0 [7730773077307730] (icount=009E, inc=2)
4404 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4405 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4406 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4407 Entering A1_ADD state [a1_x=00A0, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4408 Entering DWRITE state... Dest write address/pix address: 000E8518/0 [7730773077307730] (icount=009C, inc=2)
4409 srcz=0000000000000000][dcomp=AA zcomp=00 dbinh=00]
4410 [srcz=0000000000000000 dstz=0000000000000000 zwdata=0000000000000000 mask=7FFF]
4411 [dstart=0 dend=40 pwidth=8 srcshift=0][daas=0 dabs=0 dam=7 ds=0 daq=F]
4412 Entering A1_ADD state [a1_x=00A2, a1_y=0000, addasel=0, addbsel=0, modx=1, addareg=F, adda_xconst=1, adda_yconst=0]...
4418 #ifdef VERBOSE_BLITTER_LOGGING
4421 //printf(" Entering A1_ADD state [addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4422 WriteLog(" Entering A1_ADD state [a1_x=%04X, a1_y=%04X, addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", a1_x, a1_y, addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4426 int16_t adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
4427 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4428 a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4429 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4430 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4432 #if 0//def VERBOSE_BLITTER_LOGGING
4435 WriteLog(" [adda_x=%d, adda_y=%d, addb_x=%d, addb_y=%d, addq_x=%d, addq_y=%d]\n", adda_x, adda_y, addb_x, addb_y, addq_x, addq_y);
4439 //Now, write to what???
4440 //a2ptrld comes from a2ptrldi...
4441 //I believe it's addbsel that determines the writeback...
4442 // This is where atick[0] & [1] come in, in determining which part (fractional, integer)
4443 // gets written to...
4446 //Kludge, to get A1 channel increment working...
4449 a1_frac_x = addq_x, a1_frac_y = addq_y;
4451 addasel = 2, addbsel = 0, a1fracldi = false;
4452 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4453 a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4454 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4455 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4457 a1_x = addq_x, a1_y = addq_y;
4460 a1_x = addq_x, a1_y = addq_y;
4465 #ifdef VERBOSE_BLITTER_LOGGING
4468 //printf(" Entering A2_ADD state [addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4469 WriteLog(" Entering A2_ADD state [a2_x=%04X, a2_y=%04X, addasel=%X, addbsel=%X, modx=%X, addareg=%s, adda_xconst=%u, adda_yconst=%s]...\n", a2_x, a2_y, addasel, addbsel, modx, (addareg ? "T" : "F"), adda_xconst, (adda_yconst ? "1" : "0"));
4473 //void ADDAMUX(int16_t &adda_x, int16_t &adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
4474 // int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
4475 // int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
4476 // bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
4477 //void ADDBMUX(int16_t &addb_x, int16_t &addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
4478 // int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y)
4479 //void ADDRADD(int16_t &addq_x, int16_t &addq_y, bool a1fracldi,
4480 // int16_t adda_x, int16_t adda_y, int16_t addb_x, int16_t addb_y, uint8_t modx, bool suba_x, bool suba_y)
4481 //void DATAMUX(int16_t &data_x, int16_t &data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel)
4482 int16_t adda_x, adda_y, addb_x, addb_y, data_x, data_y, addq_x, addq_y;
4483 ADDAMUX(adda_x, adda_y, addasel, a1_step_x, a1_step_y, a1_stepf_x, a1_stepf_y, a2_step_x, a2_step_y,
4484 a1_inc_x, a1_inc_y, a1_incf_x, a1_incf_y, adda_xconst, adda_yconst, addareg, suba_x, suba_y);
4485 ADDBMUX(addb_x, addb_y, addbsel, a1_x, a1_y, a2_x, a2_y, a1_frac_x, a1_frac_y);
4486 ADDRADD(addq_x, addq_y, a1fracldi, adda_x, adda_y, addb_x, addb_y, modx, suba_x, suba_y);
4488 #if 0//def VERBOSE_BLITTER_LOGGING
4491 WriteLog(" [adda_x=%d, adda_y=%d, addb_x=%d, addb_y=%d, addq_x=%d, addq_y=%d]\n", adda_x, adda_y, addb_x, addb_y, addq_x, addq_y);
4495 //Now, write to what???
4496 //a2ptrld comes from a2ptrldi...
4497 //I believe it's addbsel that determines the writeback...
4503 Flags: SRCEN CLIP_A1 UPDA1 UPDA1F UPDA2 DSTA2 GOURZ ZMODE=0 LFUFUNC=C SRCSHADE
4505 a1_base = 0015B000, a2_base = 0014B000
4506 a1_x = 0000, a1_y = 0000, a1_frac_x = 8000, a1_frac_y = 8000, a2_x = 001F, a2_y = 0038
4507 a1_step_x = FFFFFFC0, a1_step_y = 0001, a1_stepf_x = 0000, a1_stepf_y = 2AAA, a2_step_x = FFFFFFC0, a2_step_y = 0001
4508 a1_inc_x = 0001, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4509 a1_win_x = 0040, a1_win_y = 0040, a2_mask_x = 0000, a2_mask_y = 0000
4510 a2_mask=F a1add=+inc/+0 a2add=+1/+0
4511 a1_pixsize = 4, a2_pixsize = 4
4512 srcd=FF00FF00FF00FF00 dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4513 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, col=0
4515 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4516 Entering INNER state...
4517 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4518 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4519 Entering DWRITE state...
4520 Dest write address/pix address: 0014E83E/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003F, inc=1)
4521 Entering A2_ADD state [a2_x=001F, a2_y=0038, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4522 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4523 Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4524 Entering DWRITE state...
4525 Dest write address/pix address: 0014E942/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003E, inc=1)
4526 Entering A2_ADD state [a2_x=0021, a2_y=0039, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4527 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4528 Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4529 Entering DWRITE state...
4530 Dest write address/pix address: 0014EA46/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003D, inc=1)
4531 Entering A2_ADD state [a2_x=0023, a2_y=003A, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4532 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4533 Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4534 Entering DWRITE state...
4535 Dest write address/pix address: 0014EB4A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=003C, inc=1)
4536 Entering A2_ADD state [a2_x=0025, a2_y=003B, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4538 Entering SREAD state... Source read address/pix address: 0015B000/0 [6505650565056505]
4539 Entering A1_ADD state [a1_x=FFFF8000, a1_y=FFFF8000, addasel=3, addbsel=2, modx=0, addareg=T, adda_xconst=7, adda_yconst=0]...
4540 Entering DWRITE state...
4541 Dest write address/pix address: 0015283A/0 [dstart=0 dend=10 pwidth=8 srcshift=0][daas=4 dabs=5 dam=7 ds=1 daq=F] [0000000000006505] (icount=0000, inc=1)
4542 Entering A2_ADD state [a2_x=009D, a2_y=0077, addasel=0, addbsel=1, modx=0, addareg=F, adda_xconst=0, adda_yconst=0]...
4543 Entering IDLE_INNER state...
4544 Leaving INNER state... (ocount=0036)
4545 [in=F a1f=T a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4546 Entering A1FUPDATE state...
4547 [in=F a1f=F a1=T zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4548 Entering A1UPDATE state... (-32768/-32768 -> 32704/-32767)
4549 [in=F a1f=F a1=F zf=F z=F a2=T iif=F iii=F izf=F izi=F]
4550 Entering A2UPDATE state... (159/120 -> 95/121)
4551 [in=T a1f=F a1=F zf=F z=F a2=F iif=F iii=F izf=F izi=F]
4552 Entering INNER state...
4555 #ifdef VERBOSE_BLITTER_LOGGING
4558 WriteLog(" Leaving INNER state...");
4563 // The outer counter is updated here as well on the clock cycle...
4565 /* the inner loop is started whenever another state is about to
4566 cause the inner state to go active */
4567 //Instart := ND7 (instart, innert[0], innert[2..7]);
4569 //Actually, it's done only when inner gets asserted without the 2nd line of conditions
4570 //(inner AND !indone)
4572 //Since we don't get here until the inner loop is finished (indone = true) we can get
4573 //away with doing it here...!
4578 #ifdef VERBOSE_BLITTER_LOGGING
4581 WriteLog(" (ocount=%04X)\n", ocount);
4589 #ifdef VERBOSE_BLITTER_LOGGING
4592 WriteLog(" Entering A1FUPDATE state...\n");
4596 uint32_t a1_frac_xt = (uint32_t)a1_frac_x + (uint32_t)a1_stepf_x;
4597 uint32_t a1_frac_yt = (uint32_t)a1_frac_y + (uint32_t)a1_stepf_y;
4598 a1FracCInX = a1_frac_xt >> 16;
4599 a1FracCInY = a1_frac_yt >> 16;
4600 a1_frac_x = (uint16_t)(a1_frac_xt & 0xFFFF);
4601 a1_frac_y = (uint16_t)(a1_frac_yt & 0xFFFF);
4606 #ifdef VERBOSE_BLITTER_LOGGING
4609 WriteLog(" Entering A1UPDATE state... (%d/%d -> ", a1_x, a1_y);
4613 a1_x += a1_step_x + a1FracCInX;
4614 a1_y += a1_step_y + a1FracCInY;
4615 #ifdef VERBOSE_BLITTER_LOGGING
4618 WriteLog("%d/%d)\n", a1_x, a1_y);
4626 #ifdef VERBOSE_BLITTER_LOGGING
4629 WriteLog(" Entering A2UPDATE state... (%d/%d -> ", a2_x, a2_y);
4635 #ifdef VERBOSE_BLITTER_LOGGING
4638 WriteLog("%d/%d)\n", a2_x, a2_y);
4645 // We never get here! !!! FIX !!!
4647 #ifdef VERBOSE_BLITTER_LOGGING
4650 WriteLog("Done!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n",
4651 GET16(blitter_ram, A1_PIXEL + 2),
4652 GET16(blitter_ram, A1_PIXEL + 0),
4653 GET16(blitter_ram, A1_FPIXEL + 2),
4654 GET16(blitter_ram, A1_FPIXEL + 0),
4655 GET16(blitter_ram, A2_PIXEL + 2),
4656 GET16(blitter_ram, A2_PIXEL + 0));
4661 // Write values back to registers (in real blitter, these are continuously updated)
4662 SET16(blitter_ram, A1_PIXEL + 2, a1_x);
4663 SET16(blitter_ram, A1_PIXEL + 0, a1_y);
4664 SET16(blitter_ram, A1_FPIXEL + 2, a1_frac_x);
4665 SET16(blitter_ram, A1_FPIXEL + 0, a1_frac_y);
4666 SET16(blitter_ram, A2_PIXEL + 2, a2_x);
4667 SET16(blitter_ram, A2_PIXEL + 0, a2_y);
4669 #ifdef VERBOSE_BLITTER_LOGGING
4672 WriteLog("Writeback!\na1_x=%04X a1_y=%04X a1_frac_x=%04X a1_frac_y=%04X a2_x=%04X a2_y%04X\n",
4673 GET16(blitter_ram, A1_PIXEL + 2),
4674 GET16(blitter_ram, A1_PIXEL + 0),
4675 GET16(blitter_ram, A1_FPIXEL + 2),
4676 GET16(blitter_ram, A1_FPIXEL + 0),
4677 GET16(blitter_ram, A2_PIXEL + 2),
4678 GET16(blitter_ram, A2_PIXEL + 0));
4686 int16_t a1_x = (int16_t)GET16(blitter_ram, A1_PIXEL + 2);
4687 int16_t a1_y = (int16_t)GET16(blitter_ram, A1_PIXEL + 0);
4688 uint16_t a1_frac_x = GET16(blitter_ram, A1_FPIXEL + 2);
4689 uint16_t a1_frac_y = GET16(blitter_ram, A1_FPIXEL + 0);
4690 int16_t a2_x = (int16_t)GET16(blitter_ram, A2_PIXEL + 2);
4691 int16_t a2_y = (int16_t)GET16(blitter_ram, A2_PIXEL + 0);
4693 Seems that the ending a1_x should be written between blits, but it doesn't seem to be...
4695 Blit! (CMD = 01800000)
4698 a1_base = 00050000, a2_base = 00070000
4699 a1_x = 0000, a1_y = 0000, a1_frac_x = 49CD, a1_frac_y = 0000, a2_x = 0033, a2_y = 0001
4700 a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 939A, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
4701 a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4702 a1_win_x = 0100, a1_win_y = 0020, a2_mask_x = 0000, a2_mask_y = 0000
4703 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4704 a1_pixsize = 4, a2_pixsize = 3
4705 srcd=DEDEDEDEDEDEDEDE dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4706 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4709 Blit! (CMD = 01800000)
4712 a1_base = 00050000, a2_base = 00070000
4713 a1_x = 0000, a1_y = 0000, a1_frac_x = 49CD, a1_frac_y = 0000, a2_x = 0033, a2_y = 0001
4714 a1_step_x = 0000, a1_step_y = 0000, a1_stepf_x = 939A, a1_stepf_y = 0000, a2_step_x = 0000, a2_step_y = 0000
4715 a1_inc_x = 0000, a1_inc_y = 0000, a1_incf_x = 0000, a1_incf_y = 0000
4716 a1_win_x = 0100, a1_win_y = 0020, a2_mask_x = 0000, a2_mask_y = 0000
4717 a2_mask=F a1add=+phr/+0 a2add=+phr/+0
4718 a1_pixsize = 4, a2_pixsize = 3
4719 srcd=D6D6D6D6D6D6D6D6 dstd=0000000000000000 patd=0000000000000000 iinc=00000000
4720 srcz1=0000000000000000 srcz2=0000000000000000 dstz=0000000000000000 zinc=00000000, coll=0
4726 // Various pieces of the blitter puzzle are teased out here...
4732 INT24/ address // byte address
4733 pixa[0..2] // bit part of address, un-pipe-lined
4749 apipe // load address pipe-line latch
4750 clk // co-processor clock
4751 gena2 // generate A2 as opposed to A1
4752 zaddr // generate Z address
4756 void ADDRGEN(uint32_t &address, uint32_t &pixa, bool gena2, bool zaddr,
4757 uint16_t a1_x, uint16_t a1_y, uint32_t a1_base, uint8_t a1_pitch, uint8_t a1_pixsize, uint8_t a1_width, uint8_t a1_zoffset,
4758 uint16_t a2_x, uint16_t a2_y, uint32_t a2_base, uint8_t a2_pitch, uint8_t a2_pixsize, uint8_t a2_width, uint8_t a2_zoffset)
4760 // uint16_t x = (gena2 ? a2_x : a1_x) & 0x7FFF;
4761 uint16_t x = (gena2 ? a2_x : a1_x) & 0xFFFF; // Actually uses all 16 bits to generate address...!
4762 uint16_t y = (gena2 ? a2_y : a1_y) & 0x0FFF;
4763 uint8_t width = (gena2 ? a2_width : a1_width);
4764 uint8_t pixsize = (gena2 ? a2_pixsize : a1_pixsize);
4765 uint8_t pitch = (gena2 ? a2_pitch : a1_pitch);
4766 uint32_t base = (gena2 ? a2_base : a1_base) >> 3;//Only upper 21 bits are passed around the bus? Seems like it...
4767 uint8_t zoffset = (gena2 ? a2_zoffset : a1_zoffset);
4769 uint32_t ytm = ((uint32_t)y << 2) + (width & 0x02 ? (uint32_t)y << 1 : 0) + (width & 0x01 ? (uint32_t)y : 0);
4771 uint32_t ya = (ytm << (width >> 2)) >> 2;
4773 uint32_t pa = ya + x;
4775 /*uint32*/ pixa = pa << pixsize;
4777 uint8_t pt = ((pitch & 0x01) && !(pitch & 0x02) ? 0x01 : 0x00)
4778 | (!(pitch & 0x01) && (pitch & 0x02) ? 0x02 : 0x00);
4779 // uint32_t phradr = pixa << pt;
4780 uint32_t phradr = (pixa >> 6) << pt;
4781 uint32_t shup = (pitch == 0x03 ? (pixa >> 6) : 0);
4783 uint8_t za = (zaddr ? zoffset : 0) & 0x03;
4784 // uint32_t addr = za + (phradr & 0x07) + (shup << 1) + base;
4785 uint32_t addr = za + phradr + (shup << 1) + base;
4786 /*uint32*/ address = ((pixa & 0x38) >> 3) | ((addr & 0x1FFFFF) << 3);
4787 #if 0//def VERBOSE_BLITTER_LOGGING
4790 WriteLog(" [gena2=%s, x=%04X, y=%04X, w=%1X, pxsz=%1X, ptch=%1X, b=%08X, zoff=%1X]\n", (gena2 ? "T" : "F"), x, y, width, pixsize, pitch, base, zoffset);
4791 WriteLog(" [ytm=%X, ya=%X, pa=%X, pixa=%X, pt=%X, phradr=%X, shup=%X, za=%X, addr=%X, address=%X]\n", ytm, ya, pa, pixa, pt, phradr, shup, za, addr, address);
4797 Entering INNER state...
4798 [gena2=T, x=0002, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4799 [ytm=0, ya=0, pa=2, pixa=20, pt=0, phradr=0, shup=0, za=0, addr=12BA, address=95D4]
4800 Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4801 Source extra read address/pix address: 000095D4/0 [0000001C00540038]
4802 Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4803 [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4804 [ytm=0, ya=0, pa=4, pixa=40, pt=0, phradr=1, shup=0, za=0, addr=12BB, address=95D8]
4805 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
4806 Source read address/pix address: 000095D8/0 [0054003800009814]
4807 Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4808 [gena2=F, x=0000, y=0000, w=20, pxsz=4, ptch=0, b=00006E52, zoff=0]
4809 [ytm=0, ya=0, pa=0, pixa=0, pt=0, phradr=0, shup=0, za=0, addr=6E52, address=37290]
4810 Entering DWRITE state...
4811 Dest write address/pix address: 00037290/0 [dstart=0 dend=20 pwidth=8 srcshift=0] (icount=026E, inc=4)
4812 Entering A1_ADD state [a1_x=0000, a1_y=0000, addasel=0, addbsel=0, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4813 [gena2=T, x=0008, y=0000, w=20, pxsz=4, ptch=0, b=000012BA, zoff=0]
4814 [ytm=0, ya=0, pa=8, pixa=80, pt=0, phradr=2, shup=0, za=0, addr=12BC, address=95E0]
4818 Entering SREAD state...
4819 [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4820 [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=40, shup=0, za=0, addr=10AC, address=8560]
4821 Source read address/pix address: 00008560/0 [8C27981B327E00F0]
4823 2nd pass (still wrong):
4824 Entering SREAD state...
4825 [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4826 [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=40, shup=0, za=0, addr=10EC, address=8760]
4827 Source read address/pix address: 00008760/0 [00E06DC04581880C]
4830 Entering SREAD state...
4831 [gena2=T, x=0004, y=0000, w=20, pxsz=4, ptch=0, b=000010AC, zoff=0]
4832 [ytm=0, ya=0, pa=4, pixa=0, pt=0, phradr=1, shup=0, za=0, addr=10AD, address=8568]
4833 Source read address/pix address: 00008568/0 [6267981A327C00F0]
4835 OK, now we're back into incorrect (or is it?):
4836 Entering SREADX state... [dstart=0 dend=20 pwidth=8 srcshift=20]
4837 Source extra read address/pix address: 000095D4/0 [0000 001C 0054 0038]
4838 Entering A2_ADD state [a2_x=0002, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4839 Entering SREAD state... [dstart=0 dend=20 pwidth=8 srcshift=0]
4840 Source read address/pix address: 000095D8/0 [0054 0038 0000 9814]
4841 Entering A2_ADD state [a2_x=0004, a2_y=0000, addasel=0, addbsel=1, modx=2, addareg=F, adda_xconst=2, adda_yconst=0]...
4842 I think this may be correct...!
4847 // source and destination address update conditions
4849 Sraat0 := AN2 (sraat[0], sreadxi, srcenz\);
4850 Sraat1 := AN2 (sraat[1], sreadi, srcenz\);
4851 Srca_addi := OR4 (srca_addi, szreadxi, szreadi, sraat[0..1]);
4852 Srca_add := FD1Q (srca_add, srca_addi, clk);
4854 Dstaat := AN2 (dstaat, dwritei, dstwrz\);
4855 Dsta_addi := OR2 (dsta_addi, dzwritei, dstaat);
4856 // Dsta_add := FD1Q (dsta_add, dsta_addi, clk);
4858 // source and destination address generate conditions
4860 Gensrc := OR4 (gensrc, sreadxi, szreadxi, sreadi, szreadi);
4861 Gendst := OR4 (gendst, dreadi, dzreadi, dwritei, dzwritei);
4862 Dsta2\ := INV1 (dsta2\, dsta2);
4863 Gena2t0 := NAN2 (gena2t[0], gensrc, dsta2\);
4864 Gena2t1 := NAN2 (gena2t[1], gendst, dsta2);
4865 Gena2i := NAN2 (gena2i, gena2t[0..1]);
4866 Gena2 := FD1QU (gena2, gena2i, clk);
4868 Zaddr := OR4 (zaddr, szreadx, szread, dzread, dzwrite);
4873 // Basically, the above translates to:
4874 bool srca_addi = (sreadxi && !srcenz) || (sreadi && !srcenz) || szreadxi || szreadi;
4876 bool dsta_addi = (dwritei && !dstwrz) || dzwritei;
4878 bool gensrc = sreadxi || szreadxi || sreadi || szreadi;
4879 bool gendst = dreadi || szreadi || dwritei || dzwritei;
4880 bool gena2i = (gensrc && !dsta2) || (gendst && dsta2);
4882 bool zaddr = szreadx || szread || dzread || dzwrite;
4886 // source data reads
4888 Srcdpset\ := NAN2 (srcdpset\, readreq, sread);
4889 Srcdpt1 := NAN2 (srcdpt[1], srcdpend, srcdack\);
4890 Srcdpt2 := NAN2 (srcdpt[2], srcdpset\, srcdpt[1]);
4891 Srcdpend := FD2Q (srcdpend, srcdpt[2], clk, reset\);
4893 Srcdxpset\ := NAN2 (srcdxpset\, readreq, sreadx);
4894 Srcdxpt1 := NAN2 (srcdxpt[1], srcdxpend, srcdxack\);
4895 Srcdxpt2 := NAN2 (srcdxpt[2], srcdxpset\, srcdxpt[1]);
4896 Srcdxpend := FD2Q (srcdxpend, srcdxpt[2], clk, reset\);
4898 Sdpend := OR2 (sdpend, srcdxpend, srcdpend);
4899 Srcdreadt := AN2 (srcdreadt, sdpend, read_ack);
4901 //2/9/92 - enhancement?
4902 //Load srcdread on the next tick as well to modify it in srcshade
4904 Srcdreadd := FD1Q (srcdreadd, srcdreadt, clk);
4905 Srcdread := AOR1 (srcdread, srcshade, srcdreadd, srcdreadt);
4909 Srczpset\ := NAN2 (srczpset\, readreq, szread);
4910 Srczpt1 := NAN2 (srczpt[1], srczpend, srczack\);
4911 Srczpt2 := NAN2 (srczpt[2], srczpset\, srczpt[1]);
4912 Srczpend := FD2Q (srczpend, srczpt[2], clk, reset\);
4914 Srczxpset\ := NAN2 (srczxpset\, readreq, szreadx);
4915 Srczxpt1 := NAN2 (srczxpt[1], srczxpend, srczxack\);
4916 Srczxpt2 := NAN2 (srczxpt[2], srczxpset\, srczxpt[1]);
4917 Srczxpend := FD2Q (srczxpend, srczxpt[2], clk, reset\);
4919 Szpend := OR2 (szpend, srczpend, srczxpend);
4920 Srczread := AN2 (srczread, szpend, read_ack);
4922 // destination data reads
4924 Dstdpset\ := NAN2 (dstdpset\, readreq, dread);
4925 Dstdpt0 := NAN2 (dstdpt[0], dstdpend, dstdack\);
4926 Dstdpt1 := NAN2 (dstdpt[1], dstdpset\, dstdpt[0]);
4927 Dstdpend := FD2Q (dstdpend, dstdpt[1], clk, reset\);
4928 Dstdread := AN2 (dstdread, dstdpend, read_ack);
4930 // destination zed reads
4932 Dstzpset\ := NAN2 (dstzpset\, readreq, dzread);
4933 Dstzpt0 := NAN2 (dstzpt[0], dstzpend, dstzack\);
4934 Dstzpt1 := NAN2 (dstzpt[1], dstzpset\, dstzpt[0]);
4935 Dstzpend := FD2Q (dstzpend, dstzpt[1], clk, reset\);
4936 Dstzread := AN2 (dstzread, dstzpend, read_ack);
4941 // Basically, the above translates to:
4942 bool srcdpend = (readreq && sread) || (srcdpend && !srcdack);
4943 bool srcdxpend = (readreq && sreadx) || (srcdxpend && !srcdxack);
4944 bool sdpend = srcxpend || srcdpend;
4945 bool srcdread = ((sdpend && read_ack) && srcshade) || (sdpend && read_ack);//the latter term is lookahead
4950 ////////////////////////////////////////////////////////////////////////////////////////////
4951 ////////////////////////////////////////////////////////////////////////////////////////////
4952 // Here's an important bit: The source data adder logic. Need to track down the inputs!!! //
4953 ////////////////////////////////////////////////////////////////////////////////////////////
4954 ////////////////////////////////////////////////////////////////////////////////////////////
4961 daddasel[0..2] // data adder input A selection
4966 initcin[0..3] // carry into the adders from the initializers
4967 initinc[0..63] // the initialisation increment
4968 initpix[0..15] // Data initialiser pixel value
4980 void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
4981 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
4982 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
4983 uint32_t zinc, uint32_t zstep)
4985 uint32_t initpix2 = ((uint32_t)initpix << 16) | initpix;
4986 uint32_t addalo[8], addahi[8];
4987 addalo[0] = dstd & 0xFFFFFFFF;
4988 addalo[1] = initpix2;
4991 addalo[4] = srcd & 0xFFFFFFFF;
4992 addalo[5] = patd & 0xFFFFFFFF;
4993 addalo[6] = srcz1 & 0xFFFFFFFF;
4994 addalo[7] = srcz2 & 0xFFFFFFFF;
4995 addahi[0] = dstd >> 32;
4996 addahi[1] = initpix2;
4999 addahi[4] = srcd >> 32;
5000 addahi[5] = patd >> 32;
5001 addahi[6] = srcz1 >> 32;
5002 addahi[7] = srcz2 >> 32;
5004 adda[0] = addalo[daddasel] & 0xFFFF;
5005 adda[1] = addalo[daddasel] >> 16;
5006 adda[2] = addahi[daddasel] & 0xFFFF;
5007 adda[3] = addahi[daddasel] >> 16;
5009 uint16_t wordmux[8];
5010 wordmux[0] = iinc & 0xFFFF;
5011 wordmux[1] = iinc >> 16;
5012 wordmux[2] = zinc & 0xFFFF;
5013 wordmux[3] = zinc >> 16;;
5014 wordmux[4] = istep & 0xFFFF;
5015 wordmux[5] = istep >> 16;;
5016 wordmux[6] = zstep & 0xFFFF;
5017 wordmux[7] = zstep >> 16;;
5018 uint16_t word = wordmux[((daddbsel & 0x08) >> 1) | (daddbsel & 0x03)];
5020 bool dbsel2 = daddbsel & 0x04;
5021 bool iincsel = (daddbsel & 0x01) && !(daddbsel & 0x04);
5023 if (!dbsel2 && !iincsel)
5024 addb[0] = srcd & 0xFFFF,
5025 addb[1] = (srcd >> 16) & 0xFFFF,
5026 addb[2] = (srcd >> 32) & 0xFFFF,
5027 addb[3] = (srcd >> 48) & 0xFFFF;
5028 else if (dbsel2 && !iincsel)
5029 addb[0] = addb[1] = addb[2] = addb[3] = word;
5030 else if (!dbsel2 && iincsel)
5031 addb[0] = initinc & 0xFFFF,
5032 addb[1] = (initinc >> 16) & 0xFFFF,
5033 addb[2] = (initinc >> 32) & 0xFFFF,
5034 addb[3] = (initinc >> 48) & 0xFFFF;
5036 addb[0] = addb[1] = addb[2] = addb[3] = 0;
5038 uint8_t cinsel = (daddmode >= 1 && daddmode <= 4 ? 1 : 0);
5040 static uint8_t co[4];//These are preserved between calls...
5043 for(int i=0; i<4; i++)
5044 cin[i] = initcin[i] | (co[i] & cinsel);
5046 bool eightbit = daddmode & 0x02;
5047 bool sat = daddmode & 0x03;
5048 bool hicinh = ((daddmode & 0x03) == 0x03);
5050 //Note that the carry out is saved between calls to this function...
5051 for(int i=0; i<4; i++)
5052 ADD16SAT(addq[i], co[i], adda[i], addb[i], cin[i], sat, eightbit, hicinh);
5069 void ADD16SAT(uint16_t &r, uint8_t &co, uint16_t a, uint16_t b, uint8_t cin, bool sat, bool eightbit, bool hicinh)
5073 printf("--> [sat=%s 8b=%s hicinh=%s] %04X + %04X (+ %u) = ", (sat ? "T" : "F"), (eightbit ? "T" : "F"), (hicinh ? "T" : "F"), a, b, cin);
5077 uint32_t qt = (a & 0xFF) + (b & 0xFF) + cin;
5078 carry[0] = (qt & 0x0100 ? 1 : 0);
5079 uint16_t q = qt & 0x00FF;
5080 carry[1] = (carry[0] && !eightbit ? carry[0] : 0);
5081 qt = (a & 0x0F00) + (b & 0x0F00) + (carry[1] << 8);
5082 carry[2] = (qt & 0x1000 ? 1 : 0);
5084 carry[3] = (carry[2] && !hicinh ? carry[2] : 0);
5085 qt = (a & 0xF000) + (b & 0xF000) + (carry[3] << 12);
5086 co = (qt & 0x10000 ? 1 : 0);
5089 uint8_t btop = (eightbit ? (b & 0x0080) >> 7 : (b & 0x8000) >> 15);
5090 uint8_t ctop = (eightbit ? carry[0] : co);
5092 bool saturate = sat && (btop ^ ctop);
5093 bool hisaturate = saturate && !eightbit;
5096 printf("bt=%u ct=%u s=%u hs=%u] ", btop, ctop, saturate, hisaturate);
5100 r = (saturate ? (ctop ? 0x00FF : 0x0000) : q & 0x00FF);
5101 r |= (hisaturate ? (ctop ? 0xFF00 : 0x0000) : q & 0xFF00);
5104 printf("%04X (co=%u)\n", r, co);
5110 /** ADDAMUX - Address adder input A selection *******************
5112 This module generates the data loaded into the address adder input A. This is
5113 the update value, and can be one of four registers : A1 step, A2 step, A1
5114 increment and A1 fraction. It can complement these values to perform
5115 subtraction, and it can generate constants to increment / decrement the window
5118 addasel[0..2] select the register to add
5120 000 A1 step integer part
5121 001 A1 step fraction part
5122 010 A1 increment integer part
5123 011 A1 increment fraction part
5126 adda_xconst[0..2] generate a power of 2 in the range 1-64 or all zeroes when
5129 addareg selects register value to be added as opposed to constant
5132 suba_x, suba_y complement the X and Y values
5158 void ADDAMUX(int16_t &adda_x, int16_t &adda_y, uint8_t addasel, int16_t a1_step_x, int16_t a1_step_y,
5159 int16_t a1_stepf_x, int16_t a1_stepf_y, int16_t a2_step_x, int16_t a2_step_y,
5160 int16_t a1_inc_x, int16_t a1_inc_y, int16_t a1_incf_x, int16_t a1_incf_y, uint8_t adda_xconst,
5161 bool adda_yconst, bool addareg, bool suba_x, bool suba_y)
5164 /*INT16/ addac_x, addac_y, addar_x, addar_y, addart_x, addart_y,
5165 INT16/ addas_x, addas_y, suba_x16, suba_y16
5169 Zero := TIE0 (zero);*/
5171 /* Multiplex the register terms */
5173 /*Addaselb[0-2] := BUF8 (addaselb[0-2], addasel[0-2]);
5174 Addart_x := MX4 (addart_x, a1_step_x, a1_stepf_x, a1_inc_x, a1_incf_x, addaselb[0..1]);
5175 Addar_x := MX2 (addar_x, addart_x, a2_step_x, addaselb[2]);
5176 Addart_y := MX4 (addart_y, a1_step_y, a1_stepf_y, a1_inc_y, a1_incf_y, addaselb[0..1]);
5177 Addar_y := MX2 (addar_y, addart_y, a2_step_y, addaselb[2]);*/
5179 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5180 int16_t xterm[4], yterm[4];
5181 xterm[0] = a1_step_x, xterm[1] = a1_stepf_x, xterm[2] = a1_inc_x, xterm[3] = a1_incf_x;
5182 yterm[0] = a1_step_y, yterm[1] = a1_stepf_y, yterm[2] = a1_inc_y, yterm[3] = a1_incf_y;
5183 int16_t addar_x = (addasel & 0x04 ? a2_step_x : xterm[addasel & 0x03]);
5184 int16_t addar_y = (addasel & 0x04 ? a2_step_y : yterm[addasel & 0x03]);
5185 //////////////////////////////////////////////////////////////////////////////////////
5187 /* Generate a constant value - this is a power of 2 in the range
5188 0-64, or zero. The control bits are adda_xconst[0..2], when they
5189 are all 1 the result is 0.
5190 Constants for Y can only be 0 or 1 */
5192 /*Addac_xlo := D38H (addac_x[0..6], unused[0], adda_xconst[0..2]);
5193 Unused[0] := DUMMY (unused[0]);
5195 Addac_x := JOIN (addac_x, addac_x[0..6], zero, zero, zero, zero, zero, zero, zero, zero, zero);
5196 Addac_y := JOIN (addac_y, adda_yconst, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero,
5197 zero, zero, zero, zero, zero);*/
5198 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5199 int16_t addac_x = (adda_xconst == 0x07 ? 0 : 1 << adda_xconst);
5200 int16_t addac_y = (adda_yconst ? 0x01 : 0);
5201 //////////////////////////////////////////////////////////////////////////////////////
5203 /* Select between constant value and register value */
5205 /*Addas_x := MX2 (addas_x, addac_x, addar_x, addareg);
5206 Addas_y := MX2 (addas_y, addac_y, addar_y, addareg);*/
5207 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5208 int16_t addas_x = (addareg ? addar_x : addac_x);
5209 int16_t addas_y = (addareg ? addar_y : addac_y);
5210 //////////////////////////////////////////////////////////////////////////////////////
5212 /* Complement these values (complement flag gives adder carry in)*/
5214 /*Suba_x16 := JOIN (suba_x16, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x,
5215 suba_x, suba_x, suba_x, suba_x, suba_x, suba_x, suba_x);
5216 Suba_y16 := JOIN (suba_y16, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y,
5217 suba_y, suba_y, suba_y, suba_y, suba_y, suba_y, suba_y);
5218 Adda_x := EO (adda_x, suba_x16, addas_x);
5219 Adda_y := EO (adda_y, suba_y16, addas_y);*/
5220 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5221 adda_x = addas_x ^ (suba_x ? 0xFFFF : 0x0000);
5222 adda_y = addas_y ^ (suba_y ? 0xFFFF : 0x0000);
5223 //////////////////////////////////////////////////////////////////////////////////////
5229 /** ADDBMUX - Address adder input B selection *******************
5231 This module selects the register to be updated by the address
5232 adder. This can be one of three registers, the A1 and A2
5233 pointers, or the A1 fractional part. It can also be zero, so that the step
5234 registers load directly into the pointers.
5249 INT16/ zero16 :LOCAL;
5251 void ADDBMUX(int16_t &addb_x, int16_t &addb_y, uint8_t addbsel, int16_t a1_x, int16_t a1_y,
5252 int16_t a2_x, int16_t a2_y, int16_t a1_frac_x, int16_t a1_frac_y)
5255 /*Zero := TIE0 (zero);
5256 Zero16 := JOIN (zero16, zero, zero, zero, zero, zero, zero, zero,
5257 zero, zero, zero, zero, zero, zero, zero, zero, zero);
5258 Addbselb[0-1] := BUF8 (addbselb[0-1], addbsel[0-1]);
5259 Addb_x := MX4 (addb_x, a1_x, a2_x, a1_frac_x, zero16, addbselb[0..1]);
5260 Addb_y := MX4 (addb_y, a1_y, a2_y, a1_frac_y, zero16, addbselb[0..1]);*/
5261 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5262 int16_t xterm[4], yterm[4];
5263 xterm[0] = a1_x, xterm[1] = a2_x, xterm[2] = a1_frac_x, xterm[3] = 0;
5264 yterm[0] = a1_y, yterm[1] = a2_y, yterm[2] = a1_frac_y, yterm[3] = 0;
5265 addb_x = xterm[addbsel & 0x03];
5266 addb_y = yterm[addbsel & 0x03];
5267 //////////////////////////////////////////////////////////////////////////////////////
5273 /** DATAMUX - Address local data bus selection ******************
5275 Select between the adder output and the input data bus
5288 INT16/ gpu_lo, gpu_hi
5291 void DATAMUX(int16_t &data_x, int16_t &data_y, uint32_t gpu_din, int16_t addq_x, int16_t addq_y, bool addqsel)
5293 /*Gpu_lo := JOIN (gpu_lo, gpu_din{0..15});
5294 Gpu_hi := JOIN (gpu_hi, gpu_din{16..31});
5296 Addqselb := BUF8 (addqselb, addqsel);
5297 Data_x := MX2 (data_x, gpu_lo, addq_x, addqselb);
5298 Data_y := MX2 (data_y, gpu_hi, addq_y, addqselb);*/
5299 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5300 data_x = (addqsel ? addq_x : (int16_t)(gpu_din & 0xFFFF));
5301 data_y = (addqsel ? addq_y : (int16_t)(gpu_din >> 16));
5302 //////////////////////////////////////////////////////////////////////////////////////
5308 /******************************************************************
5312 Blitter Address Adder
5313 ---------------------
5314 The blitter address adder is a pair of sixteen bit adders, one
5315 each for X and Y. The multiplexing of the input terms is
5316 performed elsewhere, but this adder can also perform modulo
5317 arithmetic to align X-addresses onto phrase boundaries.
5319 modx[0..2] take values
5326 ******************************************************************/
5328 /*IMPORT duplo, tosh;
5334 a1fracldi // propagate address adder carry
5339 clk[0] // co-processor clock
5347 Zero := TIE0 (zero);*/
5348 void ADDRADD(int16_t &addq_x, int16_t &addq_y, bool a1fracldi,
5349 uint16_t adda_x, uint16_t adda_y, uint16_t addb_x, uint16_t addb_y, uint8_t modx, bool suba_x, bool suba_y)
5352 /* Perform the addition */
5354 /*Adder_x := ADD16 (addqt_x[0..15], co_x, adda_x{0..15}, addb_x{0..15}, ci_x);
5355 Adder_y := ADD16 (addq_y[0..15], co_y, adda_y{0..15}, addb_y{0..15}, ci_y);*/
5357 /* latch carry and propagate if required */
5359 /*Cxt0 := AN2 (cxt[0], co_x, a1fracldi);
5360 Cxt1 := FD1Q (cxt[1], cxt[0], clk[0]);
5361 Ci_x := EO (ci_x, cxt[1], suba_x);
5363 yt0 := AN2 (cyt[0], co_y, a1fracldi);
5364 Cyt1 := FD1Q (cyt[1], cyt[0], clk[0]);
5365 Ci_y := EO (ci_y, cyt[1], suba_y);*/
5367 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5368 //I'm sure the following will generate a bunch of warnings, but will have to do for now.
5369 static uint16_t co_x = 0, co_y = 0; // Carry out has to propogate between function calls...
5370 uint16_t ci_x = co_x ^ (suba_x ? 1 : 0);
5371 uint16_t ci_y = co_y ^ (suba_y ? 1 : 0);
5372 uint32_t addqt_x = adda_x + addb_x + ci_x;
5373 uint32_t addqt_y = adda_y + addb_y + ci_y;
5374 co_x = ((addqt_x & 0x10000) && a1fracldi ? 1 : 0);
5375 co_y = ((addqt_y & 0x10000) && a1fracldi ? 1 : 0);
5376 //////////////////////////////////////////////////////////////////////////////////////
5378 /* Mask low bits of X to 0 if required */
5380 /*Masksel := D38H (unused[0], masksel[0..4], maskbit[5], unused[1], modx[0..2]);
5382 Maskbit[0-4] := OR2 (maskbit[0-4], masksel[0-4], maskbit[1-5]);
5384 Mask[0-5] := MX2 (addq_x[0-5], addqt_x[0-5], zero, maskbit[0-5]);
5386 Addq_x := JOIN (addq_x, addq_x[0..5], addqt_x[6..15]);
5387 Addq_y := JOIN (addq_y, addq_y[0..15]);*/
5389 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5390 uint16_t mask[8] = { 0xFFFF, 0xFFFE, 0xFFFC, 0xFFF8, 0xFFF0, 0xFFE0, 0xFFC0, 0x0000 };
5391 addq_x = addqt_x & mask[modx];
5392 addq_y = addqt_y & 0xFFFF;
5393 //////////////////////////////////////////////////////////////////////////////////////
5395 //Unused[0-1] := DUMMY (unused[0-1]);
5403 wdata[0..63] // co-processor write data bus
5405 dcomp[0..7] // data byte equal flags
5406 srcd[0..7] // bits to use for bit to byte expansion
5407 zcomp[0..3] // output from Z comparators
5409 a1_x[0..1] // low two bits of A1 X pointer
5410 big_pix // pixel organisation is big-endian
5411 blitter_active // blitter is active
5412 clk // co-processor clock
5413 cmpdst // compare dest rather than source
5414 colorld // load the pattern color fields
5415 daddasel[0..2] // data adder input A selection
5416 daddbsel[0..3] // data adder input B selection
5417 daddmode[0..2] // data adder mode
5418 daddq_sel // select adder output vs. GPU data
5419 data[0..63] // co-processor read data bus
5420 data_ena // enable write data
5421 data_sel[0..1] // select data to write
5422 dbinh\[0..7] // byte oriented changed data inhibits
5423 dend[0..5] // end of changed write data zone
5424 dpipe[0..1] // load computed data pipe-line latch
5425 dstart[0..5] // start of changed write data zone
5426 dstdld[0..1] // dest data load (two halves)
5427 dstzld[0..1] // dest zed load (two halves)
5428 ext_int // enable extended precision intensity calculations
5429 INT32/ gpu_din // GPU data bus
5430 iincld // I increment load
5431 iincldx // alternate I increment load
5432 init_if // initialise I fraction phase
5433 init_ii // initialise I integer phase
5434 init_zf // initialise Z fraction phase
5435 intld[0..3] // computed intensities load
5436 istepadd // intensity step integer add
5437 istepfadd // intensity step fraction add
5438 istepld // I step load
5439 istepdld // I step delta load
5440 lfu_func[0..3] // LFU function code
5441 patdadd // pattern data gouraud add
5442 patdld[0..1] // pattern data load (two halves)
5443 pdsel[0..1] // select pattern data type
5444 phrase_mode // phrase write mode
5445 reload // transfer contents of double buffers
5446 reset\ // system reset
5447 srcd1ld[0..1] // source register 1 load (two halves)
5448 srcdread // source data read load enable
5449 srczread // source zed read load enable
5450 srcshift[0..5] // source alignment shift
5451 srcz1ld[0..1] // source zed 1 load (two halves)
5452 srcz2add // zed fraction gouraud add
5453 srcz2ld[0..1] // source zed 2 load (two halves)
5454 textrgb // texture mapping in RGB mode
5455 txtd[0..63] // data from the texture unit
5456 zedld[0..3] // computed zeds load
5457 zincld // Z increment load
5458 zmode[0..2] // Z comparator mode
5459 zpipe[0..1] // load computed zed pipe-line latch
5460 zstepadd // zed step integer add
5461 zstepfadd // zed step fraction add
5462 zstepld // Z step load
5463 zstepdld // Z step delta load
5467 void DATA(uint64_t &wdata, uint8_t &dcomp, uint8_t &zcomp, bool &nowrite,
5468 bool big_pix, bool cmpdst, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode, bool daddq_sel, uint8_t data_sel,
5469 uint8_t dbinh, uint8_t dend, uint8_t dstart, uint64_t dstd, uint32_t iinc, uint8_t lfu_func, uint64_t &patd, bool patdadd,
5470 bool phrase_mode, uint64_t srcd, bool srcdread, bool srczread, bool srcz2add, uint8_t zmode,
5471 bool bcompen, bool bkgwren, bool dcompen, uint8_t icount, uint8_t pixsize,
5472 uint64_t &srcz, uint64_t dstz, uint32_t zinc)
5475 Stuff we absolutely *need* to have passed in/out:
5477 patdadd, dstd, srcd, patd, daddasel, daddbsel, daddmode, iinc, srcz1, srcz2, big_pix, phrase_mode, cmpdst
5479 changed patd (wdata I guess...) (Nope. We pass it back directly now...)
5482 // Source data registers
5484 /*Data_src := DATA_SRC (srcdlo, srcdhi, srcz[0..1], srczo[0..1], srczp[0..1], srcz1[0..1], srcz2[0..1], big_pix,
5485 clk, gpu_din, intld[0..3], local_data0, local_data1, srcd1ld[0..1], srcdread, srczread, srcshift[0..5],
5486 srcz1ld[0..1], srcz2add, srcz2ld[0..1], zedld[0..3], zpipe[0..1]);
5487 Srcd[0-7] := JOIN (srcd[0-7], srcdlo{0-7});
5488 Srcd[8-31] := JOIN (srcd[8-31], srcdlo{8-31});
5489 Srcd[32-63] := JOIN (srcd[32-63], srcdhi{0-31});*/
5491 // Destination data registers
5493 /*Data_dst := DATA_DST (dstd[0..63], dstz[0..1], clk, dstdld[0..1], dstzld[0..1], load_data[0..1]);
5494 Dstdlo := JOIN (dstdlo, dstd[0..31]);
5495 Dstdhi := JOIN (dstdhi, dstd[32..63]);*/
5497 // Pattern and Color data registers
5499 // Looks like this is simply another register file for the pattern data registers. No adding or anything funky
5500 // going on. Note that patd & patdv will output the same info.
5501 // Patdldl/h (patdld[0..1]) can select the local_data bus to overwrite the current pattern data...
5502 // Actually, it can be either patdld OR patdadd...!
5503 /*Data_pat := DATA_PAT (colord[0..15], int0dp[8..10], int1dp[8..10], int2dp[8..10], int3dp[8..10], mixsel[0..2],
5504 patd[0..63], patdv[0..1], clk, colorld, dpipe[0], ext_int, gpu_din, intld[0..3], local_data0, local_data1,
5505 patdadd, patdld[0..1], reload, reset\);
5506 Patdlo := JOIN (patdlo, patd[0..31]);
5507 Patdhi := JOIN (patdhi, patd[32..63]);*/
5509 // Multiplying data Mixer (NOT IN JAGUAR I)
5511 /*Datamix := DATAMIX (patdo[0..1], clk, colord[0..15], dpipe[1], dstd[0..63], int0dp[8..10], int1dp[8..10],
5512 int2dp[8..10], int3dp[8..10], mixsel[0..2], patd[0..63], pdsel[0..1], srcd[0..63], textrgb, txtd[0..63]);*/
5514 // Logic function unit
5516 /*Lfu := LFU (lfu[0..1], srcdlo, srcdhi, dstdlo, dstdhi, lfu_func[0..3]);*/
5517 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5518 uint64_t funcmask[2] = { 0, 0xFFFFFFFFFFFFFFFFLL };
5519 uint64_t func0 = funcmask[lfu_func & 0x01];
5520 uint64_t func1 = funcmask[(lfu_func >> 1) & 0x01];
5521 uint64_t func2 = funcmask[(lfu_func >> 2) & 0x01];
5522 uint64_t func3 = funcmask[(lfu_func >> 3) & 0x01];
5523 uint64_t lfu = (~srcd & ~dstd & func0) | (~srcd & dstd & func1) | (srcd & ~dstd & func2) | (srcd & dstd & func3);
5524 //////////////////////////////////////////////////////////////////////////////////////
5526 // Increment and Step Registers
5528 // Does it do anything without the step add lines? Check it!
5529 // No. This is pretty much just a register file without the Jaguar II lines...
5530 /*Inc_step := INC_STEP (iinc, istep[0..31], zinc, zstep[0..31], clk, ext_int, gpu_din, iincld, iincldx, istepadd,
5531 istepfadd, istepld, istepdld, reload, reset\, zincld, zstepadd, zstepfadd, zstepld, zstepdld);
5532 Istep := JOIN (istep, istep[0..31]);
5533 Zstep := JOIN (zstep, zstep[0..31]);*/
5535 // Pixel data comparator
5537 /*Datacomp := DATACOMP (dcomp[0..7], cmpdst, dstdlo, dstdhi, patdlo, patdhi, srcdlo, srcdhi);*/
5538 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5540 uint64_t cmpd = patd ^ (cmpdst ? dstd : srcd);
5542 if ((cmpd & 0x00000000000000FFLL) == 0)
5544 if ((cmpd & 0x000000000000FF00LL) == 0)
5546 if ((cmpd & 0x0000000000FF0000LL) == 0)
5548 if ((cmpd & 0x00000000FF000000LL) == 0)
5550 if ((cmpd & 0x000000FF00000000LL) == 0)
5552 if ((cmpd & 0x0000FF0000000000LL) == 0)
5554 if ((cmpd & 0x00FF000000000000LL) == 0)
5556 if ((cmpd & 0xFF00000000000000LL) == 0)
5558 //////////////////////////////////////////////////////////////////////////////////////
5560 // Zed comparator for Z-buffer operations
5562 /*Zedcomp := ZEDCOMP (zcomp[0..3], srczp[0..1], dstz[0..1], zmode[0..2]);*/
5563 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5564 //srczp is srcz pipelined, also it goes through a source shift as well...
5565 /*The shift is basically like so (each piece is 16 bits long):
5568 srcz1lolo srcz1lohi srcz1hilo srcz1hihi srcrz2lolo srcz2lohi srcz2hilo
5570 with srcshift bits 4 & 5 selecting the start position
5572 //So... basically what we have here is:
5575 if ((((srcz & 0x000000000000FFFFLL) < (dstz & 0x000000000000FFFFLL)) && (zmode & 0x01))
5576 || (((srcz & 0x000000000000FFFFLL) == (dstz & 0x000000000000FFFFLL)) && (zmode & 0x02))
5577 || (((srcz & 0x000000000000FFFFLL) > (dstz & 0x000000000000FFFFLL)) && (zmode & 0x04)))
5580 if ((((srcz & 0x00000000FFFF0000LL) < (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x01))
5581 || (((srcz & 0x00000000FFFF0000LL) == (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x02))
5582 || (((srcz & 0x00000000FFFF0000LL) > (dstz & 0x00000000FFFF0000LL)) && (zmode & 0x04)))
5585 if ((((srcz & 0x0000FFFF00000000LL) < (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x01))
5586 || (((srcz & 0x0000FFFF00000000LL) == (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x02))
5587 || (((srcz & 0x0000FFFF00000000LL) > (dstz & 0x0000FFFF00000000LL)) && (zmode & 0x04)))
5590 if ((((srcz & 0xFFFF000000000000LL) < (dstz & 0xFFFF000000000000LL)) && (zmode & 0x01))
5591 || (((srcz & 0xFFFF000000000000LL) == (dstz & 0xFFFF000000000000LL)) && (zmode & 0x02))
5592 || (((srcz & 0xFFFF000000000000LL) > (dstz & 0xFFFF000000000000LL)) && (zmode & 0x04)))
5595 //TEMP, TO TEST IF ZCOMP IS THE CULPRIT...
5596 //Nope, this is NOT the problem...
5598 // We'll do the comparison/bit/byte inhibits here, since that's they way it happens
5599 // in the real thing (dcomp goes out to COMP_CTRL and back into DATA through dbinh)...
5603 COMP_CTRL(dbinht, nowrite,
5604 bcompen, true/*big_pix*/, bkgwren, dcomp, dcompen, icount, pixsize, phrase_mode, srcd & 0xFF, zcomp);
5610 #ifdef VERBOSE_BLITTER_LOGGING
5612 WriteLog("\n[dcomp=%02X zcomp=%02X dbinh=%02X]\n", dcomp, zcomp, dbinh);
5615 //////////////////////////////////////////////////////////////////////////////////////
5618 // The data initializer - allows all four initial values to be computed from one (NOT IN JAGUAR I)
5620 /*Datinit := DATINIT (initcin[0..3], initinc[0..63], initpix[0..15], a1_x[0..1], big_pix, clk, iinc, init_if, init_ii,
5621 init_zf, istep[0..31], zinc, zstep[0..31]);*/
5623 // Adder array for Z and intensity increments
5625 /*Addarray := ADDARRAY (addq[0..3], clk, daddasel[0..2], daddbsel[0..3], daddmode[0..2], dstdlo, dstdhi, iinc,
5626 initcin[0..3], initinc[0..63], initpix[0..15], istep, patdv[0..1], srcdlo, srcdhi, srcz1[0..1],
5627 srcz2[0..1], reset\, zinc, zstep);*/
5628 /*void ADDARRAY(uint16_t * addq, uint8_t daddasel, uint8_t daddbsel, uint8_t daddmode,
5629 uint64_t dstd, uint32_t iinc, uint8_t initcin[], uint64_t initinc, uint16_t initpix,
5630 uint32_t istep, uint64_t patd, uint64_t srcd, uint64_t srcz1, uint64_t srcz2,
5631 uint32_t zinc, uint32_t zstep)*/
5632 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5634 uint8_t initcin[4] = { 0, 0, 0, 0 };
5635 ADDARRAY(addq, daddasel, daddbsel, daddmode, dstd, iinc, initcin, 0, 0, 0, patd, srcd, 0, 0, 0, 0);
5637 //This is normally done asynchronously above (thru local_data) when in patdadd mode...
5638 //And now it's passed back to the caller to be persistent between calls...!
5639 //But it's causing some serious fuck-ups in T2K now... !!! FIX !!! [DONE--???]
5640 //Weird! It doesn't anymore...!
5642 patd = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
5643 //////////////////////////////////////////////////////////////////////////////////////
5645 // Local data bus multiplexer
5647 /*Local_mux := LOCAL_MUX (local_data[0..1], load_data[0..1],
5648 addq[0..3], gpu_din, data[0..63], blitter_active, daddq_sel);
5649 Local_data0 := JOIN (local_data0, local_data[0]);
5650 Local_data1 := JOIN (local_data1, local_data[1]);*/
5651 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5652 //////////////////////////////////////////////////////////////////////////////////////
5654 // Data output multiplexer and tri-state drive
5656 /*Data_mux := DATA_MUX (wdata[0..63], addq[0..3], big_pix, dstdlo, dstdhi, dstz[0..1], data_sel[0..1], data_ena,
5657 dstart[0..5], dend[0..5], dbinh\[0..7], lfu[0..1], patdo[0..1], phrase_mode, srczo[0..1]);*/
5658 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5659 // NOTE: patdo comes from DATAMIX and can be considered the same as patd for Jaguar I
5661 //////////////////////////////////////////////////////////////////////////////////////
5665 wdata[0..63] // co-processor rwrite data bus
5668 big_pix // Pixel organisation is big-endian
5673 data_sel[0..1] // source of write data
5674 data_ena // enable write data onto read/write bus
5675 dstart[0..5] // start of changed write data
5676 dend[0..5] // end of changed write data
5677 dbinh\[0..7] // byte oriented changed data inhibits
5680 phrase_mode // phrase write mode
5685 /*INT32/ addql[0..1], ddatlo, ddathi zero32
5689 Phrase_mode\ := INV1 (phrase_mode\, phrase_mode);
5690 Zero := TIE0 (zero);
5691 Zero32 := JOIN (zero32, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero);*/
5693 /* Generate a changed data mask */
5695 /*Edis := OR6 (edis\, dend[0..5]);
5696 Ecoarse := DECL38E (e_coarse\[0..7], dend[3..5], edis\);
5697 E_coarse[0] := INV1 (e_coarse[0], e_coarse\[0]);
5698 Efine := DECL38E (unused[0], e_fine\[1..7], dend[0..2], e_coarse[0]);*/
5699 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5700 uint8_t decl38e[2][8] = { { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
5701 { 0xFE, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x7F } };
5702 uint8_t dech38[8] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
5703 uint8_t dech38el[2][8] = { { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 },
5704 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } };
5706 int en = (dend & 0x3F ? 1 : 0);
5707 uint8_t e_coarse = decl38e[en][(dend & 0x38) >> 3]; // Actually, this is e_coarse inverted...
5708 uint8_t e_fine = decl38e[(e_coarse & 0x01) ^ 0x01][dend & 0x07];
5710 //////////////////////////////////////////////////////////////////////////////////////
5712 /*Scoarse := DECH38 (s_coarse[0..7], dstart[3..5]);
5713 Sfen\ := INV1 (sfen\, s_coarse[0]);
5714 Sfine := DECH38EL (s_fine[0..7], dstart[0..2], sfen\);*/
5715 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5716 uint8_t s_coarse = dech38[(dstart & 0x38) >> 3];
5717 uint8_t s_fine = dech38el[(s_coarse & 0x01) ^ 0x01][dstart & 0x07];
5718 //////////////////////////////////////////////////////////////////////////////////////
5720 /*Maskt[0] := BUF1 (maskt[0], s_fine[0]);
5721 Maskt[1-7] := OAN1P (maskt[1-7], maskt[0-6], s_fine[1-7], e_fine\[1-7]);*/
5722 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5723 uint16_t maskt = s_fine & 0x0001;
5724 maskt |= (((maskt & 0x0001) || (s_fine & 0x02)) && (e_fine & 0x02) ? 0x0002 : 0x0000);
5725 maskt |= (((maskt & 0x0002) || (s_fine & 0x04)) && (e_fine & 0x04) ? 0x0004 : 0x0000);
5726 maskt |= (((maskt & 0x0004) || (s_fine & 0x08)) && (e_fine & 0x08) ? 0x0008 : 0x0000);
5727 maskt |= (((maskt & 0x0008) || (s_fine & 0x10)) && (e_fine & 0x10) ? 0x0010 : 0x0000);
5728 maskt |= (((maskt & 0x0010) || (s_fine & 0x20)) && (e_fine & 0x20) ? 0x0020 : 0x0000);
5729 maskt |= (((maskt & 0x0020) || (s_fine & 0x40)) && (e_fine & 0x40) ? 0x0040 : 0x0000);
5730 maskt |= (((maskt & 0x0040) || (s_fine & 0x80)) && (e_fine & 0x80) ? 0x0080 : 0x0000);
5731 //////////////////////////////////////////////////////////////////////////////////////
5733 /* Produce a look-ahead on the ripple carry:
5734 masktla = s_coarse[0] . /e_coarse[0] */
5735 /*Masktla := AN2 (masktla, s_coarse[0], e_coarse\[0]);
5736 Maskt[8] := OAN1P (maskt[8], masktla, s_coarse[1], e_coarse\[1]);
5737 Maskt[9-14] := OAN1P (maskt[9-14], maskt[8-13], s_coarse[2-7], e_coarse\[2-7]);*/
5738 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5739 maskt |= (((s_coarse & e_coarse & 0x01) || (s_coarse & 0x02)) && (e_coarse & 0x02) ? 0x0100 : 0x0000);
5740 maskt |= (((maskt & 0x0100) || (s_coarse & 0x04)) && (e_coarse & 0x04) ? 0x0200 : 0x0000);
5741 maskt |= (((maskt & 0x0200) || (s_coarse & 0x08)) && (e_coarse & 0x08) ? 0x0400 : 0x0000);
5742 maskt |= (((maskt & 0x0400) || (s_coarse & 0x10)) && (e_coarse & 0x10) ? 0x0800 : 0x0000);
5743 maskt |= (((maskt & 0x0800) || (s_coarse & 0x20)) && (e_coarse & 0x20) ? 0x1000 : 0x0000);
5744 maskt |= (((maskt & 0x1000) || (s_coarse & 0x40)) && (e_coarse & 0x40) ? 0x2000 : 0x0000);
5745 maskt |= (((maskt & 0x2000) || (s_coarse & 0x80)) && (e_coarse & 0x80) ? 0x4000 : 0x0000);
5746 //////////////////////////////////////////////////////////////////////////////////////
5748 /* The bit terms are mirrored for big-endian pixels outside phrase
5749 mode. The byte terms are mirrored for big-endian pixels in phrase
5752 /*Mirror_bit := AN2M (mir_bit, phrase_mode\, big_pix);
5753 Mirror_byte := AN2H (mir_byte, phrase_mode, big_pix);
5755 Masktb[14] := BUF1 (masktb[14], maskt[14]);
5756 Masku[0] := MX4 (masku[0], maskt[0], maskt[7], maskt[14], zero, mir_bit, mir_byte);
5757 Masku[1] := MX4 (masku[1], maskt[1], maskt[6], maskt[14], zero, mir_bit, mir_byte);
5758 Masku[2] := MX4 (masku[2], maskt[2], maskt[5], maskt[14], zero, mir_bit, mir_byte);
5759 Masku[3] := MX4 (masku[3], maskt[3], maskt[4], masktb[14], zero, mir_bit, mir_byte);
5760 Masku[4] := MX4 (masku[4], maskt[4], maskt[3], masktb[14], zero, mir_bit, mir_byte);
5761 Masku[5] := MX4 (masku[5], maskt[5], maskt[2], masktb[14], zero, mir_bit, mir_byte);
5762 Masku[6] := MX4 (masku[6], maskt[6], maskt[1], masktb[14], zero, mir_bit, mir_byte);
5763 Masku[7] := MX4 (masku[7], maskt[7], maskt[0], masktb[14], zero, mir_bit, mir_byte);
5764 Masku[8] := MX2 (masku[8], maskt[8], maskt[13], mir_byte);
5765 Masku[9] := MX2 (masku[9], maskt[9], maskt[12], mir_byte);
5766 Masku[10] := MX2 (masku[10], maskt[10], maskt[11], mir_byte);
5767 Masku[11] := MX2 (masku[11], maskt[11], maskt[10], mir_byte);
5768 Masku[12] := MX2 (masku[12], maskt[12], maskt[9], mir_byte);
5769 Masku[13] := MX2 (masku[13], maskt[13], maskt[8], mir_byte);
5770 Masku[14] := MX2 (masku[14], maskt[14], maskt[0], mir_byte);*/
5771 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5772 bool mir_bit = true/*big_pix*/ && !phrase_mode;
5773 bool mir_byte = true/*big_pix*/ && phrase_mode;
5774 uint16_t masku = maskt;
5779 masku |= (maskt >> 7) & 0x0001;
5780 masku |= (maskt >> 5) & 0x0002;
5781 masku |= (maskt >> 3) & 0x0004;
5782 masku |= (maskt >> 1) & 0x0008;
5783 masku |= (maskt << 1) & 0x0010;
5784 masku |= (maskt << 3) & 0x0020;
5785 masku |= (maskt << 5) & 0x0040;
5786 masku |= (maskt << 7) & 0x0080;
5792 masku |= (maskt >> 14) & 0x0001;
5793 masku |= (maskt >> 13) & 0x0002;
5794 masku |= (maskt >> 12) & 0x0004;
5795 masku |= (maskt >> 11) & 0x0008;
5796 masku |= (maskt >> 10) & 0x0010;
5797 masku |= (maskt >> 9) & 0x0020;
5798 masku |= (maskt >> 8) & 0x0040;
5799 masku |= (maskt >> 7) & 0x0080;
5801 masku |= (maskt >> 5) & 0x0100;
5802 masku |= (maskt >> 3) & 0x0200;
5803 masku |= (maskt >> 1) & 0x0400;
5804 masku |= (maskt << 1) & 0x0800;
5805 masku |= (maskt << 3) & 0x1000;
5806 masku |= (maskt << 5) & 0x2000;
5807 masku |= (maskt << 7) & 0x4000;
5809 //////////////////////////////////////////////////////////////////////////////////////
5811 /* The maskt terms define the area for changed data, but the byte
5812 inhibit terms can override these */
5814 /*Mask[0-7] := AN2 (mask[0-7], masku[0-7], dbinh\[0]);
5815 Mask[8-14] := AN2H (mask[8-14], masku[8-14], dbinh\[1-7]);*/
5816 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5817 uint16_t mask = masku & (!(dbinh & 0x01) ? 0xFFFF : 0xFF00);
5818 mask &= ~(((uint16_t)dbinh & 0x00FE) << 7);
5819 //////////////////////////////////////////////////////////////////////////////////////
5821 /*Addql[0] := JOIN (addql[0], addq[0..1]);
5822 Addql[1] := JOIN (addql[1], addq[2..3]);
5824 Dsel0b[0-1] := BUF8 (dsel0b[0-1], data_sel[0]);
5825 Dsel1b[0-1] := BUF8 (dsel1b[0-1], data_sel[1]);
5826 Ddatlo := MX4 (ddatlo, patd[0], lfu[0], addql[0], zero32, dsel0b[0], dsel1b[0]);
5827 Ddathi := MX4 (ddathi, patd[1], lfu[1], addql[1], zero32, dsel0b[1], dsel1b[1]);*/
5828 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5832 dmux[2] = ((uint64_t)addq[3] << 48) | ((uint64_t)addq[2] << 32) | ((uint64_t)addq[1] << 16) | (uint64_t)addq[0];
5834 uint64_t ddat = dmux[data_sel];
5835 //////////////////////////////////////////////////////////////////////////////////////
5837 /*Zed_sel := AN2 (zed_sel, data_sel[0..1]);
5838 Zed_selb[0-1] := BUF8 (zed_selb[0-1], zed_sel);
5840 Dat[0-7] := MX4 (dat[0-7], dstdlo{0-7}, ddatlo{0-7}, dstzlo{0-7}, srczlo{0-7}, mask[0-7], zed_selb[0]);
5841 Dat[8-15] := MX4 (dat[8-15], dstdlo{8-15}, ddatlo{8-15}, dstzlo{8-15}, srczlo{8-15}, mask[8], zed_selb[0]);
5842 Dat[16-23] := MX4 (dat[16-23], dstdlo{16-23}, ddatlo{16-23}, dstzlo{16-23}, srczlo{16-23}, mask[9], zed_selb[0]);
5843 Dat[24-31] := MX4 (dat[24-31], dstdlo{24-31}, ddatlo{24-31}, dstzlo{24-31}, srczlo{24-31}, mask[10], zed_selb[0]);
5844 Dat[32-39] := MX4 (dat[32-39], dstdhi{0-7}, ddathi{0-7}, dstzhi{0-7}, srczhi{0-7}, mask[11], zed_selb[1]);
5845 Dat[40-47] := MX4 (dat[40-47], dstdhi{8-15}, ddathi{8-15}, dstzhi{8-15}, srczhi{8-15}, mask[12], zed_selb[1]);
5846 Dat[48-55] := MX4 (dat[48-55], dstdhi{16-23}, ddathi{16-23}, dstzhi{16-23}, srczhi{16-23}, mask[13], zed_selb[1]);
5847 Dat[56-63] := MX4 (dat[56-63], dstdhi{24-31}, ddathi{24-31}, dstzhi{24-31}, srczhi{24-31}, mask[14], zed_selb[1]);*/
5848 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5849 wdata = ((ddat & mask) | (dstd & ~mask)) & 0x00000000000000FFLL;
5850 wdata |= (mask & 0x0100 ? ddat : dstd) & 0x000000000000FF00LL;
5851 wdata |= (mask & 0x0200 ? ddat : dstd) & 0x0000000000FF0000LL;
5852 wdata |= (mask & 0x0400 ? ddat : dstd) & 0x00000000FF000000LL;
5853 wdata |= (mask & 0x0800 ? ddat : dstd) & 0x000000FF00000000LL;
5854 wdata |= (mask & 0x1000 ? ddat : dstd) & 0x0000FF0000000000LL;
5855 wdata |= (mask & 0x2000 ? ddat : dstd) & 0x00FF000000000000LL;
5856 wdata |= (mask & 0x4000 ? ddat : dstd) & 0xFF00000000000000LL;
5859 printf("\n[ddat=%08X%08X dstd=%08X%08X wdata=%08X%08X mask=%04X]\n",
5860 (uint32_t)(ddat >> 32), (uint32_t)(ddat & 0xFFFFFFFF),
5861 (uint32_t)(dstd >> 32), (uint32_t)(dstd & 0xFFFFFFFF),
5862 (uint32_t)(wdata >> 32), (uint32_t)(wdata & 0xFFFFFFFF), mask);
5865 //This is a crappy way of handling this, but it should work for now...
5867 zwdata = ((srcz & mask) | (dstz & ~mask)) & 0x00000000000000FFLL;
5868 zwdata |= (mask & 0x0100 ? srcz : dstz) & 0x000000000000FF00LL;
5869 zwdata |= (mask & 0x0200 ? srcz : dstz) & 0x0000000000FF0000LL;
5870 zwdata |= (mask & 0x0400 ? srcz : dstz) & 0x00000000FF000000LL;
5871 zwdata |= (mask & 0x0800 ? srcz : dstz) & 0x000000FF00000000LL;
5872 zwdata |= (mask & 0x1000 ? srcz : dstz) & 0x0000FF0000000000LL;
5873 zwdata |= (mask & 0x2000 ? srcz : dstz) & 0x00FF000000000000LL;
5874 zwdata |= (mask & 0x4000 ? srcz : dstz) & 0xFF00000000000000LL;
5877 WriteLog("\n[srcz=%08X%08X dstz=%08X%08X zwdata=%08X%08X mask=%04X]\n",
5878 (uint32_t)(srcz >> 32), (uint32_t)(srcz & 0xFFFFFFFF),
5879 (uint32_t)(dstz >> 32), (uint32_t)(dstz & 0xFFFFFFFF),
5880 (uint32_t)(zwdata >> 32), (uint32_t)(zwdata & 0xFFFFFFFF), mask);
5884 //////////////////////////////////////////////////////////////////////////////////////
5886 /*Data_enab[0-1] := BUF8 (data_enab[0-1], data_ena);
5887 Datadrv[0-31] := TS (wdata[0-31], dat[0-31], data_enab[0]);
5888 Datadrv[32-63] := TS (wdata[32-63], dat[32-63], data_enab[1]);
5890 Unused[0] := DUMMY (unused[0]);
5896 /** COMP_CTRL - Comparator output control logic *****************
5898 This block is responsible for taking the comparator outputs and
5899 using them as appropriate to inhibit writes. Two methods are
5900 supported for inhibiting write data:
5902 - suppression of the inner loop controlled write operation
5903 - a set of eight byte inhibit lines to write back dest data
5905 The first technique is used in pixel oriented modes, the second in
5906 phrase mode, but the phrase mode form is only applicable to eight
5907 and sixteen bit pixel modes.
5909 Writes can be suppressed by data being equal, by the Z comparator
5910 conditions being met, or by the bit to pixel expansion scheme.
5912 Pipe-lining issues: the data derived comparator outputs are stable
5913 until the next data read, well after the affected write from this
5914 operation. However, the inner counter bits can count immediately
5915 before the ack for the last write. Therefore, it is necessary to
5916 delay bcompbit select terms by one inner loop pipe-line stage,
5917 when generating the select for the data control - the output is
5918 delayed one further tick to give it write data timing (2/34).
5920 There is also a problem with computed data - the new values are
5921 calculated before the write associated with the old value has been
5922 performed. The is taken care of within the zed comparator by
5923 pipe-lining the comparator inputs where appropriate.
5926 //#define LOG_COMP_CTRL
5928 dbinh\[0..7] // destination byte inhibit lines
5929 nowrite // suppress inner loop write operation
5931 bcompen // bit selector inhibit enable
5932 big_pix // pixels are big-endian
5933 bkgwren // enable dest data write in pix inhibit
5934 clk // co-processor clock
5935 dcomp[0..7] // output of data byte comparators
5936 dcompen // data comparator inhibit enable
5937 icount[0..2] // low bits of inner count
5938 pixsize[0..2] // destination pixel size
5939 phrase_mode // phrase write mode
5940 srcd[0..7] // bits to use for bit to byte expansion
5941 step_inner // inner loop advance
5942 zcomp[0..3] // output of word zed comparators
5944 void COMP_CTRL(uint8_t &dbinh, bool &nowrite,
5945 bool bcompen, bool big_pix, bool bkgwren, uint8_t dcomp, bool dcompen, uint8_t icount,
5946 uint8_t pixsize, bool phrase_mode, uint8_t srcd, uint8_t zcomp)
5950 /*Bkgwren\ := INV1 (bkgwren\, bkgwren);
5951 Phrase_mode\ := INV1 (phrase_mode\, phrase_mode);
5952 Pixsize\[0-2] := INV2 (pixsize\[0-2], pixsize[0-2]);*/
5954 /* The bit comparator bits are derived from the source data, which
5955 will have been suitably aligned for phrase mode. The contents of
5956 the inner counter are used to select which bit to use.
5958 When not in phrase mode the inner count value is used to select
5959 one bit. It is assumed that the count has already occurred, so,
5960 7 selects bit 0, etc. In big-endian pixel mode, this turns round,
5961 so that a count of 7 selects bit 7.
5963 In phrase mode, the eight bits are used directly, and this mode is
5964 only applicable to 8-bit pixel mode (2/34) */
5966 /*Bcompselt[0-2] := EO (bcompselt[0-2], icount[0-2], big_pix);
5967 Bcompbit := MX8 (bcompbit, srcd[7], srcd[6], srcd[5],
5968 srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompselt[0..2]);
5969 Bcompbit\ := INV1 (bcompbit\, bcompbit);*/
5970 ////////////////////////////////////// C++ CODE //////////////////////////////////////
5971 #ifdef LOG_COMP_CTRL
5974 WriteLog("\n [bcompen=%s dcompen=%s phrase_mode=%s bkgwren=%s dcomp=%02X zcomp=%02X]", (bcompen ? "T" : "F"), (dcompen ? "T" : "F"), (phrase_mode ? "T" : "F"), (bkgwren ? "T" : "F"), dcomp, zcomp);
5979 uint8_t bcompselt = (big_pix ? ~icount : icount) & 0x07;
5980 uint8_t bitmask[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
5981 bool bcompbit = srcd & bitmask[bcompselt];
5982 //////////////////////////////////////////////////////////////////////////////////////
5984 /* pipe-line the count */
5985 /*Bcompsel[0-2] := FDSYNC (bcompsel[0-2], bcompselt[0-2], step_inner, clk);
5986 Bcompbt := MX8 (bcompbitpt, srcd[7], srcd[6], srcd[5],
5987 srcd[4], srcd[3], srcd[2], srcd[1], srcd[0], bcompsel[0..2]);
5988 Bcompbitp := FD1Q (bcompbitp, bcompbitpt, clk);
5989 Bcompbitp\ := INV1 (bcompbitp\, bcompbitp);*/
5991 /* For pixel mode, generate the write inhibit signal for all modes
5992 on bit inhibit, for 8 and 16 bit modes on comparator inhibit, and
5993 for 16 bit mode on Z inhibit
5995 Nowrite = bcompen . /bcompbit . /phrase_mode
5996 + dcompen . dcomp[0] . /phrase_mode . pixsize = 011
5997 + dcompen . dcomp[0..1] . /phrase_mode . pixsize = 100
5998 + zcomp[0] . /phrase_mode . pixsize = 100
6001 /*Nowt0 := NAN3 (nowt[0], bcompen, bcompbit\, phrase_mode\);
6002 Nowt1 := ND6 (nowt[1], dcompen, dcomp[0], phrase_mode\, pixsize\[2], pixsize[0..1]);
6003 Nowt2 := ND7 (nowt[2], dcompen, dcomp[0..1], phrase_mode\, pixsize[2], pixsize\[0..1]);
6004 Nowt3 := NAN5 (nowt[3], zcomp[0], phrase_mode\, pixsize[2], pixsize\[0..1]);
6005 Nowt4 := NAN4 (nowt[4], nowt[0..3]);
6006 Nowrite := AN2 (nowrite, nowt[4], bkgwren\);*/
6007 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6008 nowrite = ((bcompen && !bcompbit && !phrase_mode)
6009 || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
6010 || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
6011 || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4)))
6013 //////////////////////////////////////////////////////////////////////////////////////
6015 /*Winht := NAN3 (winht, bcompen, bcompbitp\, phrase_mode\);
6016 Winhibit := NAN4 (winhibit, winht, nowt[1..3]);*/
6017 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6018 //This is the same as above, but with bcompbit delayed one tick and called 'winhibit'
6019 //Small difference: Besides the pipeline effect, it's also not using !bkgwren...
6020 // bool winhibit = (bcompen && !
6021 bool winhibit = (bcompen && !bcompbit && !phrase_mode)
6022 || (dcompen && (dcomp & 0x01) && !phrase_mode && (pixsize == 3))
6023 || (dcompen && ((dcomp & 0x03) == 0x03) && !phrase_mode && (pixsize == 4))
6024 || ((zcomp & 0x01) && !phrase_mode && (pixsize == 4));
6025 #ifdef LOG_COMP_CTRL
6028 WriteLog("[nw=%s wi=%s]", (nowrite ? "T" : "F"), (winhibit ? "T" : "F"));
6032 //////////////////////////////////////////////////////////////////////////////////////
6034 /* For phrase mode, generate the byte inhibit signals for eight bit
6035 mode 011, or sixteen bit mode 100
6036 dbinh\[0] = pixsize[2] . zcomp[0]
6037 + pixsize[2] . dcomp[0] . dcomp[1] . dcompen
6038 + /pixsize[2] . dcomp[0] . dcompen
6039 + /srcd[0] . bcompen
6041 Inhibits 0-3 are also used when not in phrase mode to write back
6045 /*Srcd\[0-7] := INV1 (srcd\[0-7], srcd[0-7]);
6047 Di0t0 := NAN2H (di0t[0], pixsize[2], zcomp[0]);
6048 Di0t1 := NAN4H (di0t[1], pixsize[2], dcomp[0..1], dcompen);
6049 Di0t2 := NAN2 (di0t[2], srcd\[0], bcompen);
6050 Di0t3 := NAN3 (di0t[3], pixsize\[2], dcomp[0], dcompen);
6051 Di0t4 := NAN4 (di0t[4], di0t[0..3]);
6052 Dbinh[0] := ANR1P (dbinh\[0], di0t[4], phrase_mode, winhibit);*/
6053 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6055 bool di0t0_1 = ((pixsize & 0x04) && (zcomp & 0x01))
6056 || ((pixsize & 0x04) && (dcomp & 0x01) && (dcomp & 0x02) && dcompen);
6057 bool di0t4 = di0t0_1
6058 || (!(srcd & 0x01) && bcompen)
6059 || (!(pixsize & 0x04) && (dcomp & 0x01) && dcompen);
6060 dbinh |= (!((di0t4 && phrase_mode) || winhibit) ? 0x01 : 0x00);
6061 #ifdef LOG_COMP_CTRL
6064 WriteLog("[di0t0_1=%s di0t4=%s]", (di0t0_1 ? "T" : "F"), (di0t4 ? "T" : "F"));
6068 //////////////////////////////////////////////////////////////////////////////////////
6070 /*Di1t0 := NAN3 (di1t[0], pixsize\[2], dcomp[1], dcompen);
6071 Di1t1 := NAN2 (di1t[1], srcd\[1], bcompen);
6072 Di1t2 := NAN4 (di1t[2], di0t[0..1], di1t[0..1]);
6073 Dbinh[1] := ANR1 (dbinh\[1], di1t[2], phrase_mode, winhibit);*/
6074 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6075 bool di1t2 = di0t0_1
6076 || (!(srcd & 0x02) && bcompen)
6077 || (!(pixsize & 0x04) && (dcomp & 0x02) && dcompen);
6078 dbinh |= (!((di1t2 && phrase_mode) || winhibit) ? 0x02 : 0x00);
6079 #ifdef LOG_COMP_CTRL
6082 WriteLog("[di1t2=%s]", (di1t2 ? "T" : "F"));
6086 //////////////////////////////////////////////////////////////////////////////////////
6088 /*Di2t0 := NAN2H (di2t[0], pixsize[2], zcomp[1]);
6089 Di2t1 := NAN4H (di2t[1], pixsize[2], dcomp[2..3], dcompen);
6090 Di2t2 := NAN2 (di2t[2], srcd\[2], bcompen);
6091 Di2t3 := NAN3 (di2t[3], pixsize\[2], dcomp[2], dcompen);
6092 Di2t4 := NAN4 (di2t[4], di2t[0..3]);
6093 Dbinh[2] := ANR1 (dbinh\[2], di2t[4], phrase_mode, winhibit);*/
6094 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6095 //[bcompen=F dcompen=T phrase_mode=T bkgwren=F][nw=F wi=F]
6096 //[di0t0_1=F di0t4=F][di1t2=F][di2t0_1=T di2t4=T][di3t2=T][di4t0_1=F di2t4=F][di5t2=F][di6t0_1=F di6t4=F][di7t2=F]
6097 //[dcomp=$00 dbinh=$0C][7804780400007804] (icount=0005, inc=4)
6098 bool di2t0_1 = ((pixsize & 0x04) && (zcomp & 0x02))
6099 || ((pixsize & 0x04) && (dcomp & 0x04) && (dcomp & 0x08) && dcompen);
6100 bool di2t4 = di2t0_1
6101 || (!(srcd & 0x04) && bcompen)
6102 || (!(pixsize & 0x04) && (dcomp & 0x04) && dcompen);
6103 dbinh |= (!((di2t4 && phrase_mode) || winhibit) ? 0x04 : 0x00);
6104 #ifdef LOG_COMP_CTRL
6107 WriteLog("[di2t0_1=%s di2t4=%s]", (di2t0_1 ? "T" : "F"), (di2t4 ? "T" : "F"));
6111 //////////////////////////////////////////////////////////////////////////////////////
6113 /*Di3t0 := NAN3 (di3t[0], pixsize\[2], dcomp[3], dcompen);
6114 Di3t1 := NAN2 (di3t[1], srcd\[3], bcompen);
6115 Di3t2 := NAN4 (di3t[2], di2t[0..1], di3t[0..1]);
6116 Dbinh[3] := ANR1 (dbinh\[3], di3t[2], phrase_mode, winhibit);*/
6117 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6118 bool di3t2 = di2t0_1
6119 || (!(srcd & 0x08) && bcompen)
6120 || (!(pixsize & 0x04) && (dcomp & 0x08) && dcompen);
6121 dbinh |= (!((di3t2 && phrase_mode) || winhibit) ? 0x08 : 0x00);
6122 #ifdef LOG_COMP_CTRL
6125 WriteLog("[di3t2=%s]", (di3t2 ? "T" : "F"));
6129 //////////////////////////////////////////////////////////////////////////////////////
6131 /*Di4t0 := NAN2H (di4t[0], pixsize[2], zcomp[2]);
6132 Di4t1 := NAN4H (di4t[1], pixsize[2], dcomp[4..5], dcompen);
6133 Di4t2 := NAN2 (di4t[2], srcd\[4], bcompen);
6134 Di4t3 := NAN3 (di4t[3], pixsize\[2], dcomp[4], dcompen);
6135 Di4t4 := NAN4 (di4t[4], di4t[0..3]);
6136 Dbinh[4] := NAN2 (dbinh\[4], di4t[4], phrase_mode);*/
6137 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6138 bool di4t0_1 = ((pixsize & 0x04) && (zcomp & 0x04))
6139 || ((pixsize & 0x04) && (dcomp & 0x10) && (dcomp & 0x20) && dcompen);
6140 bool di4t4 = di4t0_1
6141 || (!(srcd & 0x10) && bcompen)
6142 || (!(pixsize & 0x04) && (dcomp & 0x10) && dcompen);
6143 dbinh |= (!(di4t4 && phrase_mode) ? 0x10 : 0x00);
6144 #ifdef LOG_COMP_CTRL
6147 WriteLog("[di4t0_1=%s di2t4=%s]", (di4t0_1 ? "T" : "F"), (di4t4 ? "T" : "F"));
6151 //////////////////////////////////////////////////////////////////////////////////////
6153 /*Di5t0 := NAN3 (di5t[0], pixsize\[2], dcomp[5], dcompen);
6154 Di5t1 := NAN2 (di5t[1], srcd\[5], bcompen);
6155 Di5t2 := NAN4 (di5t[2], di4t[0..1], di5t[0..1]);
6156 Dbinh[5] := NAN2 (dbinh\[5], di5t[2], phrase_mode);*/
6157 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6158 bool di5t2 = di4t0_1
6159 || (!(srcd & 0x20) && bcompen)
6160 || (!(pixsize & 0x04) && (dcomp & 0x20) && dcompen);
6161 dbinh |= (!(di5t2 && phrase_mode) ? 0x20 : 0x00);
6162 #ifdef LOG_COMP_CTRL
6165 WriteLog("[di5t2=%s]", (di5t2 ? "T" : "F"));
6169 //////////////////////////////////////////////////////////////////////////////////////
6171 /*Di6t0 := NAN2H (di6t[0], pixsize[2], zcomp[3]);
6172 Di6t1 := NAN4H (di6t[1], pixsize[2], dcomp[6..7], dcompen);
6173 Di6t2 := NAN2 (di6t[2], srcd\[6], bcompen);
6174 Di6t3 := NAN3 (di6t[3], pixsize\[2], dcomp[6], dcompen);
6175 Di6t4 := NAN4 (di6t[4], di6t[0..3]);
6176 Dbinh[6] := NAN2 (dbinh\[6], di6t[4], phrase_mode);*/
6177 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6178 bool di6t0_1 = ((pixsize & 0x04) && (zcomp & 0x08))
6179 || ((pixsize & 0x04) && (dcomp & 0x40) && (dcomp & 0x80) && dcompen);
6180 bool di6t4 = di6t0_1
6181 || (!(srcd & 0x40) && bcompen)
6182 || (!(pixsize & 0x04) && (dcomp & 0x40) && dcompen);
6183 dbinh |= (!(di6t4 && phrase_mode) ? 0x40 : 0x00);
6184 #ifdef LOG_COMP_CTRL
6187 WriteLog("[di6t0_1=%s di6t4=%s]", (di6t0_1 ? "T" : "F"), (di6t4 ? "T" : "F"));
6191 //////////////////////////////////////////////////////////////////////////////////////
6193 /*Di7t0 := NAN3 (di7t[0], pixsize\[2], dcomp[7], dcompen);
6194 Di7t1 := NAN2 (di7t[1], srcd\[7], bcompen);
6195 Di7t2 := NAN4 (di7t[2], di6t[0..1], di7t[0..1]);
6196 Dbinh[7] := NAN2 (dbinh\[7], di7t[2], phrase_mode);*/
6197 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6198 bool di7t2 = di6t0_1
6199 || (!(srcd & 0x80) && bcompen)
6200 || (!(pixsize & 0x04) && (dcomp & 0x80) && dcompen);
6201 dbinh |= (!(di7t2 && phrase_mode) ? 0x80 : 0x00);
6202 #ifdef LOG_COMP_CTRL
6205 WriteLog("[di7t2=%s]", (di7t2 ? "T" : "F"));
6209 //////////////////////////////////////////////////////////////////////////////////////
6214 #ifdef LOG_COMP_CTRL
6217 WriteLog("[dcomp=$%02X dbinh=$%02X]\n ", dcomp, dbinh);
6224 ////////////////////////////////////// C++ CODE //////////////////////////////////////
6225 //////////////////////////////////////////////////////////////////////////////////////
6227 // !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
6228 // !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!
6229 // !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!! TESTING !!!