5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Cleanups/fixes by James L. Hammons
11 // Generic blitter it is, until all blitter bugs are squashed!
12 #define USE_GENERIC_BLITTER
15 #include "blitter2.h" // Testing purposes only!
18 extern int jaguar_active_memory_dumps;
20 #define REG(A) (((uint32)blitter_ram[(A)] << 24) | ((uint32)blitter_ram[(A)+1] << 16) \
21 | ((uint32)blitter_ram[(A)+2] << 8) | (uint32)blitter_ram[(A)+3])
22 #define WREG(A,D) (blitter_ram[(A)] = ((D)>>24)&0xFF, blitter_ram[(A)+1] = ((D)>>16)&0xFF, \
23 blitter_ram[(A)+2] = ((D)>>8)&0xFF, blitter_ram[(A)+3] = (D)&0xFF)
25 int start_logging = 0;
27 // Blitter register RAM (most of it is hidden from the user)
29 static uint8 blitter_ram[0x100];
31 // Blitter registers (offsets from F02200)
33 #define A1_BASE ((UINT32)0x00)
34 #define A1_FLAGS ((UINT32)0x04)
35 #define A1_CLIP ((UINT32)0x08) // Height and width values for clipping
36 #define A1_PIXEL ((UINT32)0x0C) // Integer part of the pixel (Y.i and X.i)
37 #define A1_STEP ((UINT32)0x10) // Integer part of the step
38 #define A1_FSTEP ((UINT32)0x14) // Fractionnal part of the step
39 #define A1_FPIXEL ((UINT32)0x18) // Fractionnal part of the pixel (Y.f and X.f)
40 #define A1_INC ((UINT32)0x1C) // Integer part of the increment
41 #define A1_FINC ((UINT32)0x20) // Fractional part of the increment
42 #define A2_BASE ((UINT32)0x24)
43 #define A2_FLAGS ((UINT32)0x28)
44 #define A2_MASK ((UINT32)0x2C) // Modulo values for x and y (M.y and M.x)
45 #define A2_PIXEL ((UINT32)0x30) // Integer part of the pixel (no fractional part for A2)
46 #define A2_STEP ((UINT32)0x34) // Integer part of the step (no fractional part for A2)
47 #define COMMAND ((UINT32)0x38)
48 #define PIXLINECOUNTER ((UINT32)0x3C)
49 #define SRCDATA ((UINT32)0x40)
50 #define DSTDATA ((UINT32)0x48)
51 #define DSTZ ((UINT32)0x50)
52 #define SRCZINT ((UINT32)0x58)
53 #define SRCZFRAC ((UINT32)0x60)
54 #define PATTERNDATA ((UINT32)0x68)
55 #define INTENSITYINC ((UINT32)0x70)
56 #define ZINC ((UINT32)0x74)
57 #define COLLISIONCTRL ((UINT32)0x78)
58 #define PHRASEINT3 ((UINT32)0x7C)
59 #define PHRASEINT2 ((UINT32)0x80)
60 #define PHRASEINT1 ((UINT32)0x84)
61 #define PHRASEINT0 ((UINT32)0x88)
62 #define PHRASEZ3 ((UINT32)0x8C)
63 #define PHRASEZ2 ((UINT32)0x90)
64 #define PHRASEZ1 ((UINT32)0x94)
65 #define PHRASEZ0 ((UINT32)0x98)
67 // Blitter command bits
69 #define SRCEN (cmd & 0x00000001)
70 #define SRCENZ (cmd & 0x00000002)
71 #define SRCENX (cmd & 0x00000004)
72 #define DSTEN (cmd & 0x00000008)
73 #define DSTENZ (cmd & 0x00000010)
74 #define DSTWRZ (cmd & 0x00000020)
75 #define CLIPA1 (cmd & 0x00000040)
77 #define UPDA1F (cmd & 0x00000100)
78 #define UPDA1 (cmd & 0x00000200)
79 #define UPDA2 (cmd & 0x00000400)
81 #define DSTA2 (cmd & 0x00000800)
83 #define Z_OP_INF (cmd & 0x00040000)
84 #define Z_OP_EQU (cmd & 0x00080000)
85 #define Z_OP_SUP (cmd & 0x00100000)
87 #define LFU_NAN (cmd & 0x00200000)
88 #define LFU_NA (cmd & 0x00400000)
89 #define LFU_AN (cmd & 0x00800000)
90 #define LFU_A (cmd & 0x01000000)
92 #define CMPDST (cmd & 0x02000000)
93 #define BCOMPEN (cmd & 0x04000000)
94 #define DCOMPEN (cmd & 0x08000000)
96 #define PATDSEL (cmd & 0x00010000)
97 #define INTADD (cmd & 0x00020000)
98 #define TOPBEN (cmd & 0x00004000)
99 #define TOPNEN (cmd & 0x00008000)
100 #define BKGWREN (cmd & 0x10000000)
101 #define GOURD (cmd & 0x00001000)
102 #define GOURZ (cmd & 0x00002000)
103 #define SRCSHADE (cmd & 0x40000000)
111 #define XSIGNSUB_A1 (REG(A1_FLAGS)&0x80000)
112 #define XSIGNSUB_A2 (REG(A2_FLAGS)&0x80000)
114 #define YSIGNSUB_A1 (REG(A1_FLAGS)&0x100000)
115 #define YSIGNSUB_A2 (REG(A2_FLAGS)&0x100000)
117 #define YADD1_A1 (REG(A1_FLAGS)&0x40000)
118 #define YADD1_A2 (REG(A2_FLAGS)&0x40000)
121 #define PIXEL_SHIFT_1(a) (((~a##_x) >> 16) & 7)
122 #define PIXEL_OFFSET_1(a) (((((UINT32)a##_y >> 16) * a##_width / 8) + (((UINT32)a##_x >> 19) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 19) & 7))
123 #define READ_PIXEL_1(a) ((jaguar_byte_read(a##_addr+PIXEL_OFFSET_1(a)) >> PIXEL_SHIFT_1(a)) & 0x01)
126 #define PIXEL_SHIFT_2(a) (((~a##_x) >> 15) & 6)
127 #define PIXEL_OFFSET_2(a) (((((UINT32)a##_y >> 16) * a##_width / 4) + (((UINT32)a##_x >> 18) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 18) & 7))
128 #define READ_PIXEL_2(a) ((jaguar_byte_read(a##_addr+PIXEL_OFFSET_2(a)) >> PIXEL_SHIFT_2(a)) & 0x03)
131 #define PIXEL_SHIFT_4(a) (((~a##_x) >> 14) & 4)
132 #define PIXEL_OFFSET_4(a) (((((UINT32)a##_y >> 16) * (a##_width/2)) + (((UINT32)a##_x >> 17) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 17) & 7))
133 #define READ_PIXEL_4(a) ((jaguar_byte_read(a##_addr+PIXEL_OFFSET_4(a)) >> PIXEL_SHIFT_4(a)) & 0x0f)
136 #define PIXEL_OFFSET_8(a) (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~7)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 7))
137 #define READ_PIXEL_8(a) (jaguar_byte_read(a##_addr+PIXEL_OFFSET_8(a)))
140 #define PIXEL_OFFSET_16(a) (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~3)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 3))
141 #define READ_PIXEL_16(a) (jaguar_word_read(a##_addr+(PIXEL_OFFSET_16(a)<<1)))
144 #define PIXEL_OFFSET_32(a) (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 1))
145 #define READ_PIXEL_32(a) (jaguar_long_read(a##_addr+(PIXEL_OFFSET_32(a)<<2)))
148 #define READ_PIXEL(a,f) (\
149 (((f>>3)&0x07) == 0) ? (READ_PIXEL_1(a)) : \
150 (((f>>3)&0x07) == 1) ? (READ_PIXEL_2(a)) : \
151 (((f>>3)&0x07) == 2) ? (READ_PIXEL_4(a)) : \
152 (((f>>3)&0x07) == 3) ? (READ_PIXEL_8(a)) : \
153 (((f>>3)&0x07) == 4) ? (READ_PIXEL_16(a)) : \
154 (((f>>3)&0x07) == 5) ? (READ_PIXEL_32(a)) : 0)
156 // 16 bpp z data read
157 #define ZDATA_OFFSET_16(a) (PIXEL_OFFSET_16(a) + a##_zoffs * 4)
158 #define READ_ZDATA_16(a) (jaguar_word_read(a##_addr+(ZDATA_OFFSET_16(a)<<1)))
161 #define READ_ZDATA(a,f) (READ_ZDATA_16(a))
163 // 16 bpp z data write
164 #define WRITE_ZDATA_16(a,d) { jaguar_word_write(a##_addr+(ZDATA_OFFSET_16(a)<<1),d); }
165 //#define WRITE_ZDATA_16(a,d) { jaguar_word_write(a##_addr+(ZDATA_OFFSET_16(a)<<1),d); \
166 WriteLog("16bpp z write --> "); }
169 #define WRITE_ZDATA(a,f,d) WRITE_ZDATA_16(a,d);
172 #define READ_RDATA_1(r,a,p) ((p) ? ((REG(r+(((UINT32)a##_x>>19)&4)))>>(((UINT32)a##_x>>16)&0x1f))& 0x1 : (REG(r) & 0x1))
175 #define READ_RDATA_2(r,a,p) ((p) ? ((REG(r+(((UINT32)a##_x>>18)&4)))>>(((UINT32)a##_x>>15)&0x3e))& 0x3 : (REG(r) & 0x3))
178 #define READ_RDATA_4(r,a,p) ((p) ? ((REG(r+(((UINT32)a##_x>>17)&4)))>>(((UINT32)a##_x>>14)&0x28))& 0xf : (REG(r) & 0xf))
181 #define READ_RDATA_8(r,a,p) ((p) ? ((REG(r+(((UINT32)a##_x>>16)&4)))>>(((UINT32)a##_x>>13)&0x18))& 0xff : (REG(r) & 0xff))
183 // 16 bpp r data read
184 #define READ_RDATA_16(r,a,p) ((p) ? ((REG(r+(((UINT32)a##_x>>15)&4)))>>(((UINT32)a##_x>>12)&0x10))&0xffff : (REG(r) & 0xffff))
186 // 32 bpp r data read
187 #define READ_RDATA_32(r,a,p) ((p) ? REG(r+(((UINT32)a##_x>>14)&4)) : REG(r))
189 // register data read
190 #define READ_RDATA(r,a,f,p) (\
191 (((f>>3)&0x07) == 0) ? (READ_RDATA_1(r,a,p)) : \
192 (((f>>3)&0x07) == 1) ? (READ_RDATA_2(r,a,p)) : \
193 (((f>>3)&0x07) == 2) ? (READ_RDATA_4(r,a,p)) : \
194 (((f>>3)&0x07) == 3) ? (READ_RDATA_8(r,a,p)) : \
195 (((f>>3)&0x07) == 4) ? (READ_RDATA_16(r,a,p)) : \
196 (((f>>3)&0x07) == 5) ? (READ_RDATA_32(r,a,p)) : 0)
199 #define WRITE_PIXEL_1(a,d) { jaguar_byte_write(a##_addr+PIXEL_OFFSET_1(a),(jaguar_byte_read(a##_addr+PIXEL_OFFSET_1(a))&(~(0x01 << PIXEL_SHIFT_1(a))))|(d<<PIXEL_SHIFT_1(a))); }
202 #define WRITE_PIXEL_2(a,d) { jaguar_byte_write(a##_addr+PIXEL_OFFSET_2(a),(jaguar_byte_read(a##_addr+PIXEL_OFFSET_2(a))&(~(0x03 << PIXEL_SHIFT_2(a))))|(d<<PIXEL_SHIFT_2(a))); }
205 #define WRITE_PIXEL_4(a,d) { jaguar_byte_write(a##_addr+PIXEL_OFFSET_4(a),(jaguar_byte_read(a##_addr+PIXEL_OFFSET_4(a))&(~(0x0f << PIXEL_SHIFT_4(a))))|(d<<PIXEL_SHIFT_4(a))); }
208 #define WRITE_PIXEL_8(a,d) { jaguar_byte_write(a##_addr+PIXEL_OFFSET_8(a),d); }
210 // 16 bpp pixel write
211 #define WRITE_PIXEL_16(a,d) { jaguar_word_write(a##_addr+(PIXEL_OFFSET_16(a)<<1),d); }
212 //#define WRITE_PIXEL_16(a,d) { jaguar_word_write(a##_addr+(PIXEL_OFFSET_16(a)<<1),d); \
213 WriteLog("16bpp pixel write --> "); }
215 //This is where the bad YPOS values are being written... How to fix???
216 // 32 bpp pixel write
217 #define WRITE_PIXEL_32(a,d) { jaguar_long_write(a##_addr+(PIXEL_OFFSET_32(a)<<2),d); }
218 //#define WRITE_PIXEL_32(a,d) { jaguar_long_write(a##_addr+(PIXEL_OFFSET_32(a)<<2),d); \
219 WriteLog("32bpp pixel write --> "); }
222 #define WRITE_PIXEL(a,f,d) {\
223 switch ((f>>3)&0x07) { \
224 case 0: WRITE_PIXEL_1(a,d); break; \
225 case 1: WRITE_PIXEL_2(a,d); break; \
226 case 2: WRITE_PIXEL_4(a,d); break; \
227 case 3: WRITE_PIXEL_8(a,d); break; \
228 case 4: WRITE_PIXEL_16(a,d); break; \
229 case 5: WRITE_PIXEL_32(a,d); break; \
232 // Width in Pixels of a Scanline
233 // This is a pretranslation of the value found in the A1 & A2 flags: It's really a floating point value
234 // of the form EEEEMM where MM is the mantissa with an implied "1." in front of it and the EEEE value is
235 // the exponent. Valid values for the exponent range from 0 to 11 (decimal). It's easiest to think of it
236 // as a floating point bit pattern being followed by a number of zeroes. So, e.g., 001101 translates to
237 // 1.01 (the "1." being implied) x (2 ^ 3) or 1010 -> 10 in base 10 (i.e., 1.01 with the decimal place
238 // being shifted to the right 3 places).
239 static uint32 blitter_scanline_width[48] =
241 0, 0, 0, 0, // Note: This would really translate to 1, 1, 1, 1
251 1024, 1280, 1536, 1792,
252 2048, 2560, 3072, 3584
255 //static uint8 * tom_ram_8;
256 //static uint8 * paletteRam;
266 static uint32 a1_addr;
267 static uint32 a2_addr;
268 static int32 a1_zoffs;
269 static int32 a2_zoffs;
270 static uint32 xadd_a1_control;
271 static uint32 xadd_a2_control;
272 static int32 a1_pitch;
273 static int32 a2_pitch;
274 static uint32 n_pixels;
275 static uint32 n_lines;
278 static int32 a1_width;
281 static int32 a2_width;
282 static int32 a2_mask_x;
283 static int32 a2_mask_y;
284 static int32 a1_xadd;
285 static int32 a1_yadd;
286 static int32 a2_xadd;
287 static int32 a2_yadd;
288 static uint8 a1_phrase_mode;
289 static uint8 a2_phrase_mode;
290 static int32 a1_step_x = 0;
291 static int32 a1_step_y = 0;
292 static int32 a2_step_x = 0;
293 static int32 a2_step_y = 0;
294 static uint32 outer_loop;
295 static uint32 inner_loop;
296 static uint32 a2_psize;
297 static uint32 a1_psize;
298 static uint32 gouraud_add;
299 //static uint32 gouraud_data;
300 //static uint16 gint[4];
301 //static uint16 gfrac[4];
302 //static uint8 gcolour[4];
305 static int gd_ia, gd_ca;
306 static int colour_index = 0;
308 static uint32 z_i[4];
310 static uint32 a1_clip_x, a1_clip_y;
312 static uint8 blitter_code_cache[4096];
313 static uint8 * blitter_ptr;
314 uint8 blitter_working = 0;
316 typedef void (blitter_fn)(void);
318 typedef struct s_blitter_cache
324 struct s_blitter_cache * next;
325 struct s_blitter_cache * prev;
326 } s_blitter_code_cache;
328 s_blitter_cache * blitter_cache[256];
330 uint8 blitter_cache_init = 0;
331 static uint8 BPP_LUT[8] = { 1, 2, 4, 8, 16, 32, 0, 0 };
333 FILE * blitters_code_fp;
334 FILE * blitters_code_init_fp;
336 //////////////////////////////////////////////////////////////////////////////
337 // build C code for the specified blitter
338 //////////////////////////////////////////////////////////////////////////////
342 //////////////////////////////////////////////////////////////////////////////
343 void blitter_gen_c_code(FILE *fp, uint32 cmd,uint32 hashcode)
345 static uint8 inhibit_modified=0;
347 fprintf(fp,"#ifndef blitter_code_0x%.8x\n",hashcode);
348 fprintf(fp,"#define blitter_code_0x%.8x\n",hashcode);
350 fprintf(fp,"void blitter_0x%.8x(void)\n",hashcode);
352 fprintf(fp,"\twhile (outer_loop--)\n");
354 fprintf(fp,"\t\tinner_loop=n_pixels;\n");
355 fprintf(fp,"\t\twhile (inner_loop--)\n");
356 fprintf(fp,"\t\t{\n");
357 fprintf(fp,"\t\t\tuint32 srcdata = 0;\n");
358 fprintf(fp,"\t\t\tuint32 srczdata = 0;\n");
359 fprintf(fp,"\t\t\tuint32 dstdata = 0;\n");
360 fprintf(fp,"\t\t\tuint32 dstzdata = 0;\n");
361 fprintf(fp,"\t\t\tuint32 writedata = 0;\n");
362 fprintf(fp,"\t\t\tuint32 inhibit = 0;\n");
384 // load src data and Z
387 fprintf(fp, "\t\t\tsrcdata = READ_PIXEL_%i(%s);\n", BPP_LUT[(((REG(dst_flags)) >> 3) & 0x07)], src);
389 fprintf(fp,"\t\t\tsrczdata = READ_ZDATA_%i(%s);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],src);
392 fprintf(fp,"\t\t\tsrczdata = READ_RDATA_%i(SRCZINT, %s, %s_phrase_mode);\n",BPP_LUT[(((REG(src_flags))>>3)&0x07)],src,src);
396 fprintf(fp,"\t\t\tsrcdata = READ_RDATA_%i(SRCDATA, %s, %s_phrase_mode);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],src,src);
398 fprintf(fp,"\t\t\tsrczdata = READ_RDATA_%i(SRCZINT, %s, %s_phrase_mode);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],src,src);
401 // load dst data and Z
404 fprintf(fp,"\t\t\tdstdata = READ_PIXEL_%i(%s);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],dst);
406 fprintf(fp,"\t\t\tdstzdata = READ_ZDATA_%i(%s);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],dst);
408 fprintf(fp,"\t\t\tdstzdata = READ_RDATA_%i(DSTZ, %s, %s_phrase_mode);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],dst,dst);
412 fprintf(fp,"\t\t\tdstdata = READ_RDATA_%i(DSTDATA, %s, %s_phrase_mode);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],dst,dst);
415 fprintf(fp,"\t\t\tdstzdata = READ_RDATA_%i(DSTZ, %s, %s_phrase_mode);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],dst,dst);
419 if ((cmd & 0x00000040)&&(!DSTA2))
422 fprintf(fp,"\t\t\tif (a1_x < 0 || a1_y < 0 || (a1_x >> 16) >= (REG(A1_CLIP) & 0x7fff) || (a1_y >> 16) >= ((REG(A1_CLIP) >> 16) & 0x7fff)) inhibit = 1;\n");
427 fprintf(fp,"\t\t\tsrczdata=z_i[colour_index]>>16;\n");
429 // apply z comparator
430 if (Z_OP_INF) { fprintf(fp,"\t\t\tif (srczdata < dstzdata) inhibit = 1;\n"); inhibit_modified=1;}
431 if (Z_OP_EQU) { fprintf(fp,"\t\t\tif (srczdata == dstzdata) inhibit = 1;\n"); inhibit_modified=1;}
432 if (Z_OP_SUP) { fprintf(fp,"\t\t\tif (srczdata > dstzdata) inhibit = 1;\n"); inhibit_modified=1;}
434 // apply data comparator
439 // compare source pixel with pattern pixel
440 fprintf(fp,"\t\t\tif (srcdata == READ_RDATA_%i(PATTERNDATA, %s,%s_phrase_mode)) inhibit=1;\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],src,src);
445 // compare destination pixel with pattern pixel
446 fprintf(fp,"\t\t\tif (dstdata == READ_RDATA_%i(PATTERNDATA, %s,%s_phrase_mode)) inhibit=1;\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],dst,dst);
451 // compute the write data and store
452 if (inhibit_modified) fprintf(fp,"\t\t\tif (!inhibit)\n\t\t\t{\n");
455 // use pattern data for write data
456 fprintf(fp,"\t\t\t\twritedata= READ_RDATA_%i(PATTERNDATA, %s, %s_phrase_mode);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],dst,dst);
461 // intensity addition
462 fprintf(fp,"\t\t\t\twritedata = (srcdata & 0xff) + (dstdata & 0xff);\n");
464 fprintf(fp,"\t\t\t\tif (writedata > 0xff) writedata = 0xff;\n");
466 fprintf(fp,"\t\t\t\twritedata |= (srcdata & 0xf00) + (dstdata & 0xf00);\n");
467 if (!(TOPNEN)) fprintf(fp,"\t\t\t\tif (writedata > 0xfff) writedata = 0xfff;\n");
468 fprintf(fp,"\t\t\t\twritedata |= (srcdata & 0xf000) + (dstdata & 0xf000);\n");
472 if (LFU_NAN) fprintf(fp,"\t\t\t\twritedata |= ~srcdata & ~dstdata;\n");
473 if (LFU_NA) fprintf(fp,"\t\t\t\twritedata |= ~srcdata & dstdata;\n");
474 if (LFU_AN) fprintf(fp,"\t\t\t\twritedata |= srcdata & ~dstdata;\n");
475 if (LFU_A) fprintf(fp,"\t\t\t\twritedata |= srcdata & dstdata;\n");
479 fprintf(fp,"\t\t\t\twritedata = ((gd_c[colour_index])<<8)|(gd_i[colour_index]>>16);\n");
483 fprintf(fp,"\t\t\t\t{\n");
484 fprintf(fp,"\t\t\t\tint intensity = srcdata & 0xFF;\n");
485 fprintf(fp,"\t\t\t\tint ia = gd_ia >> 16;\n");
486 fprintf(fp,"\t\t\t\tif(ia & 0x80)\n");
487 fprintf(fp,"\t\t\t\t ia = 0xFFFFFF00 | ia;\n");
488 fprintf(fp,"\t\t\t\tintensity += ia;\n");
489 fprintf(fp,"\t\t\t\tif(intensity < 0)\n");
490 fprintf(fp,"\t\t\t\t intensity = 0;\n");
491 fprintf(fp,"\t\t\t\tif(intensity > 0xFF)\n");
492 fprintf(fp,"\t\t\t\t intensity = 0xFF;\n");
493 fprintf(fp,"\t\t\t\twritedata = (srcdata & 0xFF00) | intensity;\n");
494 fprintf(fp,"\t\t\t\t}\n");
496 if (inhibit_modified)
498 fprintf(fp,"\t\t\t} else { srczdata=dstzdata; writedata=dstdata; }\n");
501 if ((DSTA2?a2_phrase_mode:a1_phrase_mode) || BKGWREN)
503 // write to the destination
504 fprintf(fp,"\t\t\tWRITE_PIXEL_%i(%s, writedata);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],dst);
505 if (DSTWRZ) fprintf(fp,"\t\t\tWRITE_ZDATA_%i(%s, srczdata);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],dst);
509 if (inhibit_modified) fprintf(fp,"\t\t\tif (!inhibit)\n\t\t\t{\n");
510 // write to the destination
511 fprintf(fp,"\t\t\t\tWRITE_PIXEL_%i(%s, writedata);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],dst);
512 if (DSTWRZ) fprintf(fp,"\t\t\t\tWRITE_ZDATA_%i(%s, srczdata);\n",BPP_LUT[(((REG(dst_flags))>>3)&0x07)],dst);
513 if (inhibit_modified) fprintf(fp,"\t\t\t}\n");
516 fprintf(fp,"\t\t\ta1_x += a1_xadd;\n");
517 fprintf(fp,"\t\t\ta1_y += a1_yadd;\n");
518 fprintf(fp,"\t\t\ta2_x = (a2_x + a2_xadd) & a2_mask_x;\n");
519 fprintf(fp,"\t\t\ta2_y = (a2_y + a2_yadd) & a2_mask_y;\n");
522 fprintf(fp,"\t\t\tz_i[colour_index]+=zadd;\n");
524 if ((GOURD)||(SRCSHADE))
526 fprintf(fp,"\t\t\tgd_i[colour_index] += gd_ia;\n");
527 fprintf(fp,"\t\t\tgd_c[colour_index] += gd_ca;\n");
529 if ((GOURD)||(SRCSHADE)||(GOURZ))
532 fprintf(fp,"\t\t\t colour_index=(colour_index+1)&0x3;\n");
534 fprintf(fp,"\t\t}\n");
536 fprintf(fp,"\t\ta1_x+=a1_step_x;\n");
537 fprintf(fp,"\t\ta1_y+=a1_step_y;\n");
538 fprintf(fp,"\t\ta2_x+=a2_step_x;\n");
539 fprintf(fp,"\t\ta2_y+=a2_step_y;\n");
542 // write values back to registers
543 fprintf(fp,"\tWREG(A1_PIXEL, (a1_y & 0xffff0000) | ((a1_x >> 16) & 0xffff));\n");
544 fprintf(fp,"\tWREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xffff));\n");
545 fprintf(fp,"\tWREG(A2_PIXEL, (a2_y & 0xffff0000) | ((a2_x >> 16) & 0xffff));\n");
547 fprintf(fp,"#endif\n");
550 //////////////////////////////////////////////////////////////////////////////
551 // Generate a start of function in x86 assembly
552 //////////////////////////////////////////////////////////////////////////////
556 //////////////////////////////////////////////////////////////////////////////
557 void blitter_gen_start_of_function(void)
559 *blitter_ptr++ = 0x55; // push ebp
560 *blitter_ptr++ = 0x8B; // mov ebp,esp
561 *blitter_ptr++ = 0xEC;
563 //////////////////////////////////////////////////////////////////////////////
564 // Generate a end of function in x86 assembly
565 //////////////////////////////////////////////////////////////////////////////
569 //////////////////////////////////////////////////////////////////////////////
570 void blitter_gen_end_of_function(void)
572 *blitter_ptr++ = 0x8B; // mov esp,ebp
573 *blitter_ptr++ = 0xE5;
574 *blitter_ptr++ = 0x5D; // pop ebp
575 *blitter_ptr++ = 0xC3; // ret
578 #define HASHCODE_BIT(C,B) if (C) hashcode|=(1<<B);
579 #define HASHCODE_BIT_TEST(B) (hashcode&(1<<B))
581 uint32 blitter_calc_hashcode(uint32 cmd)
583 uint32 hashcode=0x00000000;
585 // source and destination bit depth
586 hashcode|=((REG(A1_FLAGS)>>3)&0x07)<<0;
587 hashcode|=((REG(A2_FLAGS)>>3)&0x07)<<3;
589 HASHCODE_BIT(DSTA2, 6);
590 HASHCODE_BIT(SRCEN, 7);
591 HASHCODE_BIT(SRCENZ, 8);
592 HASHCODE_BIT(DSTEN, 9);
593 HASHCODE_BIT(DSTENZ, 10);
594 HASHCODE_BIT(Z_OP_INF, 11);
595 HASHCODE_BIT(Z_OP_EQU, 12);
596 HASHCODE_BIT(Z_OP_SUP, 13);
597 HASHCODE_BIT(DCOMPEN, 14);
598 HASHCODE_BIT(CMPDST, 15);
599 HASHCODE_BIT(PATDSEL, 16);
600 HASHCODE_BIT(INTADD, 17);
601 HASHCODE_BIT(TOPBEN, 18);
602 HASHCODE_BIT(TOPNEN, 19);
603 HASHCODE_BIT(LFU_NAN, 20);
604 HASHCODE_BIT(LFU_NA, 21);
605 HASHCODE_BIT(LFU_AN, 22);
606 HASHCODE_BIT(LFU_A, 23);
607 HASHCODE_BIT(BKGWREN, 24);
608 HASHCODE_BIT(DSTWRZ, 25);
609 HASHCODE_BIT((cmd & 0x001c020), 26); // extra data read/write
610 HASHCODE_BIT((cmd & 0x00000040), 27); // source clipping
611 HASHCODE_BIT(a1_phrase_mode, 28);
612 HASHCODE_BIT(a2_phrase_mode, 29);
616 //////////////////////////////////////////////////////////////////////////////
617 // Build the blitter code for the current blitter operation in the cache
618 //////////////////////////////////////////////////////////////////////////////
622 //////////////////////////////////////////////////////////////////////////////
623 void blitter_build_cached_code(uint32 cmd, uint32 cache_index)
626 //////////////////////////////////////////////////////////////////////////////
627 // Check if the blitter code for the current blitter operation is cached
628 //////////////////////////////////////////////////////////////////////////////
632 //////////////////////////////////////////////////////////////////////////////
633 struct s_blitter_cache * blitter_in_cache(uint32 cmd)
636 uint32 hashcode=blitter_calc_hashcode(cmd);
638 WriteLog("blitter: hashcode= 0x%.8x\n",hashcode);
640 struct s_blitter_cache *blitter_list=blitter_cache[hashcode>>24];
643 while (blitter_list->next)
645 blitter_list=blitter_list->next;
647 if (blitter_list->hashcode==hashcode)
648 return(blitter_list);
651 blitter_list->next=(struct s_blitter_cache *)malloc(sizeof(struct s_blitter_cache));
652 blitter_list->next->prev=blitter_list;
653 blitter_list->next->next=null;
654 blitter_list=blitter_list->next;
656 blitter_list->code=(uint8*)malloc(4096);
657 blitter_list->hashcode=hashcode;
658 blitter_list->ready=0;
659 blitter_gen_c_code(blitters_code_fp,cmd,hashcode);
660 fprintf(blitters_code_init_fp,"\tblitter_add(0x%.8x,(uint8*)&blitter_0x%.8x);\n",hashcode,hashcode);
662 //WriteLog("warning: using generic blitter for blitter 0x%.8x\n",hashcode);
666 #ifndef USE_GENERIC_BLITTER
667 #include "include/blit_c.h"
669 //////////////////////////////////////////////////////////////////////////////
670 // Execute the cached blitter code for the current blitter operation
671 //////////////////////////////////////////////////////////////////////////////
675 //////////////////////////////////////////////////////////////////////////////
676 uint32 blitter_execute_cached_code(struct s_blitter_cache * blitter)
678 if ((blitter == null) || (blitter->ready == 0))
681 blitter_fn * fn = (blitter_fn *)blitter->code;
688 void blitter_add(uint32 hashcode, uint8 *code)
690 struct s_blitter_cache *blitter_list=blitter_cache[(hashcode>>24)];
692 // WriteLog("adding blitter for hashcode 0x%.8x\n",hashcode);
694 while (blitter_list->next)
696 blitter_list=blitter_list->next;
698 if (blitter_list->hashcode==hashcode)
701 blitter_list->next=(struct s_blitter_cache *)malloc(sizeof(struct s_blitter_cache));
702 blitter_list->next->prev=blitter_list;
703 blitter_list->next->next=null;
704 blitter_list=blitter_list->next;
706 blitter_list->code=code;
707 blitter_list->hashcode=hashcode;
708 blitter_list->ready=1;
709 blitter_list->used=0;
712 void blitter_list(void)
715 WriteLog("Used blitters list:\n");
717 for (int i=0;i<256;i++)
719 struct s_blitter_cache *blitter_list=blitter_cache[i];
721 while (blitter_list->next)
723 blitter_list=blitter_list->next;
724 if (blitter_list->used)
725 WriteLog("\t0%.8x\n",blitter_list->hashcode);
732 // Generic blit handler
735 void blitter_generic(uint32 cmd)
738 //uint32 logGo = ((cmd == 0x01800E01 && REG(A1_BASE) == 0x898000) ? 1 : 0);
740 /* uint32 srcdata = 0;
744 uint32 writedata = 0;
745 uint32 inhibit = 0;*/
746 uint32 srcdata, srczdata, dstdata, dstzdata, writedata, inhibit;
750 inner_loop = n_pixels;
753 srcdata = srczdata = dstdata = dstzdata = writedata = inhibit = 0;
757 // load src data and Z
760 srcdata = READ_PIXEL(a2, REG(A2_FLAGS));
762 srczdata = READ_ZDATA(a2, REG(A2_FLAGS));
763 else if (cmd & 0x0001C020)
764 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
768 srcdata = READ_RDATA(SRCDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
769 if (cmd & 0x0001C020)
770 srczdata = READ_RDATA(SRCZINT, a2, REG(A2_FLAGS), a2_phrase_mode);
773 // load dst data and Z
776 dstdata = READ_PIXEL(a1, REG(A1_FLAGS));
778 dstzdata = READ_ZDATA(a1, REG(A1_FLAGS));
780 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
784 dstdata = READ_RDATA(DSTDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
786 dstzdata = READ_RDATA(DSTZ, a1, REG(A1_FLAGS), a1_phrase_mode);
789 /*This wasn't working... // a1 clipping
790 if (cmd & 0x00000040)
792 if (a1_x < 0 || a1_y < 0 || (a1_x >> 16) >= (REG(A1_CLIP) & 0x7FFF)
793 || (a1_y >> 16) >= ((REG(A1_CLIP) >> 16) & 0x7FFF))
798 srczdata = z_i[colour_index] >> 16;
800 // apply z comparator
801 if (Z_OP_INF) if (srczdata < dstzdata) inhibit = 1;
802 if (Z_OP_EQU) if (srczdata == dstzdata) inhibit = 1;
803 if (Z_OP_SUP) if (srczdata > dstzdata) inhibit = 1;
805 // apply data comparator
806 // Note: DCOMPEN only works in 8/16 bpp modes! !!! FIX !!!
807 // Does BCOMPEN only work in 1 bpp mode???
808 if (DCOMPEN | BCOMPEN)
812 //WriteLog("Blitter: BCOMPEN set on command %08X inhibit prev:%u, now:", cmd, inhibit);
813 // compare source pixel with pattern pixel
815 Blit! (000B8250 <- 0012C3A0) count: 16 x 1, A1/2_FLAGS: 00014420/00012000 [cmd: 05810001]
816 CMD -> src: SRCEN dst: misc: a1ctl: mode: ity: PATDSEL z-op: op: LFU_REPLACE ctrl: BCOMPEN
817 A1 -> pitch: 1 phrases, depth: 16bpp, z-off: 0, width: 384 (22), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
818 A2 -> pitch: 1 phrases, depth: 1bpp, z-off: 0, width: 16 (10), addctl: XADDPIX YADD0 XSIGNADD YSIGNADD
822 // AvP is still wrong, could be cuz it's doing A1 -> A2...
824 // Src is the 1bpp bitmap... DST is the PATTERN!!!
825 // This seems to solve at least ONE of the problems with MC3D...
826 // Why should this be inverted???
827 // Bcuz it is. This is supposed to be used only for a bit -> pixel expansion...
828 /* if (srcdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
829 // if (srcdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
831 /* uint32 A2bpp = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
832 if (A2bpp == 1 || A2bpp == 16 || A2bpp == 8)
833 inhibit = (srcdata == 0 ? 1: 0);
834 // inhibit = !srcdata;
836 WriteLog("Blitter: Bad BPP (%u) selected for BCOMPEN mode!\n", A2bpp);//*/
837 // What it boils down to is this:
838 // ***CHECK*** Hmm. Seems to cause Rayman to freeze. Investigate.
839 // This doesn't seem to be it. Hmm.
840 // It was a bug in the TOM read word code (reading VC)
846 // compare destination pixel with pattern pixel
847 if (dstdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
848 // if (dstdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
851 // This is DEFINITELY WRONG
852 if (a1_phrase_mode || a2_phrase_mode)
858 inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
859 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
862 // compute the write data and store
867 // use pattern data for write data
868 writedata = READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
872 // intensity addition
873 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
874 if (!(TOPBEN) && writedata > 0xFF)
876 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
877 if (!(TOPNEN) && writedata > 0xFFF)
879 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
883 if (LFU_NAN) writedata |= ~srcdata & ~dstdata;
884 if (LFU_NA) writedata |= ~srcdata & dstdata;
885 if (LFU_AN) writedata |= srcdata & ~dstdata;
886 if (LFU_A) writedata |= srcdata & dstdata;
890 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
894 int intensity = srcdata & 0xFF;
895 int ia = gd_ia >> 16;
897 ia = 0xFFFFFF00 | ia;
901 if (intensity > 0xFF)
903 writedata = (srcdata & 0xFF00) | intensity;
912 if (/*a1_phrase_mode || */BKGWREN || !inhibit)
914 // This is the sole source of the bogus YPOS values being written to the object list... !!! FIX !!!
915 /*if (((REG(A1_FLAGS) >> 3) & 0x07) == 5)
917 uint32 offset = a1_addr+(PIXEL_OFFSET_32(a1)<<2);
918 // (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 1))
919 if ((offset >= 0x1FF020 && offset <= 0x1FF03F) || (offset >= 0x1FF820 && offset <= 0x1FF83F))
920 WriteLog("32bpp pixel write: A1 Phrase mode --> ");
922 // write to the destination
923 WRITE_PIXEL(a1, REG(A1_FLAGS), writedata);
925 WRITE_ZDATA(a1, REG(A1_FLAGS), srczdata);
930 // load src data and Z
933 srcdata = READ_PIXEL(a1, REG(A1_FLAGS));
935 srczdata = READ_ZDATA(a1, REG(A1_FLAGS));
936 else if (cmd & 0x0001C020)
937 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
941 srcdata = READ_RDATA(SRCDATA, a1, REG(A1_FLAGS), a1_phrase_mode);
943 srczdata = READ_RDATA(SRCZINT, a1, REG(A1_FLAGS), a1_phrase_mode);
946 // load dst data and Z
949 dstdata = READ_PIXEL(a2, REG(A2_FLAGS));
951 dstzdata = READ_ZDATA(a2, REG(A2_FLAGS));
953 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
957 dstdata = READ_RDATA(DSTDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
959 dstzdata = READ_RDATA(DSTZ, a2, REG(A2_FLAGS), a2_phrase_mode);
963 srczdata = z_i[colour_index] >> 16;
965 // apply z comparator
966 if (Z_OP_INF) if (srczdata < dstzdata) inhibit = 1;
967 if (Z_OP_EQU) if (srczdata == dstzdata) inhibit = 1;
968 if (Z_OP_SUP) if (srczdata > dstzdata) inhibit = 1;
970 // apply data comparator
971 //NOTE: The bit comparator (BCOMPEN) is NOT the same at the data comparator!
972 if (DCOMPEN | BCOMPEN)
976 // compare source pixel with pattern pixel
977 // AvP: Numbers are correct, but sprites are not!
978 //This doesn't seem to be a problem... But could still be wrong...
979 /* if (srcdata == READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
980 // if (srcdata != READ_RDATA(PATTERNDATA, a1, REG(A1_FLAGS), a1_phrase_mode))
982 // This is probably not 100% correct... It works in the 1bpp case
983 // (in A1 <- A2 mode, that is...)
984 // AvP: This is causing blocks to be written instead of bit patterns...
986 // NOTE: We really should separate out the BCOMPEN & DCOMPEN stuff!
987 /* uint32 A1bpp = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
988 if (A1bpp == 1 || A1bpp == 16 || A1bpp == 8)
989 inhibit = (srcdata == 0 ? 1: 0);
991 WriteLog("Blitter: Bad BPP (%u) selected for BCOMPEN mode!\n", A1bpp);//*/
992 // What it boils down to is this:
993 // ***CHECK*** Hmm. Seems to cause Rayman to freeze. Investigate.
994 // This doesn't seem to be it. Hmm.
995 // It was a bug in the TOM read word code (reading VC)
1001 // compare destination pixel with pattern pixel
1002 if (dstdata == READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
1003 // if (dstdata != READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode))
1006 if (a1_phrase_mode || a2_phrase_mode)
1012 inhibit |= (((a1_x >> 16) < a1_clip_x && (a1_x >> 16) >= 0
1013 && (a1_y >> 16) < a1_clip_y && (a1_y >> 16) >= 0) ? 0 : 1);
1016 // compute the write data and store
1021 // use pattern data for write data
1022 writedata= READ_RDATA(PATTERNDATA, a2, REG(A2_FLAGS), a2_phrase_mode);
1026 // intensity addition
1027 writedata = (srcdata & 0xFF) + (dstdata & 0xFF);
1028 if (!(TOPBEN) && writedata > 0xFF)
1030 writedata |= (srcdata & 0xF00) + (dstdata & 0xF00);
1031 if (!(TOPNEN) && writedata > 0xFFF)
1033 writedata |= (srcdata & 0xF000) + (dstdata & 0xF000);
1038 writedata |= ~srcdata & ~dstdata;
1040 writedata |= ~srcdata & dstdata;
1042 writedata |= srcdata & ~dstdata;
1044 writedata |= srcdata & dstdata;
1048 writedata = ((gd_c[colour_index]) << 8) | (gd_i[colour_index] >> 16);
1052 int intensity = srcdata & 0xFF;
1053 int ia = gd_ia >> 16;
1055 ia = 0xFFFFFF00 | ia;
1059 if (intensity > 0xFF)
1061 writedata = (srcdata & 0xFF00) | intensity;
1066 writedata = dstdata;
1067 srczdata = dstzdata;
1070 if (/*a2_phrase_mode || */BKGWREN || !inhibit)
1074 uint32 offset = a2_addr+(PIXEL_OFFSET_16(a2)<<1);
1075 // (((((UINT32)a##_y >> 16) * a##_width) + (((UINT32)a##_x >> 16) & ~1)) * (1 + a##_pitch) + (((UINT32)a##_x >> 16) & 1))
1076 WriteLog("[%08X:%04X] ", offset, writedata);
1078 // write to the destination
1079 WRITE_PIXEL(a2, REG(A2_FLAGS), writedata);
1081 WRITE_ZDATA(a2, REG(A2_FLAGS), srczdata);
1088 a2_x = (a2_x + a2_xadd) & a2_mask_x;
1089 a2_y = (a2_y + a2_yadd) & a2_mask_y;
1092 z_i[colour_index] += zadd;
1094 if (GOURD || SRCSHADE)
1096 gd_i[colour_index] += gd_ia;
1097 gd_c[colour_index] += gd_ca;
1099 if (GOURD || SRCSHADE || GOURZ)
1102 colour_index = (colour_index + 1) & 0x03;
1111 /* if (a2_phrase_mode)
1113 a1_x+=(64/a1_psize)*a1_xadd;
1117 for (int nb=0;nb<(64/a2_psize)+1;nb++)
1118 a2_x = (a2_x + a2_xadd) & a2_mask_x;
1122 // write values back to registers
1123 WREG(A1_PIXEL, (a1_y & 0xFFFF0000) | ((a1_x >> 16) & 0xFFFF));
1124 WREG(A1_FPIXEL, (a1_y << 16) | (a1_x & 0xFFFF));
1125 WREG(A2_PIXEL, (a2_y & 0xFFFF0000) | ((a2_x >> 16) & 0xFFFF));
1128 void blitter_blit(uint32 cmd)
1132 dst = (cmd >> 3) & 0x07;
1133 misc = (cmd >> 6) & 0x03;
1134 a1ctl = (cmd >> 8) & 0x7;
1135 mode = (cmd >> 11) & 0x07;
1136 ity = (cmd >> 14) & 0x0F;
1137 zop = (cmd >> 18) & 0x07;
1138 op = (cmd >> 21) & 0x0F;
1139 ctrl = (cmd >> 25) & 0x3F;
1141 a1_addr = REG(A1_BASE);
1142 a2_addr = REG(A2_BASE);
1144 a1_zoffs = (REG(A1_FLAGS) >> 6) & 7;
1145 a2_zoffs = (REG(A2_FLAGS) >> 6) & 7;
1147 xadd_a1_control = (REG(A1_FLAGS) >> 16) & 0x03;
1148 xadd_a2_control = (REG(A2_FLAGS) >> 16) & 0x03;
1149 a1_pitch = (REG(A1_FLAGS) & 3) ^ ((REG(A1_FLAGS) & 2) >> 1);
1150 a2_pitch = (REG(A2_FLAGS) & 3) ^ ((REG(A2_FLAGS) & 2) >> 1);
1152 n_pixels = REG(PIXLINECOUNTER) & 0xFFFF;
1153 n_lines = (REG(PIXLINECOUNTER) >> 16) & 0xFFFF;
1155 a1_x = (REG(A1_PIXEL) << 16) | (REG(A1_FPIXEL) & 0xFFFF);
1156 a1_y = (REG(A1_PIXEL) & 0xFFFF0000) | (REG(A1_FPIXEL) >> 16);
1157 // a1_width = blitter_scanline_width[((REG(A1_FLAGS) & 0x00007E00) >> 9)];
1158 UINT32 m = (REG(A1_FLAGS) >> 9) & 0x03, e = (REG(A1_FLAGS) >> 11) & 0x0F;
1159 a1_width = ((0x04 | m) << e) >> 2;//*/
1161 a2_x = (REG(A2_PIXEL) & 0x0000FFFF) << 16;
1162 a2_y = (REG(A2_PIXEL) & 0xFFFF0000);
1163 // a2_width = blitter_scanline_width[((REG(A2_FLAGS) & 0x00007E00) >> 9)];
1164 m = (REG(A2_FLAGS) >> 9) & 0x03, e = (REG(A2_FLAGS) >> 11) & 0x0F;
1165 a2_width = ((0x04 | m) << e) >> 2;//*/
1166 a2_mask_x = ((REG(A2_MASK) & 0x0000FFFF) << 16) | 0xFFFF;
1167 a2_mask_y = (REG(A2_MASK) & 0xFFFF0000) | 0xFFFF;
1169 // Check for "use mask" flag
1170 if (!(REG(A2_FLAGS) & 0x8000))
1172 a2_mask_x = 0xFFFFFFFF; // must be 16.16
1173 a2_mask_y = 0xFFFFFFFF; // must be 16.16
1178 // According to the official documentation, a hardware bug ties A2's yadd bit to A1's...
1179 a2_yadd = a1_yadd = (YADD1_A1 ? 1 << 16 : 0);
1184 // determine a1_xadd
1185 switch (xadd_a1_control)
1188 // This is a documented Jaguar bug relating to phrase mode and truncation... Look into it!
1189 // add phrase offset to X and truncate
1194 // add pixelsize (1) to X
1198 // add zero (for those nice vertical lines)
1202 // add the contents of the increment register
1203 a1_xadd = (REG(A1_INC) << 16) | (REG(A1_FINC) & 0xFFFF);
1204 a1_yadd = (REG(A1_INC) & 0xFFFF0000) | (REG(A1_FINC) >> 16);
1215 // determine a2_xadd
1216 switch (xadd_a2_control)
1219 // add phrase offset to X and truncate
1224 // add pixelsize (1) to X
1228 // add zero (for those nice vertical lines)
1231 //This really isn't a valid bit combo for A2... Shouldn't this cause the blitter to just say no?
1233 // add the contents of the increment register
1234 // since there is no register for a2 we just add 1
1241 // Modify outer loop steps based on blitter command
1249 a1_step_x = (REG(A1_FSTEP) & 0xFFFF),
1250 a1_step_y = (REG(A1_FSTEP) >> 16);
1253 a1_step_x |= ((REG(A1_STEP) & 0x0000FFFF) << 16),
1254 a1_step_y |= ((REG(A1_STEP) & 0xFFFF0000));
1257 a2_step_x = (REG(A2_STEP) & 0x0000FFFF) << 16,
1258 a2_step_y = (REG(A2_STEP) & 0xFFFF0000);
1260 outer_loop = n_lines;
1265 a1_clip_x = REG(A1_CLIP) & 0x7FFF,
1266 a1_clip_y = (REG(A1_CLIP) >> 16) & 0x7FFF;
1268 // This phrase sizing is incorrect as well... !!! FIX !!!
1269 // Err, this is pixel size... (and it's OK)
1270 a2_psize = 1 << ((REG(A2_FLAGS) >> 3) & 0x07);
1271 a1_psize = 1 << ((REG(A1_FLAGS) >> 3) & 0x07);
1276 zadd = jaguar_long_read(0xF02274);
1278 for(int v=0; v<4; v++)
1279 z_i[v] = (int32)jaguar_long_read(0xF0228C + (v << 2));
1281 if (GOURD || GOURZ || SRCSHADE)
1284 gouraud_add = jaguar_long_read(0xF02270);
1286 gd_c[0] = jaguar_byte_read(0xF02268);
1287 gd_i[0] = jaguar_byte_read(0xF02269);
1289 gd_i[0] |= jaguar_word_read(0xF02240);
1291 gd_c[1] = jaguar_byte_read(0xF0226A);
1292 gd_i[1] = jaguar_byte_read(0xF0226B);
1294 gd_i[1] |= jaguar_word_read(0xF02242);
1296 gd_c[2] = jaguar_byte_read(0xF0226C);
1297 gd_i[2] = jaguar_byte_read(0xF0226D);
1299 gd_i[2] |= jaguar_word_read(0xF02244);
1301 gd_c[3] = jaguar_byte_read(0xF0226E);
1302 gd_i[3] = jaguar_byte_read(0xF0226F);
1304 gd_i[3] |= jaguar_word_read(0xF02246);
1306 gd_ia = gouraud_add & 0xFFFFFF;
1307 if (gd_ia & 0x800000)
1308 gd_ia = 0xFF000000 | gd_ia;
1310 gd_ca = (gouraud_add>>24) & 0xFF;
1312 gd_ca = 0xFFFFFF00 | gd_ca;
1315 // fix for zoop! and syndicate
1316 /* if ((jaguar_mainRom_crc32==0x501be17c)||
1317 (jaguar_mainRom_crc32==0x70895c51)||
1318 (jaguar_mainRom_crc32==0x0f1f1497)||
1319 (jaguar_mainRom_crc32==0xfc8f0dcd)
1323 a1_step_x = (-n_pixels) * 65536;
1326 a2_step_x = (-n_pixels) * 65536;;
1329 // fix for wolfenstein 3d
1330 if (jaguar_mainRom_crc32==0x3966698f)
1334 if ((a1_step_x / 65536)==-28)
1336 a1_step_x=-24*65536; // au lieu de -28
1337 a2_step_x= 0*65536; // au lieu de -8
1342 // fix for Tempest 2000
1343 if (jaguar_mainRom_crc32==0x32816d44)
1346 if ((n_lines!=1)&&((n_pixels==288)||(n_pixels==384)))
1348 WriteLog("Blit!\n");
1349 WriteLog(" cmd = 0x%.8x\n",cmd);
1350 WriteLog(" a1_base = %08X\n", a1_addr);
1351 WriteLog(" a1_pitch = %d\n", a1_pitch);
1352 WriteLog(" a1_psize = %d\n", a1_psize);
1353 WriteLog(" a1_width = %d\n", a1_width);
1354 WriteLog(" a1_xadd = %f (phrase=%d)\n", (float)a1_xadd / 65536.0, a1_phrase_mode);
1355 WriteLog(" a1_yadd = %f\n", (float)a1_yadd / 65536.0);
1356 WriteLog(" a1_xstep = %f\n", (float)a1_step_x / 65536.0);
1357 WriteLog(" a1_ystep = %f\n", (float)a1_step_y / 65536.0);
1358 WriteLog(" a1_x = %f\n", (float)a1_x / 65536.0);
1359 WriteLog(" a1_y = %f\n", (float)a1_y / 65536.0);
1360 WriteLog(" a1_zoffs = %i\n",a1_zoffs);
1362 WriteLog(" a2_base = %08X\n", a2_addr);
1363 WriteLog(" a2_pitch = %d\n", a2_pitch);
1364 WriteLog(" a2_psize = %d\n", a2_psize);
1365 WriteLog(" a2_width = %d\n", a2_width);
1366 WriteLog(" a2_xadd = %f (phrase=%d)\n", (float)a2_xadd / 65536.0, a2_phrase_mode);
1367 WriteLog(" a2_yadd = %f\n", (float)a2_yadd / 65536.0);
1368 WriteLog(" a2_xstep = %f\n", (float)a2_step_x / 65536.0);
1369 WriteLog(" a2_ystep = %f\n", (float)a2_step_y / 65536.0);
1370 WriteLog(" a2_x = %f\n", (float)a2_x / 65536.0);
1371 WriteLog(" a2_y = %f\n", (float)a2_y / 65536.0);
1372 WriteLog(" a2_mask_x= 0x%.4x\n",a2_mask_x);
1373 WriteLog(" a2_mask_y= 0x%.4x\n",a2_mask_y);
1374 WriteLog(" a2_zoffs = %i\n",a2_zoffs);
1376 WriteLog(" count = %d x %d\n", n_pixels, n_lines);
1378 WriteLog(" command = %08X\n", cmd);
1379 WriteLog(" dsten = %i\n",DSTEN);
1380 WriteLog(" srcen = %i\n",SRCEN);
1381 WriteLog(" patdsel = %i\n",PATDSEL);
1382 WriteLog(" color = 0x%.8x\n",REG(PATTERNDATA));
1383 WriteLog(" dcompen = %i\n",DCOMPEN);
1384 WriteLog(" bcompen = %i\n",BCOMPEN);
1385 WriteLog(" cmpdst = %i\n",CMPDST);
1386 WriteLog(" GOURZ = %i\n",GOURZ);
1387 WriteLog(" GOURD = %i\n",GOURD);
1388 WriteLog(" SRCSHADE = %i\n",SRCSHADE);
1389 WriteLog(" DSTDATA = 0x%.8x%.8x\n",REG(DSTDATA),REG(DSTDATA+4));
1396 WriteLog("Blit!\n");
1397 WriteLog(" cmd = 0x%.8x\n",cmd);
1398 WriteLog(" a1_base = %08X\n", a1_addr);
1399 WriteLog(" a1_pitch = %d\n", a1_pitch);
1400 WriteLog(" a1_psize = %d\n", a1_psize);
1401 WriteLog(" a1_width = %d\n", a1_width);
1402 WriteLog(" a1_xadd = %f (phrase=%d)\n", (float)a1_xadd / 65536.0, a1_phrase_mode);
1403 WriteLog(" a1_yadd = %f\n", (float)a1_yadd / 65536.0);
1404 WriteLog(" a1_xstep = %f\n", (float)a1_step_x / 65536.0);
1405 WriteLog(" a1_ystep = %f\n", (float)a1_step_y / 65536.0);
1406 WriteLog(" a1_x = %f\n", (float)a1_x / 65536.0);
1407 WriteLog(" a1_y = %f\n", (float)a1_y / 65536.0);
1408 WriteLog(" a1_zoffs = %i\n",a1_zoffs);
1410 WriteLog(" a2_base = %08X\n", a2_addr);
1411 WriteLog(" a2_pitch = %d\n", a2_pitch);
1412 WriteLog(" a2_psize = %d\n", a2_psize);
1413 WriteLog(" a2_width = %d\n", a2_width);
1414 WriteLog(" a2_xadd = %f (phrase=%d)\n", (float)a2_xadd / 65536.0, a2_phrase_mode);
1415 WriteLog(" a2_yadd = %f\n", (float)a2_yadd / 65536.0);
1416 WriteLog(" a2_xstep = %f\n", (float)a2_step_x / 65536.0);
1417 WriteLog(" a2_ystep = %f\n", (float)a2_step_y / 65536.0);
1418 WriteLog(" a2_x = %f\n", (float)a2_x / 65536.0);
1419 WriteLog(" a2_y = %f\n", (float)a2_y / 65536.0);
1420 WriteLog(" a2_mask_x= 0x%.4x\n",a2_mask_x);
1421 WriteLog(" a2_mask_y= 0x%.4x\n",a2_mask_y);
1422 WriteLog(" a2_zoffs = %i\n",a2_zoffs);
1424 WriteLog(" count = %d x %d\n", n_pixels, n_lines);
1426 WriteLog(" command = %08X\n", cmd);
1427 WriteLog(" dsten = %i\n",DSTEN);
1428 WriteLog(" srcen = %i\n",SRCEN);
1429 WriteLog(" patdsel = %i\n",PATDSEL);
1430 WriteLog(" color = 0x%.8x\n",REG(PATTERNDATA));
1431 WriteLog(" dcompen = %i\n",DCOMPEN);
1432 WriteLog(" bcompen = %i\n",BCOMPEN);
1433 WriteLog(" cmpdst = %i\n",CMPDST);
1434 WriteLog(" GOURZ = %i\n",GOURZ);
1435 WriteLog(" GOURD = %i\n",GOURD);
1436 WriteLog(" SRCSHADE= %i\n",SRCSHADE);
1440 extern int blit_start_log;
1441 extern int op_start_log;
1444 char * ctrlStr[4] = { "XADDPHR\0", "XADDPIX\0", "XADD0\0", "XADDINC\0" };
1445 char * bppStr[8] = { "1bpp\0", "2bpp\0", "4bpp\0", "8bpp\0", "16bpp\0", "32bpp\0", "???\0", "!!!\0" };
1446 char * opStr[16] = { "LFU_CLEAR", "LFU_NSAND", "LFU_NSAD", "LFU_NOTS", "LFU_SAND", "LFU_NOTD", "LFU_N_SXORD", "LFU_NSORND",
1447 "LFU_SAD", "LFU_XOR", "LFU_D", "LFU_NSORD", "LFU_REPLACE", "LFU_SORND", "LFU_SORD", "LFU_ONE" };
1448 uint32 src = cmd & 0x07, dst = (cmd >> 3) & 0x07, misc = (cmd >> 6) & 0x03,
1449 a1ctl = (cmd >> 8) & 0x07, mode = (cmd >> 11) & 0x07, ity = (cmd >> 14) & 0x0F,
1450 zop = (cmd >> 18) & 0x07, op = (cmd >> 21) & 0x0F, ctrl = (cmd >> 25) & 0x3F;
1451 UINT32 a1f = REG(A1_FLAGS), a2f = REG(A2_FLAGS);
1452 uint32 p1 = a1f & 0x07, p2 = a2f & 0x07,
1453 d1 = (a1f >> 3) & 0x07, d2 = (a2f >> 3) & 0x07,
1454 zo1 = (a1f >> 6) & 0x07, zo2 = (a2f >> 6) & 0x07,
1455 w1 = (a1f >> 9) & 0x3F, w2 = (a2f >> 9) & 0x3F,
1456 ac1 = (a1f >> 16) & 0x1F, ac2 = (a2f >> 16) & 0x1F;
1457 UINT32 iw1 = ((0x04 | (w1 & 0x03)) << ((w1 & 0x3C) >> 2)) >> 2;
1458 UINT32 iw2 = ((0x04 | (w2 & 0x03)) << ((w2 & 0x3C) >> 2)) >> 2;
1459 WriteLog("Blit! (%08X %s %08X) count: %d x %d, A1/2_FLAGS: %08X/%08X [cmd: %08X]\n", a1_addr, (mode&0x01 ? "->" : "<-"), a2_addr, n_pixels, n_lines, a1f, a2f, cmd);
1460 // WriteLog(" CMD -> src: %d, dst: %d, misc: %d, a1ctl: %d, mode: %d, ity: %1X, z-op: %d, op: %1X, ctrl: %02X\n", src, dst, misc, a1ctl, mode, ity, zop, op, ctrl);
1462 WriteLog(" CMD -> src: %s%s%s ", (cmd & 0x0001 ? "SRCEN " : ""), (cmd & 0x0002 ? "SRCENZ " : ""), (cmd & 0x0004 ? "SRCENX" : ""));
1463 WriteLog("dst: %s%s%s ", (cmd & 0x0008 ? "DSTEN " : ""), (cmd & 0x0010 ? "DSTENZ " : ""), (cmd & 0x0020 ? "DSTWRZ" : ""));
1464 WriteLog("misc: %s%s ", (cmd & 0x0040 ? "CLIP_A1 " : ""), (cmd & 0x0080 ? "???" : ""));
1465 WriteLog("a1ctl: %s%s%s ", (cmd & 0x0100 ? "UPDA1F " : ""), (cmd & 0x0200 ? "UPDA1 " : ""), (cmd & 0x0400 ? "UPDA2" : ""));
1466 WriteLog("mode: %s%s%s ", (cmd & 0x0800 ? "DSTA2 " : ""), (cmd & 0x1000 ? "GOURD " : ""), (cmd & 0x2000 ? "ZBUFF" : ""));
1467 WriteLog("ity: %s%s%s ", (cmd & 0x4000 ? "TOPBEN " : ""), (cmd & 0x8000 ? "TOPNEN " : ""), (cmd & 0x00010000 ? "PATDSEL" : ""));
1468 WriteLog("z-op: %s%s%s ", (cmd & 0x00040000 ? "ZMODELT " : ""), (cmd & 0x00080000 ? "ZMODEEQ " : ""), (cmd & 0x00100000 ? "ZMODEGT" : ""));
1469 WriteLog("op: %s ", opStr[(cmd >> 21) & 0x0F]);
1470 WriteLog("ctrl: %s%s%s%s%s%s\n", (cmd & 0x02000000 ? "CMPDST " : ""), (cmd & 0x04000000 ? "BCOMPEN " : ""), (cmd & 0x08000000 ? "DCOMPEN " : ""), (cmd & 0x10000000 ? "BKGWREN " : ""), (cmd & 0x20000000 ? "BUSHI " : ""), (cmd & 0x40000000 ? "SRCSHADE" : ""));
1474 WriteLog(" A2 step values: %d (X), %d (Y) [mask (%sused): %08X - %08X/%08X]\n", a2_step_x >> 16, a2_step_y >> 16, (a2f & 0x8000 ? "" : "un"), REG(A2_MASK), a2_mask_x, a2_mask_y);
1477 WriteLog(" A1 -> pitch: %d phrases, depth: %s, z-off: %d, width: %d (%02X), addctl: %s %s %s %s\n", 1 << p1, bppStr[d1], zo1, iw1, w1, ctrlStr[ac1&0x03], (ac1&0x04 ? "YADD1" : "YADD0"), (ac1&0x08 ? "XSIGNSUB" : "XSIGNADD"), (ac1&0x10 ? "YSIGNSUB" : "YSIGNADD"));
1478 WriteLog(" A2 -> pitch: %d phrases, depth: %s, z-off: %d, width: %d (%02X), addctl: %s %s %s %s\n", 1 << p2, bppStr[d2], zo2, iw2, w2, ctrlStr[ac2&0x03], (ac2&0x04 ? "YADD1" : "YADD0"), (ac2&0x08 ? "XSIGNSUB" : "XSIGNADD"), (ac2&0x10 ? "YSIGNSUB" : "YSIGNADD"));
1479 WriteLog(" x/y: %d/%d\n", a2_x >> 16, a2_y >> 16);
1480 // blit_start_log = 0;
1481 // op_start_log = 1;
1484 blitter_working = 1;
1485 #ifndef USE_GENERIC_BLITTER
1486 if (!blitter_execute_cached_code(blitter_in_cache(cmd)))
1488 blitter_generic(cmd);
1490 /*if (blit_start_log)
1492 if (a1_addr == 0xF03000 && a2_addr == 0x004D58)
1494 WriteLog("\nBytes at 004D58:\n");
1495 for(int i=0x004D58; i<0x004D58+(10*127*4); i++)
1496 WriteLog("%02X ", jaguar_byte_read(i));
1497 WriteLog("\nBytes at F03000:\n");
1498 for(int i=0xF03000; i<0xF03000+(6*127*4); i++)
1499 WriteLog("%02X ", jaguar_byte_read(i));
1504 blitter_working = 0;
1507 uint32 blitter_long_read(uint32 offset)
1509 return (blitter_word_read(offset) << 16) | blitter_word_read(offset+2);
1512 void blitter_long_write(uint32 offset, uint32 data)
1514 blitter_word_write(offset, data >> 16);
1515 blitter_word_write(offset+2, data & 0xFFFF);
1518 void blitter_init(void)
1520 if (!blitter_cache_init)
1522 for (int i=0;i<256;i++)
1524 blitter_cache[i] = (struct s_blitter_cache *)malloc(sizeof(struct s_blitter_cache));
1525 blitter_cache[i]->next=null;
1526 blitter_cache[i]->prev=null;
1528 blitter_cache_init = 1;
1530 #ifndef USE_GENERIC_BLITTER
1531 #include "include/blit_i.h"
1536 blitters_code_fp = fopen("include/blit_c.h", "awrt");
1537 blitters_code_init_fp = fopen("include/blit_i.h", "awrt");
1541 void blitter_reset(void)
1543 memset(blitter_ram, 0x00, 0xA0);
1546 void blitter_done(void)
1550 fclose(blitters_code_fp);
1551 fclose(blitters_code_init_fp);
1553 WriteLog("BLIT: Done.\n");
1556 void blitter_byte_write(uint32 offset, uint8 data)
1558 /*if (offset & 0xFF == 0x7B)
1560 WriteLog("--> Wrote to B_STOP: value -> %02X\n", data);
1564 // if ((offset >= 0x7C) && (offset <= 0x9B))
1565 if ((offset >= 0x7C) && (offset <= 0x8B))
1570 case 0x7D: blitter_ram[0x69] = data; break;
1571 case 0x7E: blitter_ram[0x40] = data; break;
1572 case 0x7F: blitter_ram[0x41] = data; break;
1575 case 0x81: blitter_ram[0x6B] = data; break;
1576 case 0x82: blitter_ram[0x42] = data; break;
1577 case 0x83: blitter_ram[0x43] = data; break;
1580 case 0x85: blitter_ram[0x6D] = data; break;
1581 case 0x86: blitter_ram[0x44] = data; break;
1582 case 0x87: blitter_ram[0x45] = data; break;
1585 case 0x89: blitter_ram[0x6F] = data; break;
1587 // case 0x9A: blitter_ram[0x46] = data; break;
1588 // case 0x9B: blitter_ram[0x47] = data; break;
1589 case 0x8A: blitter_ram[0x46] = data; break;
1590 case 0x8B: blitter_ram[0x47] = data; break;
1594 blitter_ram[offset] = data;
1597 void blitter_word_write(uint32 offset, uint16 data)
1599 blitter_byte_write(offset+0, (data>>8) & 0xFF);
1600 blitter_byte_write(offset+1, data & 0xFF);
1602 if ((offset & 0xFF) == 0x3A)
1603 // I.e., the second write of 32-bit value--not convinced this is the best way to do this!
1604 // But then again, according to the Jaguar docs, this is correct...!
1605 blitter_blit(GET32(blitter_ram, 0x38));
1606 // Testing purposes only!
1607 //This does the clipping correctly, but not the Gouraud shading...
1608 // blitter2_exec(GET32(blitter_ram, 0x38));
1612 uint8 blitter_byte_read(uint32 offset)
1617 if (offset == (0x38 + 3))
1618 return 0x01; // always idle
1620 return blitter_ram[offset];
1623 uint16 blitter_word_read(uint32 offset)
1625 return ((uint16)blitter_byte_read(offset) << 8) | (uint16)blitter_byte_read(offset+1);