6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
11 // JLH = James Hammons <jlhamm@acm.org>
14 // --- ---------- -------------------------------------------------------------
15 // JLH 01/16/2010 Created this log ;-)
16 // JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 // the braindead way in which MAME handles memory. :-)
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
30 #include <string.h> // For memset
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
44 // For GPU dissasembly...
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
70 #define GPU_DIS_MOVEFA
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
102 //bool doGPUDis = false;
103 bool doGPUDis = true;
107 GPU opcodes use (BIOS flying ATARI logo):
149 #define CINT0FLAG 0x0200
150 #define CINT1FLAG 0x0400
151 #define CINT2FLAG 0x0800
152 #define CINT3FLAG 0x1000
153 #define CINT4FLAG 0x2000
154 #define CINT04FLAGS (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
158 #define ZERO_FLAG 0x0001
159 #define CARRY_FLAG 0x0002
160 #define NEGA_FLAG 0x0004
162 #define INT_ENA0 0x0010
163 #define INT_ENA1 0x0020
164 #define INT_ENA2 0x0040
165 #define INT_ENA3 0x0080
166 #define INT_ENA4 0x0100
167 #define INT_CLR0 0x0200
168 #define INT_CLR1 0x0400
169 #define INT_CLR2 0x0800
170 #define INT_CLR3 0x1000
171 #define INT_CLR4 0x2000
172 #define REGPAGE 0x4000
175 // External global variables
177 extern int start_logging;
178 extern int gpu_start_log;
180 // Private function prototypes
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8 gpu_opcode_cycles[64] =
255 3, 3, 3, 3, 3, 3, 3, 3,
256 3, 3, 3, 3, 3, 3, 3, 3,
257 3, 3, 1, 3, 1, 18, 3, 3,
258 3, 3, 3, 3, 3, 3, 3, 3,
259 3, 3, 2, 2, 2, 2, 3, 4,
260 5, 4, 5, 6, 6, 1, 1, 1,
261 1, 2, 2, 2, 1, 1, 9, 3,
262 3, 1, 6, 6, 2, 2, 3, 3
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8 gpu_opcode_cycles[64] =
270 1, 1, 1, 1, 1, 1, 1, 1,
271 1, 1, 1, 1, 1, 1, 1, 1,
272 1, 1, 1, 1, 1, 9, 1, 1,
273 1, 1, 1, 1, 1, 1, 1, 1,
274 1, 1, 1, 1, 1, 1, 1, 2,
275 2, 2, 2, 3, 3, 1, 1, 1,
276 1, 1, 1, 1, 1, 1, 4, 1,
277 1, 1, 3, 3, 1, 1, 1, 1
279 uint8 gpu_opcode_cycles[64] =
281 1, 1, 1, 1, 1, 1, 1, 1,
282 1, 1, 1, 1, 1, 1, 1, 1,
283 1, 1, 1, 1, 1, 1, 1, 1,
284 1, 1, 1, 1, 1, 1, 1, 1,
285 1, 1, 1, 1, 1, 1, 1, 1,
286 1, 1, 1, 1, 1, 1, 1, 1,
287 1, 1, 1, 1, 1, 1, 1, 1,
288 1, 1, 1, 1, 1, 1, 1, 1
291 void (*gpu_opcode[64])()=
293 gpu_opcode_add, gpu_opcode_addc, gpu_opcode_addq, gpu_opcode_addqt,
294 gpu_opcode_sub, gpu_opcode_subc, gpu_opcode_subq, gpu_opcode_subqt,
295 gpu_opcode_neg, gpu_opcode_and, gpu_opcode_or, gpu_opcode_xor,
296 gpu_opcode_not, gpu_opcode_btst, gpu_opcode_bset, gpu_opcode_bclr,
297 gpu_opcode_mult, gpu_opcode_imult, gpu_opcode_imultn, gpu_opcode_resmac,
298 gpu_opcode_imacn, gpu_opcode_div, gpu_opcode_abs, gpu_opcode_sh,
299 gpu_opcode_shlq, gpu_opcode_shrq, gpu_opcode_sha, gpu_opcode_sharq,
300 gpu_opcode_ror, gpu_opcode_rorq, gpu_opcode_cmp, gpu_opcode_cmpq,
301 gpu_opcode_sat8, gpu_opcode_sat16, gpu_opcode_move, gpu_opcode_moveq,
302 gpu_opcode_moveta, gpu_opcode_movefa, gpu_opcode_movei, gpu_opcode_loadb,
303 gpu_opcode_loadw, gpu_opcode_load, gpu_opcode_loadp, gpu_opcode_load_r14_indexed,
304 gpu_opcode_load_r15_indexed, gpu_opcode_storeb, gpu_opcode_storew, gpu_opcode_store,
305 gpu_opcode_storep, gpu_opcode_store_r14_indexed, gpu_opcode_store_r15_indexed, gpu_opcode_move_pc,
306 gpu_opcode_jump, gpu_opcode_jr, gpu_opcode_mmult, gpu_opcode_mtoi,
307 gpu_opcode_normi, gpu_opcode_nop, gpu_opcode_load_r14_ri, gpu_opcode_load_r15_ri,
308 gpu_opcode_store_r14_ri, gpu_opcode_store_r15_ri, gpu_opcode_sat24, gpu_opcode_pack,
311 static uint8 gpu_ram_8[0x1000];
313 static uint32 gpu_acc;
314 static uint32 gpu_remain;
315 static uint32 gpu_hidata;
316 static uint32 gpu_flags;
317 static uint32 gpu_matrix_control;
318 static uint32 gpu_pointer_to_matrix;
319 static uint32 gpu_data_organization;
320 static uint32 gpu_control;
321 static uint32 gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 static uint32 gpu_reg_bank_0[32];
327 static uint32 gpu_reg_bank_1[32];
328 static uint32 * gpu_reg;
329 static uint32 * gpu_alternate_reg;
331 static uint32 gpu_instruction;
332 static uint32 gpu_opcode_first_parameter;
333 static uint32 gpu_opcode_second_parameter;
335 #define GPU_RUNNING (gpu_control & 0x01)
337 #define RM gpu_reg[gpu_opcode_first_parameter]
338 #define RN gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1 gpu_opcode_first_parameter
342 #define IMM_2 gpu_opcode_second_parameter
344 #define SET_FLAG_Z(r) (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01));
347 #define RESET_FLAG_Z() gpu_flag_z = 0;
348 #define RESET_FLAG_N() gpu_flag_n = 0;
349 #define RESET_FLAG_C() gpu_flag_c = 0;
351 #define CLR_Z (gpu_flag_z = 0)
352 #define CLR_ZN (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r) (gpu_flag_z = ((r) == 0))
355 #define SET_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(~(a))))
357 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
358 #define SET_ZN(r) SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
362 uint32 gpu_convert_zero[32] =
363 { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
365 uint8 * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x) branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
368 uint32 gpu_opcode_use[64];
370 const char * gpu_opcode_str[64]=
372 "add", "addc", "addq", "addqt",
373 "sub", "subc", "subq", "subqt",
374 "neg", "and", "or", "xor",
375 "not", "btst", "bset", "bclr",
376 "mult", "imult", "imultn", "resmac",
377 "imacn", "div", "abs", "sh",
378 "shlq", "shrq", "sha", "sharq",
379 "ror", "rorq", "cmp", "cmpq",
380 "sat8", "sat16", "move", "moveq",
381 "moveta", "movefa", "movei", "loadb",
382 "loadw", "load", "loadp", "load_r14_indexed",
383 "load_r15_indexed", "storeb", "storew", "store",
384 "storep", "store_r14_indexed","store_r15_indexed","move_pc",
385 "jump", "jr", "mmult", "mtoi",
386 "normi", "nop", "load_r14_ri", "load_r15_ri",
387 "store_r14_ri", "store_r15_ri", "sat24", "pack",
390 static uint32 gpu_in_exec = 0;
391 static uint32 gpu_releaseTimeSlice_flag = 0;
393 void GPUReleaseTimeslice(void)
395 gpu_releaseTimeSlice_flag = 1;
398 uint32 GPUGetPC(void)
403 void build_branch_condition_table(void)
405 if (!branch_condition_table)
407 branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
409 if (branch_condition_table)
411 for(int i=0; i<8; i++)
413 for(int j=0; j<32; j++)
420 if (!(i & ZERO_FLAG))
423 if (i & (CARRY_FLAG << (j >> 4)))
426 if (!(i & (CARRY_FLAG << (j >> 4))))
428 branch_condition_table[i * 32 + j] = result;
436 // GPU byte access (read)
438 uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
440 if (offset >= 0xF02000 && offset <= 0xF020FF)
441 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
443 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444 return gpu_ram_8[offset & 0xFFF];
445 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
447 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
449 if ((offset & 0x03) == 0)
451 else if ((offset & 0x03) == 1)
452 return (data >> 16) & 0xFF;
453 else if ((offset & 0x03) == 2)
454 return (data >> 8) & 0xFF;
455 else if ((offset & 0x03) == 3)
459 return JaguarReadByte(offset, who);
463 // GPU word access (read)
465 uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
467 if (offset >= 0xF02000 && offset <= 0xF020FF)
468 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
470 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
473 uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
476 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
478 // This looks and smells wrong...
479 // But it *might* be OK...
480 if (offset & 0x01) // Catch cases 1 & 3... (unaligned read)
481 return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
483 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
485 if (offset & 0x02) // Cases 0 & 2...
486 return data & 0xFFFF;
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
495 return JaguarReadWord(offset, who);
499 // GPU dword access (read)
501 uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
503 if (offset >= 0xF02000 && offset <= 0xF020FF)
504 WriteLog("GPU: ReadLong--Attempt to read from GPU register file by %s!\n", whoName[who]);
506 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
507 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
510 return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
511 | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/
512 // return GET32(gpu_ram_8, offset);
514 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
515 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
521 gpu_flag_c = (gpu_flag_c ? 1 : 0);
522 gpu_flag_z = (gpu_flag_z ? 1 : 0);
523 gpu_flag_n = (gpu_flag_n ? 1 : 0);
525 gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
527 return gpu_flags & 0xFFFFC1FF;
529 return gpu_matrix_control;
531 return gpu_pointer_to_matrix;
533 return gpu_data_organization;
542 default: // unaligned long read
544 WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
549 //TEMP--Mirror of F03000? No. Writes only...
550 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
551 // WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
552 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
553 WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
555 return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
559 // GPU byte access (write)
561 void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
563 if (offset >= 0xF02000 && offset <= 0xF020FF)
564 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
566 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
568 gpu_ram_8[offset & 0xFFF] = data;
570 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
573 m68k_end_timeslice();
574 dsp_releaseTimeslice();
578 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
580 uint32 reg = offset & 0x1C;
581 int bytenum = offset & 0x03;
583 //This is definitely wrong!
584 if ((reg >= 0x1C) && (reg <= 0x1F))
585 gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
588 uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
589 bytenum = 3 - bytenum; // convention motorola !!!
590 old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
591 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
595 // WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
596 JaguarWriteByte(offset, data, who);
600 // GPU word access (write)
602 void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
604 if (offset >= 0xF02000 && offset <= 0xF020FF)
605 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
607 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
609 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
610 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
612 SET16(gpu_ram_8, offset, data);//*/
614 /*if (offset >= 0xF03214 && offset < 0xF0321F)
615 WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
618 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
621 m68k_end_timeslice();
622 dsp_releaseTimeslice();
626 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
628 if (offset & 0x01) // This is supposed to weed out unaligned writes, but does nothing...
631 WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
636 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
637 //This just literally sucks.
638 if ((offset & 0x1C) == 0x1C)
640 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
642 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
644 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
648 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
649 uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
651 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
653 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
654 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
658 else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
661 WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
667 // Have to be careful here--this can cause an infinite loop!
668 JaguarWriteWord(offset, data, who);
672 // GPU dword access (write)
674 void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
676 if (offset >= 0xF02000 && offset <= 0xF020FF)
677 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
679 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
680 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
685 WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
691 SET32(gpu_ram_8, offset, data);
694 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
695 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
702 bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
704 gpu_flag_z = gpu_flags & ZERO_FLAG;
705 gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
706 gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
707 GPUUpdateRegisterBanks();
708 gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3); // Interrupt latch clear bits
709 //Writing here is only an interrupt enable--this approach is just plain wrong!
711 //This, however, is A-OK! ;-)
712 if (IMASKCleared) // If IMASK was cleared,
713 GPUHandleIRQs(); // see if any other interrupts need servicing!
715 if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
716 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
717 WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
722 gpu_matrix_control = data;
725 // This can only point to long aligned addresses
726 gpu_pointer_to_matrix = data & 0xFFFFFFFC;
729 gpu_data_organization = data;
734 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
739 // uint32 gpu_was_running = GPU_RUNNING;
740 data &= ~0xF7C0; // Disable writes to INT_LAT0-4 & TOM version number
742 // check for GPU -> CPU interrupt
745 //WriteLog("GPU->CPU interrupt\n");
746 if (TOMIRQEnabled(IRQ_GPU))
748 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
749 // if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
751 TOMSetPendingGPUInt();
752 m68k_set_irq(2); // Set 68000 IPL 2
753 GPUReleaseTimeslice();
759 // check for CPU -> GPU interrupt #0
762 //WriteLog("CPU->GPU interrupt\n");
763 GPUSetIRQLine(0, ASSERT_LINE);
764 m68k_end_timeslice();
765 DSPReleaseTimeslice();
772 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
774 gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
776 // if gpu wasn't running but is now running, execute a few cycles
777 #ifndef GPU_SINGLE_STEPPING
778 /* if (!gpu_was_running && GPU_RUNNING)
781 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
786 #endif // GPU_DEBUG//*/
788 if (gpu_control & 0x18)
790 #endif // #ifndef GPU_SINGLE_STEPPING
792 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
794 WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
796 WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
800 // GPUDumpDisassembly();
803 if (gpu_pc == 0xF035D8)
805 // GPUDumpDisassembly();
808 gpu_control &= 0xFFFFFFFE; // Don't run it and let's see what happens!
809 //Hmm. Seems to lock up when going into the demo...
810 //Try to disable the collision altogether!
813 extern int effect_start5;
814 static bool finished = false;
815 //if (GPU_RUNNING && effect_start5 && !finished)
816 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
818 // Let's do a dump of $6528!
819 /* uint32 numItems = JaguarReadWord(0x6BD6);
820 WriteLog("\nDump of $6528: %u items.\n\n", numItems);
821 for(int i=0; i<numItems*3*4; i+=3*4)
823 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
824 JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
825 uint16 link = JaguarReadWord(0x6528+i+8+2);
826 for(int j=0; j<40; j+=4)
827 WriteLog("%08X ", JaguarReadLong(link + j));
831 // Let's try a manual blit here...
832 //This isn't working the way it should! !!! FIX !!!
833 //Err, actually, it is.
834 // NOW, it works right! Problem solved!!! It's a blitter bug!
835 /* uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4;
836 for(int y=0; y<127; y++)
838 for(int x=0; x<2; x++)
840 JaguarWriteLong(dst, JaguarReadLong(src));
845 src += width - (2 * 4);
849 WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
851 /* WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
853 for(int i=0x004D54; i<0x004D54+2048; i++)
855 WriteLog("%02X ", JaguarReadByte(i));
863 WriteLog("\n\nData @ F03000:\n\n");
865 for(int i=0xF03000; i<0xF03200; i++)
867 WriteLog("%02X ", JaguarReadByte(i));
881 /*if (!GPU_RUNNING && finished)
883 WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
888 // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
889 // allow the GPU a chance to run...
890 // Yes! This partially fixed Trevor McFur...
892 m68k_end_timeslice();
899 gpu_div_control = data;
901 // default: // unaligned long write
908 // JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
909 // JaguarWriteWord(offset+2, data & 0xFFFF, who);
910 // We're a 32-bit processor, we can do a long write...!
911 JaguarWriteLong(offset, data, who);
915 // Change register banks if necessary
917 void GPUUpdateRegisterBanks(void)
919 int bank = (gpu_flags & REGPAGE); // REGPAGE bit
921 if (gpu_flags & IMASK) // IMASK bit
922 bank = 0; // IMASK forces main bank to be bank 0
925 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
927 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
930 void GPUHandleIRQs(void)
932 // Bail out if we're already in an interrupt!
933 if (gpu_flags & IMASK)
936 // Get the interrupt latch & enable bits
937 uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
939 // Bail out if latched interrupts aren't enabled
944 // Determine which interrupt to service
945 uint32 which = 0; //Isn't there a #pragma to disable this warning???
958 WriteLog("GPU: Generating IRQ #%i\n", which);
960 // set the interrupt flag
962 GPUUpdateRegisterBanks();
964 // subqt #4,r31 ; pre-decrement stack pointer
965 // move pc,r30 ; address of interrupted code
966 // store r30,(r31) ; store return address
968 GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
970 // movei #service_address,r30 ; pointer to ISR entry
971 // jump (r30) ; jump to ISR
973 gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
976 void GPUSetIRQLine(int irqline, int state)
979 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
981 uint32 mask = 0x0040 << irqline;
982 gpu_control &= ~mask; // Clear the interrupt latch
986 gpu_control |= mask; // Assert the interrupt latch
987 GPUHandleIRQs(); // And handle the interrupt...
991 //TEMPORARY: Testing only!
997 // memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
998 // memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs");
999 // memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs");
1001 build_branch_condition_table();
1005 //TEMPORARY: Testing only!
1012 // GPU registers (directly visible)
1013 gpu_flags = 0x00000000;
1014 gpu_matrix_control = 0x00000000;
1015 gpu_pointer_to_matrix = 0x00000000;
1016 gpu_data_organization = 0xFFFFFFFF;
1017 gpu_pc = 0x00F03000;
1018 gpu_control = 0x00002800; // Correctly sets this as TOM Rev. 2
1019 gpu_hidata = 0x00000000;
1020 gpu_remain = 0x00000000; // These two registers are RO/WO
1021 gpu_div_control = 0x00000000;
1023 // GPU internal register
1024 gpu_acc = 0x00000000;
1026 gpu_reg = gpu_reg_bank_0;
1027 gpu_alternate_reg = gpu_reg_bank_1;
1029 for(int i=0; i<32; i++)
1030 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1033 memset(gpu_ram_8, 0xFF, 0x1000);
1035 //not needed GPUInterruptPending = false;
1039 uint32 GPUReadPC(void)
1044 void GPUResetStats(void)
1046 for(uint32 i=0; i<64; i++)
1047 gpu_opcode_use[i] = 0;
1048 WriteLog("--> GPU stats were reset!\n");
1051 void GPUDumpDisassembly(void)
1055 WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1056 uint32 j = 0xF03000;
1057 while (j <= 0xF03FFF)
1060 j += dasmjag(JAGUAR_GPU, buffer, j);
1061 WriteLog("\t%08X: %s\n", oldj, buffer);
1065 void GPUDumpRegisters(void)
1067 WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1068 WriteLog("\nRegisters bank 0\n");
1069 for(int j=0; j<8; j++)
1071 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1072 (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1073 (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1074 (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1075 (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1077 WriteLog("Registers bank 1\n");
1078 for(int j=0; j<8; j++)
1080 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1081 (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1082 (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1083 (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1084 (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1088 void GPUDumpMemory(void)
1090 WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1091 for(int i=0; i<0xFFF; i+=4)
1092 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1093 gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1098 WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1100 // Get the interrupt latch & enable bits
1101 uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1102 WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1105 GPUDumpDisassembly();
1107 WriteLog("\nGPU opcodes use:\n");
1108 for(int i=0; i<64; i++)
1110 if (gpu_opcode_use[i])
1111 WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1115 // memory_free(gpu_ram_8);
1116 // memory_free(gpu_reg_bank_0);
1117 // memory_free(gpu_reg_bank_1);
1121 // Main GPU execution core
1123 static int testCount = 1;
1125 static bool tripwire = false;
1126 void GPUExec(int32 cycles)
1131 #ifdef GPU_SINGLE_STEPPING
1132 if (gpu_control & 0x18)
1135 gpu_control &= ~0x10;
1139 gpu_releaseTimeSlice_flag = 0;
1142 while (cycles > 0 && GPU_RUNNING)
1144 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1145 && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1147 if (gpu_pc == 0xF03000)
1149 extern uint32 starCount;
1151 /* WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1152 uint32 base = gpu_reg_bank_0[3];
1153 for(uint32 i=0; i<0x100; i+=16)
1155 WriteLog("%02X: ", i);
1156 for(uint32 j=0; j<16; j++)
1158 WriteLog("%02X ", JaguarReadByte(base + i + j));
1163 // if (gpu_pc == 0xF03)
1167 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1170 WriteLog("GPU: Starting disassembly log...\n");
1173 /*if (gpu_pc == 0xF0359A)
1178 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1179 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1180 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1182 uint16 opcode = GPUReadWord(gpu_pc, GPU);
1183 uint32 index = opcode >> 10;
1184 gpu_instruction = opcode; // Added for GPU #3...
1185 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1186 gpu_opcode_second_parameter = opcode & 0x1F;
1187 /*if (gpu_pc == 0xF03BE8)
1188 WriteLog("Start of OP frame write...\n");
1189 if (gpu_pc == 0xF03EEE)
1190 WriteLog("--> Writing BRANCH object ---\n");
1191 if (gpu_pc == 0xF03F62)
1192 WriteLog("--> Writing BITMAP object ***\n");//*/
1193 /*if (gpu_pc == 0xF03546)
1195 WriteLog("\n--> GPU PC: F03546\n");
1197 GPUDumpDisassembly();
1199 /*if (gpu_pc == 0xF033F6)
1201 WriteLog("\n--> GPU PC: F033F6\n");
1203 GPUDumpDisassembly();
1205 /*if (gpu_pc == 0xF033CC)
1207 WriteLog("\n--> GPU PC: F033CC\n");
1209 GPUDumpDisassembly();
1211 /*if (gpu_pc == 0xF033D6)
1213 WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1217 /*if (gpu_pc == 0xF033D8)
1219 WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1223 /*if (gpu_pc == 0xF0358E)
1225 WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1229 /*if (gpu_pc == 0xF034CA)
1231 WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1234 /*if (gpu_pc == 0xF034CA)
1236 len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1237 WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1238 for(int i=0; i<len; i+=4)
1239 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1241 for(int i=0; i<len; i+=4)
1242 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1245 if (gpu_pc == 0xF034DE)
1247 WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n ", gpu_reg[14], gpu_reg[15]);
1248 for(int i=0; i<len; i+=4)
1249 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1251 for(int i=0; i<len; i+=4)
1252 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1254 for(int i=0; i<len; i+=4)
1255 WriteLog(" --------");
1257 for(int i=0; i<len; i+=4)
1258 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1261 /*if (gpu_pc == 0xF035C8)
1263 WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1265 GPUDumpDisassembly();
1270 // gpu_reset_stats();
1271 static char buffer[512];
1272 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1273 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1275 //$E400 -> 1110 01 -> $39 -> 57
1278 gpu_opcode[index]();
1280 // gpu2_opcode[index]();
1282 //GPU #3 (Doesn't show ATARI logo! #1 & #2 do...)
1284 // gpu3_opcode[index]();
1287 //GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken.
1288 /*static bool firstTime = true;
1289 if (gpu_pc == 0xF03548 && firstTime)
1292 // firstTime = false;
1294 //static char buffer[512];
1296 //while (k<0xF0356C)
1299 //k += dasmjag(JAGUAR_GPU, buffer, k);
1300 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1302 // gpu_start_log = 1;
1304 //GPU: [00F0354C] jump nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1305 /*if (gpu_pc == 0xF0354C)
1306 gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1308 cycles -= gpu_opcode_cycles[index];
1309 gpu_opcode_use[index]++;
1311 WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1312 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1314 WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1327 GPU opcodes use (offset punch--vertically below bad guy):
1349 load_r14_indexed 1183
1350 load_r15_indexed 1125
1353 store_r14_indexed 320
1361 static void gpu_opcode_jump(void)
1364 const char * condition[32] =
1365 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1366 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1367 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1368 "???", "???", "???", "F" };
1370 WriteLog("%06X: JUMP %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1373 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1374 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1375 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1376 // KLUDGE: Used by BRANCH_CONDITION
1377 uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1379 if (BRANCH_CONDITION(IMM_2))
1383 WriteLog("Branched!\n");
1386 WriteLog(" --> JUMP: Branch taken.\n");
1387 uint32 delayed_pc = RM;
1389 gpu_pc = delayed_pc;
1390 /* uint16 opcode = GPUReadWord(gpu_pc, GPU);
1391 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1392 gpu_opcode_second_parameter = opcode & 0x1F;
1394 gpu_pc = delayed_pc;
1395 gpu_opcode[opcode>>10]();//*/
1400 WriteLog("Branch NOT taken.\n");
1404 static void gpu_opcode_jr(void)
1407 const char * condition[32] =
1408 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1409 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1410 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1411 "???", "???", "???", "F" };
1413 WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1415 /* if (CONDITION(jaguar.op & 31))
1417 int32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1418 uint32 newpc = jaguar.PC + r1;
1420 jaguar.op = ROPCODE(jaguar.PC);
1422 (*jaguar.table[jaguar.op >> 10])();
1424 jaguar_icount -= 3; // 3 wait states guaranteed
1427 /* gpu_flag_n = (gpu_flag_n ? 1 : 0);
1428 gpu_flag_c = (gpu_flag_c ? 1 : 0);
1429 gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1430 // KLUDGE: Used by BRANCH_CONDITION
1431 uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1433 if (BRANCH_CONDITION(IMM_2))
1437 WriteLog("Branched!\n");
1440 WriteLog(" --> JR: Branch taken.\n");
1441 int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
1442 int32 delayed_pc = gpu_pc + (offset * 2);
1444 gpu_pc = delayed_pc;
1445 /* uint16 opcode = GPUReadWord(gpu_pc, GPU);
1446 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1447 gpu_opcode_second_parameter = opcode & 0x1F;
1449 gpu_pc = delayed_pc;
1450 gpu_opcode[opcode>>10]();//*/
1455 WriteLog("Branch NOT taken.\n");
1459 static void gpu_opcode_add(void)
1463 WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1465 uint32 res = RN + RM;
1466 CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1470 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1474 static void gpu_opcode_addc(void)
1478 WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1480 /* int dreg = jaguar.op & 31;
1481 uint32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1482 uint32 r2 = jaguar.r[dreg];
1483 uint32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1484 jaguar.r[dreg] = res;
1485 CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1487 uint32 res = RN + RM + gpu_flag_c;
1488 uint32 carry = gpu_flag_c;
1489 // SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1490 SET_ZNC_ADD(RN + carry, RM, res);
1491 // SET_ZNC_ADD(RN, RM + carry, res);
1495 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1499 static void gpu_opcode_addq(void)
1503 WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1505 uint32 r1 = gpu_convert_zero[IMM_1];
1506 uint32 res = RN + r1;
1507 CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1511 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1515 static void gpu_opcode_addqt(void)
1517 #ifdef GPU_DIS_ADDQT
1519 WriteLog("%06X: ADDQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1521 RN += gpu_convert_zero[IMM_1];
1522 #ifdef GPU_DIS_ADDQT
1524 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1528 static void gpu_opcode_sub(void)
1532 WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1534 uint32 res = RN - RM;
1535 SET_ZNC_SUB(RN, RM, res);
1539 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1543 static void gpu_opcode_subc(void)
1547 WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1549 uint32 res = RN - RM - gpu_flag_c;
1550 uint32 borrow = gpu_flag_c;
1551 // SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1552 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1553 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1554 // SET_ZNC_SUB(RN - borrow, RM, res);
1555 SET_ZNC_SUB(RN, RM + borrow, res);
1559 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1563 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1564 N = 0, M = 1, 0 - 1 = -1, C = 0!
1566 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
1567 #define SET_ZN(r) SET_N(r); SET_Z(r)
1568 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1569 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1571 static void gpu_opcode_subq(void)
1575 WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1577 uint32 r1 = gpu_convert_zero[IMM_1];
1578 uint32 res = RN - r1;
1579 SET_ZNC_SUB(RN, r1, res);
1583 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1587 static void gpu_opcode_subqt(void)
1589 #ifdef GPU_DIS_SUBQT
1591 WriteLog("%06X: SUBQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1593 RN -= gpu_convert_zero[IMM_1];
1594 #ifdef GPU_DIS_SUBQT
1596 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1600 static void gpu_opcode_cmp(void)
1604 WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1606 uint32 res = RN - RM;
1607 SET_ZNC_SUB(RN, RM, res);
1610 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1614 static void gpu_opcode_cmpq(void)
1616 static int32 sqtable[32] =
1617 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1620 WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1622 uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1623 uint32 res = RN - r1;
1624 SET_ZNC_SUB(RN, r1, res);
1627 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1631 static void gpu_opcode_and(void)
1635 WriteLog("%06X: AND R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1641 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1645 static void gpu_opcode_or(void)
1649 WriteLog("%06X: OR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1655 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1659 static void gpu_opcode_xor(void)
1663 WriteLog("%06X: XOR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1669 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1673 static void gpu_opcode_not(void)
1677 WriteLog("%06X: NOT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1683 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1687 static void gpu_opcode_move_pc(void)
1689 #ifdef GPU_DIS_MOVEPC
1691 WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1693 // Should be previous PC--this might not always be previous instruction!
1694 // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1696 #ifdef GPU_DIS_MOVEPC
1698 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1702 static void gpu_opcode_sat8(void)
1706 WriteLog("%06X: SAT8 R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1708 RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1712 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1716 static void gpu_opcode_sat16(void)
1718 RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1722 static void gpu_opcode_sat24(void)
1724 RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1728 static void gpu_opcode_store_r14_indexed(void)
1730 #ifdef GPU_DIS_STORE14I
1732 WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1734 #ifdef GPU_CORRECT_ALIGNMENT
1735 uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1737 if (address >= 0xF03000 && address <= 0xF03FFF)
1738 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1740 GPUWriteLong(address, RN, GPU);
1742 GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1746 static void gpu_opcode_store_r15_indexed(void)
1748 #ifdef GPU_DIS_STORE15I
1750 WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1752 #ifdef GPU_CORRECT_ALIGNMENT
1753 uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1755 if (address >= 0xF03000 && address <= 0xF03FFF)
1756 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1758 GPUWriteLong(address, RN, GPU);
1760 GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1764 static void gpu_opcode_load_r14_ri(void)
1766 #ifdef GPU_DIS_LOAD14R
1768 WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1770 #ifdef GPU_CORRECT_ALIGNMENT
1771 uint32 address = gpu_reg[14] + RM;
1773 if (address >= 0xF03000 && address <= 0xF03FFF)
1774 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1776 RN = GPUReadLong(address, GPU);
1778 RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1780 #ifdef GPU_DIS_LOAD14R
1782 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1786 static void gpu_opcode_load_r15_ri(void)
1788 #ifdef GPU_DIS_LOAD15R
1790 WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1792 #ifdef GPU_CORRECT_ALIGNMENT
1793 uint32 address = gpu_reg[15] + RM;
1795 if (address >= 0xF03000 && address <= 0xF03FFF)
1796 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1798 RN = GPUReadLong(address, GPU);
1800 RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1802 #ifdef GPU_DIS_LOAD15R
1804 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1808 static void gpu_opcode_store_r14_ri(void)
1810 #ifdef GPU_DIS_STORE14R
1812 WriteLog("%06X: STORE R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1814 #ifdef GPU_CORRECT_ALIGNMENT
1815 uint32 address = gpu_reg[14] + RM;
1817 if (address >= 0xF03000 && address <= 0xF03FFF)
1818 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1820 GPUWriteLong(address, RN, GPU);
1822 GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1826 static void gpu_opcode_store_r15_ri(void)
1828 #ifdef GPU_DIS_STORE15R
1830 WriteLog("%06X: STORE R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1832 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1833 uint32 address = gpu_reg[15] + RM;
1835 if (address >= 0xF03000 && address <= 0xF03FFF)
1836 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1838 GPUWriteLong(address, RN, GPU);
1840 GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1844 static void gpu_opcode_nop(void)
1848 WriteLog("%06X: NOP [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1852 static void gpu_opcode_pack(void)
1856 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1860 //BUG! if (RM == 0) // Pack
1861 if (IMM_1 == 0) // Pack
1862 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1864 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1867 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1871 static void gpu_opcode_storeb(void)
1873 #ifdef GPU_DIS_STOREB
1875 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1878 // Would appear to be so...!
1879 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1880 GPUWriteLong(RM, RN & 0xFF, GPU);
1882 JaguarWriteByte(RM, RN, GPU);
1885 static void gpu_opcode_storew(void)
1887 #ifdef GPU_DIS_STOREW
1889 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1891 #ifdef GPU_CORRECT_ALIGNMENT
1892 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1893 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1895 JaguarWriteWord(RM, RN, GPU);
1897 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1898 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1900 JaguarWriteWord(RM, RN, GPU);
1904 static void gpu_opcode_store(void)
1906 #ifdef GPU_DIS_STORE
1908 WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1910 #ifdef GPU_CORRECT_ALIGNMENT
1911 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1912 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1914 GPUWriteLong(RM, RN, GPU);
1916 GPUWriteLong(RM, RN, GPU);
1920 static void gpu_opcode_storep(void)
1922 #ifdef GPU_CORRECT_ALIGNMENT
1923 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1925 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1926 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1930 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1931 GPUWriteLong(RM + 4, RN, GPU);
1934 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1935 GPUWriteLong(RM + 4, RN, GPU);
1939 static void gpu_opcode_loadb(void)
1941 #ifdef GPU_DIS_LOADB
1943 WriteLog("%06X: LOADB (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1945 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1946 RN = GPUReadLong(RM, GPU) & 0xFF;
1948 RN = JaguarReadByte(RM, GPU);
1949 #ifdef GPU_DIS_LOADB
1951 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1955 static void gpu_opcode_loadw(void)
1957 #ifdef GPU_DIS_LOADW
1959 WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1961 #ifdef GPU_CORRECT_ALIGNMENT
1962 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1963 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1965 RN = JaguarReadWord(RM, GPU);
1967 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1968 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1970 RN = JaguarReadWord(RM, GPU);
1972 #ifdef GPU_DIS_LOADW
1974 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1978 // According to the docs, & "Do The Same", this address is long aligned...
1980 // And it works!!! Need to fix all instances...
1981 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
1982 // the $F03000-$F03FFF range are aligned...
1983 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
1985 Preliminary testing on real hardware seems to confirm that something strange goes on
1986 with unaligned reads in main memory. When the address is off by 1, the result is the
1987 same as the long address with the top byte replaced by something. So if the read is
1988 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
1989 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
1990 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
1991 It may be that the "unknown" values come from the prefetch queue, but not sure how
1992 to test that. They seem to be stable, though, which would indicate such a mechanism.
1993 Sometimes, however, the off by 2 case returns $12345678!
1995 static void gpu_opcode_load(void)
1999 WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2001 #ifdef GPU_CORRECT_ALIGNMENT
2002 uint32 mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2003 // if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2004 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2005 // RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2007 // RN = GPUReadLong(RM, GPU);
2008 // Simulate garbage in unaligned reads...
2009 //seems that this behavior is different in GPU mem vs. main mem...
2010 // if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2011 // RN |= mask[RM & 0x03];
2013 RN = GPUReadLong(RM, GPU);
2017 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2021 static void gpu_opcode_loadp(void)
2023 #ifdef GPU_CORRECT_ALIGNMENT
2024 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2026 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2027 RN = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2031 gpu_hidata = GPUReadLong(RM + 0, GPU);
2032 RN = GPUReadLong(RM + 4, GPU);
2035 gpu_hidata = GPUReadLong(RM + 0, GPU);
2036 RN = GPUReadLong(RM + 4, GPU);
2040 static void gpu_opcode_load_r14_indexed(void)
2042 #ifdef GPU_DIS_LOAD14I
2044 WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2046 #ifdef GPU_CORRECT_ALIGNMENT
2047 uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2049 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2050 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2052 RN = GPUReadLong(address, GPU);
2054 RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2056 #ifdef GPU_DIS_LOAD14I
2058 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2062 static void gpu_opcode_load_r15_indexed(void)
2064 #ifdef GPU_DIS_LOAD15I
2066 WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2068 #ifdef GPU_CORRECT_ALIGNMENT
2069 uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2071 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2072 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2074 RN = GPUReadLong(address, GPU);
2076 RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2078 #ifdef GPU_DIS_LOAD15I
2080 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2084 static void gpu_opcode_movei(void)
2086 #ifdef GPU_DIS_MOVEI
2088 WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2090 // This instruction is followed by 32-bit value in LSW / MSW format...
2091 RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16);
2093 #ifdef GPU_DIS_MOVEI
2095 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2099 static void gpu_opcode_moveta(void)
2101 #ifdef GPU_DIS_MOVETA
2103 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2106 #ifdef GPU_DIS_MOVETA
2108 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2112 static void gpu_opcode_movefa(void)
2114 #ifdef GPU_DIS_MOVEFA
2116 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2119 #ifdef GPU_DIS_MOVEFA
2121 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2125 static void gpu_opcode_move(void)
2129 WriteLog("%06X: MOVE R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2134 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2138 static void gpu_opcode_moveq(void)
2140 #ifdef GPU_DIS_MOVEQ
2142 WriteLog("%06X: MOVEQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2145 #ifdef GPU_DIS_MOVEQ
2147 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2151 static void gpu_opcode_resmac(void)
2156 static void gpu_opcode_imult(void)
2158 #ifdef GPU_DIS_IMULT
2160 WriteLog("%06X: IMULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2162 RN = (int16)RN * (int16)RM;
2164 #ifdef GPU_DIS_IMULT
2166 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2170 static void gpu_opcode_mult(void)
2174 WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2176 RN = (uint16)RM * (uint16)RN;
2180 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2184 static void gpu_opcode_bclr(void)
2188 WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2190 uint32 res = RN & ~(1 << IMM_1);
2195 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2199 static void gpu_opcode_btst(void)
2203 WriteLog("%06X: BTST #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2205 gpu_flag_z = (~RN >> IMM_1) & 1;
2208 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2212 static void gpu_opcode_bset(void)
2216 WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2218 uint32 res = RN | (1 << IMM_1);
2223 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2227 static void gpu_opcode_imacn(void)
2229 uint32 res = (int16)RM * (int16)(RN);
2233 static void gpu_opcode_mtoi(void)
2236 uint32 res = RN = (((int32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2240 static void gpu_opcode_normi(void)
2247 while ((_RM & 0xFFC00000) == 0)
2252 while ((_RM & 0xFF800000) != 0)
2262 static void gpu_opcode_mmult(void)
2264 int count = gpu_matrix_control & 0x0F; // Matrix width
2265 uint32 addr = gpu_pointer_to_matrix; // In the GPU's RAM
2269 if (gpu_matrix_control & 0x10) // Column stepping
2271 for(int i=0; i<count; i++)
2275 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2277 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2279 int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2284 else // Row stepping
2286 for(int i=0; i<count; i++)
2290 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2292 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2294 int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2299 RN = res = (int32)accum;
2300 // carry flag to do (out of the last add)
2304 static void gpu_opcode_abs(void)
2308 WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2310 gpu_flag_c = RN >> 31;
2311 if (RN == 0x80000000)
2312 //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2313 gpu_flag_n = 1, gpu_flag_z = 0;
2318 gpu_flag_n = 0; SET_FLAG_Z(RN);
2322 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2326 static void gpu_opcode_div(void) // RN / RM
2330 WriteLog("%06X: DIV R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2332 // NOTE: remainder is NOT calculated correctly here!
2333 // The original tried to get it right by checking to see if the
2334 // remainder was negative, but that's too late...
2335 // The code there should do it now, but I'm not 100% sure...
2339 if (gpu_div_control & 0x01) // 16.16 division
2341 RN = ((uint64)RN << 16) / RM;
2342 gpu_remain = ((uint64)RN << 16) % RM;
2347 gpu_remain = RN % RM;
2350 if ((gpu_remain - RM) & 0x80000000) // If the result would have been negative...
2351 gpu_remain -= RM; // Then make it negative!
2361 if (gpu_div_control & 1)
2363 gpu_remain = (((uint64)_RN) << 16) % _RM;
2364 if (gpu_remain&0x80000000)
2366 RN = (((uint64)_RN) << 16) / _RM;
2370 gpu_remain = _RN % _RM;
2371 if (gpu_remain&0x80000000)
2380 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2384 static void gpu_opcode_imultn(void)
2386 uint32 res = (int32)((int16)RN * (int16)RM);
2387 gpu_acc = (int32)res;
2392 static void gpu_opcode_neg(void)
2396 WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2399 SET_ZNC_SUB(0, RN, res);
2403 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2407 static void gpu_opcode_shlq(void)
2411 WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2413 // Was a bug here...
2414 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2415 int32 r1 = 32 - IMM_1;
2416 uint32 res = RN << r1;
2417 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2421 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2425 static void gpu_opcode_shrq(void)
2429 WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2431 int32 r1 = gpu_convert_zero[IMM_1];
2432 uint32 res = RN >> r1;
2433 SET_ZN(res); gpu_flag_c = RN & 1;
2437 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2441 static void gpu_opcode_ror(void)
2445 WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2447 uint32 r1 = RM & 0x1F;
2448 uint32 res = (RN >> r1) | (RN << (32 - r1));
2449 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2453 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2457 static void gpu_opcode_rorq(void)
2461 WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2463 uint32 r1 = gpu_convert_zero[IMM_1 & 0x1F];
2465 uint32 res = (r2 >> r1) | (r2 << (32 - r1));
2467 SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2470 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2474 static void gpu_opcode_sha(void)
2476 /* int dreg = jaguar.op & 31;
2477 int32 r1 = (int32)jaguar.r[(jaguar.op >> 5) & 31];
2478 uint32 r2 = jaguar.r[dreg];
2484 res = (r1 <= -32) ? 0 : (r2 << -r1);
2485 jaguar.FLAGS |= (r2 >> 30) & 2;
2489 res = (r1 >= 32) ? ((int32)r2 >> 31) : ((int32)r2 >> r1);
2490 jaguar.FLAGS |= (r2 << 1) & 2;
2492 jaguar.r[dreg] = res;
2497 WriteLog("%06X: SHA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2503 res = ((int32)RM <= -32) ? 0 : (RN << -(int32)RM);
2504 gpu_flag_c = RN >> 31;
2508 res = ((int32)RM >= 32) ? ((int32)RN >> 31) : ((int32)RN >> (int32)RM);
2509 gpu_flag_c = RN & 0x01;
2515 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2518 /* int32 sRM=(int32)RM;
2524 if (shift>=32) shift=32;
2525 gpu_flag_c=(_RN&0x80000000)>>31;
2535 if (shift>=32) shift=32;
2539 _RN=((int32)_RN)>>1;
2548 static void gpu_opcode_sharq(void)
2550 #ifdef GPU_DIS_SHARQ
2552 WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2554 uint32 res = (int32)RN >> gpu_convert_zero[IMM_1];
2555 SET_ZN(res); gpu_flag_c = RN & 0x01;
2557 #ifdef GPU_DIS_SHARQ
2559 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2563 static void gpu_opcode_sh(void)
2567 WriteLog("%06X: SH R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2569 if (RM & 0x80000000) // Shift left
2571 gpu_flag_c = RN >> 31;
2572 RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM);
2576 gpu_flag_c = RN & 0x01;
2577 RN = (RM >= 32 ? 0 : RN >> RM);
2582 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2586 //Temporary: Testing only!
2587 //#include "gpu2.cpp"
2588 //#include "gpu3.cpp"
2592 // New thread-safe GPU core
2594 int GPUCore(void * data)