6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
11 // JLH = James Hammons <jlhamm@acm.org>
14 // --- ---------- -------------------------------------------------------------
15 // JLH 01/16/2010 Created this log ;-)
16 // JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 // the braindead way in which MAME handles memory. :-)
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
30 #include <string.h> // For memset
35 #include "m68000/m68kinterface.h"
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
44 // For GPU dissasembly...
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
70 #define GPU_DIS_MOVEFA
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
102 //bool doGPUDis = false;
103 bool doGPUDis = true;
107 GPU opcodes use (BIOS flying ATARI logo):
149 #define CINT0FLAG 0x0200
150 #define CINT1FLAG 0x0400
151 #define CINT2FLAG 0x0800
152 #define CINT3FLAG 0x1000
153 #define CINT4FLAG 0x2000
154 #define CINT04FLAGS (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
158 #define ZERO_FLAG 0x0001
159 #define CARRY_FLAG 0x0002
160 #define NEGA_FLAG 0x0004
162 #define INT_ENA0 0x0010
163 #define INT_ENA1 0x0020
164 #define INT_ENA2 0x0040
165 #define INT_ENA3 0x0080
166 #define INT_ENA4 0x0100
167 #define INT_CLR0 0x0200
168 #define INT_CLR1 0x0400
169 #define INT_CLR2 0x0800
170 #define INT_CLR3 0x1000
171 #define INT_CLR4 0x2000
172 #define REGPAGE 0x4000
175 // External global variables
177 extern int start_logging;
178 extern int gpu_start_log;
180 // Private function prototypes
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8 gpu_opcode_cycles[64] =
255 3, 3, 3, 3, 3, 3, 3, 3,
256 3, 3, 3, 3, 3, 3, 3, 3,
257 3, 3, 1, 3, 1, 18, 3, 3,
258 3, 3, 3, 3, 3, 3, 3, 3,
259 3, 3, 2, 2, 2, 2, 3, 4,
260 5, 4, 5, 6, 6, 1, 1, 1,
261 1, 2, 2, 2, 1, 1, 9, 3,
262 3, 1, 6, 6, 2, 2, 3, 3
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8 gpu_opcode_cycles[64] =
270 1, 1, 1, 1, 1, 1, 1, 1,
271 1, 1, 1, 1, 1, 1, 1, 1,
272 1, 1, 1, 1, 1, 9, 1, 1,
273 1, 1, 1, 1, 1, 1, 1, 1,
274 1, 1, 1, 1, 1, 1, 1, 2,
275 2, 2, 2, 3, 3, 1, 1, 1,
276 1, 1, 1, 1, 1, 1, 4, 1,
277 1, 1, 3, 3, 1, 1, 1, 1
279 uint8 gpu_opcode_cycles[64] =
281 1, 1, 1, 1, 1, 1, 1, 1,
282 1, 1, 1, 1, 1, 1, 1, 1,
283 1, 1, 1, 1, 1, 1, 1, 1,
284 1, 1, 1, 1, 1, 1, 1, 1,
285 1, 1, 1, 1, 1, 1, 1, 1,
286 1, 1, 1, 1, 1, 1, 1, 1,
287 1, 1, 1, 1, 1, 1, 1, 1,
288 1, 1, 1, 1, 1, 1, 1, 1
291 void (*gpu_opcode[64])()=
293 gpu_opcode_add, gpu_opcode_addc, gpu_opcode_addq, gpu_opcode_addqt,
294 gpu_opcode_sub, gpu_opcode_subc, gpu_opcode_subq, gpu_opcode_subqt,
295 gpu_opcode_neg, gpu_opcode_and, gpu_opcode_or, gpu_opcode_xor,
296 gpu_opcode_not, gpu_opcode_btst, gpu_opcode_bset, gpu_opcode_bclr,
297 gpu_opcode_mult, gpu_opcode_imult, gpu_opcode_imultn, gpu_opcode_resmac,
298 gpu_opcode_imacn, gpu_opcode_div, gpu_opcode_abs, gpu_opcode_sh,
299 gpu_opcode_shlq, gpu_opcode_shrq, gpu_opcode_sha, gpu_opcode_sharq,
300 gpu_opcode_ror, gpu_opcode_rorq, gpu_opcode_cmp, gpu_opcode_cmpq,
301 gpu_opcode_sat8, gpu_opcode_sat16, gpu_opcode_move, gpu_opcode_moveq,
302 gpu_opcode_moveta, gpu_opcode_movefa, gpu_opcode_movei, gpu_opcode_loadb,
303 gpu_opcode_loadw, gpu_opcode_load, gpu_opcode_loadp, gpu_opcode_load_r14_indexed,
304 gpu_opcode_load_r15_indexed, gpu_opcode_storeb, gpu_opcode_storew, gpu_opcode_store,
305 gpu_opcode_storep, gpu_opcode_store_r14_indexed, gpu_opcode_store_r15_indexed, gpu_opcode_move_pc,
306 gpu_opcode_jump, gpu_opcode_jr, gpu_opcode_mmult, gpu_opcode_mtoi,
307 gpu_opcode_normi, gpu_opcode_nop, gpu_opcode_load_r14_ri, gpu_opcode_load_r15_ri,
308 gpu_opcode_store_r14_ri, gpu_opcode_store_r15_ri, gpu_opcode_sat24, gpu_opcode_pack,
311 static uint8 gpu_ram_8[0x1000];
313 static uint32 gpu_acc;
314 static uint32 gpu_remain;
315 static uint32 gpu_hidata;
316 static uint32 gpu_flags;
317 static uint32 gpu_matrix_control;
318 static uint32 gpu_pointer_to_matrix;
319 static uint32 gpu_data_organization;
320 static uint32 gpu_control;
321 static uint32 gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 static uint32 gpu_reg_bank_0[32];
327 static uint32 gpu_reg_bank_1[32];
328 static uint32 * gpu_reg;
329 static uint32 * gpu_alternate_reg;
331 static uint32 gpu_instruction;
332 static uint32 gpu_opcode_first_parameter;
333 static uint32 gpu_opcode_second_parameter;
335 #define GPU_RUNNING (gpu_control & 0x01)
337 #define RM gpu_reg[gpu_opcode_first_parameter]
338 #define RN gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1 gpu_opcode_first_parameter
342 #define IMM_2 gpu_opcode_second_parameter
344 #define SET_FLAG_Z(r) (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01));
347 #define RESET_FLAG_Z() gpu_flag_z = 0;
348 #define RESET_FLAG_N() gpu_flag_n = 0;
349 #define RESET_FLAG_C() gpu_flag_c = 0;
351 #define CLR_Z (gpu_flag_z = 0)
352 #define CLR_ZN (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r) (gpu_flag_z = ((r) == 0))
355 #define SET_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(~(a))))
357 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
358 #define SET_ZN(r) SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
362 uint32 gpu_convert_zero[32] =
363 { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
365 uint8 * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x) branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
368 uint32 gpu_opcode_use[64];
370 const char * gpu_opcode_str[64]=
372 "add", "addc", "addq", "addqt",
373 "sub", "subc", "subq", "subqt",
374 "neg", "and", "or", "xor",
375 "not", "btst", "bset", "bclr",
376 "mult", "imult", "imultn", "resmac",
377 "imacn", "div", "abs", "sh",
378 "shlq", "shrq", "sha", "sharq",
379 "ror", "rorq", "cmp", "cmpq",
380 "sat8", "sat16", "move", "moveq",
381 "moveta", "movefa", "movei", "loadb",
382 "loadw", "load", "loadp", "load_r14_indexed",
383 "load_r15_indexed", "storeb", "storew", "store",
384 "storep", "store_r14_indexed","store_r15_indexed","move_pc",
385 "jump", "jr", "mmult", "mtoi",
386 "normi", "nop", "load_r14_ri", "load_r15_ri",
387 "store_r14_ri", "store_r15_ri", "sat24", "pack",
390 static uint32 gpu_in_exec = 0;
391 static uint32 gpu_releaseTimeSlice_flag = 0;
393 void GPUReleaseTimeslice(void)
395 gpu_releaseTimeSlice_flag = 1;
398 uint32 GPUGetPC(void)
403 void build_branch_condition_table(void)
405 if (!branch_condition_table)
407 branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
409 if (branch_condition_table)
411 for(int i=0; i<8; i++)
413 for(int j=0; j<32; j++)
420 if (!(i & ZERO_FLAG))
423 if (i & (CARRY_FLAG << (j >> 4)))
426 if (!(i & (CARRY_FLAG << (j >> 4))))
428 branch_condition_table[i * 32 + j] = result;
436 // GPU byte access (read)
438 uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
440 if (offset >= 0xF02000 && offset <= 0xF020FF)
441 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
443 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444 return gpu_ram_8[offset & 0xFFF];
445 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
447 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
449 if ((offset & 0x03) == 0)
451 else if ((offset & 0x03) == 1)
452 return (data >> 16) & 0xFF;
453 else if ((offset & 0x03) == 2)
454 return (data >> 8) & 0xFF;
455 else if ((offset & 0x03) == 3)
459 return JaguarReadByte(offset, who);
463 // GPU word access (read)
465 uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
467 if (offset >= 0xF02000 && offset <= 0xF020FF)
468 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
470 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
473 uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
476 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
478 // This looks and smells wrong...
479 // But it *might* be OK...
480 if (offset & 0x01) // Catch cases 1 & 3... (unaligned read)
481 return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
483 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
485 if (offset & 0x02) // Cases 0 & 2...
486 return data & 0xFFFF;
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
495 return JaguarReadWord(offset, who);
499 // GPU dword access (read)
501 uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
503 if (offset >= 0xF02000 && offset <= 0xF020FF)
504 WriteLog("GPU: ReadLong--Attempt to read from GPU register file by %s!\n", whoName[who]);
506 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
507 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
510 return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
511 | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/
512 // return GET32(gpu_ram_8, offset);
514 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
515 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
521 gpu_flag_c = (gpu_flag_c ? 1 : 0);
522 gpu_flag_z = (gpu_flag_z ? 1 : 0);
523 gpu_flag_n = (gpu_flag_n ? 1 : 0);
525 gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
527 return gpu_flags & 0xFFFFC1FF;
529 return gpu_matrix_control;
531 return gpu_pointer_to_matrix;
533 return gpu_data_organization;
542 default: // unaligned long read
544 WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
549 //TEMP--Mirror of F03000? No. Writes only...
550 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
551 // WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
552 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
553 WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
555 return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
559 // GPU byte access (write)
561 void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
563 if (offset >= 0xF02000 && offset <= 0xF020FF)
564 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
566 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
568 gpu_ram_8[offset & 0xFFF] = data;
570 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
573 m68k_end_timeslice();
574 dsp_releaseTimeslice();
578 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
580 uint32 reg = offset & 0x1C;
581 int bytenum = offset & 0x03;
583 //This is definitely wrong!
584 if ((reg >= 0x1C) && (reg <= 0x1F))
585 gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
588 uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
589 bytenum = 3 - bytenum; // convention motorola !!!
590 old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
591 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
595 // WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
596 JaguarWriteByte(offset, data, who);
600 // GPU word access (write)
602 void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
604 if (offset >= 0xF02000 && offset <= 0xF020FF)
605 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
607 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
609 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
610 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
612 SET16(gpu_ram_8, offset, data);//*/
614 /*if (offset >= 0xF03214 && offset < 0xF0321F)
615 WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
618 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
621 m68k_end_timeslice();
622 dsp_releaseTimeslice();
626 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
628 if (offset & 0x01) // This is supposed to weed out unaligned writes, but does nothing...
631 WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
636 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
637 //This just literally sucks.
638 if ((offset & 0x1C) == 0x1C)
640 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
642 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
644 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
648 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
649 uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
652 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
654 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
656 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
661 else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
664 WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
670 // Have to be careful here--this can cause an infinite loop!
671 JaguarWriteWord(offset, data, who);
675 // GPU dword access (write)
677 void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
679 if (offset >= 0xF02000 && offset <= 0xF020FF)
680 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
682 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
683 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
688 WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
694 SET32(gpu_ram_8, offset, data);
697 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
698 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
705 bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
706 // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
707 // IRQ logic can set it. So we mask it out here to prevent problems...
708 gpu_flags = data & (~IMASK);
709 gpu_flag_z = gpu_flags & ZERO_FLAG;
710 gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
711 gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
712 GPUUpdateRegisterBanks();
713 gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3); // Interrupt latch clear bits
714 //Writing here is only an interrupt enable--this approach is just plain wrong!
716 //This, however, is A-OK! ;-)
717 if (IMASKCleared) // If IMASK was cleared,
718 GPUHandleIRQs(); // see if any other interrupts need servicing!
720 if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
721 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
722 WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
727 gpu_matrix_control = data;
730 // This can only point to long aligned addresses
731 gpu_pointer_to_matrix = data & 0xFFFFFFFC;
734 gpu_data_organization = data;
739 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
744 // uint32 gpu_was_running = GPU_RUNNING;
745 data &= ~0xF7C0; // Disable writes to INT_LAT0-4 & TOM version number
747 // check for GPU -> CPU interrupt
750 //WriteLog("GPU->CPU interrupt\n");
751 if (TOMIRQEnabled(IRQ_GPU))
753 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
754 // if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
756 TOMSetPendingGPUInt();
757 m68k_set_irq(2); // Set 68000 IPL 2
758 GPUReleaseTimeslice();
764 // check for CPU -> GPU interrupt #0
767 //WriteLog("CPU->GPU interrupt\n");
768 GPUSetIRQLine(0, ASSERT_LINE);
769 m68k_end_timeslice();
770 DSPReleaseTimeslice();
777 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
779 gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
781 // if gpu wasn't running but is now running, execute a few cycles
782 #ifndef GPU_SINGLE_STEPPING
783 /* if (!gpu_was_running && GPU_RUNNING)
786 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
791 #endif // GPU_DEBUG//*/
793 if (gpu_control & 0x18)
795 #endif // #ifndef GPU_SINGLE_STEPPING
797 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
799 WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
801 WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
805 // GPUDumpDisassembly();
808 if (gpu_pc == 0xF035D8)
810 // GPUDumpDisassembly();
813 gpu_control &= 0xFFFFFFFE; // Don't run it and let's see what happens!
814 //Hmm. Seems to lock up when going into the demo...
815 //Try to disable the collision altogether!
818 extern int effect_start5;
819 static bool finished = false;
820 //if (GPU_RUNNING && effect_start5 && !finished)
821 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
823 // Let's do a dump of $6528!
824 /* uint32 numItems = JaguarReadWord(0x6BD6);
825 WriteLog("\nDump of $6528: %u items.\n\n", numItems);
826 for(int i=0; i<numItems*3*4; i+=3*4)
828 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
829 JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
830 uint16 link = JaguarReadWord(0x6528+i+8+2);
831 for(int j=0; j<40; j+=4)
832 WriteLog("%08X ", JaguarReadLong(link + j));
836 // Let's try a manual blit here...
837 //This isn't working the way it should! !!! FIX !!!
838 //Err, actually, it is.
839 // NOW, it works right! Problem solved!!! It's a blitter bug!
840 /* uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4;
841 for(int y=0; y<127; y++)
843 for(int x=0; x<2; x++)
845 JaguarWriteLong(dst, JaguarReadLong(src));
850 src += width - (2 * 4);
854 WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
856 /* WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
858 for(int i=0x004D54; i<0x004D54+2048; i++)
860 WriteLog("%02X ", JaguarReadByte(i));
868 WriteLog("\n\nData @ F03000:\n\n");
870 for(int i=0xF03000; i<0xF03200; i++)
872 WriteLog("%02X ", JaguarReadByte(i));
886 /*if (!GPU_RUNNING && finished)
888 WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
893 // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
894 // allow the GPU a chance to run...
895 // Yes! This partially fixed Trevor McFur...
897 m68k_end_timeslice();
904 gpu_div_control = data;
906 // default: // unaligned long write
913 // JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
914 // JaguarWriteWord(offset+2, data & 0xFFFF, who);
915 // We're a 32-bit processor, we can do a long write...!
916 JaguarWriteLong(offset, data, who);
920 // Change register banks if necessary
922 void GPUUpdateRegisterBanks(void)
924 int bank = (gpu_flags & REGPAGE); // REGPAGE bit
926 if (gpu_flags & IMASK) // IMASK bit
927 bank = 0; // IMASK forces main bank to be bank 0
930 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
932 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
935 void GPUHandleIRQs(void)
937 // Bail out if we're already in an interrupt!
938 if (gpu_flags & IMASK)
941 // Get the interrupt latch & enable bits
942 uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
944 // Bail out if latched interrupts aren't enabled
949 // Determine which interrupt to service
950 uint32 which = 0; //Isn't there a #pragma to disable this warning???
963 WriteLog("GPU: Generating IRQ #%i\n", which);
965 // set the interrupt flag
967 GPUUpdateRegisterBanks();
969 // subqt #4,r31 ; pre-decrement stack pointer
970 // move pc,r30 ; address of interrupted code
971 // store r30,(r31) ; store return address
973 GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
975 // movei #service_address,r30 ; pointer to ISR entry
976 // jump (r30) ; jump to ISR
978 gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
981 void GPUSetIRQLine(int irqline, int state)
984 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
986 uint32 mask = 0x0040 << irqline;
987 gpu_control &= ~mask; // Clear the interrupt latch
991 gpu_control |= mask; // Assert the interrupt latch
992 GPUHandleIRQs(); // And handle the interrupt...
996 //TEMPORARY: Testing only!
1002 // memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1003 // memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs");
1004 // memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs");
1006 build_branch_condition_table();
1010 //TEMPORARY: Testing only!
1017 // GPU registers (directly visible)
1018 gpu_flags = 0x00000000;
1019 gpu_matrix_control = 0x00000000;
1020 gpu_pointer_to_matrix = 0x00000000;
1021 gpu_data_organization = 0xFFFFFFFF;
1022 gpu_pc = 0x00F03000;
1023 gpu_control = 0x00002800; // Correctly sets this as TOM Rev. 2
1024 gpu_hidata = 0x00000000;
1025 gpu_remain = 0x00000000; // These two registers are RO/WO
1026 gpu_div_control = 0x00000000;
1028 // GPU internal register
1029 gpu_acc = 0x00000000;
1031 gpu_reg = gpu_reg_bank_0;
1032 gpu_alternate_reg = gpu_reg_bank_1;
1034 for(int i=0; i<32; i++)
1035 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1038 memset(gpu_ram_8, 0xFF, 0x1000);
1040 //not needed GPUInterruptPending = false;
1044 uint32 GPUReadPC(void)
1049 void GPUResetStats(void)
1051 for(uint32 i=0; i<64; i++)
1052 gpu_opcode_use[i] = 0;
1053 WriteLog("--> GPU stats were reset!\n");
1056 void GPUDumpDisassembly(void)
1060 WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1061 uint32 j = 0xF03000;
1062 while (j <= 0xF03FFF)
1065 j += dasmjag(JAGUAR_GPU, buffer, j);
1066 WriteLog("\t%08X: %s\n", oldj, buffer);
1070 void GPUDumpRegisters(void)
1072 WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1073 WriteLog("\nRegisters bank 0\n");
1074 for(int j=0; j<8; j++)
1076 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1077 (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1078 (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1079 (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1080 (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1082 WriteLog("Registers bank 1\n");
1083 for(int j=0; j<8; j++)
1085 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1086 (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1087 (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1088 (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1089 (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1093 void GPUDumpMemory(void)
1095 WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1096 for(int i=0; i<0xFFF; i+=4)
1097 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1098 gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1103 WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1105 // Get the interrupt latch & enable bits
1106 uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1107 WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1110 GPUDumpDisassembly();
1112 WriteLog("\nGPU opcodes use:\n");
1113 for(int i=0; i<64; i++)
1115 if (gpu_opcode_use[i])
1116 WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1120 // memory_free(gpu_ram_8);
1121 // memory_free(gpu_reg_bank_0);
1122 // memory_free(gpu_reg_bank_1);
1126 // Main GPU execution core
1128 static int testCount = 1;
1130 static bool tripwire = false;
1131 void GPUExec(int32 cycles)
1136 #ifdef GPU_SINGLE_STEPPING
1137 if (gpu_control & 0x18)
1140 gpu_control &= ~0x10;
1144 gpu_releaseTimeSlice_flag = 0;
1147 while (cycles > 0 && GPU_RUNNING)
1149 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1150 && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1152 if (gpu_pc == 0xF03000)
1154 extern uint32 starCount;
1156 /* WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1157 uint32 base = gpu_reg_bank_0[3];
1158 for(uint32 i=0; i<0x100; i+=16)
1160 WriteLog("%02X: ", i);
1161 for(uint32 j=0; j<16; j++)
1163 WriteLog("%02X ", JaguarReadByte(base + i + j));
1168 // if (gpu_pc == 0xF03)
1172 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1175 WriteLog("GPU: Starting disassembly log...\n");
1178 /*if (gpu_pc == 0xF0359A)
1183 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1184 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1185 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1187 uint16 opcode = GPUReadWord(gpu_pc, GPU);
1188 uint32 index = opcode >> 10;
1189 gpu_instruction = opcode; // Added for GPU #3...
1190 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1191 gpu_opcode_second_parameter = opcode & 0x1F;
1192 /*if (gpu_pc == 0xF03BE8)
1193 WriteLog("Start of OP frame write...\n");
1194 if (gpu_pc == 0xF03EEE)
1195 WriteLog("--> Writing BRANCH object ---\n");
1196 if (gpu_pc == 0xF03F62)
1197 WriteLog("--> Writing BITMAP object ***\n");//*/
1198 /*if (gpu_pc == 0xF03546)
1200 WriteLog("\n--> GPU PC: F03546\n");
1202 GPUDumpDisassembly();
1204 /*if (gpu_pc == 0xF033F6)
1206 WriteLog("\n--> GPU PC: F033F6\n");
1208 GPUDumpDisassembly();
1210 /*if (gpu_pc == 0xF033CC)
1212 WriteLog("\n--> GPU PC: F033CC\n");
1214 GPUDumpDisassembly();
1216 /*if (gpu_pc == 0xF033D6)
1218 WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1222 /*if (gpu_pc == 0xF033D8)
1224 WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1228 /*if (gpu_pc == 0xF0358E)
1230 WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1234 /*if (gpu_pc == 0xF034CA)
1236 WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1239 /*if (gpu_pc == 0xF034CA)
1241 len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1242 WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1243 for(int i=0; i<len; i+=4)
1244 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1246 for(int i=0; i<len; i+=4)
1247 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1250 if (gpu_pc == 0xF034DE)
1252 WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n ", gpu_reg[14], gpu_reg[15]);
1253 for(int i=0; i<len; i+=4)
1254 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1256 for(int i=0; i<len; i+=4)
1257 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1259 for(int i=0; i<len; i+=4)
1260 WriteLog(" --------");
1262 for(int i=0; i<len; i+=4)
1263 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1266 /*if (gpu_pc == 0xF035C8)
1268 WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1270 GPUDumpDisassembly();
1275 // gpu_reset_stats();
1276 static char buffer[512];
1277 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1278 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1280 //$E400 -> 1110 01 -> $39 -> 57
1283 gpu_opcode[index]();
1285 // gpu2_opcode[index]();
1287 //GPU #3 (Doesn't show ATARI logo! #1 & #2 do...)
1289 // gpu3_opcode[index]();
1292 //GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken.
1293 /*static bool firstTime = true;
1294 if (gpu_pc == 0xF03548 && firstTime)
1297 // firstTime = false;
1299 //static char buffer[512];
1301 //while (k<0xF0356C)
1304 //k += dasmjag(JAGUAR_GPU, buffer, k);
1305 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1307 // gpu_start_log = 1;
1309 //GPU: [00F0354C] jump nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1310 /*if (gpu_pc == 0xF0354C)
1311 gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1313 cycles -= gpu_opcode_cycles[index];
1314 gpu_opcode_use[index]++;
1316 WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1317 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1319 WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1332 GPU opcodes use (offset punch--vertically below bad guy):
1354 load_r14_indexed 1183
1355 load_r15_indexed 1125
1358 store_r14_indexed 320
1366 static void gpu_opcode_jump(void)
1369 const char * condition[32] =
1370 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1371 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1372 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1373 "???", "???", "???", "F" };
1375 WriteLog("%06X: JUMP %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1378 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1379 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1380 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1381 // KLUDGE: Used by BRANCH_CONDITION
1382 uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1384 if (BRANCH_CONDITION(IMM_2))
1388 WriteLog("Branched!\n");
1391 WriteLog(" --> JUMP: Branch taken.\n");
1392 uint32 delayed_pc = RM;
1394 gpu_pc = delayed_pc;
1395 /* uint16 opcode = GPUReadWord(gpu_pc, GPU);
1396 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1397 gpu_opcode_second_parameter = opcode & 0x1F;
1399 gpu_pc = delayed_pc;
1400 gpu_opcode[opcode>>10]();//*/
1405 WriteLog("Branch NOT taken.\n");
1409 static void gpu_opcode_jr(void)
1412 const char * condition[32] =
1413 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1414 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1415 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1416 "???", "???", "???", "F" };
1418 WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1420 /* if (CONDITION(jaguar.op & 31))
1422 int32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1423 uint32 newpc = jaguar.PC + r1;
1425 jaguar.op = ROPCODE(jaguar.PC);
1427 (*jaguar.table[jaguar.op >> 10])();
1429 jaguar_icount -= 3; // 3 wait states guaranteed
1432 /* gpu_flag_n = (gpu_flag_n ? 1 : 0);
1433 gpu_flag_c = (gpu_flag_c ? 1 : 0);
1434 gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1435 // KLUDGE: Used by BRANCH_CONDITION
1436 uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1438 if (BRANCH_CONDITION(IMM_2))
1442 WriteLog("Branched!\n");
1445 WriteLog(" --> JR: Branch taken.\n");
1446 int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
1447 int32 delayed_pc = gpu_pc + (offset * 2);
1449 gpu_pc = delayed_pc;
1450 /* uint16 opcode = GPUReadWord(gpu_pc, GPU);
1451 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1452 gpu_opcode_second_parameter = opcode & 0x1F;
1454 gpu_pc = delayed_pc;
1455 gpu_opcode[opcode>>10]();//*/
1460 WriteLog("Branch NOT taken.\n");
1464 static void gpu_opcode_add(void)
1468 WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1470 uint32 res = RN + RM;
1471 CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1475 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1479 static void gpu_opcode_addc(void)
1483 WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1485 /* int dreg = jaguar.op & 31;
1486 uint32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1487 uint32 r2 = jaguar.r[dreg];
1488 uint32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1489 jaguar.r[dreg] = res;
1490 CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1492 uint32 res = RN + RM + gpu_flag_c;
1493 uint32 carry = gpu_flag_c;
1494 // SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1495 SET_ZNC_ADD(RN + carry, RM, res);
1496 // SET_ZNC_ADD(RN, RM + carry, res);
1500 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1504 static void gpu_opcode_addq(void)
1508 WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1510 uint32 r1 = gpu_convert_zero[IMM_1];
1511 uint32 res = RN + r1;
1512 CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1516 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1520 static void gpu_opcode_addqt(void)
1522 #ifdef GPU_DIS_ADDQT
1524 WriteLog("%06X: ADDQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1526 RN += gpu_convert_zero[IMM_1];
1527 #ifdef GPU_DIS_ADDQT
1529 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1533 static void gpu_opcode_sub(void)
1537 WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1539 uint32 res = RN - RM;
1540 SET_ZNC_SUB(RN, RM, res);
1544 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1548 static void gpu_opcode_subc(void)
1552 WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1554 uint32 res = RN - RM - gpu_flag_c;
1555 uint32 borrow = gpu_flag_c;
1556 // SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1557 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1558 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1559 // SET_ZNC_SUB(RN - borrow, RM, res);
1560 SET_ZNC_SUB(RN, RM + borrow, res);
1564 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1568 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1569 N = 0, M = 1, 0 - 1 = -1, C = 0!
1571 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
1572 #define SET_ZN(r) SET_N(r); SET_Z(r)
1573 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1574 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1576 static void gpu_opcode_subq(void)
1580 WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1582 uint32 r1 = gpu_convert_zero[IMM_1];
1583 uint32 res = RN - r1;
1584 SET_ZNC_SUB(RN, r1, res);
1588 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1592 static void gpu_opcode_subqt(void)
1594 #ifdef GPU_DIS_SUBQT
1596 WriteLog("%06X: SUBQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1598 RN -= gpu_convert_zero[IMM_1];
1599 #ifdef GPU_DIS_SUBQT
1601 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1605 static void gpu_opcode_cmp(void)
1609 WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1611 uint32 res = RN - RM;
1612 SET_ZNC_SUB(RN, RM, res);
1615 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1619 static void gpu_opcode_cmpq(void)
1621 static int32 sqtable[32] =
1622 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1625 WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1627 uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1628 uint32 res = RN - r1;
1629 SET_ZNC_SUB(RN, r1, res);
1632 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1636 static void gpu_opcode_and(void)
1640 WriteLog("%06X: AND R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1646 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1650 static void gpu_opcode_or(void)
1654 WriteLog("%06X: OR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1660 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1664 static void gpu_opcode_xor(void)
1668 WriteLog("%06X: XOR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1674 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1678 static void gpu_opcode_not(void)
1682 WriteLog("%06X: NOT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1688 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1692 static void gpu_opcode_move_pc(void)
1694 #ifdef GPU_DIS_MOVEPC
1696 WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1698 // Should be previous PC--this might not always be previous instruction!
1699 // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1701 #ifdef GPU_DIS_MOVEPC
1703 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1707 static void gpu_opcode_sat8(void)
1711 WriteLog("%06X: SAT8 R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1713 RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1717 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1721 static void gpu_opcode_sat16(void)
1723 RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1727 static void gpu_opcode_sat24(void)
1729 RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1733 static void gpu_opcode_store_r14_indexed(void)
1735 #ifdef GPU_DIS_STORE14I
1737 WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1739 #ifdef GPU_CORRECT_ALIGNMENT
1740 uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1742 if (address >= 0xF03000 && address <= 0xF03FFF)
1743 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1745 GPUWriteLong(address, RN, GPU);
1747 GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1751 static void gpu_opcode_store_r15_indexed(void)
1753 #ifdef GPU_DIS_STORE15I
1755 WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1757 #ifdef GPU_CORRECT_ALIGNMENT
1758 uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1760 if (address >= 0xF03000 && address <= 0xF03FFF)
1761 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1763 GPUWriteLong(address, RN, GPU);
1765 GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1769 static void gpu_opcode_load_r14_ri(void)
1771 #ifdef GPU_DIS_LOAD14R
1773 WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1775 #ifdef GPU_CORRECT_ALIGNMENT
1776 uint32 address = gpu_reg[14] + RM;
1778 if (address >= 0xF03000 && address <= 0xF03FFF)
1779 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1781 RN = GPUReadLong(address, GPU);
1783 RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1785 #ifdef GPU_DIS_LOAD14R
1787 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1791 static void gpu_opcode_load_r15_ri(void)
1793 #ifdef GPU_DIS_LOAD15R
1795 WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1797 #ifdef GPU_CORRECT_ALIGNMENT
1798 uint32 address = gpu_reg[15] + RM;
1800 if (address >= 0xF03000 && address <= 0xF03FFF)
1801 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1803 RN = GPUReadLong(address, GPU);
1805 RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1807 #ifdef GPU_DIS_LOAD15R
1809 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1813 static void gpu_opcode_store_r14_ri(void)
1815 #ifdef GPU_DIS_STORE14R
1817 WriteLog("%06X: STORE R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1819 #ifdef GPU_CORRECT_ALIGNMENT
1820 uint32 address = gpu_reg[14] + RM;
1822 if (address >= 0xF03000 && address <= 0xF03FFF)
1823 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1825 GPUWriteLong(address, RN, GPU);
1827 GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1831 static void gpu_opcode_store_r15_ri(void)
1833 #ifdef GPU_DIS_STORE15R
1835 WriteLog("%06X: STORE R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1837 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1838 uint32 address = gpu_reg[15] + RM;
1840 if (address >= 0xF03000 && address <= 0xF03FFF)
1841 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1843 GPUWriteLong(address, RN, GPU);
1845 GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1849 static void gpu_opcode_nop(void)
1853 WriteLog("%06X: NOP [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1857 static void gpu_opcode_pack(void)
1861 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1865 //BUG! if (RM == 0) // Pack
1866 if (IMM_1 == 0) // Pack
1867 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1869 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1872 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1876 static void gpu_opcode_storeb(void)
1878 #ifdef GPU_DIS_STOREB
1880 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1883 // Would appear to be so...!
1884 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1885 GPUWriteLong(RM, RN & 0xFF, GPU);
1887 JaguarWriteByte(RM, RN, GPU);
1890 static void gpu_opcode_storew(void)
1892 #ifdef GPU_DIS_STOREW
1894 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1896 #ifdef GPU_CORRECT_ALIGNMENT
1897 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1898 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1900 JaguarWriteWord(RM, RN, GPU);
1902 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1903 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1905 JaguarWriteWord(RM, RN, GPU);
1909 static void gpu_opcode_store(void)
1911 #ifdef GPU_DIS_STORE
1913 WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1915 #ifdef GPU_CORRECT_ALIGNMENT
1916 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1917 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1919 GPUWriteLong(RM, RN, GPU);
1921 GPUWriteLong(RM, RN, GPU);
1925 static void gpu_opcode_storep(void)
1927 #ifdef GPU_CORRECT_ALIGNMENT
1928 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1930 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1931 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1935 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1936 GPUWriteLong(RM + 4, RN, GPU);
1939 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1940 GPUWriteLong(RM + 4, RN, GPU);
1944 static void gpu_opcode_loadb(void)
1946 #ifdef GPU_DIS_LOADB
1948 WriteLog("%06X: LOADB (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1950 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1951 RN = GPUReadLong(RM, GPU) & 0xFF;
1953 RN = JaguarReadByte(RM, GPU);
1954 #ifdef GPU_DIS_LOADB
1956 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1960 static void gpu_opcode_loadw(void)
1962 #ifdef GPU_DIS_LOADW
1964 WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1966 #ifdef GPU_CORRECT_ALIGNMENT
1967 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1968 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1970 RN = JaguarReadWord(RM, GPU);
1972 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1973 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1975 RN = JaguarReadWord(RM, GPU);
1977 #ifdef GPU_DIS_LOADW
1979 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1983 // According to the docs, & "Do The Same", this address is long aligned...
1985 // And it works!!! Need to fix all instances...
1986 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
1987 // the $F03000-$F03FFF range are aligned...
1988 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
1990 Preliminary testing on real hardware seems to confirm that something strange goes on
1991 with unaligned reads in main memory. When the address is off by 1, the result is the
1992 same as the long address with the top byte replaced by something. So if the read is
1993 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
1994 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
1995 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
1996 It may be that the "unknown" values come from the prefetch queue, but not sure how
1997 to test that. They seem to be stable, though, which would indicate such a mechanism.
1998 Sometimes, however, the off by 2 case returns $12345678!
2000 static void gpu_opcode_load(void)
2004 WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2006 #ifdef GPU_CORRECT_ALIGNMENT
2007 uint32 mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2008 // if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2009 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2010 // RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2012 // RN = GPUReadLong(RM, GPU);
2013 // Simulate garbage in unaligned reads...
2014 //seems that this behavior is different in GPU mem vs. main mem...
2015 // if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2016 // RN |= mask[RM & 0x03];
2018 RN = GPUReadLong(RM, GPU);
2022 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2026 static void gpu_opcode_loadp(void)
2028 #ifdef GPU_CORRECT_ALIGNMENT
2029 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2031 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2032 RN = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2036 gpu_hidata = GPUReadLong(RM + 0, GPU);
2037 RN = GPUReadLong(RM + 4, GPU);
2040 gpu_hidata = GPUReadLong(RM + 0, GPU);
2041 RN = GPUReadLong(RM + 4, GPU);
2045 static void gpu_opcode_load_r14_indexed(void)
2047 #ifdef GPU_DIS_LOAD14I
2049 WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2051 #ifdef GPU_CORRECT_ALIGNMENT
2052 uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2054 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2055 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2057 RN = GPUReadLong(address, GPU);
2059 RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2061 #ifdef GPU_DIS_LOAD14I
2063 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2067 static void gpu_opcode_load_r15_indexed(void)
2069 #ifdef GPU_DIS_LOAD15I
2071 WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2073 #ifdef GPU_CORRECT_ALIGNMENT
2074 uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2076 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2077 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2079 RN = GPUReadLong(address, GPU);
2081 RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2083 #ifdef GPU_DIS_LOAD15I
2085 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2089 static void gpu_opcode_movei(void)
2091 #ifdef GPU_DIS_MOVEI
2093 WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2095 // This instruction is followed by 32-bit value in LSW / MSW format...
2096 RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16);
2098 #ifdef GPU_DIS_MOVEI
2100 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2104 static void gpu_opcode_moveta(void)
2106 #ifdef GPU_DIS_MOVETA
2108 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2111 #ifdef GPU_DIS_MOVETA
2113 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2117 static void gpu_opcode_movefa(void)
2119 #ifdef GPU_DIS_MOVEFA
2121 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2124 #ifdef GPU_DIS_MOVEFA
2126 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2130 static void gpu_opcode_move(void)
2134 WriteLog("%06X: MOVE R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2139 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2143 static void gpu_opcode_moveq(void)
2145 #ifdef GPU_DIS_MOVEQ
2147 WriteLog("%06X: MOVEQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2150 #ifdef GPU_DIS_MOVEQ
2152 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2156 static void gpu_opcode_resmac(void)
2161 static void gpu_opcode_imult(void)
2163 #ifdef GPU_DIS_IMULT
2165 WriteLog("%06X: IMULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2167 RN = (int16)RN * (int16)RM;
2169 #ifdef GPU_DIS_IMULT
2171 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2175 static void gpu_opcode_mult(void)
2179 WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2181 RN = (uint16)RM * (uint16)RN;
2185 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2189 static void gpu_opcode_bclr(void)
2193 WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2195 uint32 res = RN & ~(1 << IMM_1);
2200 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2204 static void gpu_opcode_btst(void)
2208 WriteLog("%06X: BTST #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2210 gpu_flag_z = (~RN >> IMM_1) & 1;
2213 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2217 static void gpu_opcode_bset(void)
2221 WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2223 uint32 res = RN | (1 << IMM_1);
2228 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2232 static void gpu_opcode_imacn(void)
2234 uint32 res = (int16)RM * (int16)(RN);
2238 static void gpu_opcode_mtoi(void)
2241 uint32 res = RN = (((int32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2245 static void gpu_opcode_normi(void)
2252 while ((_RM & 0xFFC00000) == 0)
2257 while ((_RM & 0xFF800000) != 0)
2267 static void gpu_opcode_mmult(void)
2269 int count = gpu_matrix_control & 0x0F; // Matrix width
2270 uint32 addr = gpu_pointer_to_matrix; // In the GPU's RAM
2274 if (gpu_matrix_control & 0x10) // Column stepping
2276 for(int i=0; i<count; i++)
2280 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2282 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2284 int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2289 else // Row stepping
2291 for(int i=0; i<count; i++)
2295 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2297 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2299 int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2304 RN = res = (int32)accum;
2305 // carry flag to do (out of the last add)
2309 static void gpu_opcode_abs(void)
2313 WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2315 gpu_flag_c = RN >> 31;
2316 if (RN == 0x80000000)
2317 //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2318 gpu_flag_n = 1, gpu_flag_z = 0;
2323 gpu_flag_n = 0; SET_FLAG_Z(RN);
2327 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2331 static void gpu_opcode_div(void) // RN / RM
2335 WriteLog("%06X: DIV R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2337 // NOTE: remainder is NOT calculated correctly here!
2338 // The original tried to get it right by checking to see if the
2339 // remainder was negative, but that's too late...
2340 // The code there should do it now, but I'm not 100% sure...
2344 if (gpu_div_control & 0x01) // 16.16 division
2346 RN = ((uint64)RN << 16) / RM;
2347 gpu_remain = ((uint64)RN << 16) % RM;
2352 gpu_remain = RN % RM;
2355 if ((gpu_remain - RM) & 0x80000000) // If the result would have been negative...
2356 gpu_remain -= RM; // Then make it negative!
2366 if (gpu_div_control & 1)
2368 gpu_remain = (((uint64)_RN) << 16) % _RM;
2369 if (gpu_remain&0x80000000)
2371 RN = (((uint64)_RN) << 16) / _RM;
2375 gpu_remain = _RN % _RM;
2376 if (gpu_remain&0x80000000)
2385 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2389 static void gpu_opcode_imultn(void)
2391 uint32 res = (int32)((int16)RN * (int16)RM);
2392 gpu_acc = (int32)res;
2397 static void gpu_opcode_neg(void)
2401 WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2404 SET_ZNC_SUB(0, RN, res);
2408 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2412 static void gpu_opcode_shlq(void)
2416 WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2418 // Was a bug here...
2419 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2420 int32 r1 = 32 - IMM_1;
2421 uint32 res = RN << r1;
2422 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2426 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2430 static void gpu_opcode_shrq(void)
2434 WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2436 int32 r1 = gpu_convert_zero[IMM_1];
2437 uint32 res = RN >> r1;
2438 SET_ZN(res); gpu_flag_c = RN & 1;
2442 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2446 static void gpu_opcode_ror(void)
2450 WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2452 uint32 r1 = RM & 0x1F;
2453 uint32 res = (RN >> r1) | (RN << (32 - r1));
2454 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2458 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2462 static void gpu_opcode_rorq(void)
2466 WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2468 uint32 r1 = gpu_convert_zero[IMM_1 & 0x1F];
2470 uint32 res = (r2 >> r1) | (r2 << (32 - r1));
2472 SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2475 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2479 static void gpu_opcode_sha(void)
2481 /* int dreg = jaguar.op & 31;
2482 int32 r1 = (int32)jaguar.r[(jaguar.op >> 5) & 31];
2483 uint32 r2 = jaguar.r[dreg];
2489 res = (r1 <= -32) ? 0 : (r2 << -r1);
2490 jaguar.FLAGS |= (r2 >> 30) & 2;
2494 res = (r1 >= 32) ? ((int32)r2 >> 31) : ((int32)r2 >> r1);
2495 jaguar.FLAGS |= (r2 << 1) & 2;
2497 jaguar.r[dreg] = res;
2502 WriteLog("%06X: SHA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2508 res = ((int32)RM <= -32) ? 0 : (RN << -(int32)RM);
2509 gpu_flag_c = RN >> 31;
2513 res = ((int32)RM >= 32) ? ((int32)RN >> 31) : ((int32)RN >> (int32)RM);
2514 gpu_flag_c = RN & 0x01;
2520 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2523 /* int32 sRM=(int32)RM;
2529 if (shift>=32) shift=32;
2530 gpu_flag_c=(_RN&0x80000000)>>31;
2540 if (shift>=32) shift=32;
2544 _RN=((int32)_RN)>>1;
2553 static void gpu_opcode_sharq(void)
2555 #ifdef GPU_DIS_SHARQ
2557 WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2559 uint32 res = (int32)RN >> gpu_convert_zero[IMM_1];
2560 SET_ZN(res); gpu_flag_c = RN & 0x01;
2562 #ifdef GPU_DIS_SHARQ
2564 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2568 static void gpu_opcode_sh(void)
2572 WriteLog("%06X: SH R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2574 if (RM & 0x80000000) // Shift left
2576 gpu_flag_c = RN >> 31;
2577 RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM);
2581 gpu_flag_c = RN & 0x01;
2582 RN = (RM >= 32 ? 0 : RN >> RM);
2587 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2591 //Temporary: Testing only!
2592 //#include "gpu2.cpp"
2593 //#include "gpu3.cpp"
2597 // New thread-safe GPU core
2599 int GPUCore(void * data)