6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
11 // JLH = James Hammons <jlhamm@acm.org>
14 // --- ---------- -------------------------------------------------------------
15 // JLH 01/16/2010 Created this log ;-)
16 // JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 // the braindead way in which MAME handles memory. :-)
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
30 #include <string.h> // For memset
35 #include "m68000/m68kinterface.h"
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
44 // For GPU dissasembly...
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
70 #define GPU_DIS_MOVEFA
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
102 bool doGPUDis = false;
103 //bool doGPUDis = true;
107 GPU opcodes use (BIOS flying ATARI logo):
149 #define CINT0FLAG 0x0200
150 #define CINT1FLAG 0x0400
151 #define CINT2FLAG 0x0800
152 #define CINT3FLAG 0x1000
153 #define CINT4FLAG 0x2000
154 #define CINT04FLAGS (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
158 #define ZERO_FLAG 0x0001
159 #define CARRY_FLAG 0x0002
160 #define NEGA_FLAG 0x0004
162 #define INT_ENA0 0x0010
163 #define INT_ENA1 0x0020
164 #define INT_ENA2 0x0040
165 #define INT_ENA3 0x0080
166 #define INT_ENA4 0x0100
167 #define INT_CLR0 0x0200
168 #define INT_CLR1 0x0400
169 #define INT_CLR2 0x0800
170 #define INT_CLR3 0x1000
171 #define INT_CLR4 0x2000
172 #define REGPAGE 0x4000
175 // External global variables
177 extern int start_logging;
178 extern int gpu_start_log;
180 // Private function prototypes
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8_t gpu_opcode_cycles[64] =
255 3, 3, 3, 3, 3, 3, 3, 3,
256 3, 3, 3, 3, 3, 3, 3, 3,
257 3, 3, 1, 3, 1, 18, 3, 3,
258 3, 3, 3, 3, 3, 3, 3, 3,
259 3, 3, 2, 2, 2, 2, 3, 4,
260 5, 4, 5, 6, 6, 1, 1, 1,
261 1, 2, 2, 2, 1, 1, 9, 3,
262 3, 1, 6, 6, 2, 2, 3, 3
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8_t gpu_opcode_cycles[64] =
270 1, 1, 1, 1, 1, 1, 1, 1,
271 1, 1, 1, 1, 1, 1, 1, 1,
272 1, 1, 1, 1, 1, 9, 1, 1,
273 1, 1, 1, 1, 1, 1, 1, 1,
274 1, 1, 1, 1, 1, 1, 1, 2,
275 2, 2, 2, 3, 3, 1, 1, 1,
276 1, 1, 1, 1, 1, 1, 4, 1,
277 1, 1, 3, 3, 1, 1, 1, 1
279 uint8_t gpu_opcode_cycles[64] =
281 1, 1, 1, 1, 1, 1, 1, 1,
282 1, 1, 1, 1, 1, 1, 1, 1,
283 1, 1, 1, 1, 1, 1, 1, 1,
284 1, 1, 1, 1, 1, 1, 1, 1,
285 1, 1, 1, 1, 1, 1, 1, 1,
286 1, 1, 1, 1, 1, 1, 1, 1,
287 1, 1, 1, 1, 1, 1, 1, 1,
288 1, 1, 1, 1, 1, 1, 1, 1
291 void (*gpu_opcode[64])()=
293 gpu_opcode_add, gpu_opcode_addc, gpu_opcode_addq, gpu_opcode_addqt,
294 gpu_opcode_sub, gpu_opcode_subc, gpu_opcode_subq, gpu_opcode_subqt,
295 gpu_opcode_neg, gpu_opcode_and, gpu_opcode_or, gpu_opcode_xor,
296 gpu_opcode_not, gpu_opcode_btst, gpu_opcode_bset, gpu_opcode_bclr,
297 gpu_opcode_mult, gpu_opcode_imult, gpu_opcode_imultn, gpu_opcode_resmac,
298 gpu_opcode_imacn, gpu_opcode_div, gpu_opcode_abs, gpu_opcode_sh,
299 gpu_opcode_shlq, gpu_opcode_shrq, gpu_opcode_sha, gpu_opcode_sharq,
300 gpu_opcode_ror, gpu_opcode_rorq, gpu_opcode_cmp, gpu_opcode_cmpq,
301 gpu_opcode_sat8, gpu_opcode_sat16, gpu_opcode_move, gpu_opcode_moveq,
302 gpu_opcode_moveta, gpu_opcode_movefa, gpu_opcode_movei, gpu_opcode_loadb,
303 gpu_opcode_loadw, gpu_opcode_load, gpu_opcode_loadp, gpu_opcode_load_r14_indexed,
304 gpu_opcode_load_r15_indexed, gpu_opcode_storeb, gpu_opcode_storew, gpu_opcode_store,
305 gpu_opcode_storep, gpu_opcode_store_r14_indexed, gpu_opcode_store_r15_indexed, gpu_opcode_move_pc,
306 gpu_opcode_jump, gpu_opcode_jr, gpu_opcode_mmult, gpu_opcode_mtoi,
307 gpu_opcode_normi, gpu_opcode_nop, gpu_opcode_load_r14_ri, gpu_opcode_load_r15_ri,
308 gpu_opcode_store_r14_ri, gpu_opcode_store_r15_ri, gpu_opcode_sat24, gpu_opcode_pack,
311 static uint8_t gpu_ram_8[0x1000];
313 static uint32_t gpu_acc;
314 static uint32_t gpu_remain;
315 static uint32_t gpu_hidata;
316 static uint32_t gpu_flags;
317 static uint32_t gpu_matrix_control;
318 static uint32_t gpu_pointer_to_matrix;
319 static uint32_t gpu_data_organization;
320 static uint32_t gpu_control;
321 static uint32_t gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 uint32_t gpu_reg_bank_0[32];
327 uint32_t gpu_reg_bank_1[32];
328 static uint32_t * gpu_reg;
329 static uint32_t * gpu_alternate_reg;
331 static uint32_t gpu_instruction;
332 static uint32_t gpu_opcode_first_parameter;
333 static uint32_t gpu_opcode_second_parameter;
335 #define GPU_RUNNING (gpu_control & 0x01)
337 #define RM gpu_reg[gpu_opcode_first_parameter]
338 #define RN gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1 gpu_opcode_first_parameter
342 #define IMM_2 gpu_opcode_second_parameter
344 #define SET_FLAG_Z(r) (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01));
347 #define RESET_FLAG_Z() gpu_flag_z = 0;
348 #define RESET_FLAG_N() gpu_flag_n = 0;
349 #define RESET_FLAG_C() gpu_flag_c = 0;
351 #define CLR_Z (gpu_flag_z = 0)
352 #define CLR_ZN (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r) (gpu_flag_z = ((r) == 0))
355 #define SET_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
357 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
358 #define SET_ZN(r) SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
362 uint32_t gpu_convert_zero[32] =
363 { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
365 uint8_t * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x) branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
368 uint32_t gpu_opcode_use[64];
370 const char * gpu_opcode_str[64]=
372 "add", "addc", "addq", "addqt",
373 "sub", "subc", "subq", "subqt",
374 "neg", "and", "or", "xor",
375 "not", "btst", "bset", "bclr",
376 "mult", "imult", "imultn", "resmac",
377 "imacn", "div", "abs", "sh",
378 "shlq", "shrq", "sha", "sharq",
379 "ror", "rorq", "cmp", "cmpq",
380 "sat8", "sat16", "move", "moveq",
381 "moveta", "movefa", "movei", "loadb",
382 "loadw", "load", "loadp", "load_r14_indexed",
383 "load_r15_indexed", "storeb", "storew", "store",
384 "storep", "store_r14_indexed","store_r15_indexed","move_pc",
385 "jump", "jr", "mmult", "mtoi",
386 "normi", "nop", "load_r14_ri", "load_r15_ri",
387 "store_r14_ri", "store_r15_ri", "sat24", "pack",
390 static uint32_t gpu_in_exec = 0;
391 static uint32_t gpu_releaseTimeSlice_flag = 0;
393 void GPUReleaseTimeslice(void)
395 gpu_releaseTimeSlice_flag = 1;
398 uint32_t GPUGetPC(void)
403 void build_branch_condition_table(void)
405 if (!branch_condition_table)
407 branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
409 if (branch_condition_table)
411 for(int i=0; i<8; i++)
413 for(int j=0; j<32; j++)
420 if (!(i & ZERO_FLAG))
423 if (i & (CARRY_FLAG << (j >> 4)))
426 if (!(i & (CARRY_FLAG << (j >> 4))))
428 branch_condition_table[i * 32 + j] = result;
436 // GPU byte access (read)
438 uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
440 if (offset >= 0xF02000 && offset <= 0xF020FF)
441 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
443 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444 return gpu_ram_8[offset & 0xFFF];
445 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
447 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
449 if ((offset & 0x03) == 0)
451 else if ((offset & 0x03) == 1)
452 return (data >> 16) & 0xFF;
453 else if ((offset & 0x03) == 2)
454 return (data >> 8) & 0xFF;
455 else if ((offset & 0x03) == 3)
459 return JaguarReadByte(offset, who);
463 // GPU word access (read)
465 uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
467 if (offset >= 0xF02000 && offset <= 0xF020FF)
468 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
470 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
473 uint16_t data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1];
476 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
478 // This looks and smells wrong...
479 // But it *might* be OK...
480 if (offset & 0x01) // Catch cases 1 & 3... (unaligned read)
481 return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
483 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
485 if (offset & 0x02) // Cases 0 & 2...
486 return data & 0xFFFF;
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
495 return JaguarReadWord(offset, who);
499 // GPU dword access (read)
501 uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
503 if (offset >= 0xF02000 && offset <= 0xF020FF)
505 WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
506 uint32_t reg = (offset & 0xFC) >> 2;
507 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]);
510 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
511 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
514 return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16)
515 | ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/
516 // return GET32(gpu_ram_8, offset);
518 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
519 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
525 gpu_flag_c = (gpu_flag_c ? 1 : 0);
526 gpu_flag_z = (gpu_flag_z ? 1 : 0);
527 gpu_flag_n = (gpu_flag_n ? 1 : 0);
529 gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
531 return gpu_flags & 0xFFFFC1FF;
533 return gpu_matrix_control;
535 return gpu_pointer_to_matrix;
537 return gpu_data_organization;
546 default: // unaligned long read
548 WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
553 //TEMP--Mirror of F03000? No. Writes only...
554 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
555 // WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
556 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
557 WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
559 return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
563 // GPU byte access (write)
565 void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
567 if (offset >= 0xF02000 && offset <= 0xF020FF)
568 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
570 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
572 gpu_ram_8[offset & 0xFFF] = data;
574 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
577 m68k_end_timeslice();
578 dsp_releaseTimeslice();
582 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
584 uint32_t reg = offset & 0x1C;
585 int bytenum = offset & 0x03;
587 //This is definitely wrong!
588 if ((reg >= 0x1C) && (reg <= 0x1F))
589 gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
592 uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
593 bytenum = 3 - bytenum; // convention motorola !!!
594 old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
595 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
599 // WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
600 JaguarWriteByte(offset, data, who);
604 // GPU word access (write)
606 void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
608 if (offset >= 0xF02000 && offset <= 0xF020FF)
609 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
611 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
613 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
614 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
616 SET16(gpu_ram_8, offset, data);//*/
618 /*if (offset >= 0xF03214 && offset < 0xF0321F)
619 WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
622 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
625 m68k_end_timeslice();
626 dsp_releaseTimeslice();
630 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
632 if (offset & 0x01) // This is supposed to weed out unaligned writes, but does nothing...
635 WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
640 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
641 //This just literally sucks.
642 if ((offset & 0x1C) == 0x1C)
644 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
646 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
648 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
652 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
653 uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
656 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
658 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
660 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
665 else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
668 WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
674 // Have to be careful here--this can cause an infinite loop!
675 JaguarWriteWord(offset, data, who);
679 // GPU dword access (write)
681 void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
683 if (offset >= 0xF02000 && offset <= 0xF020FF)
684 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
686 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
687 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
692 WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
698 SET32(gpu_ram_8, offset, data);
701 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
702 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
709 bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
710 // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
711 // IRQ logic can set it. So we mask it out here to prevent problems...
712 gpu_flags = data & (~IMASK);
713 gpu_flag_z = gpu_flags & ZERO_FLAG;
714 gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
715 gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
716 GPUUpdateRegisterBanks();
717 gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3); // Interrupt latch clear bits
718 //Writing here is only an interrupt enable--this approach is just plain wrong!
720 //This, however, is A-OK! ;-)
721 if (IMASKCleared) // If IMASK was cleared,
722 GPUHandleIRQs(); // see if any other interrupts need servicing!
724 if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
725 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
726 WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
731 gpu_matrix_control = data;
734 // This can only point to long aligned addresses
735 gpu_pointer_to_matrix = data & 0xFFFFFFFC;
738 gpu_data_organization = data;
743 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
748 // uint32_t gpu_was_running = GPU_RUNNING;
749 data &= ~0xF7C0; // Disable writes to INT_LAT0-4 & TOM version number
751 // check for GPU -> CPU interrupt
754 //WriteLog("GPU->CPU interrupt\n");
755 if (TOMIRQEnabled(IRQ_GPU))
757 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
758 // if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
760 TOMSetPendingGPUInt();
761 m68k_set_irq(2); // Set 68000 IPL 2
762 GPUReleaseTimeslice();
768 // check for CPU -> GPU interrupt #0
771 //WriteLog("CPU->GPU interrupt\n");
772 GPUSetIRQLine(0, ASSERT_LINE);
773 m68k_end_timeslice();
774 DSPReleaseTimeslice();
781 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
784 gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
786 // if gpu wasn't running but is now running, execute a few cycles
787 #ifndef GPU_SINGLE_STEPPING
788 /* if (!gpu_was_running && GPU_RUNNING)
791 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
796 #endif // GPU_DEBUG//*/
798 if (gpu_control & 0x18)
800 #endif // #ifndef GPU_SINGLE_STEPPING
802 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
804 WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
806 WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
810 // GPUDumpDisassembly();
813 if (gpu_pc == 0xF035D8)
815 // GPUDumpDisassembly();
818 gpu_control &= 0xFFFFFFFE; // Don't run it and let's see what happens!
819 //Hmm. Seems to lock up when going into the demo...
820 //Try to disable the collision altogether!
823 extern int effect_start5;
824 static bool finished = false;
825 //if (GPU_RUNNING && effect_start5 && !finished)
826 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
828 // Let's do a dump of $6528!
829 /* uint32_t numItems = JaguarReadWord(0x6BD6);
830 WriteLog("\nDump of $6528: %u items.\n\n", numItems);
831 for(int i=0; i<numItems*3*4; i+=3*4)
833 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
834 JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
835 uint16_t link = JaguarReadWord(0x6528+i+8+2);
836 for(int j=0; j<40; j+=4)
837 WriteLog("%08X ", JaguarReadLong(link + j));
841 // Let's try a manual blit here...
842 //This isn't working the way it should! !!! FIX !!!
843 //Err, actually, it is.
844 // NOW, it works right! Problem solved!!! It's a blitter bug!
845 /* uint32_t src = 0x4D54, dst = 0xF03000, width = 10 * 4;
846 for(int y=0; y<127; y++)
848 for(int x=0; x<2; x++)
850 JaguarWriteLong(dst, JaguarReadLong(src));
855 src += width - (2 * 4);
859 WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
861 /* WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
863 for(int i=0x004D54; i<0x004D54+2048; i++)
865 WriteLog("%02X ", JaguarReadByte(i));
873 WriteLog("\n\nData @ F03000:\n\n");
875 for(int i=0xF03000; i<0xF03200; i++)
877 WriteLog("%02X ", JaguarReadByte(i));
891 /*if (!GPU_RUNNING && finished)
893 WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
898 // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
899 // allow the GPU a chance to run...
900 // Yes! This partially fixed Trevor McFur...
902 m68k_end_timeslice();
909 gpu_div_control = data;
911 // default: // unaligned long write
918 // JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
919 // JaguarWriteWord(offset+2, data & 0xFFFF, who);
920 // We're a 32-bit processor, we can do a long write...!
921 JaguarWriteLong(offset, data, who);
925 // Change register banks if necessary
927 void GPUUpdateRegisterBanks(void)
929 int bank = (gpu_flags & REGPAGE); // REGPAGE bit
931 if (gpu_flags & IMASK) // IMASK bit
932 bank = 0; // IMASK forces main bank to be bank 0
935 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
937 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
940 void GPUHandleIRQs(void)
942 // Bail out if we're already in an interrupt!
943 if (gpu_flags & IMASK)
946 // Get the interrupt latch & enable bits
947 uint32_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
949 // Bail out if latched interrupts aren't enabled
954 // Determine which interrupt to service
955 uint32_t which = 0; //Isn't there a #pragma to disable this warning???
968 WriteLog("GPU: Generating IRQ #%i\n", which);
970 // set the interrupt flag
972 GPUUpdateRegisterBanks();
974 // subqt #4,r31 ; pre-decrement stack pointer
975 // move pc,r30 ; address of interrupted code
976 // store r30,(r31) ; store return address
978 GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
980 // movei #service_address,r30 ; pointer to ISR entry
981 // jump (r30) ; jump to ISR
983 gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
986 void GPUSetIRQLine(int irqline, int state)
989 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
991 uint32_t mask = 0x0040 << irqline;
992 gpu_control &= ~mask; // Clear the interrupt latch
996 gpu_control |= mask; // Assert the interrupt latch
997 GPUHandleIRQs(); // And handle the interrupt...
1001 //TEMPORARY: Testing only!
1007 // memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1008 // memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32_t), "GPU bank 0 regs");
1009 // memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32_t), "GPU bank 1 regs");
1011 build_branch_condition_table();
1015 //TEMPORARY: Testing only!
1022 // GPU registers (directly visible)
1023 gpu_flags = 0x00000000;
1024 gpu_matrix_control = 0x00000000;
1025 gpu_pointer_to_matrix = 0x00000000;
1026 gpu_data_organization = 0xFFFFFFFF;
1027 gpu_pc = 0x00F03000;
1028 gpu_control = 0x00002800; // Correctly sets this as TOM Rev. 2
1029 gpu_hidata = 0x00000000;
1030 gpu_remain = 0x00000000; // These two registers are RO/WO
1031 gpu_div_control = 0x00000000;
1033 // GPU internal register
1034 gpu_acc = 0x00000000;
1036 gpu_reg = gpu_reg_bank_0;
1037 gpu_alternate_reg = gpu_reg_bank_1;
1039 for(int i=0; i<32; i++)
1040 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1043 memset(gpu_ram_8, 0xFF, 0x1000);
1045 //not needed GPUInterruptPending = false;
1048 // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
1049 for(uint32_t i=0; i<4096; i+=4)
1050 *((uint32_t *)(&gpu_ram_8[i])) = rand();
1053 uint32_t GPUReadPC(void)
1058 void GPUResetStats(void)
1060 for(uint32_t i=0; i<64; i++)
1061 gpu_opcode_use[i] = 0;
1062 WriteLog("--> GPU stats were reset!\n");
1065 void GPUDumpDisassembly(void)
1069 WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1070 uint32_t j = 0xF03000;
1071 while (j <= 0xF03FFF)
1074 j += dasmjag(JAGUAR_GPU, buffer, j);
1075 WriteLog("\t%08X: %s\n", oldj, buffer);
1079 void GPUDumpRegisters(void)
1081 WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1082 WriteLog("\nRegisters bank 0\n");
1083 for(int j=0; j<8; j++)
1085 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1086 (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1087 (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1088 (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1089 (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1091 WriteLog("Registers bank 1\n");
1092 for(int j=0; j<8; j++)
1094 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1095 (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1096 (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1097 (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1098 (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1102 void GPUDumpMemory(void)
1104 WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1105 for(int i=0; i<0xFFF; i+=4)
1106 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1107 gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1112 WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1114 // Get the interrupt latch & enable bits
1115 uint8_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1116 WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1119 GPUDumpDisassembly();
1121 WriteLog("\nGPU opcodes use:\n");
1122 for(int i=0; i<64; i++)
1124 if (gpu_opcode_use[i])
1125 WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1129 // memory_free(gpu_ram_8);
1130 // memory_free(gpu_reg_bank_0);
1131 // memory_free(gpu_reg_bank_1);
1135 // Main GPU execution core
1137 static int testCount = 1;
1139 static bool tripwire = false;
1140 void GPUExec(int32_t cycles)
1145 #ifdef GPU_SINGLE_STEPPING
1146 if (gpu_control & 0x18)
1149 gpu_control &= ~0x10;
1153 gpu_releaseTimeSlice_flag = 0;
1156 while (cycles > 0 && GPU_RUNNING)
1158 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1159 && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1161 if (gpu_pc == 0xF03000)
1163 extern uint32_t starCount;
1165 /* WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1166 uint32_t base = gpu_reg_bank_0[3];
1167 for(uint32_t i=0; i<0x100; i+=16)
1169 WriteLog("%02X: ", i);
1170 for(uint32_t j=0; j<16; j++)
1172 WriteLog("%02X ", JaguarReadByte(base + i + j));
1177 // if (gpu_pc == 0xF03)
1181 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1184 WriteLog("GPU: Starting disassembly log...\n");
1187 /*if (gpu_pc == 0xF0359A)
1192 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1193 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1194 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1196 if (gpu_pc == 0xF03200)
1200 uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1201 uint32_t index = opcode >> 10;
1202 gpu_instruction = opcode; // Added for GPU #3...
1203 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1204 gpu_opcode_second_parameter = opcode & 0x1F;
1205 /*if (gpu_pc == 0xF03BE8)
1206 WriteLog("Start of OP frame write...\n");
1207 if (gpu_pc == 0xF03EEE)
1208 WriteLog("--> Writing BRANCH object ---\n");
1209 if (gpu_pc == 0xF03F62)
1210 WriteLog("--> Writing BITMAP object ***\n");//*/
1211 /*if (gpu_pc == 0xF03546)
1213 WriteLog("\n--> GPU PC: F03546\n");
1215 GPUDumpDisassembly();
1217 /*if (gpu_pc == 0xF033F6)
1219 WriteLog("\n--> GPU PC: F033F6\n");
1221 GPUDumpDisassembly();
1223 /*if (gpu_pc == 0xF033CC)
1225 WriteLog("\n--> GPU PC: F033CC\n");
1227 GPUDumpDisassembly();
1229 /*if (gpu_pc == 0xF033D6)
1231 WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1235 /*if (gpu_pc == 0xF033D8)
1237 WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1241 /*if (gpu_pc == 0xF0358E)
1243 WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1247 /*if (gpu_pc == 0xF034CA)
1249 WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1252 /*if (gpu_pc == 0xF034CA)
1254 len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1255 WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1256 for(int i=0; i<len; i+=4)
1257 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1259 for(int i=0; i<len; i+=4)
1260 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1263 if (gpu_pc == 0xF034DE)
1265 WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n ", gpu_reg[14], gpu_reg[15]);
1266 for(int i=0; i<len; i+=4)
1267 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1269 for(int i=0; i<len; i+=4)
1270 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1272 for(int i=0; i<len; i+=4)
1273 WriteLog(" --------");
1275 for(int i=0; i<len; i+=4)
1276 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1279 /*if (gpu_pc == 0xF035C8)
1281 WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1283 GPUDumpDisassembly();
1288 // gpu_reset_stats();
1289 static char buffer[512];
1290 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1291 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1293 //$E400 -> 1110 01 -> $39 -> 57
1296 gpu_opcode[index]();
1298 // gpu2_opcode[index]();
1300 //GPU #3 (Doesn't show ATARI logo! #1 & #2 do...)
1302 // gpu3_opcode[index]();
1305 //GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken.
1306 /*static bool firstTime = true;
1307 if (gpu_pc == 0xF03548 && firstTime)
1310 // firstTime = false;
1312 //static char buffer[512];
1314 //while (k<0xF0356C)
1317 //k += dasmjag(JAGUAR_GPU, buffer, k);
1318 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1320 // gpu_start_log = 1;
1322 //GPU: [00F0354C] jump nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1323 /*if (gpu_pc == 0xF0354C)
1324 gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1326 cycles -= gpu_opcode_cycles[index];
1327 gpu_opcode_use[index]++;
1329 WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1330 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1332 WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1345 GPU opcodes use (offset punch--vertically below bad guy):
1367 load_r14_indexed 1183
1368 load_r15_indexed 1125
1371 store_r14_indexed 320
1380 static void gpu_opcode_jump(void)
1383 const char * condition[32] =
1384 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1385 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1386 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1387 "???", "???", "???", "F" };
1389 WriteLog("%06X: JUMP %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1392 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1393 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1394 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1395 // KLUDGE: Used by BRANCH_CONDITION
1396 uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1398 if (BRANCH_CONDITION(IMM_2))
1402 WriteLog("Branched!\n");
1405 WriteLog(" --> JUMP: Branch taken.\n");
1406 uint32_t delayed_pc = RM;
1408 gpu_pc = delayed_pc;
1409 /* uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1410 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1411 gpu_opcode_second_parameter = opcode & 0x1F;
1413 gpu_pc = delayed_pc;
1414 gpu_opcode[opcode>>10]();//*/
1419 WriteLog("Branch NOT taken.\n");
1424 static void gpu_opcode_jr(void)
1427 const char * condition[32] =
1428 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1429 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1430 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1431 "???", "???", "???", "F" };
1433 WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1435 /* if (CONDITION(jaguar.op & 31))
1437 int32_t r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1438 uint32_t newpc = jaguar.PC + r1;
1440 jaguar.op = ROPCODE(jaguar.PC);
1442 (*jaguar.table[jaguar.op >> 10])();
1444 jaguar_icount -= 3; // 3 wait states guaranteed
1447 /* gpu_flag_n = (gpu_flag_n ? 1 : 0);
1448 gpu_flag_c = (gpu_flag_c ? 1 : 0);
1449 gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1450 // KLUDGE: Used by BRANCH_CONDITION
1451 uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1453 if (BRANCH_CONDITION(IMM_2))
1457 WriteLog("Branched!\n");
1460 WriteLog(" --> JR: Branch taken.\n");
1461 int32_t offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
1462 int32_t delayed_pc = gpu_pc + (offset * 2);
1464 gpu_pc = delayed_pc;
1465 /* uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1466 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1467 gpu_opcode_second_parameter = opcode & 0x1F;
1469 gpu_pc = delayed_pc;
1470 gpu_opcode[opcode>>10]();//*/
1475 WriteLog("Branch NOT taken.\n");
1480 static void gpu_opcode_add(void)
1484 WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1486 uint32_t res = RN + RM;
1487 CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1491 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1496 static void gpu_opcode_addc(void)
1500 WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1502 /* int dreg = jaguar.op & 31;
1503 uint32_t r1 = jaguar.r[(jaguar.op >> 5) & 31];
1504 uint32_t r2 = jaguar.r[dreg];
1505 uint32_t res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1506 jaguar.r[dreg] = res;
1507 CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1509 uint32_t res = RN + RM + gpu_flag_c;
1510 uint32_t carry = gpu_flag_c;
1511 // SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1512 SET_ZNC_ADD(RN + carry, RM, res);
1513 // SET_ZNC_ADD(RN, RM + carry, res);
1517 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1522 static void gpu_opcode_addq(void)
1526 WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1528 uint32_t r1 = gpu_convert_zero[IMM_1];
1529 uint32_t res = RN + r1;
1530 CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1534 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1539 static void gpu_opcode_addqt(void)
1541 #ifdef GPU_DIS_ADDQT
1543 WriteLog("%06X: ADDQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1545 RN += gpu_convert_zero[IMM_1];
1546 #ifdef GPU_DIS_ADDQT
1548 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1553 static void gpu_opcode_sub(void)
1557 WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1559 uint32_t res = RN - RM;
1560 SET_ZNC_SUB(RN, RM, res);
1564 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1569 static void gpu_opcode_subc(void)
1573 WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1575 // This is how the GPU ALU does it--Two's complement with inverted carry
1576 uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (gpu_flag_c ^ 1);
1577 // Carry out of the result is inverted too
1578 gpu_flag_c = ((res >> 32) & 0x01) ^ 1;
1579 RN = (res & 0xFFFFFFFF);
1583 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1588 static void gpu_opcode_subq(void)
1592 WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1594 uint32_t r1 = gpu_convert_zero[IMM_1];
1595 uint32_t res = RN - r1;
1596 SET_ZNC_SUB(RN, r1, res);
1600 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1605 static void gpu_opcode_subqt(void)
1607 #ifdef GPU_DIS_SUBQT
1609 WriteLog("%06X: SUBQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1611 RN -= gpu_convert_zero[IMM_1];
1612 #ifdef GPU_DIS_SUBQT
1614 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1619 static void gpu_opcode_cmp(void)
1623 WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1625 uint32_t res = RN - RM;
1626 SET_ZNC_SUB(RN, RM, res);
1629 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1634 static void gpu_opcode_cmpq(void)
1636 static int32_t sqtable[32] =
1637 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1640 WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1642 uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1643 uint32_t res = RN - r1;
1644 SET_ZNC_SUB(RN, r1, res);
1647 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1652 static void gpu_opcode_and(void)
1656 WriteLog("%06X: AND R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1662 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1667 static void gpu_opcode_or(void)
1671 WriteLog("%06X: OR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1677 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1682 static void gpu_opcode_xor(void)
1686 WriteLog("%06X: XOR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1692 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1697 static void gpu_opcode_not(void)
1701 WriteLog("%06X: NOT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1707 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1712 static void gpu_opcode_move_pc(void)
1714 #ifdef GPU_DIS_MOVEPC
1716 WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1718 // Should be previous PC--this might not always be previous instruction!
1719 // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1721 #ifdef GPU_DIS_MOVEPC
1723 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1728 static void gpu_opcode_sat8(void)
1732 WriteLog("%06X: SAT8 R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1734 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1738 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1743 static void gpu_opcode_sat16(void)
1745 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1749 static void gpu_opcode_sat24(void)
1751 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1756 static void gpu_opcode_store_r14_indexed(void)
1758 #ifdef GPU_DIS_STORE14I
1760 WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1762 #ifdef GPU_CORRECT_ALIGNMENT
1763 uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1765 if (address >= 0xF03000 && address <= 0xF03FFF)
1766 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1768 GPUWriteLong(address, RN, GPU);
1770 GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1775 static void gpu_opcode_store_r15_indexed(void)
1777 #ifdef GPU_DIS_STORE15I
1779 WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1781 #ifdef GPU_CORRECT_ALIGNMENT
1782 uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1784 if (address >= 0xF03000 && address <= 0xF03FFF)
1785 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1787 GPUWriteLong(address, RN, GPU);
1789 GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1794 static void gpu_opcode_load_r14_ri(void)
1796 #ifdef GPU_DIS_LOAD14R
1798 WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1800 #ifdef GPU_CORRECT_ALIGNMENT
1801 uint32_t address = gpu_reg[14] + RM;
1803 if (address >= 0xF03000 && address <= 0xF03FFF)
1804 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1806 RN = GPUReadLong(address, GPU);
1808 RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1810 #ifdef GPU_DIS_LOAD14R
1812 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1817 static void gpu_opcode_load_r15_ri(void)
1819 #ifdef GPU_DIS_LOAD15R
1821 WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1823 #ifdef GPU_CORRECT_ALIGNMENT
1824 uint32_t address = gpu_reg[15] + RM;
1826 if (address >= 0xF03000 && address <= 0xF03FFF)
1827 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1829 RN = GPUReadLong(address, GPU);
1831 RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1833 #ifdef GPU_DIS_LOAD15R
1835 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1840 static void gpu_opcode_store_r14_ri(void)
1842 #ifdef GPU_DIS_STORE14R
1844 WriteLog("%06X: STORE R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1846 #ifdef GPU_CORRECT_ALIGNMENT
1847 uint32_t address = gpu_reg[14] + RM;
1849 if (address >= 0xF03000 && address <= 0xF03FFF)
1850 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1852 GPUWriteLong(address, RN, GPU);
1854 GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1859 static void gpu_opcode_store_r15_ri(void)
1861 #ifdef GPU_DIS_STORE15R
1863 WriteLog("%06X: STORE R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1865 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1866 uint32_t address = gpu_reg[15] + RM;
1868 if (address >= 0xF03000 && address <= 0xF03FFF)
1869 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1871 GPUWriteLong(address, RN, GPU);
1873 GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1878 static void gpu_opcode_nop(void)
1882 WriteLog("%06X: NOP [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1887 static void gpu_opcode_pack(void)
1891 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1895 //BUG! if (RM == 0) // Pack
1896 if (IMM_1 == 0) // Pack
1897 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1899 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1902 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1907 static void gpu_opcode_storeb(void)
1909 #ifdef GPU_DIS_STOREB
1911 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1914 // Would appear to be so...!
1915 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1916 GPUWriteLong(RM, RN & 0xFF, GPU);
1918 JaguarWriteByte(RM, RN, GPU);
1922 static void gpu_opcode_storew(void)
1924 #ifdef GPU_DIS_STOREW
1926 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1928 #ifdef GPU_CORRECT_ALIGNMENT
1929 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1930 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1932 JaguarWriteWord(RM, RN, GPU);
1934 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1935 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1937 JaguarWriteWord(RM, RN, GPU);
1942 static void gpu_opcode_store(void)
1944 #ifdef GPU_DIS_STORE
1946 WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1948 #ifdef GPU_CORRECT_ALIGNMENT
1949 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1950 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1952 GPUWriteLong(RM, RN, GPU);
1954 GPUWriteLong(RM, RN, GPU);
1959 static void gpu_opcode_storep(void)
1961 #ifdef GPU_CORRECT_ALIGNMENT
1962 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1964 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1965 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1969 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1970 GPUWriteLong(RM + 4, RN, GPU);
1973 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1974 GPUWriteLong(RM + 4, RN, GPU);
1978 static void gpu_opcode_loadb(void)
1980 #ifdef GPU_DIS_LOADB
1982 WriteLog("%06X: LOADB (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1984 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1985 RN = GPUReadLong(RM, GPU) & 0xFF;
1987 RN = JaguarReadByte(RM, GPU);
1988 #ifdef GPU_DIS_LOADB
1990 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1995 static void gpu_opcode_loadw(void)
1997 #ifdef GPU_DIS_LOADW
1999 WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2001 #ifdef GPU_CORRECT_ALIGNMENT
2002 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2003 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
2005 RN = JaguarReadWord(RM, GPU);
2007 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2008 RN = GPUReadLong(RM, GPU) & 0xFFFF;
2010 RN = JaguarReadWord(RM, GPU);
2012 #ifdef GPU_DIS_LOADW
2014 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2019 // According to the docs, & "Do The Same", this address is long aligned...
2021 // And it works!!! Need to fix all instances...
2022 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
2023 // the $F03000-$F03FFF range are aligned...
2024 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
2026 Preliminary testing on real hardware seems to confirm that something strange goes on
2027 with unaligned reads in main memory. When the address is off by 1, the result is the
2028 same as the long address with the top byte replaced by something. So if the read is
2029 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
2030 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
2031 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
2032 It may be that the "unknown" values come from the prefetch queue, but not sure how
2033 to test that. They seem to be stable, though, which would indicate such a mechanism.
2034 Sometimes, however, the off by 2 case returns $12345678!
2036 static void gpu_opcode_load(void)
2040 WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2042 #ifdef GPU_CORRECT_ALIGNMENT
2043 uint32_t mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2044 // if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2045 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2046 // RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2048 // RN = GPUReadLong(RM, GPU);
2049 // Simulate garbage in unaligned reads...
2050 //seems that this behavior is different in GPU mem vs. main mem...
2051 // if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2052 // RN |= mask[RM & 0x03];
2054 RN = GPUReadLong(RM, GPU);
2058 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2063 static void gpu_opcode_loadp(void)
2065 #ifdef GPU_CORRECT_ALIGNMENT
2066 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2068 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2069 RN = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2073 gpu_hidata = GPUReadLong(RM + 0, GPU);
2074 RN = GPUReadLong(RM + 4, GPU);
2077 gpu_hidata = GPUReadLong(RM + 0, GPU);
2078 RN = GPUReadLong(RM + 4, GPU);
2083 static void gpu_opcode_load_r14_indexed(void)
2085 #ifdef GPU_DIS_LOAD14I
2087 WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2089 #ifdef GPU_CORRECT_ALIGNMENT
2090 uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2092 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2093 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2095 RN = GPUReadLong(address, GPU);
2097 RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2099 #ifdef GPU_DIS_LOAD14I
2101 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2106 static void gpu_opcode_load_r15_indexed(void)
2108 #ifdef GPU_DIS_LOAD15I
2110 WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2112 #ifdef GPU_CORRECT_ALIGNMENT
2113 uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2115 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2116 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2118 RN = GPUReadLong(address, GPU);
2120 RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2122 #ifdef GPU_DIS_LOAD15I
2124 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2129 static void gpu_opcode_movei(void)
2131 #ifdef GPU_DIS_MOVEI
2133 WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32_t)GPUReadWord(gpu_pc) | ((uint32_t)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2135 // This instruction is followed by 32-bit value in LSW / MSW format...
2136 RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16);
2138 #ifdef GPU_DIS_MOVEI
2140 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2145 static void gpu_opcode_moveta(void)
2147 #ifdef GPU_DIS_MOVETA
2149 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2152 #ifdef GPU_DIS_MOVETA
2154 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2159 static void gpu_opcode_movefa(void)
2161 #ifdef GPU_DIS_MOVEFA
2163 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2166 #ifdef GPU_DIS_MOVEFA
2168 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2173 static void gpu_opcode_move(void)
2177 WriteLog("%06X: MOVE R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2182 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2187 static void gpu_opcode_moveq(void)
2189 #ifdef GPU_DIS_MOVEQ
2191 WriteLog("%06X: MOVEQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2194 #ifdef GPU_DIS_MOVEQ
2196 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2201 static void gpu_opcode_resmac(void)
2207 static void gpu_opcode_imult(void)
2209 #ifdef GPU_DIS_IMULT
2211 WriteLog("%06X: IMULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2213 RN = (int16_t)RN * (int16_t)RM;
2215 #ifdef GPU_DIS_IMULT
2217 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2222 static void gpu_opcode_mult(void)
2226 WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2228 RN = (uint16_t)RM * (uint16_t)RN;
2229 // RN = (RM & 0xFFFF) * (RN & 0xFFFF);
2233 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2238 static void gpu_opcode_bclr(void)
2242 WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2244 uint32_t res = RN & ~(1 << IMM_1);
2249 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2254 static void gpu_opcode_btst(void)
2258 WriteLog("%06X: BTST #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2260 gpu_flag_z = (~RN >> IMM_1) & 1;
2263 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2268 static void gpu_opcode_bset(void)
2272 WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2274 uint32_t res = RN | (1 << IMM_1);
2279 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2284 static void gpu_opcode_imacn(void)
2286 uint32_t res = (int16_t)RM * (int16_t)(RN);
2291 static void gpu_opcode_mtoi(void)
2294 uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2299 static void gpu_opcode_normi(void)
2306 while ((_RM & 0xFFC00000) == 0)
2311 while ((_RM & 0xFF800000) != 0)
2321 static void gpu_opcode_mmult(void)
2323 int count = gpu_matrix_control & 0x0F; // Matrix width
2324 uint32_t addr = gpu_pointer_to_matrix; // In the GPU's RAM
2328 if (gpu_matrix_control & 0x10) // Column stepping
2330 for(int i=0; i<count; i++)
2334 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2336 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2338 int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2343 else // Row stepping
2345 for(int i=0; i<count; i++)
2349 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2351 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2353 int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2358 RN = res = (int32_t)accum;
2359 // carry flag to do (out of the last add)
2364 static void gpu_opcode_abs(void)
2368 WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2370 gpu_flag_c = RN >> 31;
2371 if (RN == 0x80000000)
2372 //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2373 gpu_flag_n = 1, gpu_flag_z = 0;
2378 gpu_flag_n = 0; SET_FLAG_Z(RN);
2382 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2387 static void gpu_opcode_div(void) // RN / RM
2391 WriteLog("%06X: DIV R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2393 // NOTE: remainder is NOT calculated correctly here!
2394 // The original tried to get it right by checking to see if the
2395 // remainder was negative, but that's too late...
2396 // The code there should do it now, but I'm not 100% sure...
2397 // [Now it should be correct, but not displaying correct behavior of the actual
2398 // hardware. A step in the right direction.]
2402 if (gpu_div_control & 0x01) // 16.16 division
2404 gpu_remain = ((uint64_t)RN << 16) % RM;
2405 RN = ((uint64_t)RN << 16) / RM;
2409 // We calculate the remainder first because we destroy RN after
2410 // this by assigning it to itself.
2411 gpu_remain = RN % RM;
2415 // What we really should do here is figure out why this condition
2416 // happens in the real divide unit and emulate *that* behavior.
2418 if ((gpu_remain - RM) & 0x80000000) // If the result would have been negative...
2419 gpu_remain -= RM; // Then make it negative!
2427 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2432 static void gpu_opcode_imultn(void)
2434 uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
2435 gpu_acc = (int32_t)res;
2441 static void gpu_opcode_neg(void)
2445 WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2448 SET_ZNC_SUB(0, RN, res);
2452 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2457 static void gpu_opcode_shlq(void)
2461 WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2463 // Was a bug here...
2464 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2465 int32_t r1 = 32 - IMM_1;
2466 uint32_t res = RN << r1;
2467 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2471 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2476 static void gpu_opcode_shrq(void)
2480 WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2482 int32_t r1 = gpu_convert_zero[IMM_1];
2483 uint32_t res = RN >> r1;
2484 SET_ZN(res); gpu_flag_c = RN & 1;
2488 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2493 static void gpu_opcode_ror(void)
2497 WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2499 uint32_t r1 = RM & 0x1F;
2500 uint32_t res = (RN >> r1) | (RN << (32 - r1));
2501 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2505 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2510 static void gpu_opcode_rorq(void)
2514 WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2516 uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F];
2518 uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
2520 SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2523 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2528 static void gpu_opcode_sha(void)
2530 /* int dreg = jaguar.op & 31;
2531 int32_t r1 = (int32_t)jaguar.r[(jaguar.op >> 5) & 31];
2532 uint32_t r2 = jaguar.r[dreg];
2538 res = (r1 <= -32) ? 0 : (r2 << -r1);
2539 jaguar.FLAGS |= (r2 >> 30) & 2;
2543 res = (r1 >= 32) ? ((int32_t)r2 >> 31) : ((int32_t)r2 >> r1);
2544 jaguar.FLAGS |= (r2 << 1) & 2;
2546 jaguar.r[dreg] = res;
2551 WriteLog("%06X: SHA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2555 if ((int32_t)RM < 0)
2557 res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM);
2558 gpu_flag_c = RN >> 31;
2562 res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM);
2563 gpu_flag_c = RN & 0x01;
2569 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2572 /* int32_t sRM=(int32_t)RM;
2577 uint32_t shift=-sRM;
2578 if (shift>=32) shift=32;
2579 gpu_flag_c=(_RN&0x80000000)>>31;
2589 if (shift>=32) shift=32;
2593 _RN=((int32_t)_RN)>>1;
2603 static void gpu_opcode_sharq(void)
2605 #ifdef GPU_DIS_SHARQ
2607 WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2609 uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1];
2610 SET_ZN(res); gpu_flag_c = RN & 0x01;
2612 #ifdef GPU_DIS_SHARQ
2614 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2619 static void gpu_opcode_sh(void)
2623 WriteLog("%06X: SH R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2625 if (RM & 0x80000000) // Shift left
2627 gpu_flag_c = RN >> 31;
2628 RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM);
2632 gpu_flag_c = RN & 0x01;
2633 RN = (RM >= 32 ? 0 : RN >> RM);
2638 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2643 //Temporary: Testing only!
2644 //#include "gpu2.cpp"
2645 //#include "gpu3.cpp"
2650 // New thread-safe GPU core
2652 int GPUCore(void * data)