6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
11 // JLH = James Hammons <jlhamm@acm.org>
14 // --- ---------- -------------------------------------------------------------
15 // JLH 01/16/2010 Created this log ;-)
16 // JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 // the braindead way in which MAME handles memory. :-)
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
30 #include <string.h> // For memset
35 #include "m68000/m68kinterface.h"
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
44 // For GPU dissasembly...
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
70 #define GPU_DIS_MOVEFA
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
102 bool doGPUDis = false;
103 //bool doGPUDis = true;
107 GPU opcodes use (BIOS flying ATARI logo):
149 #define CINT0FLAG 0x0200
150 #define CINT1FLAG 0x0400
151 #define CINT2FLAG 0x0800
152 #define CINT3FLAG 0x1000
153 #define CINT4FLAG 0x2000
154 #define CINT04FLAGS (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
158 #define ZERO_FLAG 0x0001
159 #define CARRY_FLAG 0x0002
160 #define NEGA_FLAG 0x0004
162 #define INT_ENA0 0x0010
163 #define INT_ENA1 0x0020
164 #define INT_ENA2 0x0040
165 #define INT_ENA3 0x0080
166 #define INT_ENA4 0x0100
167 #define INT_CLR0 0x0200
168 #define INT_CLR1 0x0400
169 #define INT_CLR2 0x0800
170 #define INT_CLR3 0x1000
171 #define INT_CLR4 0x2000
172 #define REGPAGE 0x4000
175 // External global variables
177 extern int start_logging;
178 extern int gpu_start_log;
180 // Private function prototypes
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8_t gpu_opcode_cycles[64] =
255 3, 3, 3, 3, 3, 3, 3, 3,
256 3, 3, 3, 3, 3, 3, 3, 3,
257 3, 3, 1, 3, 1, 18, 3, 3,
258 3, 3, 3, 3, 3, 3, 3, 3,
259 3, 3, 2, 2, 2, 2, 3, 4,
260 5, 4, 5, 6, 6, 1, 1, 1,
261 1, 2, 2, 2, 1, 1, 9, 3,
262 3, 1, 6, 6, 2, 2, 3, 3
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8_t gpu_opcode_cycles[64] =
270 1, 1, 1, 1, 1, 1, 1, 1,
271 1, 1, 1, 1, 1, 1, 1, 1,
272 1, 1, 1, 1, 1, 9, 1, 1,
273 1, 1, 1, 1, 1, 1, 1, 1,
274 1, 1, 1, 1, 1, 1, 1, 2,
275 2, 2, 2, 3, 3, 1, 1, 1,
276 1, 1, 1, 1, 1, 1, 4, 1,
277 1, 1, 3, 3, 1, 1, 1, 1
279 uint8_t gpu_opcode_cycles[64] =
281 1, 1, 1, 1, 1, 1, 1, 1,
282 1, 1, 1, 1, 1, 1, 1, 1,
283 1, 1, 1, 1, 1, 1, 1, 1,
284 1, 1, 1, 1, 1, 1, 1, 1,
285 1, 1, 1, 1, 1, 1, 1, 1,
286 1, 1, 1, 1, 1, 1, 1, 1,
287 1, 1, 1, 1, 1, 1, 1, 1,
288 1, 1, 1, 1, 1, 1, 1, 1
291 void (*gpu_opcode[64])()=
293 gpu_opcode_add, gpu_opcode_addc, gpu_opcode_addq, gpu_opcode_addqt,
294 gpu_opcode_sub, gpu_opcode_subc, gpu_opcode_subq, gpu_opcode_subqt,
295 gpu_opcode_neg, gpu_opcode_and, gpu_opcode_or, gpu_opcode_xor,
296 gpu_opcode_not, gpu_opcode_btst, gpu_opcode_bset, gpu_opcode_bclr,
297 gpu_opcode_mult, gpu_opcode_imult, gpu_opcode_imultn, gpu_opcode_resmac,
298 gpu_opcode_imacn, gpu_opcode_div, gpu_opcode_abs, gpu_opcode_sh,
299 gpu_opcode_shlq, gpu_opcode_shrq, gpu_opcode_sha, gpu_opcode_sharq,
300 gpu_opcode_ror, gpu_opcode_rorq, gpu_opcode_cmp, gpu_opcode_cmpq,
301 gpu_opcode_sat8, gpu_opcode_sat16, gpu_opcode_move, gpu_opcode_moveq,
302 gpu_opcode_moveta, gpu_opcode_movefa, gpu_opcode_movei, gpu_opcode_loadb,
303 gpu_opcode_loadw, gpu_opcode_load, gpu_opcode_loadp, gpu_opcode_load_r14_indexed,
304 gpu_opcode_load_r15_indexed, gpu_opcode_storeb, gpu_opcode_storew, gpu_opcode_store,
305 gpu_opcode_storep, gpu_opcode_store_r14_indexed, gpu_opcode_store_r15_indexed, gpu_opcode_move_pc,
306 gpu_opcode_jump, gpu_opcode_jr, gpu_opcode_mmult, gpu_opcode_mtoi,
307 gpu_opcode_normi, gpu_opcode_nop, gpu_opcode_load_r14_ri, gpu_opcode_load_r15_ri,
308 gpu_opcode_store_r14_ri, gpu_opcode_store_r15_ri, gpu_opcode_sat24, gpu_opcode_pack,
311 static uint8_t gpu_ram_8[0x1000];
313 static uint32_t gpu_acc;
314 static uint32_t gpu_remain;
315 static uint32_t gpu_hidata;
316 static uint32_t gpu_flags;
317 static uint32_t gpu_matrix_control;
318 static uint32_t gpu_pointer_to_matrix;
319 static uint32_t gpu_data_organization;
320 static uint32_t gpu_control;
321 static uint32_t gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need
323 // to clear a bit before writing a result. I.e., if the result of an operation
324 // leaves a zero in the carry flag, you don't have to zero gpu_flag_c before
325 // you can write that zero!
326 static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c;
327 uint32_t gpu_reg_bank_0[32];
328 uint32_t gpu_reg_bank_1[32];
329 static uint32_t * gpu_reg;
330 static uint32_t * gpu_alternate_reg;
332 static uint32_t gpu_instruction;
333 static uint32_t gpu_opcode_first_parameter;
334 static uint32_t gpu_opcode_second_parameter;
336 #define GPU_RUNNING (gpu_control & 0x01)
338 #define RM gpu_reg[gpu_opcode_first_parameter]
339 #define RN gpu_reg[gpu_opcode_second_parameter]
340 #define ALTERNATE_RM gpu_alternate_reg[gpu_opcode_first_parameter]
341 #define ALTERNATE_RN gpu_alternate_reg[gpu_opcode_second_parameter]
342 #define IMM_1 gpu_opcode_first_parameter
343 #define IMM_2 gpu_opcode_second_parameter
345 #define SET_FLAG_Z(r) (gpu_flag_z = ((r) == 0));
346 #define SET_FLAG_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01));
348 #define RESET_FLAG_Z() gpu_flag_z = 0;
349 #define RESET_FLAG_N() gpu_flag_n = 0;
350 #define RESET_FLAG_C() gpu_flag_c = 0;
352 #define CLR_Z (gpu_flag_z = 0)
353 #define CLR_ZN (gpu_flag_z = gpu_flag_n = 0)
354 #define CLR_ZNC (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
355 #define SET_Z(r) (gpu_flag_z = ((r) == 0))
356 #define SET_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01))
357 #define SET_C_ADD(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
358 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
359 #define SET_ZN(r) SET_N(r); SET_Z(r)
360 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
361 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
363 uint32_t gpu_convert_zero[32] =
364 { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
366 uint8_t * branch_condition_table = 0;
367 #define BRANCH_CONDITION(x) branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
369 uint32_t gpu_opcode_use[64];
371 const char * gpu_opcode_str[64]=
373 "add", "addc", "addq", "addqt",
374 "sub", "subc", "subq", "subqt",
375 "neg", "and", "or", "xor",
376 "not", "btst", "bset", "bclr",
377 "mult", "imult", "imultn", "resmac",
378 "imacn", "div", "abs", "sh",
379 "shlq", "shrq", "sha", "sharq",
380 "ror", "rorq", "cmp", "cmpq",
381 "sat8", "sat16", "move", "moveq",
382 "moveta", "movefa", "movei", "loadb",
383 "loadw", "load", "loadp", "load_r14_indexed",
384 "load_r15_indexed", "storeb", "storew", "store",
385 "storep", "store_r14_indexed","store_r15_indexed","move_pc",
386 "jump", "jr", "mmult", "mtoi",
387 "normi", "nop", "load_r14_ri", "load_r15_ri",
388 "store_r14_ri", "store_r15_ri", "sat24", "pack",
391 static uint32_t gpu_in_exec = 0;
392 static uint32_t gpu_releaseTimeSlice_flag = 0;
394 void GPUReleaseTimeslice(void)
396 gpu_releaseTimeSlice_flag = 1;
399 uint32_t GPUGetPC(void)
404 void build_branch_condition_table(void)
406 if (!branch_condition_table)
408 branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
410 if (branch_condition_table)
412 for(int i=0; i<8; i++)
414 for(int j=0; j<32; j++)
421 if (!(i & ZERO_FLAG))
424 if (i & (CARRY_FLAG << (j >> 4)))
427 if (!(i & (CARRY_FLAG << (j >> 4))))
429 branch_condition_table[i * 32 + j] = result;
437 // GPU byte access (read)
439 uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
441 if (offset >= 0xF02000 && offset <= 0xF020FF)
442 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
444 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
445 return gpu_ram_8[offset & 0xFFF];
446 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
448 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
450 if ((offset & 0x03) == 0)
452 else if ((offset & 0x03) == 1)
453 return (data >> 16) & 0xFF;
454 else if ((offset & 0x03) == 2)
455 return (data >> 8) & 0xFF;
456 else if ((offset & 0x03) == 3)
460 return JaguarReadByte(offset, who);
464 // GPU word access (read)
466 uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
468 if (offset >= 0xF02000 && offset <= 0xF020FF)
469 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
471 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
474 uint16_t data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1];
477 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
479 // This looks and smells wrong...
480 // But it *might* be OK...
481 if (offset & 0x01) // Catch cases 1 & 3... (unaligned read)
482 return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
484 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
486 if (offset & 0x02) // Cases 0 & 2...
487 return data & 0xFFFF;
492 //TEMP--Mirror of F03000? No. Writes only...
493 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
494 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
496 return JaguarReadWord(offset, who);
500 // GPU dword access (read)
502 uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
504 if (offset >= 0xF02000 && offset <= 0xF020FF)
506 WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
507 uint32_t reg = (offset & 0xFC) >> 2;
508 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]);
511 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
512 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
515 return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16)
516 | ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/
517 // return GET32(gpu_ram_8, offset);
519 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
520 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
526 gpu_flag_c = (gpu_flag_c ? 1 : 0);
527 gpu_flag_z = (gpu_flag_z ? 1 : 0);
528 gpu_flag_n = (gpu_flag_n ? 1 : 0);
530 gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
532 return gpu_flags & 0xFFFFC1FF;
534 return gpu_matrix_control;
536 return gpu_pointer_to_matrix;
538 return gpu_data_organization;
547 default: // unaligned long read
549 WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
554 //TEMP--Mirror of F03000? No. Writes only...
555 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
556 // WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
557 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
558 WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
560 return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
564 // GPU byte access (write)
566 void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
568 if (offset >= 0xF02000 && offset <= 0xF020FF)
569 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
571 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
573 gpu_ram_8[offset & 0xFFF] = data;
575 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
578 m68k_end_timeslice();
579 dsp_releaseTimeslice();
583 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
585 uint32_t reg = offset & 0x1C;
586 int bytenum = offset & 0x03;
588 //This is definitely wrong!
589 if ((reg >= 0x1C) && (reg <= 0x1F))
590 gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
593 uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
594 bytenum = 3 - bytenum; // convention motorola !!!
595 old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
596 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
600 // WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
601 JaguarWriteByte(offset, data, who);
605 // GPU word access (write)
607 void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
609 if (offset >= 0xF02000 && offset <= 0xF020FF)
610 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
612 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
614 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
615 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
617 SET16(gpu_ram_8, offset, data);//*/
619 /*if (offset >= 0xF03214 && offset < 0xF0321F)
620 WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
623 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
626 m68k_end_timeslice();
627 dsp_releaseTimeslice();
631 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
633 if (offset & 0x01) // This is supposed to weed out unaligned writes, but does nothing...
636 WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
641 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
642 //This just literally sucks.
643 if ((offset & 0x1C) == 0x1C)
645 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
647 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
649 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
653 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
654 uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
657 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
659 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
661 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
666 else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
669 WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
675 // Have to be careful here--this can cause an infinite loop!
676 JaguarWriteWord(offset, data, who);
680 // GPU dword access (write)
682 void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
684 if (offset >= 0xF02000 && offset <= 0xF020FF)
685 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
687 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
688 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
693 WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
699 SET32(gpu_ram_8, offset, data);
702 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
703 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
710 bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
711 // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
712 // IRQ logic can set it. So we mask it out here to prevent problems...
713 gpu_flags = data & (~IMASK);
714 gpu_flag_z = gpu_flags & ZERO_FLAG;
715 gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
716 gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
717 GPUUpdateRegisterBanks();
718 gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3); // Interrupt latch clear bits
719 //Writing here is only an interrupt enable--this approach is just plain wrong!
721 //This, however, is A-OK! ;-)
722 if (IMASKCleared) // If IMASK was cleared,
723 GPUHandleIRQs(); // see if any other interrupts need servicing!
725 if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
726 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
727 WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
732 gpu_matrix_control = data;
735 // This can only point to long aligned addresses
736 gpu_pointer_to_matrix = data & 0xFFFFFFFC;
739 gpu_data_organization = data;
744 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
749 // uint32_t gpu_was_running = GPU_RUNNING;
750 data &= ~0xF7C0; // Disable writes to INT_LAT0-4 & TOM version number
752 // check for GPU -> CPU interrupt
755 //WriteLog("GPU->CPU interrupt\n");
756 if (TOMIRQEnabled(IRQ_GPU))
758 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
759 // if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
761 TOMSetPendingGPUInt();
762 m68k_set_irq(2); // Set 68000 IPL 2
763 GPUReleaseTimeslice();
769 // check for CPU -> GPU interrupt #0
772 //WriteLog("CPU->GPU interrupt\n");
773 GPUSetIRQLine(0, ASSERT_LINE);
774 m68k_end_timeslice();
775 DSPReleaseTimeslice();
782 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
785 gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
787 // if gpu wasn't running but is now running, execute a few cycles
788 #ifndef GPU_SINGLE_STEPPING
789 /* if (!gpu_was_running && GPU_RUNNING)
792 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
797 #endif // GPU_DEBUG//*/
799 if (gpu_control & 0x18)
801 #endif // #ifndef GPU_SINGLE_STEPPING
803 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
805 WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
807 WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
811 // GPUDumpDisassembly();
814 if (gpu_pc == 0xF035D8)
816 // GPUDumpDisassembly();
819 gpu_control &= 0xFFFFFFFE; // Don't run it and let's see what happens!
820 //Hmm. Seems to lock up when going into the demo...
821 //Try to disable the collision altogether!
824 extern int effect_start5;
825 static bool finished = false;
826 //if (GPU_RUNNING && effect_start5 && !finished)
827 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
829 // Let's do a dump of $6528!
830 /* uint32_t numItems = JaguarReadWord(0x6BD6);
831 WriteLog("\nDump of $6528: %u items.\n\n", numItems);
832 for(int i=0; i<numItems*3*4; i+=3*4)
834 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
835 JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
836 uint16_t link = JaguarReadWord(0x6528+i+8+2);
837 for(int j=0; j<40; j+=4)
838 WriteLog("%08X ", JaguarReadLong(link + j));
842 // Let's try a manual blit here...
843 //This isn't working the way it should! !!! FIX !!!
844 //Err, actually, it is.
845 // NOW, it works right! Problem solved!!! It's a blitter bug!
846 /* uint32_t src = 0x4D54, dst = 0xF03000, width = 10 * 4;
847 for(int y=0; y<127; y++)
849 for(int x=0; x<2; x++)
851 JaguarWriteLong(dst, JaguarReadLong(src));
856 src += width - (2 * 4);
860 WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
862 /* WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
864 for(int i=0x004D54; i<0x004D54+2048; i++)
866 WriteLog("%02X ", JaguarReadByte(i));
874 WriteLog("\n\nData @ F03000:\n\n");
876 for(int i=0xF03000; i<0xF03200; i++)
878 WriteLog("%02X ", JaguarReadByte(i));
892 /*if (!GPU_RUNNING && finished)
894 WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
899 // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
900 // allow the GPU a chance to run...
901 // Yes! This partially fixed Trevor McFur...
903 m68k_end_timeslice();
910 gpu_div_control = data;
912 // default: // unaligned long write
919 // JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
920 // JaguarWriteWord(offset+2, data & 0xFFFF, who);
921 // We're a 32-bit processor, we can do a long write...!
922 JaguarWriteLong(offset, data, who);
926 // Change register banks if necessary
928 void GPUUpdateRegisterBanks(void)
930 int bank = (gpu_flags & REGPAGE); // REGPAGE bit
932 if (gpu_flags & IMASK) // IMASK bit
933 bank = 0; // IMASK forces main bank to be bank 0
936 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
938 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
941 void GPUHandleIRQs(void)
943 // Bail out if we're already in an interrupt!
944 if (gpu_flags & IMASK)
947 // Get the interrupt latch & enable bits
948 uint32_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
950 // Bail out if latched interrupts aren't enabled
955 // Determine which interrupt to service
956 uint32_t which = 0; //Isn't there a #pragma to disable this warning???
969 WriteLog("GPU: Generating IRQ #%i\n", which);
971 // set the interrupt flag
973 GPUUpdateRegisterBanks();
975 // subqt #4,r31 ; pre-decrement stack pointer
976 // move pc,r30 ; address of interrupted code
977 // store r30,(r31) ; store return address
979 GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
981 // movei #service_address,r30 ; pointer to ISR entry
982 // jump (r30) ; jump to ISR
984 gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
987 void GPUSetIRQLine(int irqline, int state)
990 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
992 uint32_t mask = 0x0040 << irqline;
993 gpu_control &= ~mask; // Clear the interrupt latch
997 gpu_control |= mask; // Assert the interrupt latch
998 GPUHandleIRQs(); // And handle the interrupt...
1002 //TEMPORARY: Testing only!
1008 // memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1009 // memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32_t), "GPU bank 0 regs");
1010 // memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32_t), "GPU bank 1 regs");
1012 build_branch_condition_table();
1016 //TEMPORARY: Testing only!
1023 // GPU registers (directly visible)
1024 gpu_flags = 0x00000000;
1025 gpu_matrix_control = 0x00000000;
1026 gpu_pointer_to_matrix = 0x00000000;
1027 gpu_data_organization = 0xFFFFFFFF;
1028 gpu_pc = 0x00F03000;
1029 gpu_control = 0x00002800; // Correctly sets this as TOM Rev. 2
1030 gpu_hidata = 0x00000000;
1031 gpu_remain = 0x00000000; // These two registers are RO/WO
1032 gpu_div_control = 0x00000000;
1034 // GPU internal register
1035 gpu_acc = 0x00000000;
1037 gpu_reg = gpu_reg_bank_0;
1038 gpu_alternate_reg = gpu_reg_bank_1;
1040 for(int i=0; i<32; i++)
1041 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1044 memset(gpu_ram_8, 0xFF, 0x1000);
1046 //not needed GPUInterruptPending = false;
1049 // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
1050 for(uint32_t i=0; i<4096; i+=4)
1051 *((uint32_t *)(&gpu_ram_8[i])) = rand();
1055 uint32_t GPUReadPC(void)
1061 void GPUResetStats(void)
1063 for(uint32_t i=0; i<64; i++)
1064 gpu_opcode_use[i] = 0;
1065 WriteLog("--> GPU stats were reset!\n");
1069 void GPUDumpDisassembly(void)
1073 WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1074 uint32_t j = 0xF03000;
1075 while (j <= 0xF03FFF)
1078 j += dasmjag(JAGUAR_GPU, buffer, j);
1079 WriteLog("\t%08X: %s\n", oldj, buffer);
1084 void GPUDumpRegisters(void)
1086 WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1087 WriteLog("\nRegisters bank 0\n");
1088 for(int j=0; j<8; j++)
1090 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1091 (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1092 (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1093 (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1094 (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1096 WriteLog("Registers bank 1\n");
1097 for(int j=0; j<8; j++)
1099 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1100 (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1101 (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1102 (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1103 (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1108 void GPUDumpMemory(void)
1110 WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1111 for(int i=0; i<0xFFF; i+=4)
1112 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1113 gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1119 WriteLog("\n\n---------------------------------------------------------------------\n");
1120 WriteLog("GPU I/O Registers\n");
1121 WriteLog("---------------------------------------------------------------------\n");
1122 WriteLog("F0%04X (G_FLAGS): $%06X\n", 0x2100, (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z);
1123 WriteLog("F0%04X (G_MTXC): $%04X\n", 0x2104, gpu_matrix_control);
1124 WriteLog("F0%04X (G_MTXA): $%04X\n", 0x2108, gpu_pointer_to_matrix);
1125 WriteLog("F0%04X (G_END): $%02X\n", 0x210C, gpu_data_organization);
1126 WriteLog("F0%04X (G_PC): $%06X\n", 0x2110, gpu_pc);
1127 WriteLog("F0%04X (G_CTRL): $%06X\n", 0x2114, gpu_control);
1128 WriteLog("F0%04X (G_HIDATA): $%08X\n", 0x2118, gpu_hidata);
1129 WriteLog("F0%04X (G_REMAIN): $%08X\n", 0x211C, gpu_remain);
1130 WriteLog("F0%04X (G_DIVCTRL): $%02X\n", 0x211C, gpu_div_control);
1131 WriteLog("---------------------------------------------------------------------\n\n\n");
1133 WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1135 // Get the interrupt latch & enable bits
1136 uint8_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1137 WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1140 GPUDumpDisassembly();
1142 WriteLog("\nGPU opcodes use:\n");
1143 for(int i=0; i<64; i++)
1145 if (gpu_opcode_use[i])
1146 WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1153 // Main GPU execution core
1155 static int testCount = 1;
1157 static bool tripwire = false;
1158 void GPUExec(int32_t cycles)
1163 #ifdef GPU_SINGLE_STEPPING
1164 if (gpu_control & 0x18)
1167 gpu_control &= ~0x10;
1171 gpu_releaseTimeSlice_flag = 0;
1174 while (cycles > 0 && GPU_RUNNING)
1176 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1177 && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1179 if (gpu_pc == 0xF03000)
1181 extern uint32_t starCount;
1183 /* WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1184 uint32_t base = gpu_reg_bank_0[3];
1185 for(uint32_t i=0; i<0x100; i+=16)
1187 WriteLog("%02X: ", i);
1188 for(uint32_t j=0; j<16; j++)
1190 WriteLog("%02X ", JaguarReadByte(base + i + j));
1195 // if (gpu_pc == 0xF03)
1199 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1202 WriteLog("GPU: Starting disassembly log...\n");
1205 /*if (gpu_pc == 0xF0359A)
1210 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1211 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1212 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1214 if (gpu_pc == 0xF03200)
1218 uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1219 uint32_t index = opcode >> 10;
1220 gpu_instruction = opcode; // Added for GPU #3...
1221 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1222 gpu_opcode_second_parameter = opcode & 0x1F;
1223 /*if (gpu_pc == 0xF03BE8)
1224 WriteLog("Start of OP frame write...\n");
1225 if (gpu_pc == 0xF03EEE)
1226 WriteLog("--> Writing BRANCH object ---\n");
1227 if (gpu_pc == 0xF03F62)
1228 WriteLog("--> Writing BITMAP object ***\n");//*/
1229 /*if (gpu_pc == 0xF03546)
1231 WriteLog("\n--> GPU PC: F03546\n");
1233 GPUDumpDisassembly();
1235 /*if (gpu_pc == 0xF033F6)
1237 WriteLog("\n--> GPU PC: F033F6\n");
1239 GPUDumpDisassembly();
1241 /*if (gpu_pc == 0xF033CC)
1243 WriteLog("\n--> GPU PC: F033CC\n");
1245 GPUDumpDisassembly();
1247 /*if (gpu_pc == 0xF033D6)
1249 WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1253 /*if (gpu_pc == 0xF033D8)
1255 WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1259 /*if (gpu_pc == 0xF0358E)
1261 WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1265 /*if (gpu_pc == 0xF034CA)
1267 WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1270 /*if (gpu_pc == 0xF034CA)
1272 len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1273 WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1274 for(int i=0; i<len; i+=4)
1275 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1277 for(int i=0; i<len; i+=4)
1278 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1281 if (gpu_pc == 0xF034DE)
1283 WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n ", gpu_reg[14], gpu_reg[15]);
1284 for(int i=0; i<len; i+=4)
1285 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1287 for(int i=0; i<len; i+=4)
1288 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1290 for(int i=0; i<len; i+=4)
1291 WriteLog(" --------");
1293 for(int i=0; i<len; i+=4)
1294 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1297 /*if (gpu_pc == 0xF035C8)
1299 WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1301 GPUDumpDisassembly();
1306 // gpu_reset_stats();
1307 static char buffer[512];
1308 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1309 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1311 //$E400 -> 1110 01 -> $39 -> 57
1314 gpu_opcode[index]();
1316 // gpu2_opcode[index]();
1318 //GPU #3 (Doesn't show ATARI logo! #1 & #2 do...)
1320 // gpu3_opcode[index]();
1323 //GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken.
1324 /*static bool firstTime = true;
1325 if (gpu_pc == 0xF03548 && firstTime)
1328 // firstTime = false;
1330 //static char buffer[512];
1332 //while (k<0xF0356C)
1335 //k += dasmjag(JAGUAR_GPU, buffer, k);
1336 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1338 // gpu_start_log = 1;
1340 //GPU: [00F0354C] jump nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1341 /*if (gpu_pc == 0xF0354C)
1342 gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1344 cycles -= gpu_opcode_cycles[index];
1345 gpu_opcode_use[index]++;
1347 WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1348 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1350 WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1363 GPU opcodes use (offset punch--vertically below bad guy):
1385 load_r14_indexed 1183
1386 load_r15_indexed 1125
1389 store_r14_indexed 320
1398 static void gpu_opcode_jump(void)
1401 const char * condition[32] =
1402 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1403 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1404 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1405 "???", "???", "???", "F" };
1407 WriteLog("%06X: JUMP %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1410 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1411 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1412 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1413 // KLUDGE: Used by BRANCH_CONDITION
1414 uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1416 if (BRANCH_CONDITION(IMM_2))
1420 WriteLog("Branched!\n");
1423 WriteLog(" --> JUMP: Branch taken.\n");
1424 uint32_t delayed_pc = RM;
1426 gpu_pc = delayed_pc;
1427 /* uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1428 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1429 gpu_opcode_second_parameter = opcode & 0x1F;
1431 gpu_pc = delayed_pc;
1432 gpu_opcode[opcode>>10]();//*/
1437 WriteLog("Branch NOT taken.\n");
1442 static void gpu_opcode_jr(void)
1445 const char * condition[32] =
1446 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1447 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1448 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1449 "???", "???", "???", "F" };
1451 WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1453 /* if (CONDITION(jaguar.op & 31))
1455 int32_t r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1456 uint32_t newpc = jaguar.PC + r1;
1458 jaguar.op = ROPCODE(jaguar.PC);
1460 (*jaguar.table[jaguar.op >> 10])();
1462 jaguar_icount -= 3; // 3 wait states guaranteed
1465 /* gpu_flag_n = (gpu_flag_n ? 1 : 0);
1466 gpu_flag_c = (gpu_flag_c ? 1 : 0);
1467 gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1468 // KLUDGE: Used by BRANCH_CONDITION
1469 uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1471 if (BRANCH_CONDITION(IMM_2))
1475 WriteLog("Branched!\n");
1478 WriteLog(" --> JR: Branch taken.\n");
1479 int32_t offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
1480 int32_t delayed_pc = gpu_pc + (offset * 2);
1482 gpu_pc = delayed_pc;
1483 /* uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1484 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1485 gpu_opcode_second_parameter = opcode & 0x1F;
1487 gpu_pc = delayed_pc;
1488 gpu_opcode[opcode>>10]();//*/
1493 WriteLog("Branch NOT taken.\n");
1498 static void gpu_opcode_add(void)
1502 WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1504 uint32_t res = RN + RM;
1505 CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1509 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1514 static void gpu_opcode_addc(void)
1518 WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1520 /* int dreg = jaguar.op & 31;
1521 uint32_t r1 = jaguar.r[(jaguar.op >> 5) & 31];
1522 uint32_t r2 = jaguar.r[dreg];
1523 uint32_t res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1524 jaguar.r[dreg] = res;
1525 CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1527 uint32_t res = RN + RM + gpu_flag_c;
1528 uint32_t carry = gpu_flag_c;
1529 // SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1530 SET_ZNC_ADD(RN + carry, RM, res);
1531 // SET_ZNC_ADD(RN, RM + carry, res);
1535 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1540 static void gpu_opcode_addq(void)
1544 WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1546 uint32_t r1 = gpu_convert_zero[IMM_1];
1547 uint32_t res = RN + r1;
1548 CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1552 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1557 static void gpu_opcode_addqt(void)
1559 #ifdef GPU_DIS_ADDQT
1561 WriteLog("%06X: ADDQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1563 RN += gpu_convert_zero[IMM_1];
1564 #ifdef GPU_DIS_ADDQT
1566 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1571 static void gpu_opcode_sub(void)
1575 WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1577 uint32_t res = RN - RM;
1578 SET_ZNC_SUB(RN, RM, res);
1582 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1587 static void gpu_opcode_subc(void)
1591 WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1593 // This is how the GPU ALU does it--Two's complement with inverted carry
1594 uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (gpu_flag_c ^ 1);
1595 // Carry out of the result is inverted too
1596 gpu_flag_c = ((res >> 32) & 0x01) ^ 1;
1597 RN = (res & 0xFFFFFFFF);
1601 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1606 static void gpu_opcode_subq(void)
1610 WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1612 uint32_t r1 = gpu_convert_zero[IMM_1];
1613 uint32_t res = RN - r1;
1614 SET_ZNC_SUB(RN, r1, res);
1618 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1623 static void gpu_opcode_subqt(void)
1625 #ifdef GPU_DIS_SUBQT
1627 WriteLog("%06X: SUBQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1629 RN -= gpu_convert_zero[IMM_1];
1630 #ifdef GPU_DIS_SUBQT
1632 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1637 static void gpu_opcode_cmp(void)
1641 WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1643 uint32_t res = RN - RM;
1644 SET_ZNC_SUB(RN, RM, res);
1647 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1652 static void gpu_opcode_cmpq(void)
1654 static int32_t sqtable[32] =
1655 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1658 WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1660 uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1661 uint32_t res = RN - r1;
1662 SET_ZNC_SUB(RN, r1, res);
1665 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1670 static void gpu_opcode_and(void)
1674 WriteLog("%06X: AND R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1680 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1685 static void gpu_opcode_or(void)
1689 WriteLog("%06X: OR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1695 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1700 static void gpu_opcode_xor(void)
1704 WriteLog("%06X: XOR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1710 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1715 static void gpu_opcode_not(void)
1719 WriteLog("%06X: NOT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1725 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1730 static void gpu_opcode_move_pc(void)
1732 #ifdef GPU_DIS_MOVEPC
1734 WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1736 // Should be previous PC--this might not always be previous instruction!
1737 // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1739 #ifdef GPU_DIS_MOVEPC
1741 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1746 static void gpu_opcode_sat8(void)
1750 WriteLog("%06X: SAT8 R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1752 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1756 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1761 static void gpu_opcode_sat16(void)
1763 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1767 static void gpu_opcode_sat24(void)
1769 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1774 static void gpu_opcode_store_r14_indexed(void)
1776 #ifdef GPU_DIS_STORE14I
1778 WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1780 #ifdef GPU_CORRECT_ALIGNMENT
1781 uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1783 if (address >= 0xF03000 && address <= 0xF03FFF)
1784 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1786 GPUWriteLong(address, RN, GPU);
1788 GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1793 static void gpu_opcode_store_r15_indexed(void)
1795 #ifdef GPU_DIS_STORE15I
1797 WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1799 #ifdef GPU_CORRECT_ALIGNMENT
1800 uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1802 if (address >= 0xF03000 && address <= 0xF03FFF)
1803 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1805 GPUWriteLong(address, RN, GPU);
1807 GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1812 static void gpu_opcode_load_r14_ri(void)
1814 #ifdef GPU_DIS_LOAD14R
1816 WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1818 #ifdef GPU_CORRECT_ALIGNMENT
1819 uint32_t address = gpu_reg[14] + RM;
1821 if (address >= 0xF03000 && address <= 0xF03FFF)
1822 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1824 RN = GPUReadLong(address, GPU);
1826 RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1828 #ifdef GPU_DIS_LOAD14R
1830 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1835 static void gpu_opcode_load_r15_ri(void)
1837 #ifdef GPU_DIS_LOAD15R
1839 WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1841 #ifdef GPU_CORRECT_ALIGNMENT
1842 uint32_t address = gpu_reg[15] + RM;
1844 if (address >= 0xF03000 && address <= 0xF03FFF)
1845 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1847 RN = GPUReadLong(address, GPU);
1849 RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1851 #ifdef GPU_DIS_LOAD15R
1853 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1858 static void gpu_opcode_store_r14_ri(void)
1860 #ifdef GPU_DIS_STORE14R
1862 WriteLog("%06X: STORE R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1864 #ifdef GPU_CORRECT_ALIGNMENT
1865 uint32_t address = gpu_reg[14] + RM;
1867 if (address >= 0xF03000 && address <= 0xF03FFF)
1868 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1870 GPUWriteLong(address, RN, GPU);
1872 GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1877 static void gpu_opcode_store_r15_ri(void)
1879 #ifdef GPU_DIS_STORE15R
1881 WriteLog("%06X: STORE R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1883 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1884 uint32_t address = gpu_reg[15] + RM;
1886 if (address >= 0xF03000 && address <= 0xF03FFF)
1887 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1889 GPUWriteLong(address, RN, GPU);
1891 GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1896 static void gpu_opcode_nop(void)
1900 WriteLog("%06X: NOP [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1905 static void gpu_opcode_pack(void)
1909 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1913 //BUG! if (RM == 0) // Pack
1914 if (IMM_1 == 0) // Pack
1915 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1917 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1920 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1925 static void gpu_opcode_storeb(void)
1927 #ifdef GPU_DIS_STOREB
1929 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1932 // Would appear to be so...!
1933 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1934 GPUWriteLong(RM, RN & 0xFF, GPU);
1936 JaguarWriteByte(RM, RN, GPU);
1940 static void gpu_opcode_storew(void)
1942 #ifdef GPU_DIS_STOREW
1944 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1946 #ifdef GPU_CORRECT_ALIGNMENT
1947 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1948 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1950 JaguarWriteWord(RM, RN, GPU);
1952 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1953 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1955 JaguarWriteWord(RM, RN, GPU);
1960 static void gpu_opcode_store(void)
1962 #ifdef GPU_DIS_STORE
1964 WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1966 #ifdef GPU_CORRECT_ALIGNMENT
1967 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1968 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1970 GPUWriteLong(RM, RN, GPU);
1972 GPUWriteLong(RM, RN, GPU);
1977 static void gpu_opcode_storep(void)
1979 #ifdef GPU_CORRECT_ALIGNMENT
1980 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1982 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1983 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1987 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1988 GPUWriteLong(RM + 4, RN, GPU);
1991 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1992 GPUWriteLong(RM + 4, RN, GPU);
1996 static void gpu_opcode_loadb(void)
1998 #ifdef GPU_DIS_LOADB
2000 WriteLog("%06X: LOADB (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2002 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2003 RN = GPUReadLong(RM, GPU) & 0xFF;
2005 RN = JaguarReadByte(RM, GPU);
2006 #ifdef GPU_DIS_LOADB
2008 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2013 static void gpu_opcode_loadw(void)
2015 #ifdef GPU_DIS_LOADW
2017 WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2019 #ifdef GPU_CORRECT_ALIGNMENT
2020 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2021 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
2023 RN = JaguarReadWord(RM, GPU);
2025 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2026 RN = GPUReadLong(RM, GPU) & 0xFFFF;
2028 RN = JaguarReadWord(RM, GPU);
2030 #ifdef GPU_DIS_LOADW
2032 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2037 // According to the docs, & "Do The Same", this address is long aligned...
2039 // And it works!!! Need to fix all instances...
2040 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
2041 // the $F03000-$F03FFF range are aligned...
2042 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
2044 Preliminary testing on real hardware seems to confirm that something strange goes on
2045 with unaligned reads in main memory. When the address is off by 1, the result is the
2046 same as the long address with the top byte replaced by something. So if the read is
2047 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
2048 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
2049 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
2050 It may be that the "unknown" values come from the prefetch queue, but not sure how
2051 to test that. They seem to be stable, though, which would indicate such a mechanism.
2052 Sometimes, however, the off by 2 case returns $12345678!
2054 static void gpu_opcode_load(void)
2058 WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2060 #ifdef GPU_CORRECT_ALIGNMENT
2061 uint32_t mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2062 // if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2063 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2064 // RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2066 // RN = GPUReadLong(RM, GPU);
2067 // Simulate garbage in unaligned reads...
2068 //seems that this behavior is different in GPU mem vs. main mem...
2069 // if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2070 // RN |= mask[RM & 0x03];
2072 RN = GPUReadLong(RM, GPU);
2076 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2081 static void gpu_opcode_loadp(void)
2083 #ifdef GPU_CORRECT_ALIGNMENT
2084 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2086 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2087 RN = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2091 gpu_hidata = GPUReadLong(RM + 0, GPU);
2092 RN = GPUReadLong(RM + 4, GPU);
2095 gpu_hidata = GPUReadLong(RM + 0, GPU);
2096 RN = GPUReadLong(RM + 4, GPU);
2101 static void gpu_opcode_load_r14_indexed(void)
2103 #ifdef GPU_DIS_LOAD14I
2105 WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2107 #ifdef GPU_CORRECT_ALIGNMENT
2108 uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2110 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2111 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2113 RN = GPUReadLong(address, GPU);
2115 RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2117 #ifdef GPU_DIS_LOAD14I
2119 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2124 static void gpu_opcode_load_r15_indexed(void)
2126 #ifdef GPU_DIS_LOAD15I
2128 WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2130 #ifdef GPU_CORRECT_ALIGNMENT
2131 uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2133 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2134 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2136 RN = GPUReadLong(address, GPU);
2138 RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2140 #ifdef GPU_DIS_LOAD15I
2142 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2147 static void gpu_opcode_movei(void)
2149 #ifdef GPU_DIS_MOVEI
2151 WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32_t)GPUReadWord(gpu_pc) | ((uint32_t)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2153 // This instruction is followed by 32-bit value in LSW / MSW format...
2154 RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16);
2156 #ifdef GPU_DIS_MOVEI
2158 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2163 static void gpu_opcode_moveta(void)
2165 #ifdef GPU_DIS_MOVETA
2167 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2170 #ifdef GPU_DIS_MOVETA
2172 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2177 static void gpu_opcode_movefa(void)
2179 #ifdef GPU_DIS_MOVEFA
2181 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2184 #ifdef GPU_DIS_MOVEFA
2186 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2191 static void gpu_opcode_move(void)
2195 WriteLog("%06X: MOVE R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2200 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2205 static void gpu_opcode_moveq(void)
2207 #ifdef GPU_DIS_MOVEQ
2209 WriteLog("%06X: MOVEQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2212 #ifdef GPU_DIS_MOVEQ
2214 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2219 static void gpu_opcode_resmac(void)
2225 static void gpu_opcode_imult(void)
2227 #ifdef GPU_DIS_IMULT
2229 WriteLog("%06X: IMULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2231 RN = (int16_t)RN * (int16_t)RM;
2233 #ifdef GPU_DIS_IMULT
2235 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2240 static void gpu_opcode_mult(void)
2244 WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2246 RN = (uint16_t)RM * (uint16_t)RN;
2247 // RN = (RM & 0xFFFF) * (RN & 0xFFFF);
2251 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2256 static void gpu_opcode_bclr(void)
2260 WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2262 uint32_t res = RN & ~(1 << IMM_1);
2267 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2272 static void gpu_opcode_btst(void)
2276 WriteLog("%06X: BTST #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2278 gpu_flag_z = (~RN >> IMM_1) & 1;
2281 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2286 static void gpu_opcode_bset(void)
2290 WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2292 uint32_t res = RN | (1 << IMM_1);
2297 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2302 static void gpu_opcode_imacn(void)
2304 uint32_t res = (int16_t)RM * (int16_t)(RN);
2309 static void gpu_opcode_mtoi(void)
2312 uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2317 static void gpu_opcode_normi(void)
2324 while ((_RM & 0xFFC00000) == 0)
2329 while ((_RM & 0xFF800000) != 0)
2339 static void gpu_opcode_mmult(void)
2341 int count = gpu_matrix_control & 0x0F; // Matrix width
2342 uint32_t addr = gpu_pointer_to_matrix; // In the GPU's RAM
2346 if (gpu_matrix_control & 0x10) // Column stepping
2348 for(int i=0; i<count; i++)
2352 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2354 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2356 int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2361 else // Row stepping
2363 for(int i=0; i<count; i++)
2367 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2369 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2371 int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2376 RN = res = (int32_t)accum;
2377 // carry flag to do (out of the last add)
2382 static void gpu_opcode_abs(void)
2386 WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2388 gpu_flag_c = RN >> 31;
2389 if (RN == 0x80000000)
2390 //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2391 gpu_flag_n = 1, gpu_flag_z = 0;
2396 gpu_flag_n = 0; SET_FLAG_Z(RN);
2400 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2405 static void gpu_opcode_div(void) // RN / RM
2409 WriteLog("%06X: DIV R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2414 if (gpu_div_control & 0x01) // 16.16 division
2416 gpu_remain = ((uint64_t)RN << 16) % RM;
2417 RN = ((uint64_t)RN << 16) / RM;
2421 // We calculate the remainder first because we destroy RN after
2422 // this by assigning it to itself.
2423 gpu_remain = RN % RM;
2429 // This is what happens according to SCPCD. NYAN!
2434 // Real algorithm, courtesy of SCPCD: NYAN!
2438 // If 16.16 division, stuff top 16 bits of RN into remainder and put the
2439 // bottom 16 of RN in top 16 of quotient
2440 if (gpu_div_control & 0x01)
2441 q <<= 16, r = RN >> 16;
2443 for(int i=0; i<32; i++)
2445 // uint32_t sign = (r >> 31) & 0x01;
2446 uint32_t sign = r & 0x80000000;
2447 r = (r << 1) | ((q >> 31) & 0x01);
2448 r += (sign ? RM : -RM);
2449 q = (q << 1) | (((~r) >> 31) & 0x01);
2458 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2463 static void gpu_opcode_imultn(void)
2465 uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
2466 gpu_acc = (int32_t)res;
2472 static void gpu_opcode_neg(void)
2476 WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2479 SET_ZNC_SUB(0, RN, res);
2483 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2488 static void gpu_opcode_shlq(void)
2492 WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2494 // Was a bug here...
2495 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2496 int32_t r1 = 32 - IMM_1;
2497 uint32_t res = RN << r1;
2498 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2502 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2507 static void gpu_opcode_shrq(void)
2511 WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2513 int32_t r1 = gpu_convert_zero[IMM_1];
2514 uint32_t res = RN >> r1;
2515 SET_ZN(res); gpu_flag_c = RN & 1;
2519 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2524 static void gpu_opcode_ror(void)
2528 WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2530 uint32_t r1 = RM & 0x1F;
2531 uint32_t res = (RN >> r1) | (RN << (32 - r1));
2532 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2536 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2541 static void gpu_opcode_rorq(void)
2545 WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2547 uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F];
2549 uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
2551 SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2554 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2559 static void gpu_opcode_sha(void)
2561 /* int dreg = jaguar.op & 31;
2562 int32_t r1 = (int32_t)jaguar.r[(jaguar.op >> 5) & 31];
2563 uint32_t r2 = jaguar.r[dreg];
2569 res = (r1 <= -32) ? 0 : (r2 << -r1);
2570 jaguar.FLAGS |= (r2 >> 30) & 2;
2574 res = (r1 >= 32) ? ((int32_t)r2 >> 31) : ((int32_t)r2 >> r1);
2575 jaguar.FLAGS |= (r2 << 1) & 2;
2577 jaguar.r[dreg] = res;
2582 WriteLog("%06X: SHA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2586 if ((int32_t)RM < 0)
2588 res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM);
2589 gpu_flag_c = RN >> 31;
2593 res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM);
2594 gpu_flag_c = RN & 0x01;
2600 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2603 /* int32_t sRM=(int32_t)RM;
2608 uint32_t shift=-sRM;
2609 if (shift>=32) shift=32;
2610 gpu_flag_c=(_RN&0x80000000)>>31;
2620 if (shift>=32) shift=32;
2624 _RN=((int32_t)_RN)>>1;
2634 static void gpu_opcode_sharq(void)
2636 #ifdef GPU_DIS_SHARQ
2638 WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2640 uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1];
2641 SET_ZN(res); gpu_flag_c = RN & 0x01;
2643 #ifdef GPU_DIS_SHARQ
2645 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2650 static void gpu_opcode_sh(void)
2654 WriteLog("%06X: SH R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2656 if (RM & 0x80000000) // Shift left
2658 gpu_flag_c = RN >> 31;
2659 RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM);
2663 gpu_flag_c = RN & 0x01;
2664 RN = (RM >= 32 ? 0 : RN >> RM);
2669 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2674 //Temporary: Testing only!
2675 //#include "gpu2.cpp"
2676 //#include "gpu3.cpp"
2681 // New thread-safe GPU core
2683 int GPUCore(void * data)