6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
11 // JLH = James Hammons <jlhamm@acm.org>
14 // --- ---------- -------------------------------------------------------------
15 // JLH 01/16/2010 Created this log ;-)
16 // JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 // the braindead way in which MAME handles memory. :-)
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
30 #include <string.h> // For memset
35 #include "m68000/m68kinterface.h"
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
44 // For GPU dissasembly...
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
70 #define GPU_DIS_MOVEFA
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
102 bool doGPUDis = false;
103 //bool doGPUDis = true;
107 GPU opcodes use (BIOS flying ATARI logo):
149 #define CINT0FLAG 0x0200
150 #define CINT1FLAG 0x0400
151 #define CINT2FLAG 0x0800
152 #define CINT3FLAG 0x1000
153 #define CINT4FLAG 0x2000
154 #define CINT04FLAGS (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
158 #define ZERO_FLAG 0x0001
159 #define CARRY_FLAG 0x0002
160 #define NEGA_FLAG 0x0004
162 #define INT_ENA0 0x0010
163 #define INT_ENA1 0x0020
164 #define INT_ENA2 0x0040
165 #define INT_ENA3 0x0080
166 #define INT_ENA4 0x0100
167 #define INT_CLR0 0x0200
168 #define INT_CLR1 0x0400
169 #define INT_CLR2 0x0800
170 #define INT_CLR3 0x1000
171 #define INT_CLR4 0x2000
172 #define REGPAGE 0x4000
175 // External global variables
177 extern int start_logging;
178 extern int gpu_start_log;
180 // Private function prototypes
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8_t gpu_opcode_cycles[64] =
255 3, 3, 3, 3, 3, 3, 3, 3,
256 3, 3, 3, 3, 3, 3, 3, 3,
257 3, 3, 1, 3, 1, 18, 3, 3,
258 3, 3, 3, 3, 3, 3, 3, 3,
259 3, 3, 2, 2, 2, 2, 3, 4,
260 5, 4, 5, 6, 6, 1, 1, 1,
261 1, 2, 2, 2, 1, 1, 9, 3,
262 3, 1, 6, 6, 2, 2, 3, 3
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8_t gpu_opcode_cycles[64] =
270 1, 1, 1, 1, 1, 1, 1, 1,
271 1, 1, 1, 1, 1, 1, 1, 1,
272 1, 1, 1, 1, 1, 9, 1, 1,
273 1, 1, 1, 1, 1, 1, 1, 1,
274 1, 1, 1, 1, 1, 1, 1, 2,
275 2, 2, 2, 3, 3, 1, 1, 1,
276 1, 1, 1, 1, 1, 1, 4, 1,
277 1, 1, 3, 3, 1, 1, 1, 1
279 uint8_t gpu_opcode_cycles[64] =
281 1, 1, 1, 1, 1, 1, 1, 1,
282 1, 1, 1, 1, 1, 1, 1, 1,
283 1, 1, 1, 1, 1, 1, 1, 1,
284 1, 1, 1, 1, 1, 1, 1, 1,
285 1, 1, 1, 1, 1, 1, 1, 1,
286 1, 1, 1, 1, 1, 1, 1, 1,
287 1, 1, 1, 1, 1, 1, 1, 1,
288 1, 1, 1, 1, 1, 1, 1, 1
291 void (*gpu_opcode[64])()=
293 gpu_opcode_add, gpu_opcode_addc, gpu_opcode_addq, gpu_opcode_addqt,
294 gpu_opcode_sub, gpu_opcode_subc, gpu_opcode_subq, gpu_opcode_subqt,
295 gpu_opcode_neg, gpu_opcode_and, gpu_opcode_or, gpu_opcode_xor,
296 gpu_opcode_not, gpu_opcode_btst, gpu_opcode_bset, gpu_opcode_bclr,
297 gpu_opcode_mult, gpu_opcode_imult, gpu_opcode_imultn, gpu_opcode_resmac,
298 gpu_opcode_imacn, gpu_opcode_div, gpu_opcode_abs, gpu_opcode_sh,
299 gpu_opcode_shlq, gpu_opcode_shrq, gpu_opcode_sha, gpu_opcode_sharq,
300 gpu_opcode_ror, gpu_opcode_rorq, gpu_opcode_cmp, gpu_opcode_cmpq,
301 gpu_opcode_sat8, gpu_opcode_sat16, gpu_opcode_move, gpu_opcode_moveq,
302 gpu_opcode_moveta, gpu_opcode_movefa, gpu_opcode_movei, gpu_opcode_loadb,
303 gpu_opcode_loadw, gpu_opcode_load, gpu_opcode_loadp, gpu_opcode_load_r14_indexed,
304 gpu_opcode_load_r15_indexed, gpu_opcode_storeb, gpu_opcode_storew, gpu_opcode_store,
305 gpu_opcode_storep, gpu_opcode_store_r14_indexed, gpu_opcode_store_r15_indexed, gpu_opcode_move_pc,
306 gpu_opcode_jump, gpu_opcode_jr, gpu_opcode_mmult, gpu_opcode_mtoi,
307 gpu_opcode_normi, gpu_opcode_nop, gpu_opcode_load_r14_ri, gpu_opcode_load_r15_ri,
308 gpu_opcode_store_r14_ri, gpu_opcode_store_r15_ri, gpu_opcode_sat24, gpu_opcode_pack,
311 static uint8_t gpu_ram_8[0x1000];
313 static uint32_t gpu_acc;
314 static uint32_t gpu_remain;
315 static uint32_t gpu_hidata;
316 static uint32_t gpu_flags;
317 static uint32_t gpu_matrix_control;
318 static uint32_t gpu_pointer_to_matrix;
319 static uint32_t gpu_data_organization;
320 static uint32_t gpu_control;
321 static uint32_t gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 uint32_t gpu_reg_bank_0[32];
327 uint32_t gpu_reg_bank_1[32];
328 static uint32_t * gpu_reg;
329 static uint32_t * gpu_alternate_reg;
331 static uint32_t gpu_instruction;
332 static uint32_t gpu_opcode_first_parameter;
333 static uint32_t gpu_opcode_second_parameter;
335 #define GPU_RUNNING (gpu_control & 0x01)
337 #define RM gpu_reg[gpu_opcode_first_parameter]
338 #define RN gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1 gpu_opcode_first_parameter
342 #define IMM_2 gpu_opcode_second_parameter
344 #define SET_FLAG_Z(r) (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01));
347 #define RESET_FLAG_Z() gpu_flag_z = 0;
348 #define RESET_FLAG_N() gpu_flag_n = 0;
349 #define RESET_FLAG_C() gpu_flag_c = 0;
351 #define CLR_Z (gpu_flag_z = 0)
352 #define CLR_ZN (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r) (gpu_flag_z = ((r) == 0))
355 #define SET_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
357 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
358 #define SET_ZN(r) SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
362 uint32_t gpu_convert_zero[32] =
363 { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
365 uint8_t * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x) branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
368 uint32_t gpu_opcode_use[64];
370 const char * gpu_opcode_str[64]=
372 "add", "addc", "addq", "addqt",
373 "sub", "subc", "subq", "subqt",
374 "neg", "and", "or", "xor",
375 "not", "btst", "bset", "bclr",
376 "mult", "imult", "imultn", "resmac",
377 "imacn", "div", "abs", "sh",
378 "shlq", "shrq", "sha", "sharq",
379 "ror", "rorq", "cmp", "cmpq",
380 "sat8", "sat16", "move", "moveq",
381 "moveta", "movefa", "movei", "loadb",
382 "loadw", "load", "loadp", "load_r14_indexed",
383 "load_r15_indexed", "storeb", "storew", "store",
384 "storep", "store_r14_indexed","store_r15_indexed","move_pc",
385 "jump", "jr", "mmult", "mtoi",
386 "normi", "nop", "load_r14_ri", "load_r15_ri",
387 "store_r14_ri", "store_r15_ri", "sat24", "pack",
390 static uint32_t gpu_in_exec = 0;
391 static uint32_t gpu_releaseTimeSlice_flag = 0;
393 void GPUReleaseTimeslice(void)
395 gpu_releaseTimeSlice_flag = 1;
398 uint32_t GPUGetPC(void)
403 void build_branch_condition_table(void)
405 if (!branch_condition_table)
407 branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
409 if (branch_condition_table)
411 for(int i=0; i<8; i++)
413 for(int j=0; j<32; j++)
420 if (!(i & ZERO_FLAG))
423 if (i & (CARRY_FLAG << (j >> 4)))
426 if (!(i & (CARRY_FLAG << (j >> 4))))
428 branch_condition_table[i * 32 + j] = result;
436 // GPU byte access (read)
438 uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
440 if (offset >= 0xF02000 && offset <= 0xF020FF)
441 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
443 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444 return gpu_ram_8[offset & 0xFFF];
445 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
447 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
449 if ((offset & 0x03) == 0)
451 else if ((offset & 0x03) == 1)
452 return (data >> 16) & 0xFF;
453 else if ((offset & 0x03) == 2)
454 return (data >> 8) & 0xFF;
455 else if ((offset & 0x03) == 3)
459 return JaguarReadByte(offset, who);
463 // GPU word access (read)
465 uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
467 if (offset >= 0xF02000 && offset <= 0xF020FF)
468 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
470 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
473 uint16_t data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1];
476 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
478 // This looks and smells wrong...
479 // But it *might* be OK...
480 if (offset & 0x01) // Catch cases 1 & 3... (unaligned read)
481 return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
483 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
485 if (offset & 0x02) // Cases 0 & 2...
486 return data & 0xFFFF;
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
495 return JaguarReadWord(offset, who);
499 // GPU dword access (read)
501 uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
503 if (offset >= 0xF02000 && offset <= 0xF020FF)
505 WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
506 uint32_t reg = (offset & 0xFC) >> 2;
507 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]);
510 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
511 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
514 return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16)
515 | ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/
516 // return GET32(gpu_ram_8, offset);
518 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
519 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
525 gpu_flag_c = (gpu_flag_c ? 1 : 0);
526 gpu_flag_z = (gpu_flag_z ? 1 : 0);
527 gpu_flag_n = (gpu_flag_n ? 1 : 0);
529 gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
531 return gpu_flags & 0xFFFFC1FF;
533 return gpu_matrix_control;
535 return gpu_pointer_to_matrix;
537 return gpu_data_organization;
546 default: // unaligned long read
548 WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
553 //TEMP--Mirror of F03000? No. Writes only...
554 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
555 // WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
556 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
557 WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
559 return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
563 // GPU byte access (write)
565 void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
567 if (offset >= 0xF02000 && offset <= 0xF020FF)
568 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
570 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
572 gpu_ram_8[offset & 0xFFF] = data;
574 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
577 m68k_end_timeslice();
578 dsp_releaseTimeslice();
582 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
584 uint32_t reg = offset & 0x1C;
585 int bytenum = offset & 0x03;
587 //This is definitely wrong!
588 if ((reg >= 0x1C) && (reg <= 0x1F))
589 gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
592 uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
593 bytenum = 3 - bytenum; // convention motorola !!!
594 old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
595 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
599 // WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
600 JaguarWriteByte(offset, data, who);
604 // GPU word access (write)
606 void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
608 if (offset >= 0xF02000 && offset <= 0xF020FF)
609 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
611 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
613 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
614 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
616 SET16(gpu_ram_8, offset, data);//*/
618 /*if (offset >= 0xF03214 && offset < 0xF0321F)
619 WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
622 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
625 m68k_end_timeslice();
626 dsp_releaseTimeslice();
630 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
632 if (offset & 0x01) // This is supposed to weed out unaligned writes, but does nothing...
635 WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
640 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
641 //This just literally sucks.
642 if ((offset & 0x1C) == 0x1C)
644 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
646 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
648 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
652 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
653 uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
656 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
658 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
660 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
665 else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
668 WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
674 // Have to be careful here--this can cause an infinite loop!
675 JaguarWriteWord(offset, data, who);
679 // GPU dword access (write)
681 void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
683 if (offset >= 0xF02000 && offset <= 0xF020FF)
684 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
686 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
687 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
692 WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
698 SET32(gpu_ram_8, offset, data);
701 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
702 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
709 bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
710 // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
711 // IRQ logic can set it. So we mask it out here to prevent problems...
712 gpu_flags = data & (~IMASK);
713 gpu_flag_z = gpu_flags & ZERO_FLAG;
714 gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
715 gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
716 GPUUpdateRegisterBanks();
717 gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3); // Interrupt latch clear bits
718 //Writing here is only an interrupt enable--this approach is just plain wrong!
720 //This, however, is A-OK! ;-)
721 if (IMASKCleared) // If IMASK was cleared,
722 GPUHandleIRQs(); // see if any other interrupts need servicing!
724 if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
725 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
726 WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
731 gpu_matrix_control = data;
734 // This can only point to long aligned addresses
735 gpu_pointer_to_matrix = data & 0xFFFFFFFC;
738 gpu_data_organization = data;
743 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
748 // uint32_t gpu_was_running = GPU_RUNNING;
749 data &= ~0xF7C0; // Disable writes to INT_LAT0-4 & TOM version number
751 // check for GPU -> CPU interrupt
754 //WriteLog("GPU->CPU interrupt\n");
755 if (TOMIRQEnabled(IRQ_GPU))
757 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
758 // if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
760 TOMSetPendingGPUInt();
761 m68k_set_irq(2); // Set 68000 IPL 2
762 GPUReleaseTimeslice();
768 // check for CPU -> GPU interrupt #0
771 //WriteLog("CPU->GPU interrupt\n");
772 GPUSetIRQLine(0, ASSERT_LINE);
773 m68k_end_timeslice();
774 DSPReleaseTimeslice();
781 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
784 gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
786 // if gpu wasn't running but is now running, execute a few cycles
787 #ifndef GPU_SINGLE_STEPPING
788 /* if (!gpu_was_running && GPU_RUNNING)
791 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
796 #endif // GPU_DEBUG//*/
798 if (gpu_control & 0x18)
800 #endif // #ifndef GPU_SINGLE_STEPPING
802 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
804 WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
806 WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
810 // GPUDumpDisassembly();
813 if (gpu_pc == 0xF035D8)
815 // GPUDumpDisassembly();
818 gpu_control &= 0xFFFFFFFE; // Don't run it and let's see what happens!
819 //Hmm. Seems to lock up when going into the demo...
820 //Try to disable the collision altogether!
823 extern int effect_start5;
824 static bool finished = false;
825 //if (GPU_RUNNING && effect_start5 && !finished)
826 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
828 // Let's do a dump of $6528!
829 /* uint32_t numItems = JaguarReadWord(0x6BD6);
830 WriteLog("\nDump of $6528: %u items.\n\n", numItems);
831 for(int i=0; i<numItems*3*4; i+=3*4)
833 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
834 JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
835 uint16_t link = JaguarReadWord(0x6528+i+8+2);
836 for(int j=0; j<40; j+=4)
837 WriteLog("%08X ", JaguarReadLong(link + j));
841 // Let's try a manual blit here...
842 //This isn't working the way it should! !!! FIX !!!
843 //Err, actually, it is.
844 // NOW, it works right! Problem solved!!! It's a blitter bug!
845 /* uint32_t src = 0x4D54, dst = 0xF03000, width = 10 * 4;
846 for(int y=0; y<127; y++)
848 for(int x=0; x<2; x++)
850 JaguarWriteLong(dst, JaguarReadLong(src));
855 src += width - (2 * 4);
859 WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
861 /* WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
863 for(int i=0x004D54; i<0x004D54+2048; i++)
865 WriteLog("%02X ", JaguarReadByte(i));
873 WriteLog("\n\nData @ F03000:\n\n");
875 for(int i=0xF03000; i<0xF03200; i++)
877 WriteLog("%02X ", JaguarReadByte(i));
891 /*if (!GPU_RUNNING && finished)
893 WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
898 // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
899 // allow the GPU a chance to run...
900 // Yes! This partially fixed Trevor McFur...
902 m68k_end_timeslice();
909 gpu_div_control = data;
911 // default: // unaligned long write
918 // JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
919 // JaguarWriteWord(offset+2, data & 0xFFFF, who);
920 // We're a 32-bit processor, we can do a long write...!
921 JaguarWriteLong(offset, data, who);
925 // Change register banks if necessary
927 void GPUUpdateRegisterBanks(void)
929 int bank = (gpu_flags & REGPAGE); // REGPAGE bit
931 if (gpu_flags & IMASK) // IMASK bit
932 bank = 0; // IMASK forces main bank to be bank 0
935 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
937 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
940 void GPUHandleIRQs(void)
942 // Bail out if we're already in an interrupt!
943 if (gpu_flags & IMASK)
946 // Get the interrupt latch & enable bits
947 uint32_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
949 // Bail out if latched interrupts aren't enabled
954 // Determine which interrupt to service
955 uint32_t which = 0; //Isn't there a #pragma to disable this warning???
968 WriteLog("GPU: Generating IRQ #%i\n", which);
970 // set the interrupt flag
972 GPUUpdateRegisterBanks();
974 // subqt #4,r31 ; pre-decrement stack pointer
975 // move pc,r30 ; address of interrupted code
976 // store r30,(r31) ; store return address
978 GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
980 // movei #service_address,r30 ; pointer to ISR entry
981 // jump (r30) ; jump to ISR
983 gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
986 void GPUSetIRQLine(int irqline, int state)
989 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
991 uint32_t mask = 0x0040 << irqline;
992 gpu_control &= ~mask; // Clear the interrupt latch
996 gpu_control |= mask; // Assert the interrupt latch
997 GPUHandleIRQs(); // And handle the interrupt...
1001 //TEMPORARY: Testing only!
1007 // memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1008 // memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32_t), "GPU bank 0 regs");
1009 // memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32_t), "GPU bank 1 regs");
1011 build_branch_condition_table();
1015 //TEMPORARY: Testing only!
1022 // GPU registers (directly visible)
1023 gpu_flags = 0x00000000;
1024 gpu_matrix_control = 0x00000000;
1025 gpu_pointer_to_matrix = 0x00000000;
1026 gpu_data_organization = 0xFFFFFFFF;
1027 gpu_pc = 0x00F03000;
1028 gpu_control = 0x00002800; // Correctly sets this as TOM Rev. 2
1029 gpu_hidata = 0x00000000;
1030 gpu_remain = 0x00000000; // These two registers are RO/WO
1031 gpu_div_control = 0x00000000;
1033 // GPU internal register
1034 gpu_acc = 0x00000000;
1036 gpu_reg = gpu_reg_bank_0;
1037 gpu_alternate_reg = gpu_reg_bank_1;
1039 for(int i=0; i<32; i++)
1040 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1043 memset(gpu_ram_8, 0xFF, 0x1000);
1045 //not needed GPUInterruptPending = false;
1048 // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
1049 for(uint32_t i=0; i<4096; i+=4)
1050 *((uint32_t *)(&gpu_ram_8[i])) = rand();
1053 uint32_t GPUReadPC(void)
1058 void GPUResetStats(void)
1060 for(uint32_t i=0; i<64; i++)
1061 gpu_opcode_use[i] = 0;
1062 WriteLog("--> GPU stats were reset!\n");
1065 void GPUDumpDisassembly(void)
1069 WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1070 uint32_t j = 0xF03000;
1071 while (j <= 0xF03FFF)
1074 j += dasmjag(JAGUAR_GPU, buffer, j);
1075 WriteLog("\t%08X: %s\n", oldj, buffer);
1079 void GPUDumpRegisters(void)
1081 WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1082 WriteLog("\nRegisters bank 0\n");
1083 for(int j=0; j<8; j++)
1085 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1086 (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1087 (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1088 (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1089 (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1091 WriteLog("Registers bank 1\n");
1092 for(int j=0; j<8; j++)
1094 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1095 (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1096 (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1097 (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1098 (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1102 void GPUDumpMemory(void)
1104 WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1105 for(int i=0; i<0xFFF; i+=4)
1106 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1107 gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1112 WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1114 // Get the interrupt latch & enable bits
1115 uint8_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1116 WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1119 GPUDumpDisassembly();
1121 WriteLog("\nGPU opcodes use:\n");
1122 for(int i=0; i<64; i++)
1124 if (gpu_opcode_use[i])
1125 WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1129 // memory_free(gpu_ram_8);
1130 // memory_free(gpu_reg_bank_0);
1131 // memory_free(gpu_reg_bank_1);
1135 // Main GPU execution core
1137 static int testCount = 1;
1139 static bool tripwire = false;
1140 void GPUExec(int32_t cycles)
1145 #ifdef GPU_SINGLE_STEPPING
1146 if (gpu_control & 0x18)
1149 gpu_control &= ~0x10;
1153 gpu_releaseTimeSlice_flag = 0;
1156 while (cycles > 0 && GPU_RUNNING)
1158 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1159 && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1161 if (gpu_pc == 0xF03000)
1163 extern uint32_t starCount;
1165 /* WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1166 uint32_t base = gpu_reg_bank_0[3];
1167 for(uint32_t i=0; i<0x100; i+=16)
1169 WriteLog("%02X: ", i);
1170 for(uint32_t j=0; j<16; j++)
1172 WriteLog("%02X ", JaguarReadByte(base + i + j));
1177 // if (gpu_pc == 0xF03)
1181 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1184 WriteLog("GPU: Starting disassembly log...\n");
1187 /*if (gpu_pc == 0xF0359A)
1192 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1193 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1194 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1196 if (gpu_pc == 0xF03200)
1200 uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1201 uint32_t index = opcode >> 10;
1202 gpu_instruction = opcode; // Added for GPU #3...
1203 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1204 gpu_opcode_second_parameter = opcode & 0x1F;
1205 /*if (gpu_pc == 0xF03BE8)
1206 WriteLog("Start of OP frame write...\n");
1207 if (gpu_pc == 0xF03EEE)
1208 WriteLog("--> Writing BRANCH object ---\n");
1209 if (gpu_pc == 0xF03F62)
1210 WriteLog("--> Writing BITMAP object ***\n");//*/
1211 /*if (gpu_pc == 0xF03546)
1213 WriteLog("\n--> GPU PC: F03546\n");
1215 GPUDumpDisassembly();
1217 /*if (gpu_pc == 0xF033F6)
1219 WriteLog("\n--> GPU PC: F033F6\n");
1221 GPUDumpDisassembly();
1223 /*if (gpu_pc == 0xF033CC)
1225 WriteLog("\n--> GPU PC: F033CC\n");
1227 GPUDumpDisassembly();
1229 /*if (gpu_pc == 0xF033D6)
1231 WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1235 /*if (gpu_pc == 0xF033D8)
1237 WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1241 /*if (gpu_pc == 0xF0358E)
1243 WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1247 /*if (gpu_pc == 0xF034CA)
1249 WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1252 /*if (gpu_pc == 0xF034CA)
1254 len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1255 WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1256 for(int i=0; i<len; i+=4)
1257 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1259 for(int i=0; i<len; i+=4)
1260 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1263 if (gpu_pc == 0xF034DE)
1265 WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n ", gpu_reg[14], gpu_reg[15]);
1266 for(int i=0; i<len; i+=4)
1267 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1269 for(int i=0; i<len; i+=4)
1270 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1272 for(int i=0; i<len; i+=4)
1273 WriteLog(" --------");
1275 for(int i=0; i<len; i+=4)
1276 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1279 /*if (gpu_pc == 0xF035C8)
1281 WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1283 GPUDumpDisassembly();
1288 // gpu_reset_stats();
1289 static char buffer[512];
1290 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1291 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1293 //$E400 -> 1110 01 -> $39 -> 57
1296 gpu_opcode[index]();
1298 // gpu2_opcode[index]();
1300 //GPU #3 (Doesn't show ATARI logo! #1 & #2 do...)
1302 // gpu3_opcode[index]();
1305 //GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken.
1306 /*static bool firstTime = true;
1307 if (gpu_pc == 0xF03548 && firstTime)
1310 // firstTime = false;
1312 //static char buffer[512];
1314 //while (k<0xF0356C)
1317 //k += dasmjag(JAGUAR_GPU, buffer, k);
1318 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1320 // gpu_start_log = 1;
1322 //GPU: [00F0354C] jump nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1323 /*if (gpu_pc == 0xF0354C)
1324 gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1326 cycles -= gpu_opcode_cycles[index];
1327 gpu_opcode_use[index]++;
1329 WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1330 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1332 WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1345 GPU opcodes use (offset punch--vertically below bad guy):
1367 load_r14_indexed 1183
1368 load_r15_indexed 1125
1371 store_r14_indexed 320
1379 static void gpu_opcode_jump(void)
1382 const char * condition[32] =
1383 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1384 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1385 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1386 "???", "???", "???", "F" };
1388 WriteLog("%06X: JUMP %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1391 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1392 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1393 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1394 // KLUDGE: Used by BRANCH_CONDITION
1395 uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1397 if (BRANCH_CONDITION(IMM_2))
1401 WriteLog("Branched!\n");
1404 WriteLog(" --> JUMP: Branch taken.\n");
1405 uint32_t delayed_pc = RM;
1407 gpu_pc = delayed_pc;
1408 /* uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1409 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1410 gpu_opcode_second_parameter = opcode & 0x1F;
1412 gpu_pc = delayed_pc;
1413 gpu_opcode[opcode>>10]();//*/
1418 WriteLog("Branch NOT taken.\n");
1422 static void gpu_opcode_jr(void)
1425 const char * condition[32] =
1426 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1427 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1428 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1429 "???", "???", "???", "F" };
1431 WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1433 /* if (CONDITION(jaguar.op & 31))
1435 int32_t r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1436 uint32_t newpc = jaguar.PC + r1;
1438 jaguar.op = ROPCODE(jaguar.PC);
1440 (*jaguar.table[jaguar.op >> 10])();
1442 jaguar_icount -= 3; // 3 wait states guaranteed
1445 /* gpu_flag_n = (gpu_flag_n ? 1 : 0);
1446 gpu_flag_c = (gpu_flag_c ? 1 : 0);
1447 gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1448 // KLUDGE: Used by BRANCH_CONDITION
1449 uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1451 if (BRANCH_CONDITION(IMM_2))
1455 WriteLog("Branched!\n");
1458 WriteLog(" --> JR: Branch taken.\n");
1459 int32_t offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
1460 int32_t delayed_pc = gpu_pc + (offset * 2);
1462 gpu_pc = delayed_pc;
1463 /* uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1464 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1465 gpu_opcode_second_parameter = opcode & 0x1F;
1467 gpu_pc = delayed_pc;
1468 gpu_opcode[opcode>>10]();//*/
1473 WriteLog("Branch NOT taken.\n");
1477 static void gpu_opcode_add(void)
1481 WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1483 uint32_t res = RN + RM;
1484 CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1488 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1492 static void gpu_opcode_addc(void)
1496 WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1498 /* int dreg = jaguar.op & 31;
1499 uint32_t r1 = jaguar.r[(jaguar.op >> 5) & 31];
1500 uint32_t r2 = jaguar.r[dreg];
1501 uint32_t res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1502 jaguar.r[dreg] = res;
1503 CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1505 uint32_t res = RN + RM + gpu_flag_c;
1506 uint32_t carry = gpu_flag_c;
1507 // SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1508 SET_ZNC_ADD(RN + carry, RM, res);
1509 // SET_ZNC_ADD(RN, RM + carry, res);
1513 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1517 static void gpu_opcode_addq(void)
1521 WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1523 uint32_t r1 = gpu_convert_zero[IMM_1];
1524 uint32_t res = RN + r1;
1525 CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1529 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1533 static void gpu_opcode_addqt(void)
1535 #ifdef GPU_DIS_ADDQT
1537 WriteLog("%06X: ADDQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1539 RN += gpu_convert_zero[IMM_1];
1540 #ifdef GPU_DIS_ADDQT
1542 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1546 static void gpu_opcode_sub(void)
1550 WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1552 uint32_t res = RN - RM;
1553 SET_ZNC_SUB(RN, RM, res);
1557 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1561 static void gpu_opcode_subc(void)
1565 WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1567 uint32_t res = RN - RM - gpu_flag_c;
1568 uint32_t borrow = gpu_flag_c;
1569 // SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1570 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1571 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1572 // SET_ZNC_SUB(RN - borrow, RM, res);
1573 SET_ZNC_SUB(RN, RM + borrow, res);
1577 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1581 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1582 N = 0, M = 1, 0 - 1 = -1, C = 0!
1584 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
1585 #define SET_ZN(r) SET_N(r); SET_Z(r)
1586 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1587 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1589 static void gpu_opcode_subq(void)
1593 WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1595 uint32_t r1 = gpu_convert_zero[IMM_1];
1596 uint32_t res = RN - r1;
1597 SET_ZNC_SUB(RN, r1, res);
1601 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1605 static void gpu_opcode_subqt(void)
1607 #ifdef GPU_DIS_SUBQT
1609 WriteLog("%06X: SUBQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1611 RN -= gpu_convert_zero[IMM_1];
1612 #ifdef GPU_DIS_SUBQT
1614 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1618 static void gpu_opcode_cmp(void)
1622 WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1624 uint32_t res = RN - RM;
1625 SET_ZNC_SUB(RN, RM, res);
1628 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1632 static void gpu_opcode_cmpq(void)
1634 static int32_t sqtable[32] =
1635 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1638 WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1640 uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1641 uint32_t res = RN - r1;
1642 SET_ZNC_SUB(RN, r1, res);
1645 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1649 static void gpu_opcode_and(void)
1653 WriteLog("%06X: AND R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1659 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1663 static void gpu_opcode_or(void)
1667 WriteLog("%06X: OR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1673 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1677 static void gpu_opcode_xor(void)
1681 WriteLog("%06X: XOR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1687 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1691 static void gpu_opcode_not(void)
1695 WriteLog("%06X: NOT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1701 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1705 static void gpu_opcode_move_pc(void)
1707 #ifdef GPU_DIS_MOVEPC
1709 WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1711 // Should be previous PC--this might not always be previous instruction!
1712 // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1714 #ifdef GPU_DIS_MOVEPC
1716 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1720 static void gpu_opcode_sat8(void)
1724 WriteLog("%06X: SAT8 R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1726 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1730 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1734 static void gpu_opcode_sat16(void)
1736 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1740 static void gpu_opcode_sat24(void)
1742 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1746 static void gpu_opcode_store_r14_indexed(void)
1748 #ifdef GPU_DIS_STORE14I
1750 WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1752 #ifdef GPU_CORRECT_ALIGNMENT
1753 uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1755 if (address >= 0xF03000 && address <= 0xF03FFF)
1756 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1758 GPUWriteLong(address, RN, GPU);
1760 GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1764 static void gpu_opcode_store_r15_indexed(void)
1766 #ifdef GPU_DIS_STORE15I
1768 WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1770 #ifdef GPU_CORRECT_ALIGNMENT
1771 uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1773 if (address >= 0xF03000 && address <= 0xF03FFF)
1774 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1776 GPUWriteLong(address, RN, GPU);
1778 GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1782 static void gpu_opcode_load_r14_ri(void)
1784 #ifdef GPU_DIS_LOAD14R
1786 WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1788 #ifdef GPU_CORRECT_ALIGNMENT
1789 uint32_t address = gpu_reg[14] + RM;
1791 if (address >= 0xF03000 && address <= 0xF03FFF)
1792 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1794 RN = GPUReadLong(address, GPU);
1796 RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1798 #ifdef GPU_DIS_LOAD14R
1800 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1804 static void gpu_opcode_load_r15_ri(void)
1806 #ifdef GPU_DIS_LOAD15R
1808 WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1810 #ifdef GPU_CORRECT_ALIGNMENT
1811 uint32_t address = gpu_reg[15] + RM;
1813 if (address >= 0xF03000 && address <= 0xF03FFF)
1814 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1816 RN = GPUReadLong(address, GPU);
1818 RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1820 #ifdef GPU_DIS_LOAD15R
1822 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1826 static void gpu_opcode_store_r14_ri(void)
1828 #ifdef GPU_DIS_STORE14R
1830 WriteLog("%06X: STORE R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1832 #ifdef GPU_CORRECT_ALIGNMENT
1833 uint32_t address = gpu_reg[14] + RM;
1835 if (address >= 0xF03000 && address <= 0xF03FFF)
1836 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1838 GPUWriteLong(address, RN, GPU);
1840 GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1844 static void gpu_opcode_store_r15_ri(void)
1846 #ifdef GPU_DIS_STORE15R
1848 WriteLog("%06X: STORE R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1850 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1851 uint32_t address = gpu_reg[15] + RM;
1853 if (address >= 0xF03000 && address <= 0xF03FFF)
1854 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1856 GPUWriteLong(address, RN, GPU);
1858 GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1862 static void gpu_opcode_nop(void)
1866 WriteLog("%06X: NOP [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1870 static void gpu_opcode_pack(void)
1874 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1878 //BUG! if (RM == 0) // Pack
1879 if (IMM_1 == 0) // Pack
1880 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1882 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1885 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1889 static void gpu_opcode_storeb(void)
1891 #ifdef GPU_DIS_STOREB
1893 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1896 // Would appear to be so...!
1897 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1898 GPUWriteLong(RM, RN & 0xFF, GPU);
1900 JaguarWriteByte(RM, RN, GPU);
1903 static void gpu_opcode_storew(void)
1905 #ifdef GPU_DIS_STOREW
1907 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1909 #ifdef GPU_CORRECT_ALIGNMENT
1910 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1911 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1913 JaguarWriteWord(RM, RN, GPU);
1915 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1916 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1918 JaguarWriteWord(RM, RN, GPU);
1922 static void gpu_opcode_store(void)
1924 #ifdef GPU_DIS_STORE
1926 WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1928 #ifdef GPU_CORRECT_ALIGNMENT
1929 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1930 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1932 GPUWriteLong(RM, RN, GPU);
1934 GPUWriteLong(RM, RN, GPU);
1938 static void gpu_opcode_storep(void)
1940 #ifdef GPU_CORRECT_ALIGNMENT
1941 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1943 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1944 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1948 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1949 GPUWriteLong(RM + 4, RN, GPU);
1952 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1953 GPUWriteLong(RM + 4, RN, GPU);
1957 static void gpu_opcode_loadb(void)
1959 #ifdef GPU_DIS_LOADB
1961 WriteLog("%06X: LOADB (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1963 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1964 RN = GPUReadLong(RM, GPU) & 0xFF;
1966 RN = JaguarReadByte(RM, GPU);
1967 #ifdef GPU_DIS_LOADB
1969 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1973 static void gpu_opcode_loadw(void)
1975 #ifdef GPU_DIS_LOADW
1977 WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1979 #ifdef GPU_CORRECT_ALIGNMENT
1980 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1981 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1983 RN = JaguarReadWord(RM, GPU);
1985 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1986 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1988 RN = JaguarReadWord(RM, GPU);
1990 #ifdef GPU_DIS_LOADW
1992 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1996 // According to the docs, & "Do The Same", this address is long aligned...
1998 // And it works!!! Need to fix all instances...
1999 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
2000 // the $F03000-$F03FFF range are aligned...
2001 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
2003 Preliminary testing on real hardware seems to confirm that something strange goes on
2004 with unaligned reads in main memory. When the address is off by 1, the result is the
2005 same as the long address with the top byte replaced by something. So if the read is
2006 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
2007 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
2008 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
2009 It may be that the "unknown" values come from the prefetch queue, but not sure how
2010 to test that. They seem to be stable, though, which would indicate such a mechanism.
2011 Sometimes, however, the off by 2 case returns $12345678!
2013 static void gpu_opcode_load(void)
2017 WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2019 #ifdef GPU_CORRECT_ALIGNMENT
2020 uint32_t mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2021 // if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2022 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2023 // RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2025 // RN = GPUReadLong(RM, GPU);
2026 // Simulate garbage in unaligned reads...
2027 //seems that this behavior is different in GPU mem vs. main mem...
2028 // if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2029 // RN |= mask[RM & 0x03];
2031 RN = GPUReadLong(RM, GPU);
2035 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2039 static void gpu_opcode_loadp(void)
2041 #ifdef GPU_CORRECT_ALIGNMENT
2042 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2044 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2045 RN = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2049 gpu_hidata = GPUReadLong(RM + 0, GPU);
2050 RN = GPUReadLong(RM + 4, GPU);
2053 gpu_hidata = GPUReadLong(RM + 0, GPU);
2054 RN = GPUReadLong(RM + 4, GPU);
2058 static void gpu_opcode_load_r14_indexed(void)
2060 #ifdef GPU_DIS_LOAD14I
2062 WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2064 #ifdef GPU_CORRECT_ALIGNMENT
2065 uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2067 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2068 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2070 RN = GPUReadLong(address, GPU);
2072 RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2074 #ifdef GPU_DIS_LOAD14I
2076 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2080 static void gpu_opcode_load_r15_indexed(void)
2082 #ifdef GPU_DIS_LOAD15I
2084 WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2086 #ifdef GPU_CORRECT_ALIGNMENT
2087 uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2089 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2090 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2092 RN = GPUReadLong(address, GPU);
2094 RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2096 #ifdef GPU_DIS_LOAD15I
2098 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2102 static void gpu_opcode_movei(void)
2104 #ifdef GPU_DIS_MOVEI
2106 WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32_t)GPUReadWord(gpu_pc) | ((uint32_t)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2108 // This instruction is followed by 32-bit value in LSW / MSW format...
2109 RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16);
2111 #ifdef GPU_DIS_MOVEI
2113 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2117 static void gpu_opcode_moveta(void)
2119 #ifdef GPU_DIS_MOVETA
2121 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2124 #ifdef GPU_DIS_MOVETA
2126 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2130 static void gpu_opcode_movefa(void)
2132 #ifdef GPU_DIS_MOVEFA
2134 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2137 #ifdef GPU_DIS_MOVEFA
2139 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2143 static void gpu_opcode_move(void)
2147 WriteLog("%06X: MOVE R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2152 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2156 static void gpu_opcode_moveq(void)
2158 #ifdef GPU_DIS_MOVEQ
2160 WriteLog("%06X: MOVEQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2163 #ifdef GPU_DIS_MOVEQ
2165 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2169 static void gpu_opcode_resmac(void)
2174 static void gpu_opcode_imult(void)
2176 #ifdef GPU_DIS_IMULT
2178 WriteLog("%06X: IMULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2180 RN = (int16_t)RN * (int16_t)RM;
2182 #ifdef GPU_DIS_IMULT
2184 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2188 static void gpu_opcode_mult(void)
2192 WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2194 RN = (uint16_t)RM * (uint16_t)RN;
2195 // RN = (RM & 0xFFFF) * (RN & 0xFFFF);
2199 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2203 static void gpu_opcode_bclr(void)
2207 WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2209 uint32_t res = RN & ~(1 << IMM_1);
2214 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2218 static void gpu_opcode_btst(void)
2222 WriteLog("%06X: BTST #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2224 gpu_flag_z = (~RN >> IMM_1) & 1;
2227 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2231 static void gpu_opcode_bset(void)
2235 WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2237 uint32_t res = RN | (1 << IMM_1);
2242 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2246 static void gpu_opcode_imacn(void)
2248 uint32_t res = (int16_t)RM * (int16_t)(RN);
2252 static void gpu_opcode_mtoi(void)
2255 uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2259 static void gpu_opcode_normi(void)
2266 while ((_RM & 0xFFC00000) == 0)
2271 while ((_RM & 0xFF800000) != 0)
2281 static void gpu_opcode_mmult(void)
2283 int count = gpu_matrix_control & 0x0F; // Matrix width
2284 uint32_t addr = gpu_pointer_to_matrix; // In the GPU's RAM
2288 if (gpu_matrix_control & 0x10) // Column stepping
2290 for(int i=0; i<count; i++)
2294 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2296 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2298 int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2303 else // Row stepping
2305 for(int i=0; i<count; i++)
2309 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2311 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2313 int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2318 RN = res = (int32_t)accum;
2319 // carry flag to do (out of the last add)
2323 static void gpu_opcode_abs(void)
2327 WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2329 gpu_flag_c = RN >> 31;
2330 if (RN == 0x80000000)
2331 //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2332 gpu_flag_n = 1, gpu_flag_z = 0;
2337 gpu_flag_n = 0; SET_FLAG_Z(RN);
2341 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2345 static void gpu_opcode_div(void) // RN / RM
2349 WriteLog("%06X: DIV R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2351 // NOTE: remainder is NOT calculated correctly here!
2352 // The original tried to get it right by checking to see if the
2353 // remainder was negative, but that's too late...
2354 // The code there should do it now, but I'm not 100% sure...
2355 // [Now it should be correct, but not displaying correct behavior of the actual
2356 // hardware. A step in the right direction.]
2360 if (gpu_div_control & 0x01) // 16.16 division
2362 gpu_remain = ((uint64_t)RN << 16) % RM;
2363 RN = ((uint64_t)RN << 16) / RM;
2367 // We calculate the remainder first because we destroy RN after
2368 // this by assigning it to itself.
2369 gpu_remain = RN % RM;
2373 // What we really should do here is figure out why this condition
2374 // happens in the real divide unit and emulate *that* behavior.
2376 if ((gpu_remain - RM) & 0x80000000) // If the result would have been negative...
2377 gpu_remain -= RM; // Then make it negative!
2385 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2389 static void gpu_opcode_imultn(void)
2391 uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
2392 gpu_acc = (int32_t)res;
2397 static void gpu_opcode_neg(void)
2401 WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2404 SET_ZNC_SUB(0, RN, res);
2408 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2412 static void gpu_opcode_shlq(void)
2416 WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2418 // Was a bug here...
2419 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2420 int32_t r1 = 32 - IMM_1;
2421 uint32_t res = RN << r1;
2422 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2426 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2430 static void gpu_opcode_shrq(void)
2434 WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2436 int32_t r1 = gpu_convert_zero[IMM_1];
2437 uint32_t res = RN >> r1;
2438 SET_ZN(res); gpu_flag_c = RN & 1;
2442 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2446 static void gpu_opcode_ror(void)
2450 WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2452 uint32_t r1 = RM & 0x1F;
2453 uint32_t res = (RN >> r1) | (RN << (32 - r1));
2454 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2458 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2462 static void gpu_opcode_rorq(void)
2466 WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2468 uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F];
2470 uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
2472 SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2475 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2479 static void gpu_opcode_sha(void)
2481 /* int dreg = jaguar.op & 31;
2482 int32_t r1 = (int32_t)jaguar.r[(jaguar.op >> 5) & 31];
2483 uint32_t r2 = jaguar.r[dreg];
2489 res = (r1 <= -32) ? 0 : (r2 << -r1);
2490 jaguar.FLAGS |= (r2 >> 30) & 2;
2494 res = (r1 >= 32) ? ((int32_t)r2 >> 31) : ((int32_t)r2 >> r1);
2495 jaguar.FLAGS |= (r2 << 1) & 2;
2497 jaguar.r[dreg] = res;
2502 WriteLog("%06X: SHA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2506 if ((int32_t)RM < 0)
2508 res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM);
2509 gpu_flag_c = RN >> 31;
2513 res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM);
2514 gpu_flag_c = RN & 0x01;
2520 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2523 /* int32_t sRM=(int32_t)RM;
2528 uint32_t shift=-sRM;
2529 if (shift>=32) shift=32;
2530 gpu_flag_c=(_RN&0x80000000)>>31;
2540 if (shift>=32) shift=32;
2544 _RN=((int32_t)_RN)>>1;
2553 static void gpu_opcode_sharq(void)
2555 #ifdef GPU_DIS_SHARQ
2557 WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2559 uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1];
2560 SET_ZN(res); gpu_flag_c = RN & 0x01;
2562 #ifdef GPU_DIS_SHARQ
2564 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2568 static void gpu_opcode_sh(void)
2572 WriteLog("%06X: SH R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2574 if (RM & 0x80000000) // Shift left
2576 gpu_flag_c = RN >> 31;
2577 RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM);
2581 gpu_flag_c = RN & 0x01;
2582 RN = (RM >= 32 ? 0 : RN >> RM);
2587 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2591 //Temporary: Testing only!
2592 //#include "gpu2.cpp"
2593 //#include "gpu3.cpp"
2597 // New thread-safe GPU core
2599 int GPUCore(void * data)