6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
11 // JLH = James Hammons <jlhamm@acm.org>
14 // --- ---------- -------------------------------------------------------------
15 // JLH 01/16/2010 Created this log ;-)
16 // JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 // the braindead way in which MAME handles memory. :-)
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
30 #include <string.h> // For memset
35 #include "m68000/m68kinterface.h"
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
44 // For GPU dissasembly...
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
70 #define GPU_DIS_MOVEFA
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
102 //bool doGPUDis = false;
103 bool doGPUDis = true;
107 GPU opcodes use (BIOS flying ATARI logo):
149 #define CINT0FLAG 0x0200
150 #define CINT1FLAG 0x0400
151 #define CINT2FLAG 0x0800
152 #define CINT3FLAG 0x1000
153 #define CINT4FLAG 0x2000
154 #define CINT04FLAGS (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
158 #define ZERO_FLAG 0x0001
159 #define CARRY_FLAG 0x0002
160 #define NEGA_FLAG 0x0004
162 #define INT_ENA0 0x0010
163 #define INT_ENA1 0x0020
164 #define INT_ENA2 0x0040
165 #define INT_ENA3 0x0080
166 #define INT_ENA4 0x0100
167 #define INT_CLR0 0x0200
168 #define INT_CLR1 0x0400
169 #define INT_CLR2 0x0800
170 #define INT_CLR3 0x1000
171 #define INT_CLR4 0x2000
172 #define REGPAGE 0x4000
175 // External global variables
177 extern int start_logging;
178 extern int gpu_start_log;
180 // Private function prototypes
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8_t gpu_opcode_cycles[64] =
255 3, 3, 3, 3, 3, 3, 3, 3,
256 3, 3, 3, 3, 3, 3, 3, 3,
257 3, 3, 1, 3, 1, 18, 3, 3,
258 3, 3, 3, 3, 3, 3, 3, 3,
259 3, 3, 2, 2, 2, 2, 3, 4,
260 5, 4, 5, 6, 6, 1, 1, 1,
261 1, 2, 2, 2, 1, 1, 9, 3,
262 3, 1, 6, 6, 2, 2, 3, 3
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8_t gpu_opcode_cycles[64] =
270 1, 1, 1, 1, 1, 1, 1, 1,
271 1, 1, 1, 1, 1, 1, 1, 1,
272 1, 1, 1, 1, 1, 9, 1, 1,
273 1, 1, 1, 1, 1, 1, 1, 1,
274 1, 1, 1, 1, 1, 1, 1, 2,
275 2, 2, 2, 3, 3, 1, 1, 1,
276 1, 1, 1, 1, 1, 1, 4, 1,
277 1, 1, 3, 3, 1, 1, 1, 1
279 uint8_t gpu_opcode_cycles[64] =
281 1, 1, 1, 1, 1, 1, 1, 1,
282 1, 1, 1, 1, 1, 1, 1, 1,
283 1, 1, 1, 1, 1, 1, 1, 1,
284 1, 1, 1, 1, 1, 1, 1, 1,
285 1, 1, 1, 1, 1, 1, 1, 1,
286 1, 1, 1, 1, 1, 1, 1, 1,
287 1, 1, 1, 1, 1, 1, 1, 1,
288 1, 1, 1, 1, 1, 1, 1, 1
291 void (*gpu_opcode[64])()=
293 gpu_opcode_add, gpu_opcode_addc, gpu_opcode_addq, gpu_opcode_addqt,
294 gpu_opcode_sub, gpu_opcode_subc, gpu_opcode_subq, gpu_opcode_subqt,
295 gpu_opcode_neg, gpu_opcode_and, gpu_opcode_or, gpu_opcode_xor,
296 gpu_opcode_not, gpu_opcode_btst, gpu_opcode_bset, gpu_opcode_bclr,
297 gpu_opcode_mult, gpu_opcode_imult, gpu_opcode_imultn, gpu_opcode_resmac,
298 gpu_opcode_imacn, gpu_opcode_div, gpu_opcode_abs, gpu_opcode_sh,
299 gpu_opcode_shlq, gpu_opcode_shrq, gpu_opcode_sha, gpu_opcode_sharq,
300 gpu_opcode_ror, gpu_opcode_rorq, gpu_opcode_cmp, gpu_opcode_cmpq,
301 gpu_opcode_sat8, gpu_opcode_sat16, gpu_opcode_move, gpu_opcode_moveq,
302 gpu_opcode_moveta, gpu_opcode_movefa, gpu_opcode_movei, gpu_opcode_loadb,
303 gpu_opcode_loadw, gpu_opcode_load, gpu_opcode_loadp, gpu_opcode_load_r14_indexed,
304 gpu_opcode_load_r15_indexed, gpu_opcode_storeb, gpu_opcode_storew, gpu_opcode_store,
305 gpu_opcode_storep, gpu_opcode_store_r14_indexed, gpu_opcode_store_r15_indexed, gpu_opcode_move_pc,
306 gpu_opcode_jump, gpu_opcode_jr, gpu_opcode_mmult, gpu_opcode_mtoi,
307 gpu_opcode_normi, gpu_opcode_nop, gpu_opcode_load_r14_ri, gpu_opcode_load_r15_ri,
308 gpu_opcode_store_r14_ri, gpu_opcode_store_r15_ri, gpu_opcode_sat24, gpu_opcode_pack,
311 static uint8_t gpu_ram_8[0x1000];
313 static uint32_t gpu_acc;
314 static uint32_t gpu_remain;
315 static uint32_t gpu_hidata;
316 static uint32_t gpu_flags;
317 static uint32_t gpu_matrix_control;
318 static uint32_t gpu_pointer_to_matrix;
319 static uint32_t gpu_data_organization;
320 static uint32_t gpu_control;
321 static uint32_t gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 uint32_t gpu_reg_bank_0[32];
327 uint32_t gpu_reg_bank_1[32];
328 static uint32_t * gpu_reg;
329 static uint32_t * gpu_alternate_reg;
331 static uint32_t gpu_instruction;
332 static uint32_t gpu_opcode_first_parameter;
333 static uint32_t gpu_opcode_second_parameter;
335 #define GPU_RUNNING (gpu_control & 0x01)
337 #define RM gpu_reg[gpu_opcode_first_parameter]
338 #define RN gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1 gpu_opcode_first_parameter
342 #define IMM_2 gpu_opcode_second_parameter
344 #define SET_FLAG_Z(r) (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01));
347 #define RESET_FLAG_Z() gpu_flag_z = 0;
348 #define RESET_FLAG_N() gpu_flag_n = 0;
349 #define RESET_FLAG_C() gpu_flag_c = 0;
351 #define CLR_Z (gpu_flag_z = 0)
352 #define CLR_ZN (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r) (gpu_flag_z = ((r) == 0))
355 #define SET_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
357 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
358 #define SET_ZN(r) SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
362 uint32_t gpu_convert_zero[32] =
363 { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
365 uint8_t * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x) branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
368 uint32_t gpu_opcode_use[64];
370 const char * gpu_opcode_str[64]=
372 "add", "addc", "addq", "addqt",
373 "sub", "subc", "subq", "subqt",
374 "neg", "and", "or", "xor",
375 "not", "btst", "bset", "bclr",
376 "mult", "imult", "imultn", "resmac",
377 "imacn", "div", "abs", "sh",
378 "shlq", "shrq", "sha", "sharq",
379 "ror", "rorq", "cmp", "cmpq",
380 "sat8", "sat16", "move", "moveq",
381 "moveta", "movefa", "movei", "loadb",
382 "loadw", "load", "loadp", "load_r14_indexed",
383 "load_r15_indexed", "storeb", "storew", "store",
384 "storep", "store_r14_indexed","store_r15_indexed","move_pc",
385 "jump", "jr", "mmult", "mtoi",
386 "normi", "nop", "load_r14_ri", "load_r15_ri",
387 "store_r14_ri", "store_r15_ri", "sat24", "pack",
390 static uint32_t gpu_in_exec = 0;
391 static uint32_t gpu_releaseTimeSlice_flag = 0;
393 void GPUReleaseTimeslice(void)
395 gpu_releaseTimeSlice_flag = 1;
398 uint32_t GPUGetPC(void)
403 void build_branch_condition_table(void)
405 if (!branch_condition_table)
407 branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
409 if (branch_condition_table)
411 for(int i=0; i<8; i++)
413 for(int j=0; j<32; j++)
420 if (!(i & ZERO_FLAG))
423 if (i & (CARRY_FLAG << (j >> 4)))
426 if (!(i & (CARRY_FLAG << (j >> 4))))
428 branch_condition_table[i * 32 + j] = result;
436 // GPU byte access (read)
438 uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
440 if (offset >= 0xF02000 && offset <= 0xF020FF)
441 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
443 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444 return gpu_ram_8[offset & 0xFFF];
445 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
447 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
449 if ((offset & 0x03) == 0)
451 else if ((offset & 0x03) == 1)
452 return (data >> 16) & 0xFF;
453 else if ((offset & 0x03) == 2)
454 return (data >> 8) & 0xFF;
455 else if ((offset & 0x03) == 3)
459 return JaguarReadByte(offset, who);
463 // GPU word access (read)
465 uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
467 if (offset >= 0xF02000 && offset <= 0xF020FF)
468 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
470 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
473 uint16_t data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1];
476 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
478 // This looks and smells wrong...
479 // But it *might* be OK...
480 if (offset & 0x01) // Catch cases 1 & 3... (unaligned read)
481 return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
483 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
485 if (offset & 0x02) // Cases 0 & 2...
486 return data & 0xFFFF;
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
495 return JaguarReadWord(offset, who);
499 // GPU dword access (read)
501 uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
503 if (offset >= 0xF02000 && offset <= 0xF020FF)
505 WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
506 uint32_t reg = (offset & 0xFC) >> 2;
507 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]);
510 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
511 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
514 return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16)
515 | ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/
516 // return GET32(gpu_ram_8, offset);
518 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
519 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
525 gpu_flag_c = (gpu_flag_c ? 1 : 0);
526 gpu_flag_z = (gpu_flag_z ? 1 : 0);
527 gpu_flag_n = (gpu_flag_n ? 1 : 0);
529 gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
531 return gpu_flags & 0xFFFFC1FF;
533 return gpu_matrix_control;
535 return gpu_pointer_to_matrix;
537 return gpu_data_organization;
546 default: // unaligned long read
548 WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
553 //TEMP--Mirror of F03000? No. Writes only...
554 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
555 // WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
556 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
557 WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
559 return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
563 // GPU byte access (write)
565 void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
567 if (offset >= 0xF02000 && offset <= 0xF020FF)
568 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
570 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
572 gpu_ram_8[offset & 0xFFF] = data;
574 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
577 m68k_end_timeslice();
578 dsp_releaseTimeslice();
582 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
584 uint32_t reg = offset & 0x1C;
585 int bytenum = offset & 0x03;
587 //This is definitely wrong!
588 if ((reg >= 0x1C) && (reg <= 0x1F))
589 gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
592 uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
593 bytenum = 3 - bytenum; // convention motorola !!!
594 old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
595 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
599 // WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
600 JaguarWriteByte(offset, data, who);
604 // GPU word access (write)
606 void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
608 if (offset >= 0xF02000 && offset <= 0xF020FF)
609 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
611 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
613 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
614 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
616 SET16(gpu_ram_8, offset, data);//*/
618 /*if (offset >= 0xF03214 && offset < 0xF0321F)
619 WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
622 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
625 m68k_end_timeslice();
626 dsp_releaseTimeslice();
630 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
632 if (offset & 0x01) // This is supposed to weed out unaligned writes, but does nothing...
635 WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
640 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
641 //This just literally sucks.
642 if ((offset & 0x1C) == 0x1C)
644 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
646 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
648 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
652 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
653 uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
656 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
658 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
660 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
665 else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
668 WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
674 // Have to be careful here--this can cause an infinite loop!
675 JaguarWriteWord(offset, data, who);
679 // GPU dword access (write)
681 void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
683 if (offset >= 0xF02000 && offset <= 0xF020FF)
684 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
686 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
687 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
692 WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
698 SET32(gpu_ram_8, offset, data);
701 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
702 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
709 bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
710 // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
711 // IRQ logic can set it. So we mask it out here to prevent problems...
712 gpu_flags = data & (~IMASK);
713 gpu_flag_z = gpu_flags & ZERO_FLAG;
714 gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
715 gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
716 GPUUpdateRegisterBanks();
717 gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3); // Interrupt latch clear bits
718 //Writing here is only an interrupt enable--this approach is just plain wrong!
720 //This, however, is A-OK! ;-)
721 if (IMASKCleared) // If IMASK was cleared,
722 GPUHandleIRQs(); // see if any other interrupts need servicing!
724 if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
725 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
726 WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
731 gpu_matrix_control = data;
734 // This can only point to long aligned addresses
735 gpu_pointer_to_matrix = data & 0xFFFFFFFC;
738 gpu_data_organization = data;
743 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
748 // uint32_t gpu_was_running = GPU_RUNNING;
749 data &= ~0xF7C0; // Disable writes to INT_LAT0-4 & TOM version number
751 // check for GPU -> CPU interrupt
754 //WriteLog("GPU->CPU interrupt\n");
755 if (TOMIRQEnabled(IRQ_GPU))
757 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
758 // if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
760 TOMSetPendingGPUInt();
761 m68k_set_irq(2); // Set 68000 IPL 2
762 GPUReleaseTimeslice();
768 // check for CPU -> GPU interrupt #0
771 //WriteLog("CPU->GPU interrupt\n");
772 GPUSetIRQLine(0, ASSERT_LINE);
773 m68k_end_timeslice();
774 DSPReleaseTimeslice();
781 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
784 gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
786 // if gpu wasn't running but is now running, execute a few cycles
787 #ifndef GPU_SINGLE_STEPPING
788 /* if (!gpu_was_running && GPU_RUNNING)
791 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
796 #endif // GPU_DEBUG//*/
798 if (gpu_control & 0x18)
800 #endif // #ifndef GPU_SINGLE_STEPPING
802 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
804 WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
806 WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
810 // GPUDumpDisassembly();
813 if (gpu_pc == 0xF035D8)
815 // GPUDumpDisassembly();
818 gpu_control &= 0xFFFFFFFE; // Don't run it and let's see what happens!
819 //Hmm. Seems to lock up when going into the demo...
820 //Try to disable the collision altogether!
823 extern int effect_start5;
824 static bool finished = false;
825 //if (GPU_RUNNING && effect_start5 && !finished)
826 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
828 // Let's do a dump of $6528!
829 /* uint32_t numItems = JaguarReadWord(0x6BD6);
830 WriteLog("\nDump of $6528: %u items.\n\n", numItems);
831 for(int i=0; i<numItems*3*4; i+=3*4)
833 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
834 JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
835 uint16_t link = JaguarReadWord(0x6528+i+8+2);
836 for(int j=0; j<40; j+=4)
837 WriteLog("%08X ", JaguarReadLong(link + j));
841 // Let's try a manual blit here...
842 //This isn't working the way it should! !!! FIX !!!
843 //Err, actually, it is.
844 // NOW, it works right! Problem solved!!! It's a blitter bug!
845 /* uint32_t src = 0x4D54, dst = 0xF03000, width = 10 * 4;
846 for(int y=0; y<127; y++)
848 for(int x=0; x<2; x++)
850 JaguarWriteLong(dst, JaguarReadLong(src));
855 src += width - (2 * 4);
859 WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
861 /* WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
863 for(int i=0x004D54; i<0x004D54+2048; i++)
865 WriteLog("%02X ", JaguarReadByte(i));
873 WriteLog("\n\nData @ F03000:\n\n");
875 for(int i=0xF03000; i<0xF03200; i++)
877 WriteLog("%02X ", JaguarReadByte(i));
891 /*if (!GPU_RUNNING && finished)
893 WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
898 // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
899 // allow the GPU a chance to run...
900 // Yes! This partially fixed Trevor McFur...
902 m68k_end_timeslice();
909 gpu_div_control = data;
911 // default: // unaligned long write
918 // JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
919 // JaguarWriteWord(offset+2, data & 0xFFFF, who);
920 // We're a 32-bit processor, we can do a long write...!
921 JaguarWriteLong(offset, data, who);
925 // Change register banks if necessary
927 void GPUUpdateRegisterBanks(void)
929 int bank = (gpu_flags & REGPAGE); // REGPAGE bit
931 if (gpu_flags & IMASK) // IMASK bit
932 bank = 0; // IMASK forces main bank to be bank 0
935 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
937 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
940 void GPUHandleIRQs(void)
942 // Bail out if we're already in an interrupt!
943 if (gpu_flags & IMASK)
946 // Get the interrupt latch & enable bits
947 uint32_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
949 // Bail out if latched interrupts aren't enabled
954 // Determine which interrupt to service
955 uint32_t which = 0; //Isn't there a #pragma to disable this warning???
968 WriteLog("GPU: Generating IRQ #%i\n", which);
970 // set the interrupt flag
972 GPUUpdateRegisterBanks();
974 // subqt #4,r31 ; pre-decrement stack pointer
975 // move pc,r30 ; address of interrupted code
976 // store r30,(r31) ; store return address
978 GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
980 // movei #service_address,r30 ; pointer to ISR entry
981 // jump (r30) ; jump to ISR
983 gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
986 void GPUSetIRQLine(int irqline, int state)
989 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
991 uint32_t mask = 0x0040 << irqline;
992 gpu_control &= ~mask; // Clear the interrupt latch
996 gpu_control |= mask; // Assert the interrupt latch
997 GPUHandleIRQs(); // And handle the interrupt...
1001 //TEMPORARY: Testing only!
1007 // memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1008 // memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32_t), "GPU bank 0 regs");
1009 // memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32_t), "GPU bank 1 regs");
1011 build_branch_condition_table();
1015 //TEMPORARY: Testing only!
1022 // GPU registers (directly visible)
1023 gpu_flags = 0x00000000;
1024 gpu_matrix_control = 0x00000000;
1025 gpu_pointer_to_matrix = 0x00000000;
1026 gpu_data_organization = 0xFFFFFFFF;
1027 gpu_pc = 0x00F03000;
1028 gpu_control = 0x00002800; // Correctly sets this as TOM Rev. 2
1029 gpu_hidata = 0x00000000;
1030 gpu_remain = 0x00000000; // These two registers are RO/WO
1031 gpu_div_control = 0x00000000;
1033 // GPU internal register
1034 gpu_acc = 0x00000000;
1036 gpu_reg = gpu_reg_bank_0;
1037 gpu_alternate_reg = gpu_reg_bank_1;
1039 for(int i=0; i<32; i++)
1040 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1043 memset(gpu_ram_8, 0xFF, 0x1000);
1045 //not needed GPUInterruptPending = false;
1048 // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
1049 for(uint32_t i=0; i<4096; i+=4)
1050 *((uint32_t *)(&gpu_ram_8[i])) = rand();
1053 uint32_t GPUReadPC(void)
1058 void GPUResetStats(void)
1060 for(uint32_t i=0; i<64; i++)
1061 gpu_opcode_use[i] = 0;
1062 WriteLog("--> GPU stats were reset!\n");
1065 void GPUDumpDisassembly(void)
1069 WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1070 uint32_t j = 0xF03000;
1071 while (j <= 0xF03FFF)
1074 j += dasmjag(JAGUAR_GPU, buffer, j);
1075 WriteLog("\t%08X: %s\n", oldj, buffer);
1079 void GPUDumpRegisters(void)
1081 WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1082 WriteLog("\nRegisters bank 0\n");
1083 for(int j=0; j<8; j++)
1085 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1086 (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1087 (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1088 (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1089 (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1091 WriteLog("Registers bank 1\n");
1092 for(int j=0; j<8; j++)
1094 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1095 (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1096 (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1097 (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1098 (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1102 void GPUDumpMemory(void)
1104 WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1105 for(int i=0; i<0xFFF; i+=4)
1106 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1107 gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1112 WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1114 // Get the interrupt latch & enable bits
1115 uint8_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1116 WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1119 GPUDumpDisassembly();
1121 WriteLog("\nGPU opcodes use:\n");
1122 for(int i=0; i<64; i++)
1124 if (gpu_opcode_use[i])
1125 WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1129 // memory_free(gpu_ram_8);
1130 // memory_free(gpu_reg_bank_0);
1131 // memory_free(gpu_reg_bank_1);
1135 // Main GPU execution core
1137 static int testCount = 1;
1139 static bool tripwire = false;
1140 void GPUExec(int32_t cycles)
1145 #ifdef GPU_SINGLE_STEPPING
1146 if (gpu_control & 0x18)
1149 gpu_control &= ~0x10;
1153 gpu_releaseTimeSlice_flag = 0;
1156 while (cycles > 0 && GPU_RUNNING)
1158 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1159 && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1161 if (gpu_pc == 0xF03000)
1163 extern uint32_t starCount;
1165 /* WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1166 uint32_t base = gpu_reg_bank_0[3];
1167 for(uint32_t i=0; i<0x100; i+=16)
1169 WriteLog("%02X: ", i);
1170 for(uint32_t j=0; j<16; j++)
1172 WriteLog("%02X ", JaguarReadByte(base + i + j));
1177 // if (gpu_pc == 0xF03)
1181 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1184 WriteLog("GPU: Starting disassembly log...\n");
1187 /*if (gpu_pc == 0xF0359A)
1192 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1193 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1194 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1196 uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1197 uint32_t index = opcode >> 10;
1198 gpu_instruction = opcode; // Added for GPU #3...
1199 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1200 gpu_opcode_second_parameter = opcode & 0x1F;
1201 /*if (gpu_pc == 0xF03BE8)
1202 WriteLog("Start of OP frame write...\n");
1203 if (gpu_pc == 0xF03EEE)
1204 WriteLog("--> Writing BRANCH object ---\n");
1205 if (gpu_pc == 0xF03F62)
1206 WriteLog("--> Writing BITMAP object ***\n");//*/
1207 /*if (gpu_pc == 0xF03546)
1209 WriteLog("\n--> GPU PC: F03546\n");
1211 GPUDumpDisassembly();
1213 /*if (gpu_pc == 0xF033F6)
1215 WriteLog("\n--> GPU PC: F033F6\n");
1217 GPUDumpDisassembly();
1219 /*if (gpu_pc == 0xF033CC)
1221 WriteLog("\n--> GPU PC: F033CC\n");
1223 GPUDumpDisassembly();
1225 /*if (gpu_pc == 0xF033D6)
1227 WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1231 /*if (gpu_pc == 0xF033D8)
1233 WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1237 /*if (gpu_pc == 0xF0358E)
1239 WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1243 /*if (gpu_pc == 0xF034CA)
1245 WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1248 /*if (gpu_pc == 0xF034CA)
1250 len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1251 WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1252 for(int i=0; i<len; i+=4)
1253 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1255 for(int i=0; i<len; i+=4)
1256 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1259 if (gpu_pc == 0xF034DE)
1261 WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n ", gpu_reg[14], gpu_reg[15]);
1262 for(int i=0; i<len; i+=4)
1263 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1265 for(int i=0; i<len; i+=4)
1266 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1268 for(int i=0; i<len; i+=4)
1269 WriteLog(" --------");
1271 for(int i=0; i<len; i+=4)
1272 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1275 /*if (gpu_pc == 0xF035C8)
1277 WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1279 GPUDumpDisassembly();
1284 // gpu_reset_stats();
1285 static char buffer[512];
1286 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1287 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1289 //$E400 -> 1110 01 -> $39 -> 57
1292 gpu_opcode[index]();
1294 // gpu2_opcode[index]();
1296 //GPU #3 (Doesn't show ATARI logo! #1 & #2 do...)
1298 // gpu3_opcode[index]();
1301 //GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken.
1302 /*static bool firstTime = true;
1303 if (gpu_pc == 0xF03548 && firstTime)
1306 // firstTime = false;
1308 //static char buffer[512];
1310 //while (k<0xF0356C)
1313 //k += dasmjag(JAGUAR_GPU, buffer, k);
1314 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1316 // gpu_start_log = 1;
1318 //GPU: [00F0354C] jump nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1319 /*if (gpu_pc == 0xF0354C)
1320 gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1322 cycles -= gpu_opcode_cycles[index];
1323 gpu_opcode_use[index]++;
1325 WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1326 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1328 WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1341 GPU opcodes use (offset punch--vertically below bad guy):
1363 load_r14_indexed 1183
1364 load_r15_indexed 1125
1367 store_r14_indexed 320
1375 static void gpu_opcode_jump(void)
1378 const char * condition[32] =
1379 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1380 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1381 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1382 "???", "???", "???", "F" };
1384 WriteLog("%06X: JUMP %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1387 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1388 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1389 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1390 // KLUDGE: Used by BRANCH_CONDITION
1391 uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1393 if (BRANCH_CONDITION(IMM_2))
1397 WriteLog("Branched!\n");
1400 WriteLog(" --> JUMP: Branch taken.\n");
1401 uint32_t delayed_pc = RM;
1403 gpu_pc = delayed_pc;
1404 /* uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1405 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1406 gpu_opcode_second_parameter = opcode & 0x1F;
1408 gpu_pc = delayed_pc;
1409 gpu_opcode[opcode>>10]();//*/
1414 WriteLog("Branch NOT taken.\n");
1418 static void gpu_opcode_jr(void)
1421 const char * condition[32] =
1422 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1423 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1424 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1425 "???", "???", "???", "F" };
1427 WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1429 /* if (CONDITION(jaguar.op & 31))
1431 int32_t r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1432 uint32_t newpc = jaguar.PC + r1;
1434 jaguar.op = ROPCODE(jaguar.PC);
1436 (*jaguar.table[jaguar.op >> 10])();
1438 jaguar_icount -= 3; // 3 wait states guaranteed
1441 /* gpu_flag_n = (gpu_flag_n ? 1 : 0);
1442 gpu_flag_c = (gpu_flag_c ? 1 : 0);
1443 gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1444 // KLUDGE: Used by BRANCH_CONDITION
1445 uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1447 if (BRANCH_CONDITION(IMM_2))
1451 WriteLog("Branched!\n");
1454 WriteLog(" --> JR: Branch taken.\n");
1455 int32_t offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
1456 int32_t delayed_pc = gpu_pc + (offset * 2);
1458 gpu_pc = delayed_pc;
1459 /* uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1460 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1461 gpu_opcode_second_parameter = opcode & 0x1F;
1463 gpu_pc = delayed_pc;
1464 gpu_opcode[opcode>>10]();//*/
1469 WriteLog("Branch NOT taken.\n");
1473 static void gpu_opcode_add(void)
1477 WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1479 uint32_t res = RN + RM;
1480 CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1484 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1488 static void gpu_opcode_addc(void)
1492 WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1494 /* int dreg = jaguar.op & 31;
1495 uint32_t r1 = jaguar.r[(jaguar.op >> 5) & 31];
1496 uint32_t r2 = jaguar.r[dreg];
1497 uint32_t res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1498 jaguar.r[dreg] = res;
1499 CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1501 uint32_t res = RN + RM + gpu_flag_c;
1502 uint32_t carry = gpu_flag_c;
1503 // SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1504 SET_ZNC_ADD(RN + carry, RM, res);
1505 // SET_ZNC_ADD(RN, RM + carry, res);
1509 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1513 static void gpu_opcode_addq(void)
1517 WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1519 uint32_t r1 = gpu_convert_zero[IMM_1];
1520 uint32_t res = RN + r1;
1521 CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1525 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1529 static void gpu_opcode_addqt(void)
1531 #ifdef GPU_DIS_ADDQT
1533 WriteLog("%06X: ADDQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1535 RN += gpu_convert_zero[IMM_1];
1536 #ifdef GPU_DIS_ADDQT
1538 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1542 static void gpu_opcode_sub(void)
1546 WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1548 uint32_t res = RN - RM;
1549 SET_ZNC_SUB(RN, RM, res);
1553 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1557 static void gpu_opcode_subc(void)
1561 WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1563 uint32_t res = RN - RM - gpu_flag_c;
1564 uint32_t borrow = gpu_flag_c;
1565 // SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1566 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1567 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1568 // SET_ZNC_SUB(RN - borrow, RM, res);
1569 SET_ZNC_SUB(RN, RM + borrow, res);
1573 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1577 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1578 N = 0, M = 1, 0 - 1 = -1, C = 0!
1580 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
1581 #define SET_ZN(r) SET_N(r); SET_Z(r)
1582 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1583 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1585 static void gpu_opcode_subq(void)
1589 WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1591 uint32_t r1 = gpu_convert_zero[IMM_1];
1592 uint32_t res = RN - r1;
1593 SET_ZNC_SUB(RN, r1, res);
1597 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1601 static void gpu_opcode_subqt(void)
1603 #ifdef GPU_DIS_SUBQT
1605 WriteLog("%06X: SUBQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1607 RN -= gpu_convert_zero[IMM_1];
1608 #ifdef GPU_DIS_SUBQT
1610 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1614 static void gpu_opcode_cmp(void)
1618 WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1620 uint32_t res = RN - RM;
1621 SET_ZNC_SUB(RN, RM, res);
1624 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1628 static void gpu_opcode_cmpq(void)
1630 static int32_t sqtable[32] =
1631 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1634 WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1636 uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1637 uint32_t res = RN - r1;
1638 SET_ZNC_SUB(RN, r1, res);
1641 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1645 static void gpu_opcode_and(void)
1649 WriteLog("%06X: AND R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1655 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1659 static void gpu_opcode_or(void)
1663 WriteLog("%06X: OR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1669 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1673 static void gpu_opcode_xor(void)
1677 WriteLog("%06X: XOR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1683 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1687 static void gpu_opcode_not(void)
1691 WriteLog("%06X: NOT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1697 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1701 static void gpu_opcode_move_pc(void)
1703 #ifdef GPU_DIS_MOVEPC
1705 WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1707 // Should be previous PC--this might not always be previous instruction!
1708 // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1710 #ifdef GPU_DIS_MOVEPC
1712 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1716 static void gpu_opcode_sat8(void)
1720 WriteLog("%06X: SAT8 R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1722 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1726 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1730 static void gpu_opcode_sat16(void)
1732 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1736 static void gpu_opcode_sat24(void)
1738 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1742 static void gpu_opcode_store_r14_indexed(void)
1744 #ifdef GPU_DIS_STORE14I
1746 WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1748 #ifdef GPU_CORRECT_ALIGNMENT
1749 uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1751 if (address >= 0xF03000 && address <= 0xF03FFF)
1752 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1754 GPUWriteLong(address, RN, GPU);
1756 GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1760 static void gpu_opcode_store_r15_indexed(void)
1762 #ifdef GPU_DIS_STORE15I
1764 WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1766 #ifdef GPU_CORRECT_ALIGNMENT
1767 uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1769 if (address >= 0xF03000 && address <= 0xF03FFF)
1770 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1772 GPUWriteLong(address, RN, GPU);
1774 GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1778 static void gpu_opcode_load_r14_ri(void)
1780 #ifdef GPU_DIS_LOAD14R
1782 WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1784 #ifdef GPU_CORRECT_ALIGNMENT
1785 uint32_t address = gpu_reg[14] + RM;
1787 if (address >= 0xF03000 && address <= 0xF03FFF)
1788 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1790 RN = GPUReadLong(address, GPU);
1792 RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1794 #ifdef GPU_DIS_LOAD14R
1796 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1800 static void gpu_opcode_load_r15_ri(void)
1802 #ifdef GPU_DIS_LOAD15R
1804 WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1806 #ifdef GPU_CORRECT_ALIGNMENT
1807 uint32_t address = gpu_reg[15] + RM;
1809 if (address >= 0xF03000 && address <= 0xF03FFF)
1810 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1812 RN = GPUReadLong(address, GPU);
1814 RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1816 #ifdef GPU_DIS_LOAD15R
1818 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1822 static void gpu_opcode_store_r14_ri(void)
1824 #ifdef GPU_DIS_STORE14R
1826 WriteLog("%06X: STORE R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1828 #ifdef GPU_CORRECT_ALIGNMENT
1829 uint32_t address = gpu_reg[14] + RM;
1831 if (address >= 0xF03000 && address <= 0xF03FFF)
1832 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1834 GPUWriteLong(address, RN, GPU);
1836 GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1840 static void gpu_opcode_store_r15_ri(void)
1842 #ifdef GPU_DIS_STORE15R
1844 WriteLog("%06X: STORE R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1846 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1847 uint32_t address = gpu_reg[15] + RM;
1849 if (address >= 0xF03000 && address <= 0xF03FFF)
1850 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1852 GPUWriteLong(address, RN, GPU);
1854 GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1858 static void gpu_opcode_nop(void)
1862 WriteLog("%06X: NOP [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1866 static void gpu_opcode_pack(void)
1870 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1874 //BUG! if (RM == 0) // Pack
1875 if (IMM_1 == 0) // Pack
1876 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1878 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1881 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1885 static void gpu_opcode_storeb(void)
1887 #ifdef GPU_DIS_STOREB
1889 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1892 // Would appear to be so...!
1893 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1894 GPUWriteLong(RM, RN & 0xFF, GPU);
1896 JaguarWriteByte(RM, RN, GPU);
1899 static void gpu_opcode_storew(void)
1901 #ifdef GPU_DIS_STOREW
1903 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1905 #ifdef GPU_CORRECT_ALIGNMENT
1906 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1907 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1909 JaguarWriteWord(RM, RN, GPU);
1911 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1912 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1914 JaguarWriteWord(RM, RN, GPU);
1918 static void gpu_opcode_store(void)
1920 #ifdef GPU_DIS_STORE
1922 WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1924 #ifdef GPU_CORRECT_ALIGNMENT
1925 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1926 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1928 GPUWriteLong(RM, RN, GPU);
1930 GPUWriteLong(RM, RN, GPU);
1934 static void gpu_opcode_storep(void)
1936 #ifdef GPU_CORRECT_ALIGNMENT
1937 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1939 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1940 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1944 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1945 GPUWriteLong(RM + 4, RN, GPU);
1948 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1949 GPUWriteLong(RM + 4, RN, GPU);
1953 static void gpu_opcode_loadb(void)
1955 #ifdef GPU_DIS_LOADB
1957 WriteLog("%06X: LOADB (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1959 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1960 RN = GPUReadLong(RM, GPU) & 0xFF;
1962 RN = JaguarReadByte(RM, GPU);
1963 #ifdef GPU_DIS_LOADB
1965 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1969 static void gpu_opcode_loadw(void)
1971 #ifdef GPU_DIS_LOADW
1973 WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1975 #ifdef GPU_CORRECT_ALIGNMENT
1976 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1977 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1979 RN = JaguarReadWord(RM, GPU);
1981 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1982 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1984 RN = JaguarReadWord(RM, GPU);
1986 #ifdef GPU_DIS_LOADW
1988 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1992 // According to the docs, & "Do The Same", this address is long aligned...
1994 // And it works!!! Need to fix all instances...
1995 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
1996 // the $F03000-$F03FFF range are aligned...
1997 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
1999 Preliminary testing on real hardware seems to confirm that something strange goes on
2000 with unaligned reads in main memory. When the address is off by 1, the result is the
2001 same as the long address with the top byte replaced by something. So if the read is
2002 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
2003 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
2004 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
2005 It may be that the "unknown" values come from the prefetch queue, but not sure how
2006 to test that. They seem to be stable, though, which would indicate such a mechanism.
2007 Sometimes, however, the off by 2 case returns $12345678!
2009 static void gpu_opcode_load(void)
2013 WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2015 #ifdef GPU_CORRECT_ALIGNMENT
2016 uint32_t mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2017 // if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2018 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2019 // RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2021 // RN = GPUReadLong(RM, GPU);
2022 // Simulate garbage in unaligned reads...
2023 //seems that this behavior is different in GPU mem vs. main mem...
2024 // if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2025 // RN |= mask[RM & 0x03];
2027 RN = GPUReadLong(RM, GPU);
2031 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2035 static void gpu_opcode_loadp(void)
2037 #ifdef GPU_CORRECT_ALIGNMENT
2038 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2040 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2041 RN = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2045 gpu_hidata = GPUReadLong(RM + 0, GPU);
2046 RN = GPUReadLong(RM + 4, GPU);
2049 gpu_hidata = GPUReadLong(RM + 0, GPU);
2050 RN = GPUReadLong(RM + 4, GPU);
2054 static void gpu_opcode_load_r14_indexed(void)
2056 #ifdef GPU_DIS_LOAD14I
2058 WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2060 #ifdef GPU_CORRECT_ALIGNMENT
2061 uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2063 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2064 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2066 RN = GPUReadLong(address, GPU);
2068 RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2070 #ifdef GPU_DIS_LOAD14I
2072 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2076 static void gpu_opcode_load_r15_indexed(void)
2078 #ifdef GPU_DIS_LOAD15I
2080 WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2082 #ifdef GPU_CORRECT_ALIGNMENT
2083 uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2085 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2086 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2088 RN = GPUReadLong(address, GPU);
2090 RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2092 #ifdef GPU_DIS_LOAD15I
2094 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2098 static void gpu_opcode_movei(void)
2100 #ifdef GPU_DIS_MOVEI
2102 WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32_t)GPUReadWord(gpu_pc) | ((uint32_t)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2104 // This instruction is followed by 32-bit value in LSW / MSW format...
2105 RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16);
2107 #ifdef GPU_DIS_MOVEI
2109 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2113 static void gpu_opcode_moveta(void)
2115 #ifdef GPU_DIS_MOVETA
2117 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2120 #ifdef GPU_DIS_MOVETA
2122 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2126 static void gpu_opcode_movefa(void)
2128 #ifdef GPU_DIS_MOVEFA
2130 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2133 #ifdef GPU_DIS_MOVEFA
2135 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2139 static void gpu_opcode_move(void)
2143 WriteLog("%06X: MOVE R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2148 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2152 static void gpu_opcode_moveq(void)
2154 #ifdef GPU_DIS_MOVEQ
2156 WriteLog("%06X: MOVEQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2159 #ifdef GPU_DIS_MOVEQ
2161 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2165 static void gpu_opcode_resmac(void)
2170 static void gpu_opcode_imult(void)
2172 #ifdef GPU_DIS_IMULT
2174 WriteLog("%06X: IMULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2176 RN = (int16_t)RN * (int16_t)RM;
2178 #ifdef GPU_DIS_IMULT
2180 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2184 static void gpu_opcode_mult(void)
2188 WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2190 RN = (uint16_t)RM * (uint16_t)RN;
2194 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2198 static void gpu_opcode_bclr(void)
2202 WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2204 uint32_t res = RN & ~(1 << IMM_1);
2209 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2213 static void gpu_opcode_btst(void)
2217 WriteLog("%06X: BTST #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2219 gpu_flag_z = (~RN >> IMM_1) & 1;
2222 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2226 static void gpu_opcode_bset(void)
2230 WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2232 uint32_t res = RN | (1 << IMM_1);
2237 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2241 static void gpu_opcode_imacn(void)
2243 uint32_t res = (int16_t)RM * (int16_t)(RN);
2247 static void gpu_opcode_mtoi(void)
2250 uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2254 static void gpu_opcode_normi(void)
2261 while ((_RM & 0xFFC00000) == 0)
2266 while ((_RM & 0xFF800000) != 0)
2276 static void gpu_opcode_mmult(void)
2278 int count = gpu_matrix_control & 0x0F; // Matrix width
2279 uint32_t addr = gpu_pointer_to_matrix; // In the GPU's RAM
2283 if (gpu_matrix_control & 0x10) // Column stepping
2285 for(int i=0; i<count; i++)
2289 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2291 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2293 int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2298 else // Row stepping
2300 for(int i=0; i<count; i++)
2304 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2306 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2308 int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2313 RN = res = (int32_t)accum;
2314 // carry flag to do (out of the last add)
2318 static void gpu_opcode_abs(void)
2322 WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2324 gpu_flag_c = RN >> 31;
2325 if (RN == 0x80000000)
2326 //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2327 gpu_flag_n = 1, gpu_flag_z = 0;
2332 gpu_flag_n = 0; SET_FLAG_Z(RN);
2336 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2340 static void gpu_opcode_div(void) // RN / RM
2344 WriteLog("%06X: DIV R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2346 // NOTE: remainder is NOT calculated correctly here!
2347 // The original tried to get it right by checking to see if the
2348 // remainder was negative, but that's too late...
2349 // The code there should do it now, but I'm not 100% sure...
2353 if (gpu_div_control & 0x01) // 16.16 division
2355 RN = ((uint64_t)RN << 16) / RM;
2356 gpu_remain = ((uint64_t)RN << 16) % RM;
2361 gpu_remain = RN % RM;
2364 if ((gpu_remain - RM) & 0x80000000) // If the result would have been negative...
2365 gpu_remain -= RM; // Then make it negative!
2375 if (gpu_div_control & 1)
2377 gpu_remain = (((uint64_t)_RN) << 16) % _RM;
2378 if (gpu_remain&0x80000000)
2380 RN = (((uint64_t)_RN) << 16) / _RM;
2384 gpu_remain = _RN % _RM;
2385 if (gpu_remain&0x80000000)
2394 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2398 static void gpu_opcode_imultn(void)
2400 uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
2401 gpu_acc = (int32_t)res;
2406 static void gpu_opcode_neg(void)
2410 WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2413 SET_ZNC_SUB(0, RN, res);
2417 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2421 static void gpu_opcode_shlq(void)
2425 WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2427 // Was a bug here...
2428 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2429 int32_t r1 = 32 - IMM_1;
2430 uint32_t res = RN << r1;
2431 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2435 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2439 static void gpu_opcode_shrq(void)
2443 WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2445 int32_t r1 = gpu_convert_zero[IMM_1];
2446 uint32_t res = RN >> r1;
2447 SET_ZN(res); gpu_flag_c = RN & 1;
2451 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2455 static void gpu_opcode_ror(void)
2459 WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2461 uint32_t r1 = RM & 0x1F;
2462 uint32_t res = (RN >> r1) | (RN << (32 - r1));
2463 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2467 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2471 static void gpu_opcode_rorq(void)
2475 WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2477 uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F];
2479 uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
2481 SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2484 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2488 static void gpu_opcode_sha(void)
2490 /* int dreg = jaguar.op & 31;
2491 int32_t r1 = (int32_t)jaguar.r[(jaguar.op >> 5) & 31];
2492 uint32_t r2 = jaguar.r[dreg];
2498 res = (r1 <= -32) ? 0 : (r2 << -r1);
2499 jaguar.FLAGS |= (r2 >> 30) & 2;
2503 res = (r1 >= 32) ? ((int32_t)r2 >> 31) : ((int32_t)r2 >> r1);
2504 jaguar.FLAGS |= (r2 << 1) & 2;
2506 jaguar.r[dreg] = res;
2511 WriteLog("%06X: SHA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2515 if ((int32_t)RM < 0)
2517 res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM);
2518 gpu_flag_c = RN >> 31;
2522 res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM);
2523 gpu_flag_c = RN & 0x01;
2529 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2532 /* int32_t sRM=(int32_t)RM;
2537 uint32_t shift=-sRM;
2538 if (shift>=32) shift=32;
2539 gpu_flag_c=(_RN&0x80000000)>>31;
2549 if (shift>=32) shift=32;
2553 _RN=((int32_t)_RN)>>1;
2562 static void gpu_opcode_sharq(void)
2564 #ifdef GPU_DIS_SHARQ
2566 WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2568 uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1];
2569 SET_ZN(res); gpu_flag_c = RN & 0x01;
2571 #ifdef GPU_DIS_SHARQ
2573 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2577 static void gpu_opcode_sh(void)
2581 WriteLog("%06X: SH R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2583 if (RM & 0x80000000) // Shift left
2585 gpu_flag_c = RN >> 31;
2586 RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM);
2590 gpu_flag_c = RN & 0x01;
2591 RN = (RM >= 32 ? 0 : RN >> RM);
2596 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2600 //Temporary: Testing only!
2601 //#include "gpu2.cpp"
2602 //#include "gpu3.cpp"
2606 // New thread-safe GPU core
2608 int GPUCore(void * data)