6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
11 // JLH = James Hammons <jlhamm@acm.org>
14 // --- ---------- -------------------------------------------------------------
15 // JLH 01/16/2010 Created this log ;-)
16 // JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 // the braindead way in which MAME handles memory. :-)
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
30 #include <string.h> // For memset
35 #include "m68000/m68kinterface.h"
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
44 // For GPU dissasembly...
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
70 #define GPU_DIS_MOVEFA
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
102 //bool doGPUDis = false;
103 bool doGPUDis = true;
107 GPU opcodes use (BIOS flying ATARI logo):
149 #define CINT0FLAG 0x0200
150 #define CINT1FLAG 0x0400
151 #define CINT2FLAG 0x0800
152 #define CINT3FLAG 0x1000
153 #define CINT4FLAG 0x2000
154 #define CINT04FLAGS (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
158 #define ZERO_FLAG 0x0001
159 #define CARRY_FLAG 0x0002
160 #define NEGA_FLAG 0x0004
162 #define INT_ENA0 0x0010
163 #define INT_ENA1 0x0020
164 #define INT_ENA2 0x0040
165 #define INT_ENA3 0x0080
166 #define INT_ENA4 0x0100
167 #define INT_CLR0 0x0200
168 #define INT_CLR1 0x0400
169 #define INT_CLR2 0x0800
170 #define INT_CLR3 0x1000
171 #define INT_CLR4 0x2000
172 #define REGPAGE 0x4000
175 // External global variables
177 extern int start_logging;
178 extern int gpu_start_log;
180 // Private function prototypes
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8 gpu_opcode_cycles[64] =
255 3, 3, 3, 3, 3, 3, 3, 3,
256 3, 3, 3, 3, 3, 3, 3, 3,
257 3, 3, 1, 3, 1, 18, 3, 3,
258 3, 3, 3, 3, 3, 3, 3, 3,
259 3, 3, 2, 2, 2, 2, 3, 4,
260 5, 4, 5, 6, 6, 1, 1, 1,
261 1, 2, 2, 2, 1, 1, 9, 3,
262 3, 1, 6, 6, 2, 2, 3, 3
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8 gpu_opcode_cycles[64] =
270 1, 1, 1, 1, 1, 1, 1, 1,
271 1, 1, 1, 1, 1, 1, 1, 1,
272 1, 1, 1, 1, 1, 9, 1, 1,
273 1, 1, 1, 1, 1, 1, 1, 1,
274 1, 1, 1, 1, 1, 1, 1, 2,
275 2, 2, 2, 3, 3, 1, 1, 1,
276 1, 1, 1, 1, 1, 1, 4, 1,
277 1, 1, 3, 3, 1, 1, 1, 1
279 uint8 gpu_opcode_cycles[64] =
281 1, 1, 1, 1, 1, 1, 1, 1,
282 1, 1, 1, 1, 1, 1, 1, 1,
283 1, 1, 1, 1, 1, 1, 1, 1,
284 1, 1, 1, 1, 1, 1, 1, 1,
285 1, 1, 1, 1, 1, 1, 1, 1,
286 1, 1, 1, 1, 1, 1, 1, 1,
287 1, 1, 1, 1, 1, 1, 1, 1,
288 1, 1, 1, 1, 1, 1, 1, 1
291 void (*gpu_opcode[64])()=
293 gpu_opcode_add, gpu_opcode_addc, gpu_opcode_addq, gpu_opcode_addqt,
294 gpu_opcode_sub, gpu_opcode_subc, gpu_opcode_subq, gpu_opcode_subqt,
295 gpu_opcode_neg, gpu_opcode_and, gpu_opcode_or, gpu_opcode_xor,
296 gpu_opcode_not, gpu_opcode_btst, gpu_opcode_bset, gpu_opcode_bclr,
297 gpu_opcode_mult, gpu_opcode_imult, gpu_opcode_imultn, gpu_opcode_resmac,
298 gpu_opcode_imacn, gpu_opcode_div, gpu_opcode_abs, gpu_opcode_sh,
299 gpu_opcode_shlq, gpu_opcode_shrq, gpu_opcode_sha, gpu_opcode_sharq,
300 gpu_opcode_ror, gpu_opcode_rorq, gpu_opcode_cmp, gpu_opcode_cmpq,
301 gpu_opcode_sat8, gpu_opcode_sat16, gpu_opcode_move, gpu_opcode_moveq,
302 gpu_opcode_moveta, gpu_opcode_movefa, gpu_opcode_movei, gpu_opcode_loadb,
303 gpu_opcode_loadw, gpu_opcode_load, gpu_opcode_loadp, gpu_opcode_load_r14_indexed,
304 gpu_opcode_load_r15_indexed, gpu_opcode_storeb, gpu_opcode_storew, gpu_opcode_store,
305 gpu_opcode_storep, gpu_opcode_store_r14_indexed, gpu_opcode_store_r15_indexed, gpu_opcode_move_pc,
306 gpu_opcode_jump, gpu_opcode_jr, gpu_opcode_mmult, gpu_opcode_mtoi,
307 gpu_opcode_normi, gpu_opcode_nop, gpu_opcode_load_r14_ri, gpu_opcode_load_r15_ri,
308 gpu_opcode_store_r14_ri, gpu_opcode_store_r15_ri, gpu_opcode_sat24, gpu_opcode_pack,
311 static uint8 gpu_ram_8[0x1000];
313 static uint32 gpu_acc;
314 static uint32 gpu_remain;
315 static uint32 gpu_hidata;
316 static uint32 gpu_flags;
317 static uint32 gpu_matrix_control;
318 static uint32 gpu_pointer_to_matrix;
319 static uint32 gpu_data_organization;
320 static uint32 gpu_control;
321 static uint32 gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 uint32 gpu_reg_bank_0[32];
327 uint32 gpu_reg_bank_1[32];
328 static uint32 * gpu_reg;
329 static uint32 * gpu_alternate_reg;
331 static uint32 gpu_instruction;
332 static uint32 gpu_opcode_first_parameter;
333 static uint32 gpu_opcode_second_parameter;
335 #define GPU_RUNNING (gpu_control & 0x01)
337 #define RM gpu_reg[gpu_opcode_first_parameter]
338 #define RN gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1 gpu_opcode_first_parameter
342 #define IMM_2 gpu_opcode_second_parameter
344 #define SET_FLAG_Z(r) (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01));
347 #define RESET_FLAG_Z() gpu_flag_z = 0;
348 #define RESET_FLAG_N() gpu_flag_n = 0;
349 #define RESET_FLAG_C() gpu_flag_c = 0;
351 #define CLR_Z (gpu_flag_z = 0)
352 #define CLR_ZN (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r) (gpu_flag_z = ((r) == 0))
355 #define SET_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(~(a))))
357 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
358 #define SET_ZN(r) SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
362 uint32 gpu_convert_zero[32] =
363 { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
365 uint8 * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x) branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
368 uint32 gpu_opcode_use[64];
370 const char * gpu_opcode_str[64]=
372 "add", "addc", "addq", "addqt",
373 "sub", "subc", "subq", "subqt",
374 "neg", "and", "or", "xor",
375 "not", "btst", "bset", "bclr",
376 "mult", "imult", "imultn", "resmac",
377 "imacn", "div", "abs", "sh",
378 "shlq", "shrq", "sha", "sharq",
379 "ror", "rorq", "cmp", "cmpq",
380 "sat8", "sat16", "move", "moveq",
381 "moveta", "movefa", "movei", "loadb",
382 "loadw", "load", "loadp", "load_r14_indexed",
383 "load_r15_indexed", "storeb", "storew", "store",
384 "storep", "store_r14_indexed","store_r15_indexed","move_pc",
385 "jump", "jr", "mmult", "mtoi",
386 "normi", "nop", "load_r14_ri", "load_r15_ri",
387 "store_r14_ri", "store_r15_ri", "sat24", "pack",
390 static uint32 gpu_in_exec = 0;
391 static uint32 gpu_releaseTimeSlice_flag = 0;
393 void GPUReleaseTimeslice(void)
395 gpu_releaseTimeSlice_flag = 1;
398 uint32 GPUGetPC(void)
403 void build_branch_condition_table(void)
405 if (!branch_condition_table)
407 branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
409 if (branch_condition_table)
411 for(int i=0; i<8; i++)
413 for(int j=0; j<32; j++)
420 if (!(i & ZERO_FLAG))
423 if (i & (CARRY_FLAG << (j >> 4)))
426 if (!(i & (CARRY_FLAG << (j >> 4))))
428 branch_condition_table[i * 32 + j] = result;
436 // GPU byte access (read)
438 uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
440 if (offset >= 0xF02000 && offset <= 0xF020FF)
441 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
443 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444 return gpu_ram_8[offset & 0xFFF];
445 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
447 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
449 if ((offset & 0x03) == 0)
451 else if ((offset & 0x03) == 1)
452 return (data >> 16) & 0xFF;
453 else if ((offset & 0x03) == 2)
454 return (data >> 8) & 0xFF;
455 else if ((offset & 0x03) == 3)
459 return JaguarReadByte(offset, who);
463 // GPU word access (read)
465 uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
467 if (offset >= 0xF02000 && offset <= 0xF020FF)
468 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
470 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
473 uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
476 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
478 // This looks and smells wrong...
479 // But it *might* be OK...
480 if (offset & 0x01) // Catch cases 1 & 3... (unaligned read)
481 return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
483 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
485 if (offset & 0x02) // Cases 0 & 2...
486 return data & 0xFFFF;
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
495 return JaguarReadWord(offset, who);
499 // GPU dword access (read)
501 uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
503 if (offset >= 0xF02000 && offset <= 0xF020FF)
505 WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
506 uint32 reg = (offset & 0xFC) >> 2;
507 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]);
510 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
511 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
514 return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
515 | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/
516 // return GET32(gpu_ram_8, offset);
518 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
519 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
525 gpu_flag_c = (gpu_flag_c ? 1 : 0);
526 gpu_flag_z = (gpu_flag_z ? 1 : 0);
527 gpu_flag_n = (gpu_flag_n ? 1 : 0);
529 gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
531 return gpu_flags & 0xFFFFC1FF;
533 return gpu_matrix_control;
535 return gpu_pointer_to_matrix;
537 return gpu_data_organization;
546 default: // unaligned long read
548 WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
553 //TEMP--Mirror of F03000? No. Writes only...
554 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
555 // WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
556 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
557 WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
559 return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
563 // GPU byte access (write)
565 void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
567 if (offset >= 0xF02000 && offset <= 0xF020FF)
568 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
570 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
572 gpu_ram_8[offset & 0xFFF] = data;
574 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
577 m68k_end_timeslice();
578 dsp_releaseTimeslice();
582 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
584 uint32 reg = offset & 0x1C;
585 int bytenum = offset & 0x03;
587 //This is definitely wrong!
588 if ((reg >= 0x1C) && (reg <= 0x1F))
589 gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
592 uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
593 bytenum = 3 - bytenum; // convention motorola !!!
594 old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
595 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
599 // WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
600 JaguarWriteByte(offset, data, who);
604 // GPU word access (write)
606 void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
608 if (offset >= 0xF02000 && offset <= 0xF020FF)
609 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
611 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
613 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
614 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
616 SET16(gpu_ram_8, offset, data);//*/
618 /*if (offset >= 0xF03214 && offset < 0xF0321F)
619 WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
622 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
625 m68k_end_timeslice();
626 dsp_releaseTimeslice();
630 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
632 if (offset & 0x01) // This is supposed to weed out unaligned writes, but does nothing...
635 WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
640 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
641 //This just literally sucks.
642 if ((offset & 0x1C) == 0x1C)
644 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
646 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
648 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
652 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
653 uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
656 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
658 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
660 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
665 else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
668 WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
674 // Have to be careful here--this can cause an infinite loop!
675 JaguarWriteWord(offset, data, who);
679 // GPU dword access (write)
681 void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
683 if (offset >= 0xF02000 && offset <= 0xF020FF)
684 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
686 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
687 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
692 WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
698 SET32(gpu_ram_8, offset, data);
701 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
702 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
709 bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
710 // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
711 // IRQ logic can set it. So we mask it out here to prevent problems...
712 gpu_flags = data & (~IMASK);
713 gpu_flag_z = gpu_flags & ZERO_FLAG;
714 gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
715 gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
716 GPUUpdateRegisterBanks();
717 gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3); // Interrupt latch clear bits
718 //Writing here is only an interrupt enable--this approach is just plain wrong!
720 //This, however, is A-OK! ;-)
721 if (IMASKCleared) // If IMASK was cleared,
722 GPUHandleIRQs(); // see if any other interrupts need servicing!
724 if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
725 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
726 WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
731 gpu_matrix_control = data;
734 // This can only point to long aligned addresses
735 gpu_pointer_to_matrix = data & 0xFFFFFFFC;
738 gpu_data_organization = data;
743 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
748 // uint32 gpu_was_running = GPU_RUNNING;
749 data &= ~0xF7C0; // Disable writes to INT_LAT0-4 & TOM version number
751 // check for GPU -> CPU interrupt
754 //WriteLog("GPU->CPU interrupt\n");
755 if (TOMIRQEnabled(IRQ_GPU))
757 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
758 // if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
760 TOMSetPendingGPUInt();
761 m68k_set_irq(2); // Set 68000 IPL 2
762 GPUReleaseTimeslice();
768 // check for CPU -> GPU interrupt #0
771 //WriteLog("CPU->GPU interrupt\n");
772 GPUSetIRQLine(0, ASSERT_LINE);
773 m68k_end_timeslice();
774 DSPReleaseTimeslice();
781 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
784 gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
786 // if gpu wasn't running but is now running, execute a few cycles
787 #ifndef GPU_SINGLE_STEPPING
788 /* if (!gpu_was_running && GPU_RUNNING)
791 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
796 #endif // GPU_DEBUG//*/
798 if (gpu_control & 0x18)
800 #endif // #ifndef GPU_SINGLE_STEPPING
802 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
804 WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
806 WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
810 // GPUDumpDisassembly();
813 if (gpu_pc == 0xF035D8)
815 // GPUDumpDisassembly();
818 gpu_control &= 0xFFFFFFFE; // Don't run it and let's see what happens!
819 //Hmm. Seems to lock up when going into the demo...
820 //Try to disable the collision altogether!
823 extern int effect_start5;
824 static bool finished = false;
825 //if (GPU_RUNNING && effect_start5 && !finished)
826 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
828 // Let's do a dump of $6528!
829 /* uint32 numItems = JaguarReadWord(0x6BD6);
830 WriteLog("\nDump of $6528: %u items.\n\n", numItems);
831 for(int i=0; i<numItems*3*4; i+=3*4)
833 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
834 JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
835 uint16 link = JaguarReadWord(0x6528+i+8+2);
836 for(int j=0; j<40; j+=4)
837 WriteLog("%08X ", JaguarReadLong(link + j));
841 // Let's try a manual blit here...
842 //This isn't working the way it should! !!! FIX !!!
843 //Err, actually, it is.
844 // NOW, it works right! Problem solved!!! It's a blitter bug!
845 /* uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4;
846 for(int y=0; y<127; y++)
848 for(int x=0; x<2; x++)
850 JaguarWriteLong(dst, JaguarReadLong(src));
855 src += width - (2 * 4);
859 WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
861 /* WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
863 for(int i=0x004D54; i<0x004D54+2048; i++)
865 WriteLog("%02X ", JaguarReadByte(i));
873 WriteLog("\n\nData @ F03000:\n\n");
875 for(int i=0xF03000; i<0xF03200; i++)
877 WriteLog("%02X ", JaguarReadByte(i));
891 /*if (!GPU_RUNNING && finished)
893 WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
898 // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
899 // allow the GPU a chance to run...
900 // Yes! This partially fixed Trevor McFur...
902 m68k_end_timeslice();
909 gpu_div_control = data;
911 // default: // unaligned long write
918 // JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
919 // JaguarWriteWord(offset+2, data & 0xFFFF, who);
920 // We're a 32-bit processor, we can do a long write...!
921 JaguarWriteLong(offset, data, who);
925 // Change register banks if necessary
927 void GPUUpdateRegisterBanks(void)
929 int bank = (gpu_flags & REGPAGE); // REGPAGE bit
931 if (gpu_flags & IMASK) // IMASK bit
932 bank = 0; // IMASK forces main bank to be bank 0
935 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
937 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
940 void GPUHandleIRQs(void)
942 // Bail out if we're already in an interrupt!
943 if (gpu_flags & IMASK)
946 // Get the interrupt latch & enable bits
947 uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
949 // Bail out if latched interrupts aren't enabled
954 // Determine which interrupt to service
955 uint32 which = 0; //Isn't there a #pragma to disable this warning???
968 WriteLog("GPU: Generating IRQ #%i\n", which);
970 // set the interrupt flag
972 GPUUpdateRegisterBanks();
974 // subqt #4,r31 ; pre-decrement stack pointer
975 // move pc,r30 ; address of interrupted code
976 // store r30,(r31) ; store return address
978 GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
980 // movei #service_address,r30 ; pointer to ISR entry
981 // jump (r30) ; jump to ISR
983 gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
986 void GPUSetIRQLine(int irqline, int state)
989 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
991 uint32 mask = 0x0040 << irqline;
992 gpu_control &= ~mask; // Clear the interrupt latch
996 gpu_control |= mask; // Assert the interrupt latch
997 GPUHandleIRQs(); // And handle the interrupt...
1001 //TEMPORARY: Testing only!
1007 // memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1008 // memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs");
1009 // memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs");
1011 build_branch_condition_table();
1015 //TEMPORARY: Testing only!
1022 // GPU registers (directly visible)
1023 gpu_flags = 0x00000000;
1024 gpu_matrix_control = 0x00000000;
1025 gpu_pointer_to_matrix = 0x00000000;
1026 gpu_data_organization = 0xFFFFFFFF;
1027 gpu_pc = 0x00F03000;
1028 gpu_control = 0x00002800; // Correctly sets this as TOM Rev. 2
1029 gpu_hidata = 0x00000000;
1030 gpu_remain = 0x00000000; // These two registers are RO/WO
1031 gpu_div_control = 0x00000000;
1033 // GPU internal register
1034 gpu_acc = 0x00000000;
1036 gpu_reg = gpu_reg_bank_0;
1037 gpu_alternate_reg = gpu_reg_bank_1;
1039 for(int i=0; i<32; i++)
1040 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1043 memset(gpu_ram_8, 0xFF, 0x1000);
1045 //not needed GPUInterruptPending = false;
1049 uint32 GPUReadPC(void)
1054 void GPUResetStats(void)
1056 for(uint32 i=0; i<64; i++)
1057 gpu_opcode_use[i] = 0;
1058 WriteLog("--> GPU stats were reset!\n");
1061 void GPUDumpDisassembly(void)
1065 WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1066 uint32 j = 0xF03000;
1067 while (j <= 0xF03FFF)
1070 j += dasmjag(JAGUAR_GPU, buffer, j);
1071 WriteLog("\t%08X: %s\n", oldj, buffer);
1075 void GPUDumpRegisters(void)
1077 WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1078 WriteLog("\nRegisters bank 0\n");
1079 for(int j=0; j<8; j++)
1081 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1082 (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1083 (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1084 (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1085 (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1087 WriteLog("Registers bank 1\n");
1088 for(int j=0; j<8; j++)
1090 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1091 (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1092 (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1093 (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1094 (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1098 void GPUDumpMemory(void)
1100 WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1101 for(int i=0; i<0xFFF; i+=4)
1102 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1103 gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1108 WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1110 // Get the interrupt latch & enable bits
1111 uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1112 WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1115 GPUDumpDisassembly();
1117 WriteLog("\nGPU opcodes use:\n");
1118 for(int i=0; i<64; i++)
1120 if (gpu_opcode_use[i])
1121 WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1125 // memory_free(gpu_ram_8);
1126 // memory_free(gpu_reg_bank_0);
1127 // memory_free(gpu_reg_bank_1);
1131 // Main GPU execution core
1133 static int testCount = 1;
1135 static bool tripwire = false;
1136 void GPUExec(int32 cycles)
1141 #ifdef GPU_SINGLE_STEPPING
1142 if (gpu_control & 0x18)
1145 gpu_control &= ~0x10;
1149 gpu_releaseTimeSlice_flag = 0;
1152 while (cycles > 0 && GPU_RUNNING)
1154 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1155 && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1157 if (gpu_pc == 0xF03000)
1159 extern uint32 starCount;
1161 /* WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1162 uint32 base = gpu_reg_bank_0[3];
1163 for(uint32 i=0; i<0x100; i+=16)
1165 WriteLog("%02X: ", i);
1166 for(uint32 j=0; j<16; j++)
1168 WriteLog("%02X ", JaguarReadByte(base + i + j));
1173 // if (gpu_pc == 0xF03)
1177 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1180 WriteLog("GPU: Starting disassembly log...\n");
1183 /*if (gpu_pc == 0xF0359A)
1188 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1189 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1190 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1192 uint16 opcode = GPUReadWord(gpu_pc, GPU);
1193 uint32 index = opcode >> 10;
1194 gpu_instruction = opcode; // Added for GPU #3...
1195 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1196 gpu_opcode_second_parameter = opcode & 0x1F;
1197 /*if (gpu_pc == 0xF03BE8)
1198 WriteLog("Start of OP frame write...\n");
1199 if (gpu_pc == 0xF03EEE)
1200 WriteLog("--> Writing BRANCH object ---\n");
1201 if (gpu_pc == 0xF03F62)
1202 WriteLog("--> Writing BITMAP object ***\n");//*/
1203 /*if (gpu_pc == 0xF03546)
1205 WriteLog("\n--> GPU PC: F03546\n");
1207 GPUDumpDisassembly();
1209 /*if (gpu_pc == 0xF033F6)
1211 WriteLog("\n--> GPU PC: F033F6\n");
1213 GPUDumpDisassembly();
1215 /*if (gpu_pc == 0xF033CC)
1217 WriteLog("\n--> GPU PC: F033CC\n");
1219 GPUDumpDisassembly();
1221 /*if (gpu_pc == 0xF033D6)
1223 WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1227 /*if (gpu_pc == 0xF033D8)
1229 WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1233 /*if (gpu_pc == 0xF0358E)
1235 WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1239 /*if (gpu_pc == 0xF034CA)
1241 WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1244 /*if (gpu_pc == 0xF034CA)
1246 len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1247 WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1248 for(int i=0; i<len; i+=4)
1249 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1251 for(int i=0; i<len; i+=4)
1252 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1255 if (gpu_pc == 0xF034DE)
1257 WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n ", gpu_reg[14], gpu_reg[15]);
1258 for(int i=0; i<len; i+=4)
1259 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1261 for(int i=0; i<len; i+=4)
1262 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1264 for(int i=0; i<len; i+=4)
1265 WriteLog(" --------");
1267 for(int i=0; i<len; i+=4)
1268 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1271 /*if (gpu_pc == 0xF035C8)
1273 WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1275 GPUDumpDisassembly();
1280 // gpu_reset_stats();
1281 static char buffer[512];
1282 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1283 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1285 //$E400 -> 1110 01 -> $39 -> 57
1288 gpu_opcode[index]();
1290 // gpu2_opcode[index]();
1292 //GPU #3 (Doesn't show ATARI logo! #1 & #2 do...)
1294 // gpu3_opcode[index]();
1297 //GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken.
1298 /*static bool firstTime = true;
1299 if (gpu_pc == 0xF03548 && firstTime)
1302 // firstTime = false;
1304 //static char buffer[512];
1306 //while (k<0xF0356C)
1309 //k += dasmjag(JAGUAR_GPU, buffer, k);
1310 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1312 // gpu_start_log = 1;
1314 //GPU: [00F0354C] jump nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1315 /*if (gpu_pc == 0xF0354C)
1316 gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1318 cycles -= gpu_opcode_cycles[index];
1319 gpu_opcode_use[index]++;
1321 WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1322 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1324 WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1337 GPU opcodes use (offset punch--vertically below bad guy):
1359 load_r14_indexed 1183
1360 load_r15_indexed 1125
1363 store_r14_indexed 320
1371 static void gpu_opcode_jump(void)
1374 const char * condition[32] =
1375 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1376 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1377 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1378 "???", "???", "???", "F" };
1380 WriteLog("%06X: JUMP %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1383 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1384 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1385 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1386 // KLUDGE: Used by BRANCH_CONDITION
1387 uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1389 if (BRANCH_CONDITION(IMM_2))
1393 WriteLog("Branched!\n");
1396 WriteLog(" --> JUMP: Branch taken.\n");
1397 uint32 delayed_pc = RM;
1399 gpu_pc = delayed_pc;
1400 /* uint16 opcode = GPUReadWord(gpu_pc, GPU);
1401 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1402 gpu_opcode_second_parameter = opcode & 0x1F;
1404 gpu_pc = delayed_pc;
1405 gpu_opcode[opcode>>10]();//*/
1410 WriteLog("Branch NOT taken.\n");
1414 static void gpu_opcode_jr(void)
1417 const char * condition[32] =
1418 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1419 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1420 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1421 "???", "???", "???", "F" };
1423 WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1425 /* if (CONDITION(jaguar.op & 31))
1427 int32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1428 uint32 newpc = jaguar.PC + r1;
1430 jaguar.op = ROPCODE(jaguar.PC);
1432 (*jaguar.table[jaguar.op >> 10])();
1434 jaguar_icount -= 3; // 3 wait states guaranteed
1437 /* gpu_flag_n = (gpu_flag_n ? 1 : 0);
1438 gpu_flag_c = (gpu_flag_c ? 1 : 0);
1439 gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1440 // KLUDGE: Used by BRANCH_CONDITION
1441 uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1443 if (BRANCH_CONDITION(IMM_2))
1447 WriteLog("Branched!\n");
1450 WriteLog(" --> JR: Branch taken.\n");
1451 int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
1452 int32 delayed_pc = gpu_pc + (offset * 2);
1454 gpu_pc = delayed_pc;
1455 /* uint16 opcode = GPUReadWord(gpu_pc, GPU);
1456 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1457 gpu_opcode_second_parameter = opcode & 0x1F;
1459 gpu_pc = delayed_pc;
1460 gpu_opcode[opcode>>10]();//*/
1465 WriteLog("Branch NOT taken.\n");
1469 static void gpu_opcode_add(void)
1473 WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1475 uint32 res = RN + RM;
1476 CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1480 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1484 static void gpu_opcode_addc(void)
1488 WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1490 /* int dreg = jaguar.op & 31;
1491 uint32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1492 uint32 r2 = jaguar.r[dreg];
1493 uint32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1494 jaguar.r[dreg] = res;
1495 CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1497 uint32 res = RN + RM + gpu_flag_c;
1498 uint32 carry = gpu_flag_c;
1499 // SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1500 SET_ZNC_ADD(RN + carry, RM, res);
1501 // SET_ZNC_ADD(RN, RM + carry, res);
1505 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1509 static void gpu_opcode_addq(void)
1513 WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1515 uint32 r1 = gpu_convert_zero[IMM_1];
1516 uint32 res = RN + r1;
1517 CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1521 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1525 static void gpu_opcode_addqt(void)
1527 #ifdef GPU_DIS_ADDQT
1529 WriteLog("%06X: ADDQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1531 RN += gpu_convert_zero[IMM_1];
1532 #ifdef GPU_DIS_ADDQT
1534 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1538 static void gpu_opcode_sub(void)
1542 WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1544 uint32 res = RN - RM;
1545 SET_ZNC_SUB(RN, RM, res);
1549 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1553 static void gpu_opcode_subc(void)
1557 WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1559 uint32 res = RN - RM - gpu_flag_c;
1560 uint32 borrow = gpu_flag_c;
1561 // SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1562 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1563 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1564 // SET_ZNC_SUB(RN - borrow, RM, res);
1565 SET_ZNC_SUB(RN, RM + borrow, res);
1569 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1573 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1574 N = 0, M = 1, 0 - 1 = -1, C = 0!
1576 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
1577 #define SET_ZN(r) SET_N(r); SET_Z(r)
1578 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1579 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1581 static void gpu_opcode_subq(void)
1585 WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1587 uint32 r1 = gpu_convert_zero[IMM_1];
1588 uint32 res = RN - r1;
1589 SET_ZNC_SUB(RN, r1, res);
1593 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1597 static void gpu_opcode_subqt(void)
1599 #ifdef GPU_DIS_SUBQT
1601 WriteLog("%06X: SUBQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1603 RN -= gpu_convert_zero[IMM_1];
1604 #ifdef GPU_DIS_SUBQT
1606 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1610 static void gpu_opcode_cmp(void)
1614 WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1616 uint32 res = RN - RM;
1617 SET_ZNC_SUB(RN, RM, res);
1620 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1624 static void gpu_opcode_cmpq(void)
1626 static int32 sqtable[32] =
1627 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1630 WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1632 uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1633 uint32 res = RN - r1;
1634 SET_ZNC_SUB(RN, r1, res);
1637 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1641 static void gpu_opcode_and(void)
1645 WriteLog("%06X: AND R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1651 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1655 static void gpu_opcode_or(void)
1659 WriteLog("%06X: OR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1665 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1669 static void gpu_opcode_xor(void)
1673 WriteLog("%06X: XOR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1679 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1683 static void gpu_opcode_not(void)
1687 WriteLog("%06X: NOT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1693 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1697 static void gpu_opcode_move_pc(void)
1699 #ifdef GPU_DIS_MOVEPC
1701 WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1703 // Should be previous PC--this might not always be previous instruction!
1704 // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1706 #ifdef GPU_DIS_MOVEPC
1708 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1712 static void gpu_opcode_sat8(void)
1716 WriteLog("%06X: SAT8 R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1718 RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1722 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1726 static void gpu_opcode_sat16(void)
1728 RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1732 static void gpu_opcode_sat24(void)
1734 RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1738 static void gpu_opcode_store_r14_indexed(void)
1740 #ifdef GPU_DIS_STORE14I
1742 WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1744 #ifdef GPU_CORRECT_ALIGNMENT
1745 uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1747 if (address >= 0xF03000 && address <= 0xF03FFF)
1748 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1750 GPUWriteLong(address, RN, GPU);
1752 GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1756 static void gpu_opcode_store_r15_indexed(void)
1758 #ifdef GPU_DIS_STORE15I
1760 WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1762 #ifdef GPU_CORRECT_ALIGNMENT
1763 uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1765 if (address >= 0xF03000 && address <= 0xF03FFF)
1766 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1768 GPUWriteLong(address, RN, GPU);
1770 GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1774 static void gpu_opcode_load_r14_ri(void)
1776 #ifdef GPU_DIS_LOAD14R
1778 WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1780 #ifdef GPU_CORRECT_ALIGNMENT
1781 uint32 address = gpu_reg[14] + RM;
1783 if (address >= 0xF03000 && address <= 0xF03FFF)
1784 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1786 RN = GPUReadLong(address, GPU);
1788 RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1790 #ifdef GPU_DIS_LOAD14R
1792 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1796 static void gpu_opcode_load_r15_ri(void)
1798 #ifdef GPU_DIS_LOAD15R
1800 WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1802 #ifdef GPU_CORRECT_ALIGNMENT
1803 uint32 address = gpu_reg[15] + RM;
1805 if (address >= 0xF03000 && address <= 0xF03FFF)
1806 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1808 RN = GPUReadLong(address, GPU);
1810 RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1812 #ifdef GPU_DIS_LOAD15R
1814 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1818 static void gpu_opcode_store_r14_ri(void)
1820 #ifdef GPU_DIS_STORE14R
1822 WriteLog("%06X: STORE R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1824 #ifdef GPU_CORRECT_ALIGNMENT
1825 uint32 address = gpu_reg[14] + RM;
1827 if (address >= 0xF03000 && address <= 0xF03FFF)
1828 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1830 GPUWriteLong(address, RN, GPU);
1832 GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1836 static void gpu_opcode_store_r15_ri(void)
1838 #ifdef GPU_DIS_STORE15R
1840 WriteLog("%06X: STORE R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1842 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1843 uint32 address = gpu_reg[15] + RM;
1845 if (address >= 0xF03000 && address <= 0xF03FFF)
1846 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1848 GPUWriteLong(address, RN, GPU);
1850 GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1854 static void gpu_opcode_nop(void)
1858 WriteLog("%06X: NOP [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1862 static void gpu_opcode_pack(void)
1866 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1870 //BUG! if (RM == 0) // Pack
1871 if (IMM_1 == 0) // Pack
1872 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1874 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1877 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1881 static void gpu_opcode_storeb(void)
1883 #ifdef GPU_DIS_STOREB
1885 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1888 // Would appear to be so...!
1889 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1890 GPUWriteLong(RM, RN & 0xFF, GPU);
1892 JaguarWriteByte(RM, RN, GPU);
1895 static void gpu_opcode_storew(void)
1897 #ifdef GPU_DIS_STOREW
1899 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1901 #ifdef GPU_CORRECT_ALIGNMENT
1902 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1903 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1905 JaguarWriteWord(RM, RN, GPU);
1907 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1908 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1910 JaguarWriteWord(RM, RN, GPU);
1914 static void gpu_opcode_store(void)
1916 #ifdef GPU_DIS_STORE
1918 WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1920 #ifdef GPU_CORRECT_ALIGNMENT
1921 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1922 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1924 GPUWriteLong(RM, RN, GPU);
1926 GPUWriteLong(RM, RN, GPU);
1930 static void gpu_opcode_storep(void)
1932 #ifdef GPU_CORRECT_ALIGNMENT
1933 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1935 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1936 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1940 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1941 GPUWriteLong(RM + 4, RN, GPU);
1944 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1945 GPUWriteLong(RM + 4, RN, GPU);
1949 static void gpu_opcode_loadb(void)
1951 #ifdef GPU_DIS_LOADB
1953 WriteLog("%06X: LOADB (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1955 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1956 RN = GPUReadLong(RM, GPU) & 0xFF;
1958 RN = JaguarReadByte(RM, GPU);
1959 #ifdef GPU_DIS_LOADB
1961 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1965 static void gpu_opcode_loadw(void)
1967 #ifdef GPU_DIS_LOADW
1969 WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1971 #ifdef GPU_CORRECT_ALIGNMENT
1972 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1973 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1975 RN = JaguarReadWord(RM, GPU);
1977 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1978 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1980 RN = JaguarReadWord(RM, GPU);
1982 #ifdef GPU_DIS_LOADW
1984 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1988 // According to the docs, & "Do The Same", this address is long aligned...
1990 // And it works!!! Need to fix all instances...
1991 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
1992 // the $F03000-$F03FFF range are aligned...
1993 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
1995 Preliminary testing on real hardware seems to confirm that something strange goes on
1996 with unaligned reads in main memory. When the address is off by 1, the result is the
1997 same as the long address with the top byte replaced by something. So if the read is
1998 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
1999 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
2000 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
2001 It may be that the "unknown" values come from the prefetch queue, but not sure how
2002 to test that. They seem to be stable, though, which would indicate such a mechanism.
2003 Sometimes, however, the off by 2 case returns $12345678!
2005 static void gpu_opcode_load(void)
2009 WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2011 #ifdef GPU_CORRECT_ALIGNMENT
2012 uint32 mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2013 // if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2014 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2015 // RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2017 // RN = GPUReadLong(RM, GPU);
2018 // Simulate garbage in unaligned reads...
2019 //seems that this behavior is different in GPU mem vs. main mem...
2020 // if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2021 // RN |= mask[RM & 0x03];
2023 RN = GPUReadLong(RM, GPU);
2027 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2031 static void gpu_opcode_loadp(void)
2033 #ifdef GPU_CORRECT_ALIGNMENT
2034 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2036 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2037 RN = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2041 gpu_hidata = GPUReadLong(RM + 0, GPU);
2042 RN = GPUReadLong(RM + 4, GPU);
2045 gpu_hidata = GPUReadLong(RM + 0, GPU);
2046 RN = GPUReadLong(RM + 4, GPU);
2050 static void gpu_opcode_load_r14_indexed(void)
2052 #ifdef GPU_DIS_LOAD14I
2054 WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2056 #ifdef GPU_CORRECT_ALIGNMENT
2057 uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2059 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2060 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2062 RN = GPUReadLong(address, GPU);
2064 RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2066 #ifdef GPU_DIS_LOAD14I
2068 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2072 static void gpu_opcode_load_r15_indexed(void)
2074 #ifdef GPU_DIS_LOAD15I
2076 WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2078 #ifdef GPU_CORRECT_ALIGNMENT
2079 uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2081 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2082 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2084 RN = GPUReadLong(address, GPU);
2086 RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2088 #ifdef GPU_DIS_LOAD15I
2090 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2094 static void gpu_opcode_movei(void)
2096 #ifdef GPU_DIS_MOVEI
2098 WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2100 // This instruction is followed by 32-bit value in LSW / MSW format...
2101 RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16);
2103 #ifdef GPU_DIS_MOVEI
2105 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2109 static void gpu_opcode_moveta(void)
2111 #ifdef GPU_DIS_MOVETA
2113 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2116 #ifdef GPU_DIS_MOVETA
2118 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2122 static void gpu_opcode_movefa(void)
2124 #ifdef GPU_DIS_MOVEFA
2126 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2129 #ifdef GPU_DIS_MOVEFA
2131 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2135 static void gpu_opcode_move(void)
2139 WriteLog("%06X: MOVE R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2144 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2148 static void gpu_opcode_moveq(void)
2150 #ifdef GPU_DIS_MOVEQ
2152 WriteLog("%06X: MOVEQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2155 #ifdef GPU_DIS_MOVEQ
2157 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2161 static void gpu_opcode_resmac(void)
2166 static void gpu_opcode_imult(void)
2168 #ifdef GPU_DIS_IMULT
2170 WriteLog("%06X: IMULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2172 RN = (int16)RN * (int16)RM;
2174 #ifdef GPU_DIS_IMULT
2176 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2180 static void gpu_opcode_mult(void)
2184 WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2186 RN = (uint16)RM * (uint16)RN;
2190 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2194 static void gpu_opcode_bclr(void)
2198 WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2200 uint32 res = RN & ~(1 << IMM_1);
2205 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2209 static void gpu_opcode_btst(void)
2213 WriteLog("%06X: BTST #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2215 gpu_flag_z = (~RN >> IMM_1) & 1;
2218 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2222 static void gpu_opcode_bset(void)
2226 WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2228 uint32 res = RN | (1 << IMM_1);
2233 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2237 static void gpu_opcode_imacn(void)
2239 uint32 res = (int16)RM * (int16)(RN);
2243 static void gpu_opcode_mtoi(void)
2246 uint32 res = RN = (((int32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2250 static void gpu_opcode_normi(void)
2257 while ((_RM & 0xFFC00000) == 0)
2262 while ((_RM & 0xFF800000) != 0)
2272 static void gpu_opcode_mmult(void)
2274 int count = gpu_matrix_control & 0x0F; // Matrix width
2275 uint32 addr = gpu_pointer_to_matrix; // In the GPU's RAM
2279 if (gpu_matrix_control & 0x10) // Column stepping
2281 for(int i=0; i<count; i++)
2285 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2287 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2289 int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2294 else // Row stepping
2296 for(int i=0; i<count; i++)
2300 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2302 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2304 int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2309 RN = res = (int32)accum;
2310 // carry flag to do (out of the last add)
2314 static void gpu_opcode_abs(void)
2318 WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2320 gpu_flag_c = RN >> 31;
2321 if (RN == 0x80000000)
2322 //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2323 gpu_flag_n = 1, gpu_flag_z = 0;
2328 gpu_flag_n = 0; SET_FLAG_Z(RN);
2332 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2336 static void gpu_opcode_div(void) // RN / RM
2340 WriteLog("%06X: DIV R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2342 // NOTE: remainder is NOT calculated correctly here!
2343 // The original tried to get it right by checking to see if the
2344 // remainder was negative, but that's too late...
2345 // The code there should do it now, but I'm not 100% sure...
2349 if (gpu_div_control & 0x01) // 16.16 division
2351 RN = ((uint64)RN << 16) / RM;
2352 gpu_remain = ((uint64)RN << 16) % RM;
2357 gpu_remain = RN % RM;
2360 if ((gpu_remain - RM) & 0x80000000) // If the result would have been negative...
2361 gpu_remain -= RM; // Then make it negative!
2371 if (gpu_div_control & 1)
2373 gpu_remain = (((uint64)_RN) << 16) % _RM;
2374 if (gpu_remain&0x80000000)
2376 RN = (((uint64)_RN) << 16) / _RM;
2380 gpu_remain = _RN % _RM;
2381 if (gpu_remain&0x80000000)
2390 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2394 static void gpu_opcode_imultn(void)
2396 uint32 res = (int32)((int16)RN * (int16)RM);
2397 gpu_acc = (int32)res;
2402 static void gpu_opcode_neg(void)
2406 WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2409 SET_ZNC_SUB(0, RN, res);
2413 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2417 static void gpu_opcode_shlq(void)
2421 WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2423 // Was a bug here...
2424 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2425 int32 r1 = 32 - IMM_1;
2426 uint32 res = RN << r1;
2427 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2431 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2435 static void gpu_opcode_shrq(void)
2439 WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2441 int32 r1 = gpu_convert_zero[IMM_1];
2442 uint32 res = RN >> r1;
2443 SET_ZN(res); gpu_flag_c = RN & 1;
2447 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2451 static void gpu_opcode_ror(void)
2455 WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2457 uint32 r1 = RM & 0x1F;
2458 uint32 res = (RN >> r1) | (RN << (32 - r1));
2459 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2463 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2467 static void gpu_opcode_rorq(void)
2471 WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2473 uint32 r1 = gpu_convert_zero[IMM_1 & 0x1F];
2475 uint32 res = (r2 >> r1) | (r2 << (32 - r1));
2477 SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2480 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2484 static void gpu_opcode_sha(void)
2486 /* int dreg = jaguar.op & 31;
2487 int32 r1 = (int32)jaguar.r[(jaguar.op >> 5) & 31];
2488 uint32 r2 = jaguar.r[dreg];
2494 res = (r1 <= -32) ? 0 : (r2 << -r1);
2495 jaguar.FLAGS |= (r2 >> 30) & 2;
2499 res = (r1 >= 32) ? ((int32)r2 >> 31) : ((int32)r2 >> r1);
2500 jaguar.FLAGS |= (r2 << 1) & 2;
2502 jaguar.r[dreg] = res;
2507 WriteLog("%06X: SHA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2513 res = ((int32)RM <= -32) ? 0 : (RN << -(int32)RM);
2514 gpu_flag_c = RN >> 31;
2518 res = ((int32)RM >= 32) ? ((int32)RN >> 31) : ((int32)RN >> (int32)RM);
2519 gpu_flag_c = RN & 0x01;
2525 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2528 /* int32 sRM=(int32)RM;
2534 if (shift>=32) shift=32;
2535 gpu_flag_c=(_RN&0x80000000)>>31;
2545 if (shift>=32) shift=32;
2549 _RN=((int32)_RN)>>1;
2558 static void gpu_opcode_sharq(void)
2560 #ifdef GPU_DIS_SHARQ
2562 WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2564 uint32 res = (int32)RN >> gpu_convert_zero[IMM_1];
2565 SET_ZN(res); gpu_flag_c = RN & 0x01;
2567 #ifdef GPU_DIS_SHARQ
2569 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2573 static void gpu_opcode_sh(void)
2577 WriteLog("%06X: SH R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2579 if (RM & 0x80000000) // Shift left
2581 gpu_flag_c = RN >> 31;
2582 RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM);
2586 gpu_flag_c = RN & 0x01;
2587 RN = (RM >= 32 ? 0 : RN >> RM);
2592 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2596 //Temporary: Testing only!
2597 //#include "gpu2.cpp"
2598 //#include "gpu3.cpp"
2602 // New thread-safe GPU core
2604 int GPUCore(void * data)