6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
11 // JLH = James Hammons <jlhamm@acm.org>
14 // --- ---------- -------------------------------------------------------------
15 // JLH 01/16/2010 Created this log ;-)
16 // JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 // the braindead way in which MAME handles memory. :-)
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
30 #include <string.h> // For memset
35 #include "m68000/m68kinterface.h"
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
44 // For GPU dissasembly...
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
70 #define GPU_DIS_MOVEFA
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
102 //bool doGPUDis = false;
103 bool doGPUDis = true;
107 GPU opcodes use (BIOS flying ATARI logo):
149 #define CINT0FLAG 0x0200
150 #define CINT1FLAG 0x0400
151 #define CINT2FLAG 0x0800
152 #define CINT3FLAG 0x1000
153 #define CINT4FLAG 0x2000
154 #define CINT04FLAGS (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
158 #define ZERO_FLAG 0x0001
159 #define CARRY_FLAG 0x0002
160 #define NEGA_FLAG 0x0004
162 #define INT_ENA0 0x0010
163 #define INT_ENA1 0x0020
164 #define INT_ENA2 0x0040
165 #define INT_ENA3 0x0080
166 #define INT_ENA4 0x0100
167 #define INT_CLR0 0x0200
168 #define INT_CLR1 0x0400
169 #define INT_CLR2 0x0800
170 #define INT_CLR3 0x1000
171 #define INT_CLR4 0x2000
172 #define REGPAGE 0x4000
175 // External global variables
177 extern int start_logging;
178 extern int gpu_start_log;
180 // Private function prototypes
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8 gpu_opcode_cycles[64] =
255 3, 3, 3, 3, 3, 3, 3, 3,
256 3, 3, 3, 3, 3, 3, 3, 3,
257 3, 3, 1, 3, 1, 18, 3, 3,
258 3, 3, 3, 3, 3, 3, 3, 3,
259 3, 3, 2, 2, 2, 2, 3, 4,
260 5, 4, 5, 6, 6, 1, 1, 1,
261 1, 2, 2, 2, 1, 1, 9, 3,
262 3, 1, 6, 6, 2, 2, 3, 3
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8 gpu_opcode_cycles[64] =
270 1, 1, 1, 1, 1, 1, 1, 1,
271 1, 1, 1, 1, 1, 1, 1, 1,
272 1, 1, 1, 1, 1, 9, 1, 1,
273 1, 1, 1, 1, 1, 1, 1, 1,
274 1, 1, 1, 1, 1, 1, 1, 2,
275 2, 2, 2, 3, 3, 1, 1, 1,
276 1, 1, 1, 1, 1, 1, 4, 1,
277 1, 1, 3, 3, 1, 1, 1, 1
279 uint8 gpu_opcode_cycles[64] =
281 1, 1, 1, 1, 1, 1, 1, 1,
282 1, 1, 1, 1, 1, 1, 1, 1,
283 1, 1, 1, 1, 1, 1, 1, 1,
284 1, 1, 1, 1, 1, 1, 1, 1,
285 1, 1, 1, 1, 1, 1, 1, 1,
286 1, 1, 1, 1, 1, 1, 1, 1,
287 1, 1, 1, 1, 1, 1, 1, 1,
288 1, 1, 1, 1, 1, 1, 1, 1
291 void (*gpu_opcode[64])()=
293 gpu_opcode_add, gpu_opcode_addc, gpu_opcode_addq, gpu_opcode_addqt,
294 gpu_opcode_sub, gpu_opcode_subc, gpu_opcode_subq, gpu_opcode_subqt,
295 gpu_opcode_neg, gpu_opcode_and, gpu_opcode_or, gpu_opcode_xor,
296 gpu_opcode_not, gpu_opcode_btst, gpu_opcode_bset, gpu_opcode_bclr,
297 gpu_opcode_mult, gpu_opcode_imult, gpu_opcode_imultn, gpu_opcode_resmac,
298 gpu_opcode_imacn, gpu_opcode_div, gpu_opcode_abs, gpu_opcode_sh,
299 gpu_opcode_shlq, gpu_opcode_shrq, gpu_opcode_sha, gpu_opcode_sharq,
300 gpu_opcode_ror, gpu_opcode_rorq, gpu_opcode_cmp, gpu_opcode_cmpq,
301 gpu_opcode_sat8, gpu_opcode_sat16, gpu_opcode_move, gpu_opcode_moveq,
302 gpu_opcode_moveta, gpu_opcode_movefa, gpu_opcode_movei, gpu_opcode_loadb,
303 gpu_opcode_loadw, gpu_opcode_load, gpu_opcode_loadp, gpu_opcode_load_r14_indexed,
304 gpu_opcode_load_r15_indexed, gpu_opcode_storeb, gpu_opcode_storew, gpu_opcode_store,
305 gpu_opcode_storep, gpu_opcode_store_r14_indexed, gpu_opcode_store_r15_indexed, gpu_opcode_move_pc,
306 gpu_opcode_jump, gpu_opcode_jr, gpu_opcode_mmult, gpu_opcode_mtoi,
307 gpu_opcode_normi, gpu_opcode_nop, gpu_opcode_load_r14_ri, gpu_opcode_load_r15_ri,
308 gpu_opcode_store_r14_ri, gpu_opcode_store_r15_ri, gpu_opcode_sat24, gpu_opcode_pack,
311 static uint8 gpu_ram_8[0x1000];
313 static uint32 gpu_acc;
314 static uint32 gpu_remain;
315 static uint32 gpu_hidata;
316 static uint32 gpu_flags;
317 static uint32 gpu_matrix_control;
318 static uint32 gpu_pointer_to_matrix;
319 static uint32 gpu_data_organization;
320 static uint32 gpu_control;
321 static uint32 gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 static uint32 gpu_reg_bank_0[32];
327 static uint32 gpu_reg_bank_1[32];
328 static uint32 * gpu_reg;
329 static uint32 * gpu_alternate_reg;
331 static uint32 gpu_instruction;
332 static uint32 gpu_opcode_first_parameter;
333 static uint32 gpu_opcode_second_parameter;
335 #define GPU_RUNNING (gpu_control & 0x01)
337 #define RM gpu_reg[gpu_opcode_first_parameter]
338 #define RN gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1 gpu_opcode_first_parameter
342 #define IMM_2 gpu_opcode_second_parameter
344 #define SET_FLAG_Z(r) (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01));
347 #define RESET_FLAG_Z() gpu_flag_z = 0;
348 #define RESET_FLAG_N() gpu_flag_n = 0;
349 #define RESET_FLAG_C() gpu_flag_c = 0;
351 #define CLR_Z (gpu_flag_z = 0)
352 #define CLR_ZN (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r) (gpu_flag_z = ((r) == 0))
355 #define SET_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(~(a))))
357 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
358 #define SET_ZN(r) SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
362 uint32 gpu_convert_zero[32] =
363 { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
365 uint8 * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x) branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
368 uint32 gpu_opcode_use[64];
370 const char * gpu_opcode_str[64]=
372 "add", "addc", "addq", "addqt",
373 "sub", "subc", "subq", "subqt",
374 "neg", "and", "or", "xor",
375 "not", "btst", "bset", "bclr",
376 "mult", "imult", "imultn", "resmac",
377 "imacn", "div", "abs", "sh",
378 "shlq", "shrq", "sha", "sharq",
379 "ror", "rorq", "cmp", "cmpq",
380 "sat8", "sat16", "move", "moveq",
381 "moveta", "movefa", "movei", "loadb",
382 "loadw", "load", "loadp", "load_r14_indexed",
383 "load_r15_indexed", "storeb", "storew", "store",
384 "storep", "store_r14_indexed","store_r15_indexed","move_pc",
385 "jump", "jr", "mmult", "mtoi",
386 "normi", "nop", "load_r14_ri", "load_r15_ri",
387 "store_r14_ri", "store_r15_ri", "sat24", "pack",
390 static uint32 gpu_in_exec = 0;
391 static uint32 gpu_releaseTimeSlice_flag = 0;
393 void GPUReleaseTimeslice(void)
395 gpu_releaseTimeSlice_flag = 1;
398 uint32 GPUGetPC(void)
403 void build_branch_condition_table(void)
405 if (!branch_condition_table)
407 branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
409 if (branch_condition_table)
411 for(int i=0; i<8; i++)
413 for(int j=0; j<32; j++)
420 if (!(i & ZERO_FLAG))
423 if (i & (CARRY_FLAG << (j >> 4)))
426 if (!(i & (CARRY_FLAG << (j >> 4))))
428 branch_condition_table[i * 32 + j] = result;
436 // GPU byte access (read)
438 uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
440 if (offset >= 0xF02000 && offset <= 0xF020FF)
441 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
443 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444 return gpu_ram_8[offset & 0xFFF];
445 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
447 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
449 if ((offset & 0x03) == 0)
451 else if ((offset & 0x03) == 1)
452 return (data >> 16) & 0xFF;
453 else if ((offset & 0x03) == 2)
454 return (data >> 8) & 0xFF;
455 else if ((offset & 0x03) == 3)
459 return JaguarReadByte(offset, who);
463 // GPU word access (read)
465 uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
467 if (offset >= 0xF02000 && offset <= 0xF020FF)
468 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
470 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
473 uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
476 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
478 // This looks and smells wrong...
479 // But it *might* be OK...
480 if (offset & 0x01) // Catch cases 1 & 3... (unaligned read)
481 return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
483 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
485 if (offset & 0x02) // Cases 0 & 2...
486 return data & 0xFFFF;
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
495 return JaguarReadWord(offset, who);
499 // GPU dword access (read)
501 uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
503 if (offset >= 0xF02000 && offset <= 0xF020FF)
505 WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
506 uint32 reg = (offset & 0xFC) >> 2;
507 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]);
510 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
511 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
514 return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
515 | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/
516 // return GET32(gpu_ram_8, offset);
518 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
519 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
525 gpu_flag_c = (gpu_flag_c ? 1 : 0);
526 gpu_flag_z = (gpu_flag_z ? 1 : 0);
527 gpu_flag_n = (gpu_flag_n ? 1 : 0);
529 gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
531 return gpu_flags & 0xFFFFC1FF;
533 return gpu_matrix_control;
535 return gpu_pointer_to_matrix;
537 return gpu_data_organization;
546 default: // unaligned long read
548 WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
553 //TEMP--Mirror of F03000? No. Writes only...
554 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
555 // WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
556 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
557 WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
559 return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
563 // GPU byte access (write)
565 void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
567 if (offset >= 0xF02000 && offset <= 0xF020FF)
568 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
570 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
572 gpu_ram_8[offset & 0xFFF] = data;
574 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
577 m68k_end_timeslice();
578 dsp_releaseTimeslice();
582 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
584 uint32 reg = offset & 0x1C;
585 int bytenum = offset & 0x03;
587 //This is definitely wrong!
588 if ((reg >= 0x1C) && (reg <= 0x1F))
589 gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
592 uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
593 bytenum = 3 - bytenum; // convention motorola !!!
594 old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
595 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
599 // WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
600 JaguarWriteByte(offset, data, who);
604 // GPU word access (write)
606 void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
608 if (offset >= 0xF02000 && offset <= 0xF020FF)
609 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
611 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
613 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
614 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
616 SET16(gpu_ram_8, offset, data);//*/
618 /*if (offset >= 0xF03214 && offset < 0xF0321F)
619 WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
622 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
625 m68k_end_timeslice();
626 dsp_releaseTimeslice();
630 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
632 if (offset & 0x01) // This is supposed to weed out unaligned writes, but does nothing...
635 WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
640 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
641 //This just literally sucks.
642 if ((offset & 0x1C) == 0x1C)
644 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
646 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
648 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
652 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
653 uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
656 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
658 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
660 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
665 else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
668 WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
674 // Have to be careful here--this can cause an infinite loop!
675 JaguarWriteWord(offset, data, who);
679 // GPU dword access (write)
681 void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
683 if (offset >= 0xF02000 && offset <= 0xF020FF)
684 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
686 // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
687 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
692 WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
698 SET32(gpu_ram_8, offset, data);
701 // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
702 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
709 bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
710 // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
711 // IRQ logic can set it. So we mask it out here to prevent problems...
712 gpu_flags = data & (~IMASK);
713 gpu_flag_z = gpu_flags & ZERO_FLAG;
714 gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
715 gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
716 GPUUpdateRegisterBanks();
717 gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3); // Interrupt latch clear bits
718 //Writing here is only an interrupt enable--this approach is just plain wrong!
720 //This, however, is A-OK! ;-)
721 if (IMASKCleared) // If IMASK was cleared,
722 GPUHandleIRQs(); // see if any other interrupts need servicing!
724 if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
725 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
726 WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
731 gpu_matrix_control = data;
734 // This can only point to long aligned addresses
735 gpu_pointer_to_matrix = data & 0xFFFFFFFC;
738 gpu_data_organization = data;
743 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
748 // uint32 gpu_was_running = GPU_RUNNING;
749 data &= ~0xF7C0; // Disable writes to INT_LAT0-4 & TOM version number
751 // check for GPU -> CPU interrupt
754 //WriteLog("GPU->CPU interrupt\n");
755 if (TOMIRQEnabled(IRQ_GPU))
757 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
758 // if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
760 TOMSetPendingGPUInt();
761 m68k_set_irq(2); // Set 68000 IPL 2
762 GPUReleaseTimeslice();
768 // check for CPU -> GPU interrupt #0
771 //WriteLog("CPU->GPU interrupt\n");
772 GPUSetIRQLine(0, ASSERT_LINE);
773 m68k_end_timeslice();
774 DSPReleaseTimeslice();
781 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
783 gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
785 // if gpu wasn't running but is now running, execute a few cycles
786 #ifndef GPU_SINGLE_STEPPING
787 /* if (!gpu_was_running && GPU_RUNNING)
790 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
795 #endif // GPU_DEBUG//*/
797 if (gpu_control & 0x18)
799 #endif // #ifndef GPU_SINGLE_STEPPING
801 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
803 WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
805 WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
809 // GPUDumpDisassembly();
812 if (gpu_pc == 0xF035D8)
814 // GPUDumpDisassembly();
817 gpu_control &= 0xFFFFFFFE; // Don't run it and let's see what happens!
818 //Hmm. Seems to lock up when going into the demo...
819 //Try to disable the collision altogether!
822 extern int effect_start5;
823 static bool finished = false;
824 //if (GPU_RUNNING && effect_start5 && !finished)
825 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
827 // Let's do a dump of $6528!
828 /* uint32 numItems = JaguarReadWord(0x6BD6);
829 WriteLog("\nDump of $6528: %u items.\n\n", numItems);
830 for(int i=0; i<numItems*3*4; i+=3*4)
832 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
833 JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
834 uint16 link = JaguarReadWord(0x6528+i+8+2);
835 for(int j=0; j<40; j+=4)
836 WriteLog("%08X ", JaguarReadLong(link + j));
840 // Let's try a manual blit here...
841 //This isn't working the way it should! !!! FIX !!!
842 //Err, actually, it is.
843 // NOW, it works right! Problem solved!!! It's a blitter bug!
844 /* uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4;
845 for(int y=0; y<127; y++)
847 for(int x=0; x<2; x++)
849 JaguarWriteLong(dst, JaguarReadLong(src));
854 src += width - (2 * 4);
858 WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
860 /* WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
862 for(int i=0x004D54; i<0x004D54+2048; i++)
864 WriteLog("%02X ", JaguarReadByte(i));
872 WriteLog("\n\nData @ F03000:\n\n");
874 for(int i=0xF03000; i<0xF03200; i++)
876 WriteLog("%02X ", JaguarReadByte(i));
890 /*if (!GPU_RUNNING && finished)
892 WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
897 // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
898 // allow the GPU a chance to run...
899 // Yes! This partially fixed Trevor McFur...
901 m68k_end_timeslice();
908 gpu_div_control = data;
910 // default: // unaligned long write
917 // JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
918 // JaguarWriteWord(offset+2, data & 0xFFFF, who);
919 // We're a 32-bit processor, we can do a long write...!
920 JaguarWriteLong(offset, data, who);
924 // Change register banks if necessary
926 void GPUUpdateRegisterBanks(void)
928 int bank = (gpu_flags & REGPAGE); // REGPAGE bit
930 if (gpu_flags & IMASK) // IMASK bit
931 bank = 0; // IMASK forces main bank to be bank 0
934 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
936 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
939 void GPUHandleIRQs(void)
941 // Bail out if we're already in an interrupt!
942 if (gpu_flags & IMASK)
945 // Get the interrupt latch & enable bits
946 uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
948 // Bail out if latched interrupts aren't enabled
953 // Determine which interrupt to service
954 uint32 which = 0; //Isn't there a #pragma to disable this warning???
967 WriteLog("GPU: Generating IRQ #%i\n", which);
969 // set the interrupt flag
971 GPUUpdateRegisterBanks();
973 // subqt #4,r31 ; pre-decrement stack pointer
974 // move pc,r30 ; address of interrupted code
975 // store r30,(r31) ; store return address
977 GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
979 // movei #service_address,r30 ; pointer to ISR entry
980 // jump (r30) ; jump to ISR
982 gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
985 void GPUSetIRQLine(int irqline, int state)
988 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
990 uint32 mask = 0x0040 << irqline;
991 gpu_control &= ~mask; // Clear the interrupt latch
995 gpu_control |= mask; // Assert the interrupt latch
996 GPUHandleIRQs(); // And handle the interrupt...
1000 //TEMPORARY: Testing only!
1006 // memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1007 // memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs");
1008 // memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs");
1010 build_branch_condition_table();
1014 //TEMPORARY: Testing only!
1021 // GPU registers (directly visible)
1022 gpu_flags = 0x00000000;
1023 gpu_matrix_control = 0x00000000;
1024 gpu_pointer_to_matrix = 0x00000000;
1025 gpu_data_organization = 0xFFFFFFFF;
1026 gpu_pc = 0x00F03000;
1027 gpu_control = 0x00002800; // Correctly sets this as TOM Rev. 2
1028 gpu_hidata = 0x00000000;
1029 gpu_remain = 0x00000000; // These two registers are RO/WO
1030 gpu_div_control = 0x00000000;
1032 // GPU internal register
1033 gpu_acc = 0x00000000;
1035 gpu_reg = gpu_reg_bank_0;
1036 gpu_alternate_reg = gpu_reg_bank_1;
1038 for(int i=0; i<32; i++)
1039 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1042 memset(gpu_ram_8, 0xFF, 0x1000);
1044 //not needed GPUInterruptPending = false;
1048 uint32 GPUReadPC(void)
1053 void GPUResetStats(void)
1055 for(uint32 i=0; i<64; i++)
1056 gpu_opcode_use[i] = 0;
1057 WriteLog("--> GPU stats were reset!\n");
1060 void GPUDumpDisassembly(void)
1064 WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1065 uint32 j = 0xF03000;
1066 while (j <= 0xF03FFF)
1069 j += dasmjag(JAGUAR_GPU, buffer, j);
1070 WriteLog("\t%08X: %s\n", oldj, buffer);
1074 void GPUDumpRegisters(void)
1076 WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1077 WriteLog("\nRegisters bank 0\n");
1078 for(int j=0; j<8; j++)
1080 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1081 (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1082 (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1083 (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1084 (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1086 WriteLog("Registers bank 1\n");
1087 for(int j=0; j<8; j++)
1089 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1090 (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1091 (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1092 (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1093 (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1097 void GPUDumpMemory(void)
1099 WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1100 for(int i=0; i<0xFFF; i+=4)
1101 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1102 gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1107 WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1109 // Get the interrupt latch & enable bits
1110 uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1111 WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1114 GPUDumpDisassembly();
1116 WriteLog("\nGPU opcodes use:\n");
1117 for(int i=0; i<64; i++)
1119 if (gpu_opcode_use[i])
1120 WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1124 // memory_free(gpu_ram_8);
1125 // memory_free(gpu_reg_bank_0);
1126 // memory_free(gpu_reg_bank_1);
1130 // Main GPU execution core
1132 static int testCount = 1;
1134 static bool tripwire = false;
1135 void GPUExec(int32 cycles)
1140 #ifdef GPU_SINGLE_STEPPING
1141 if (gpu_control & 0x18)
1144 gpu_control &= ~0x10;
1148 gpu_releaseTimeSlice_flag = 0;
1151 while (cycles > 0 && GPU_RUNNING)
1153 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1154 && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1156 if (gpu_pc == 0xF03000)
1158 extern uint32 starCount;
1160 /* WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1161 uint32 base = gpu_reg_bank_0[3];
1162 for(uint32 i=0; i<0x100; i+=16)
1164 WriteLog("%02X: ", i);
1165 for(uint32 j=0; j<16; j++)
1167 WriteLog("%02X ", JaguarReadByte(base + i + j));
1172 // if (gpu_pc == 0xF03)
1176 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1179 WriteLog("GPU: Starting disassembly log...\n");
1182 /*if (gpu_pc == 0xF0359A)
1187 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1188 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1189 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1191 uint16 opcode = GPUReadWord(gpu_pc, GPU);
1192 uint32 index = opcode >> 10;
1193 gpu_instruction = opcode; // Added for GPU #3...
1194 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1195 gpu_opcode_second_parameter = opcode & 0x1F;
1196 /*if (gpu_pc == 0xF03BE8)
1197 WriteLog("Start of OP frame write...\n");
1198 if (gpu_pc == 0xF03EEE)
1199 WriteLog("--> Writing BRANCH object ---\n");
1200 if (gpu_pc == 0xF03F62)
1201 WriteLog("--> Writing BITMAP object ***\n");//*/
1202 /*if (gpu_pc == 0xF03546)
1204 WriteLog("\n--> GPU PC: F03546\n");
1206 GPUDumpDisassembly();
1208 /*if (gpu_pc == 0xF033F6)
1210 WriteLog("\n--> GPU PC: F033F6\n");
1212 GPUDumpDisassembly();
1214 /*if (gpu_pc == 0xF033CC)
1216 WriteLog("\n--> GPU PC: F033CC\n");
1218 GPUDumpDisassembly();
1220 /*if (gpu_pc == 0xF033D6)
1222 WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1226 /*if (gpu_pc == 0xF033D8)
1228 WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1232 /*if (gpu_pc == 0xF0358E)
1234 WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1238 /*if (gpu_pc == 0xF034CA)
1240 WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1243 /*if (gpu_pc == 0xF034CA)
1245 len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1246 WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1247 for(int i=0; i<len; i+=4)
1248 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1250 for(int i=0; i<len; i+=4)
1251 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1254 if (gpu_pc == 0xF034DE)
1256 WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n ", gpu_reg[14], gpu_reg[15]);
1257 for(int i=0; i<len; i+=4)
1258 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1260 for(int i=0; i<len; i+=4)
1261 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1263 for(int i=0; i<len; i+=4)
1264 WriteLog(" --------");
1266 for(int i=0; i<len; i+=4)
1267 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1270 /*if (gpu_pc == 0xF035C8)
1272 WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1274 GPUDumpDisassembly();
1279 // gpu_reset_stats();
1280 static char buffer[512];
1281 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1282 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1284 //$E400 -> 1110 01 -> $39 -> 57
1287 gpu_opcode[index]();
1289 // gpu2_opcode[index]();
1291 //GPU #3 (Doesn't show ATARI logo! #1 & #2 do...)
1293 // gpu3_opcode[index]();
1296 //GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken.
1297 /*static bool firstTime = true;
1298 if (gpu_pc == 0xF03548 && firstTime)
1301 // firstTime = false;
1303 //static char buffer[512];
1305 //while (k<0xF0356C)
1308 //k += dasmjag(JAGUAR_GPU, buffer, k);
1309 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1311 // gpu_start_log = 1;
1313 //GPU: [00F0354C] jump nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1314 /*if (gpu_pc == 0xF0354C)
1315 gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1317 cycles -= gpu_opcode_cycles[index];
1318 gpu_opcode_use[index]++;
1320 WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1321 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1323 WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1336 GPU opcodes use (offset punch--vertically below bad guy):
1358 load_r14_indexed 1183
1359 load_r15_indexed 1125
1362 store_r14_indexed 320
1370 static void gpu_opcode_jump(void)
1373 const char * condition[32] =
1374 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1375 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1376 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1377 "???", "???", "???", "F" };
1379 WriteLog("%06X: JUMP %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1382 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
1383 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1384 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1385 // KLUDGE: Used by BRANCH_CONDITION
1386 uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1388 if (BRANCH_CONDITION(IMM_2))
1392 WriteLog("Branched!\n");
1395 WriteLog(" --> JUMP: Branch taken.\n");
1396 uint32 delayed_pc = RM;
1398 gpu_pc = delayed_pc;
1399 /* uint16 opcode = GPUReadWord(gpu_pc, GPU);
1400 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1401 gpu_opcode_second_parameter = opcode & 0x1F;
1403 gpu_pc = delayed_pc;
1404 gpu_opcode[opcode>>10]();//*/
1409 WriteLog("Branch NOT taken.\n");
1413 static void gpu_opcode_jr(void)
1416 const char * condition[32] =
1417 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1418 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1419 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1420 "???", "???", "???", "F" };
1422 WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1424 /* if (CONDITION(jaguar.op & 31))
1426 int32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1427 uint32 newpc = jaguar.PC + r1;
1429 jaguar.op = ROPCODE(jaguar.PC);
1431 (*jaguar.table[jaguar.op >> 10])();
1433 jaguar_icount -= 3; // 3 wait states guaranteed
1436 /* gpu_flag_n = (gpu_flag_n ? 1 : 0);
1437 gpu_flag_c = (gpu_flag_c ? 1 : 0);
1438 gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1439 // KLUDGE: Used by BRANCH_CONDITION
1440 uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1442 if (BRANCH_CONDITION(IMM_2))
1446 WriteLog("Branched!\n");
1449 WriteLog(" --> JR: Branch taken.\n");
1450 int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
1451 int32 delayed_pc = gpu_pc + (offset * 2);
1453 gpu_pc = delayed_pc;
1454 /* uint16 opcode = GPUReadWord(gpu_pc, GPU);
1455 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1456 gpu_opcode_second_parameter = opcode & 0x1F;
1458 gpu_pc = delayed_pc;
1459 gpu_opcode[opcode>>10]();//*/
1464 WriteLog("Branch NOT taken.\n");
1468 static void gpu_opcode_add(void)
1472 WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1474 uint32 res = RN + RM;
1475 CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1479 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1483 static void gpu_opcode_addc(void)
1487 WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1489 /* int dreg = jaguar.op & 31;
1490 uint32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1491 uint32 r2 = jaguar.r[dreg];
1492 uint32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1493 jaguar.r[dreg] = res;
1494 CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1496 uint32 res = RN + RM + gpu_flag_c;
1497 uint32 carry = gpu_flag_c;
1498 // SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1499 SET_ZNC_ADD(RN + carry, RM, res);
1500 // SET_ZNC_ADD(RN, RM + carry, res);
1504 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1508 static void gpu_opcode_addq(void)
1512 WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1514 uint32 r1 = gpu_convert_zero[IMM_1];
1515 uint32 res = RN + r1;
1516 CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1520 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1524 static void gpu_opcode_addqt(void)
1526 #ifdef GPU_DIS_ADDQT
1528 WriteLog("%06X: ADDQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1530 RN += gpu_convert_zero[IMM_1];
1531 #ifdef GPU_DIS_ADDQT
1533 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1537 static void gpu_opcode_sub(void)
1541 WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1543 uint32 res = RN - RM;
1544 SET_ZNC_SUB(RN, RM, res);
1548 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1552 static void gpu_opcode_subc(void)
1556 WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1558 uint32 res = RN - RM - gpu_flag_c;
1559 uint32 borrow = gpu_flag_c;
1560 // SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1561 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1562 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1563 // SET_ZNC_SUB(RN - borrow, RM, res);
1564 SET_ZNC_SUB(RN, RM + borrow, res);
1568 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1572 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1573 N = 0, M = 1, 0 - 1 = -1, C = 0!
1575 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
1576 #define SET_ZN(r) SET_N(r); SET_Z(r)
1577 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1578 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1580 static void gpu_opcode_subq(void)
1584 WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1586 uint32 r1 = gpu_convert_zero[IMM_1];
1587 uint32 res = RN - r1;
1588 SET_ZNC_SUB(RN, r1, res);
1592 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1596 static void gpu_opcode_subqt(void)
1598 #ifdef GPU_DIS_SUBQT
1600 WriteLog("%06X: SUBQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1602 RN -= gpu_convert_zero[IMM_1];
1603 #ifdef GPU_DIS_SUBQT
1605 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1609 static void gpu_opcode_cmp(void)
1613 WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1615 uint32 res = RN - RM;
1616 SET_ZNC_SUB(RN, RM, res);
1619 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1623 static void gpu_opcode_cmpq(void)
1625 static int32 sqtable[32] =
1626 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1629 WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1631 uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1632 uint32 res = RN - r1;
1633 SET_ZNC_SUB(RN, r1, res);
1636 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1640 static void gpu_opcode_and(void)
1644 WriteLog("%06X: AND R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1650 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1654 static void gpu_opcode_or(void)
1658 WriteLog("%06X: OR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1664 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1668 static void gpu_opcode_xor(void)
1672 WriteLog("%06X: XOR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1678 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1682 static void gpu_opcode_not(void)
1686 WriteLog("%06X: NOT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1692 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1696 static void gpu_opcode_move_pc(void)
1698 #ifdef GPU_DIS_MOVEPC
1700 WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1702 // Should be previous PC--this might not always be previous instruction!
1703 // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1705 #ifdef GPU_DIS_MOVEPC
1707 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1711 static void gpu_opcode_sat8(void)
1715 WriteLog("%06X: SAT8 R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1717 RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1721 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1725 static void gpu_opcode_sat16(void)
1727 RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1731 static void gpu_opcode_sat24(void)
1733 RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1737 static void gpu_opcode_store_r14_indexed(void)
1739 #ifdef GPU_DIS_STORE14I
1741 WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1743 #ifdef GPU_CORRECT_ALIGNMENT
1744 uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1746 if (address >= 0xF03000 && address <= 0xF03FFF)
1747 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1749 GPUWriteLong(address, RN, GPU);
1751 GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1755 static void gpu_opcode_store_r15_indexed(void)
1757 #ifdef GPU_DIS_STORE15I
1759 WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1761 #ifdef GPU_CORRECT_ALIGNMENT
1762 uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1764 if (address >= 0xF03000 && address <= 0xF03FFF)
1765 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1767 GPUWriteLong(address, RN, GPU);
1769 GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1773 static void gpu_opcode_load_r14_ri(void)
1775 #ifdef GPU_DIS_LOAD14R
1777 WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1779 #ifdef GPU_CORRECT_ALIGNMENT
1780 uint32 address = gpu_reg[14] + RM;
1782 if (address >= 0xF03000 && address <= 0xF03FFF)
1783 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1785 RN = GPUReadLong(address, GPU);
1787 RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1789 #ifdef GPU_DIS_LOAD14R
1791 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1795 static void gpu_opcode_load_r15_ri(void)
1797 #ifdef GPU_DIS_LOAD15R
1799 WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1801 #ifdef GPU_CORRECT_ALIGNMENT
1802 uint32 address = gpu_reg[15] + RM;
1804 if (address >= 0xF03000 && address <= 0xF03FFF)
1805 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1807 RN = GPUReadLong(address, GPU);
1809 RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1811 #ifdef GPU_DIS_LOAD15R
1813 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1817 static void gpu_opcode_store_r14_ri(void)
1819 #ifdef GPU_DIS_STORE14R
1821 WriteLog("%06X: STORE R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1823 #ifdef GPU_CORRECT_ALIGNMENT
1824 uint32 address = gpu_reg[14] + RM;
1826 if (address >= 0xF03000 && address <= 0xF03FFF)
1827 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1829 GPUWriteLong(address, RN, GPU);
1831 GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1835 static void gpu_opcode_store_r15_ri(void)
1837 #ifdef GPU_DIS_STORE15R
1839 WriteLog("%06X: STORE R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1841 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1842 uint32 address = gpu_reg[15] + RM;
1844 if (address >= 0xF03000 && address <= 0xF03FFF)
1845 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1847 GPUWriteLong(address, RN, GPU);
1849 GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1853 static void gpu_opcode_nop(void)
1857 WriteLog("%06X: NOP [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1861 static void gpu_opcode_pack(void)
1865 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1869 //BUG! if (RM == 0) // Pack
1870 if (IMM_1 == 0) // Pack
1871 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1873 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1876 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1880 static void gpu_opcode_storeb(void)
1882 #ifdef GPU_DIS_STOREB
1884 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1887 // Would appear to be so...!
1888 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1889 GPUWriteLong(RM, RN & 0xFF, GPU);
1891 JaguarWriteByte(RM, RN, GPU);
1894 static void gpu_opcode_storew(void)
1896 #ifdef GPU_DIS_STOREW
1898 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1900 #ifdef GPU_CORRECT_ALIGNMENT
1901 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1902 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1904 JaguarWriteWord(RM, RN, GPU);
1906 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1907 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1909 JaguarWriteWord(RM, RN, GPU);
1913 static void gpu_opcode_store(void)
1915 #ifdef GPU_DIS_STORE
1917 WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1919 #ifdef GPU_CORRECT_ALIGNMENT
1920 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1921 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1923 GPUWriteLong(RM, RN, GPU);
1925 GPUWriteLong(RM, RN, GPU);
1929 static void gpu_opcode_storep(void)
1931 #ifdef GPU_CORRECT_ALIGNMENT
1932 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1934 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1935 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1939 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1940 GPUWriteLong(RM + 4, RN, GPU);
1943 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1944 GPUWriteLong(RM + 4, RN, GPU);
1948 static void gpu_opcode_loadb(void)
1950 #ifdef GPU_DIS_LOADB
1952 WriteLog("%06X: LOADB (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1954 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1955 RN = GPUReadLong(RM, GPU) & 0xFF;
1957 RN = JaguarReadByte(RM, GPU);
1958 #ifdef GPU_DIS_LOADB
1960 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1964 static void gpu_opcode_loadw(void)
1966 #ifdef GPU_DIS_LOADW
1968 WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1970 #ifdef GPU_CORRECT_ALIGNMENT
1971 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1972 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1974 RN = JaguarReadWord(RM, GPU);
1976 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1977 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1979 RN = JaguarReadWord(RM, GPU);
1981 #ifdef GPU_DIS_LOADW
1983 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1987 // According to the docs, & "Do The Same", this address is long aligned...
1989 // And it works!!! Need to fix all instances...
1990 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
1991 // the $F03000-$F03FFF range are aligned...
1992 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
1994 Preliminary testing on real hardware seems to confirm that something strange goes on
1995 with unaligned reads in main memory. When the address is off by 1, the result is the
1996 same as the long address with the top byte replaced by something. So if the read is
1997 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
1998 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
1999 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
2000 It may be that the "unknown" values come from the prefetch queue, but not sure how
2001 to test that. They seem to be stable, though, which would indicate such a mechanism.
2002 Sometimes, however, the off by 2 case returns $12345678!
2004 static void gpu_opcode_load(void)
2008 WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2010 #ifdef GPU_CORRECT_ALIGNMENT
2011 uint32 mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2012 // if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2013 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2014 // RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2016 // RN = GPUReadLong(RM, GPU);
2017 // Simulate garbage in unaligned reads...
2018 //seems that this behavior is different in GPU mem vs. main mem...
2019 // if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2020 // RN |= mask[RM & 0x03];
2022 RN = GPUReadLong(RM, GPU);
2026 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2030 static void gpu_opcode_loadp(void)
2032 #ifdef GPU_CORRECT_ALIGNMENT
2033 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2035 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2036 RN = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2040 gpu_hidata = GPUReadLong(RM + 0, GPU);
2041 RN = GPUReadLong(RM + 4, GPU);
2044 gpu_hidata = GPUReadLong(RM + 0, GPU);
2045 RN = GPUReadLong(RM + 4, GPU);
2049 static void gpu_opcode_load_r14_indexed(void)
2051 #ifdef GPU_DIS_LOAD14I
2053 WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2055 #ifdef GPU_CORRECT_ALIGNMENT
2056 uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2058 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2059 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2061 RN = GPUReadLong(address, GPU);
2063 RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2065 #ifdef GPU_DIS_LOAD14I
2067 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2071 static void gpu_opcode_load_r15_indexed(void)
2073 #ifdef GPU_DIS_LOAD15I
2075 WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2077 #ifdef GPU_CORRECT_ALIGNMENT
2078 uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2080 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2081 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2083 RN = GPUReadLong(address, GPU);
2085 RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2087 #ifdef GPU_DIS_LOAD15I
2089 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2093 static void gpu_opcode_movei(void)
2095 #ifdef GPU_DIS_MOVEI
2097 WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2099 // This instruction is followed by 32-bit value in LSW / MSW format...
2100 RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16);
2102 #ifdef GPU_DIS_MOVEI
2104 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2108 static void gpu_opcode_moveta(void)
2110 #ifdef GPU_DIS_MOVETA
2112 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2115 #ifdef GPU_DIS_MOVETA
2117 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2121 static void gpu_opcode_movefa(void)
2123 #ifdef GPU_DIS_MOVEFA
2125 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2128 #ifdef GPU_DIS_MOVEFA
2130 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2134 static void gpu_opcode_move(void)
2138 WriteLog("%06X: MOVE R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2143 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2147 static void gpu_opcode_moveq(void)
2149 #ifdef GPU_DIS_MOVEQ
2151 WriteLog("%06X: MOVEQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2154 #ifdef GPU_DIS_MOVEQ
2156 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2160 static void gpu_opcode_resmac(void)
2165 static void gpu_opcode_imult(void)
2167 #ifdef GPU_DIS_IMULT
2169 WriteLog("%06X: IMULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2171 RN = (int16)RN * (int16)RM;
2173 #ifdef GPU_DIS_IMULT
2175 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2179 static void gpu_opcode_mult(void)
2183 WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2185 RN = (uint16)RM * (uint16)RN;
2189 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2193 static void gpu_opcode_bclr(void)
2197 WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2199 uint32 res = RN & ~(1 << IMM_1);
2204 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2208 static void gpu_opcode_btst(void)
2212 WriteLog("%06X: BTST #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2214 gpu_flag_z = (~RN >> IMM_1) & 1;
2217 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2221 static void gpu_opcode_bset(void)
2225 WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2227 uint32 res = RN | (1 << IMM_1);
2232 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2236 static void gpu_opcode_imacn(void)
2238 uint32 res = (int16)RM * (int16)(RN);
2242 static void gpu_opcode_mtoi(void)
2245 uint32 res = RN = (((int32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2249 static void gpu_opcode_normi(void)
2256 while ((_RM & 0xFFC00000) == 0)
2261 while ((_RM & 0xFF800000) != 0)
2271 static void gpu_opcode_mmult(void)
2273 int count = gpu_matrix_control & 0x0F; // Matrix width
2274 uint32 addr = gpu_pointer_to_matrix; // In the GPU's RAM
2278 if (gpu_matrix_control & 0x10) // Column stepping
2280 for(int i=0; i<count; i++)
2284 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2286 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2288 int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2293 else // Row stepping
2295 for(int i=0; i<count; i++)
2299 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2301 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2303 int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2308 RN = res = (int32)accum;
2309 // carry flag to do (out of the last add)
2313 static void gpu_opcode_abs(void)
2317 WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2319 gpu_flag_c = RN >> 31;
2320 if (RN == 0x80000000)
2321 //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2322 gpu_flag_n = 1, gpu_flag_z = 0;
2327 gpu_flag_n = 0; SET_FLAG_Z(RN);
2331 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2335 static void gpu_opcode_div(void) // RN / RM
2339 WriteLog("%06X: DIV R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2341 // NOTE: remainder is NOT calculated correctly here!
2342 // The original tried to get it right by checking to see if the
2343 // remainder was negative, but that's too late...
2344 // The code there should do it now, but I'm not 100% sure...
2348 if (gpu_div_control & 0x01) // 16.16 division
2350 RN = ((uint64)RN << 16) / RM;
2351 gpu_remain = ((uint64)RN << 16) % RM;
2356 gpu_remain = RN % RM;
2359 if ((gpu_remain - RM) & 0x80000000) // If the result would have been negative...
2360 gpu_remain -= RM; // Then make it negative!
2370 if (gpu_div_control & 1)
2372 gpu_remain = (((uint64)_RN) << 16) % _RM;
2373 if (gpu_remain&0x80000000)
2375 RN = (((uint64)_RN) << 16) / _RM;
2379 gpu_remain = _RN % _RM;
2380 if (gpu_remain&0x80000000)
2389 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2393 static void gpu_opcode_imultn(void)
2395 uint32 res = (int32)((int16)RN * (int16)RM);
2396 gpu_acc = (int32)res;
2401 static void gpu_opcode_neg(void)
2405 WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2408 SET_ZNC_SUB(0, RN, res);
2412 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2416 static void gpu_opcode_shlq(void)
2420 WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2422 // Was a bug here...
2423 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2424 int32 r1 = 32 - IMM_1;
2425 uint32 res = RN << r1;
2426 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2430 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2434 static void gpu_opcode_shrq(void)
2438 WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2440 int32 r1 = gpu_convert_zero[IMM_1];
2441 uint32 res = RN >> r1;
2442 SET_ZN(res); gpu_flag_c = RN & 1;
2446 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2450 static void gpu_opcode_ror(void)
2454 WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2456 uint32 r1 = RM & 0x1F;
2457 uint32 res = (RN >> r1) | (RN << (32 - r1));
2458 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2462 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2466 static void gpu_opcode_rorq(void)
2470 WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2472 uint32 r1 = gpu_convert_zero[IMM_1 & 0x1F];
2474 uint32 res = (r2 >> r1) | (r2 << (32 - r1));
2476 SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2479 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2483 static void gpu_opcode_sha(void)
2485 /* int dreg = jaguar.op & 31;
2486 int32 r1 = (int32)jaguar.r[(jaguar.op >> 5) & 31];
2487 uint32 r2 = jaguar.r[dreg];
2493 res = (r1 <= -32) ? 0 : (r2 << -r1);
2494 jaguar.FLAGS |= (r2 >> 30) & 2;
2498 res = (r1 >= 32) ? ((int32)r2 >> 31) : ((int32)r2 >> r1);
2499 jaguar.FLAGS |= (r2 << 1) & 2;
2501 jaguar.r[dreg] = res;
2506 WriteLog("%06X: SHA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2512 res = ((int32)RM <= -32) ? 0 : (RN << -(int32)RM);
2513 gpu_flag_c = RN >> 31;
2517 res = ((int32)RM >= 32) ? ((int32)RN >> 31) : ((int32)RN >> (int32)RM);
2518 gpu_flag_c = RN & 0x01;
2524 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2527 /* int32 sRM=(int32)RM;
2533 if (shift>=32) shift=32;
2534 gpu_flag_c=(_RN&0x80000000)>>31;
2544 if (shift>=32) shift=32;
2548 _RN=((int32)_RN)>>1;
2557 static void gpu_opcode_sharq(void)
2559 #ifdef GPU_DIS_SHARQ
2561 WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2563 uint32 res = (int32)RN >> gpu_convert_zero[IMM_1];
2564 SET_ZN(res); gpu_flag_c = RN & 0x01;
2566 #ifdef GPU_DIS_SHARQ
2568 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2572 static void gpu_opcode_sh(void)
2576 WriteLog("%06X: SH R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2578 if (RM & 0x80000000) // Shift left
2580 gpu_flag_c = RN >> 31;
2581 RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM);
2585 gpu_flag_c = RN & 0x01;
2586 RN = (RM >= 32 ? 0 : RN >> RM);
2591 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2595 //Temporary: Testing only!
2596 //#include "gpu2.cpp"
2597 //#include "gpu3.cpp"
2601 // New thread-safe GPU core
2603 int GPUCore(void * data)