X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fgpu.cpp;h=02fa112a37281ca310b7925de28b3b550b3458bb;hb=9af4fb023287b26dce01a36c65c9e30f56481051;hp=f1d9c727cef3371589a814520710a72b3ebc46b2;hpb=99c569a1257cc13ddd2201870266c9ef83ee3fcc;p=virtualjaguar diff --git a/src/gpu.cpp b/src/gpu.cpp index f1d9c72..02fa112 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -1,9 +1,21 @@ +#if 1 + // // GPU Core // -// by Cal2 +// Originally by David Raingeard (Cal2) // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS) -// Cleanups, endian wrongness, and bad ASM amelioration by James L. Hammons +// Cleanups, endian wrongness, and bad ASM amelioration by James Hammons +// (C) 2010 Underground Software +// +// JLH = James Hammons +// +// Who When What +// --- ---------- ------------------------------------------------------------- +// JLH 01/16/2010 Created this log ;-) +// JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues + +// // Note: Endian wrongness probably stems from the MAME origins of this emu and // the braindead way in which MAME handles memory. :-) // @@ -14,10 +26,24 @@ #include "gpu.h" +#include +#include // For memset +#include "dsp.h" +#include "jagdasm.h" +#include "jaguar.h" +#include "log.h" +#include "m68000/m68kinterface.h" +//#include "memory.h" +#include "tom.h" + + +// Seems alignment in loads & stores was off... +#define GPU_CORRECT_ALIGNMENT //#define GPU_DEBUG // For GPU dissasembly... +#if 0 #define GPU_DIS_ABS #define GPU_DIS_ADD #define GPU_DIS_ADDC @@ -73,9 +99,10 @@ #define GPU_DIS_SUBQT #define GPU_DIS_XOR -bool doGPUDis = false; -//bool doGPUDis = true; -//*/ +//bool doGPUDis = false; +bool doGPUDis = true; +#endif + /* GPU opcodes use (BIOS flying ATARI logo): + add 357416 @@ -153,7 +180,6 @@ extern int gpu_start_log; // Private function prototypes void GPUUpdateRegisterBanks(void); - void GPUDumpDisassembly(void); void GPUDumpRegisters(void); void GPUDumpMemory(void); @@ -223,7 +249,8 @@ static void gpu_opcode_store_r15_ri(void); static void gpu_opcode_sat24(void); static void gpu_opcode_pack(void); -uint8 gpu_opcode_cycles[64] = +// This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!! +/*uint8 gpu_opcode_cycles[64] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, @@ -233,10 +260,36 @@ uint8 gpu_opcode_cycles[64] = 5, 4, 5, 6, 6, 1, 1, 1, 1, 2, 2, 2, 1, 1, 9, 3, 3, 1, 6, 6, 2, 2, 3, 3 -}; - -void (*gpu_opcode[64])()= -{ +};//*/ +//Here's a QnD kludge... +//This is wrong, wrong, WRONG, but it seems to work for the time being... +//(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!) +//What's needed here is a way to take pipeline effects into account (including pipeline stalls!)... +/*uint8 gpu_opcode_cycles[64] = +{ + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 9, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 2, + 2, 2, 2, 3, 3, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 4, 1, + 1, 1, 3, 3, 1, 1, 1, 1 +};//*/ +uint8 gpu_opcode_cycles[64] = +{ + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 +};//*/ + +void (*gpu_opcode[64])()= +{ gpu_opcode_add, gpu_opcode_addc, gpu_opcode_addq, gpu_opcode_addqt, gpu_opcode_sub, gpu_opcode_subc, gpu_opcode_subq, gpu_opcode_subqt, gpu_opcode_neg, gpu_opcode_and, gpu_opcode_or, gpu_opcode_xor, @@ -255,7 +308,7 @@ void (*gpu_opcode[64])()= gpu_opcode_store_r14_ri, gpu_opcode_store_r15_ri, gpu_opcode_sat24, gpu_opcode_pack, }; -static uint8 * gpu_ram_8; +static uint8 gpu_ram_8[0x1000]; uint32 gpu_pc; static uint32 gpu_acc; static uint32 gpu_remain; @@ -270,8 +323,8 @@ static uint32 gpu_div_control; // a bit before writing a result. I.e., if the result of an operation leaves a zero in // the carry flag, you don't have to zero gpu_flag_c before you can write that zero! static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c; -static uint32 * gpu_reg_bank_0; -static uint32 * gpu_reg_bank_1; +static uint32 gpu_reg_bank_0[32]; +static uint32 gpu_reg_bank_1[32]; static uint32 * gpu_reg; static uint32 * gpu_alternate_reg; @@ -289,19 +342,19 @@ static uint32 gpu_opcode_second_parameter; #define IMM_2 gpu_opcode_second_parameter #define SET_FLAG_Z(r) (gpu_flag_z = ((r) == 0)); -#define SET_FLAG_N(r) (gpu_flag_n = (((UINT32)(r) >> 31) & 0x01)); +#define SET_FLAG_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01)); #define RESET_FLAG_Z() gpu_flag_z = 0; #define RESET_FLAG_N() gpu_flag_n = 0; -#define RESET_FLAG_C() gpu_flag_c = 0; +#define RESET_FLAG_C() gpu_flag_c = 0; #define CLR_Z (gpu_flag_z = 0) #define CLR_ZN (gpu_flag_z = gpu_flag_n = 0) #define CLR_ZNC (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0) #define SET_Z(r) (gpu_flag_z = ((r) == 0)) -#define SET_N(r) (gpu_flag_n = (((UINT32)(r) >> 31) & 0x01)) -#define SET_C_ADD(a,b) (gpu_flag_c = ((UINT32)(b) > (UINT32)(~(a)))) -#define SET_C_SUB(a,b) (gpu_flag_c = ((UINT32)(b) > (UINT32)(a))) +#define SET_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01)) +#define SET_C_ADD(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(~(a)))) +#define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a))) #define SET_ZN(r) SET_N(r); SET_Z(r) #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b) #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b) @@ -314,8 +367,8 @@ uint8 * branch_condition_table = 0; uint32 gpu_opcode_use[64]; -char * gpu_opcode_str[64]= -{ +const char * gpu_opcode_str[64]= +{ "add", "addc", "addq", "addqt", "sub", "subc", "subq", "subqt", "neg", "and", "or", "xor", @@ -337,12 +390,12 @@ char * gpu_opcode_str[64]= static uint32 gpu_in_exec = 0; static uint32 gpu_releaseTimeSlice_flag = 0; -void gpu_releaseTimeslice(void) +void GPUReleaseTimeslice(void) { gpu_releaseTimeSlice_flag = 1; } -uint32 gpu_get_pc(void) +uint32 GPUGetPC(void) { return gpu_pc; } @@ -448,7 +501,11 @@ uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/) uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/) { if (offset >= 0xF02000 && offset <= 0xF020FF) - WriteLog("GPU: ReadLong--Attempt to read from GPU register file by %s!\n", whoName[who]); + { + WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]); + uint32 reg = (offset & 0xFC) >> 2; + return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]); + } // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000)) if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC)) @@ -470,7 +527,7 @@ uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/) gpu_flag_n = (gpu_flag_n ? 1 : 0); gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z; - + return gpu_flags & 0xFFFFC1FF; case 0x04: return gpu_matrix_control; @@ -558,6 +615,10 @@ void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/) /* offset &= 0xFFF; SET16(gpu_ram_8, offset, data);//*/ +/*if (offset >= 0xF03214 && offset < 0xF0321F) + WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/ + + //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!! /* if (!gpu_in_exec) { @@ -586,16 +647,19 @@ void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/) else gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16); } - else + else { //WriteLog("[GPU W16:%08X,%04X]", offset, data); uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who); + if (offset & 0x02) old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF); else old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16); + GPUWriteLong(offset & 0xFFFFFFC, old_data, who); } + return; } else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F)) @@ -630,12 +694,8 @@ void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/) } #endif // GPU_DEBUG -/* gpu_ram_8[offset & 0xFFF] = (data >> 24) & 0xFF, - gpu_ram_8[(offset+1) & 0xFFF] = (data >> 16) & 0xFF, - gpu_ram_8[(offset+2) & 0xFFF] = (data >> 8) & 0xFF, - gpu_ram_8[(offset+3) & 0xFFF] = data & 0xFF;//*/ offset &= 0xFFF; - SET32(gpu_ram_8, offset, data);//*/ + SET32(gpu_ram_8, offset, data); return; } // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20)) @@ -647,7 +707,9 @@ void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/) case 0x00: { bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK); - gpu_flags = data; + // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the + // IRQ logic can set it. So we mask it out here to prevent problems... + gpu_flags = data & (~IMASK); gpu_flag_z = gpu_flags & ZERO_FLAG; gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1; gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2; @@ -682,7 +744,7 @@ WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNI #endif // GPU_DEBUG break; case 0x14: - { + { // uint32 gpu_was_running = GPU_RUNNING; data &= ~0xF7C0; // Disable writes to INT_LAT0-4 & TOM version number @@ -690,13 +752,14 @@ WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNI if (data & 0x02) { //WriteLog("GPU->CPU interrupt\n"); - if (tom_irq_enabled(IRQ_GPU)) + if (TOMIRQEnabled(IRQ_GPU)) { - if ((tom_irq_enabled(IRQ_GPU)) && (jaguar_interrupt_handler_is_valid(64))) +//This is the programmer's responsibility, to make sure the handler is valid, not ours! +// if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64))) { - tom_set_pending_gpu_int(); - m68k_set_irq(7); // Set 68000 NMI - gpu_releaseTimeslice(); + TOMSetPendingGPUInt(); + m68k_set_irq(2); // Set 68000 IPL 2 + GPUReleaseTimeslice(); } } data &= ~0x02; @@ -708,7 +771,7 @@ WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNI //WriteLog("CPU->GPU interrupt\n"); GPUSetIRQLine(0, ASSERT_LINE); m68k_end_timeslice(); - dsp_releaseTimeslice(); + DSPReleaseTimeslice(); data &= ~0x04; } @@ -726,13 +789,13 @@ WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNI { WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n"); #endif // GPU_DEBUG - gpu_exec(200); + GPUExec(200); #ifdef GPU_DEBUG } #endif // GPU_DEBUG//*/ #else if (gpu_control & 0x18) - gpu_exec(1); + GPUExec(1); #endif // #ifndef GPU_SINGLE_STEPPING #ifdef GPU_DEBUG WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data); @@ -784,7 +847,7 @@ if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8) for(int x=0; x<2; x++) { JaguarWriteLong(dst, JaguarReadLong(src)); - + src += 4; dst += 4; } @@ -853,6 +916,7 @@ if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8) // JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who); // JaguarWriteWord(offset+2, data & 0xFFFF, who); +// We're a 32-bit processor, we can do a long write...! JaguarWriteLong(offset, data, who); } @@ -880,12 +944,12 @@ void GPUHandleIRQs(void) // Get the interrupt latch & enable bits uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F; - + // Bail out if latched interrupts aren't enabled bits &= mask; if (!bits) return; - + // Determine which interrupt to service uint32 which = 0; //Isn't there a #pragma to disable this warning??? if (bits & 0x01) @@ -902,18 +966,18 @@ void GPUHandleIRQs(void) if (start_logging) WriteLog("GPU: Generating IRQ #%i\n", which); - // set the interrupt flag + // set the interrupt flag gpu_flags |= IMASK; GPUUpdateRegisterBanks(); - // subqt #4,r31 ; pre-decrement stack pointer - // move pc,r30 ; address of interrupted code + // subqt #4,r31 ; pre-decrement stack pointer + // move pc,r30 ; address of interrupted code // store r30,(r31) ; store return address gpu_reg[31] -= 4; GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU); - - // movei #service_address,r30 ; pointer to ISR entry - // jump (r30) ; jump to ISR + + // movei #service_address,r30 ; pointer to ISR entry + // jump (r30) ; jump to ISR // nop gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10); } @@ -937,24 +1001,22 @@ void GPUSetIRQLine(int irqline, int state) //#include "gpu2.h" //#include "gpu3.h" -void gpu_init(void) +void GPUInit(void) { - memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM"); -// memory_malloc_secure((void **)&gpu_reg, 32*sizeof(int32), "GPU bank 0 regs"); -// memory_malloc_secure((void **)&gpu_alternate_reg, 32*sizeof(int32), "GPU bank 1 regs"); - memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs"); - memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs"); +// memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM"); +// memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs"); +// memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs"); build_branch_condition_table(); - gpu_reset(); + GPUReset(); //TEMPORARY: Testing only! // gpu2_init(); // gpu3_init(); } -void gpu_reset(void) +void GPUReset(void) { // GPU registers (directly visible) gpu_flags = 0x00000000; @@ -980,15 +1042,15 @@ void gpu_reset(void) memset(gpu_ram_8, 0xFF, 0x1000); gpu_in_exec = 0; //not needed GPUInterruptPending = false; - gpu_reset_stats(); + GPUResetStats(); } -uint32 gpu_read_pc(void) +uint32 GPUReadPC(void) { return gpu_pc; } -void gpu_reset_stats(void) +void GPUResetStats(void) { for(uint32 i=0; i<64; i++) gpu_opcode_use[i] = 0; @@ -1040,11 +1102,11 @@ void GPUDumpMemory(void) gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]); } -void gpu_done(void) -{ +void GPUDone(void) +{ WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't"); - // Get the interrupt latch & enable bits + // Get the interrupt latch & enable bits uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F; WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask); @@ -1059,7 +1121,9 @@ void gpu_done(void) } WriteLog("\n"); - memory_free(gpu_ram_8); +// memory_free(gpu_ram_8); +// memory_free(gpu_reg_bank_0); +// memory_free(gpu_reg_bank_1); } // @@ -1068,7 +1132,7 @@ void gpu_done(void) static int testCount = 1; static int len = 0; static bool tripwire = false; -void gpu_exec(int32 cycles) +void GPUExec(int32 cycles) { if (!GPU_RUNNING) return; @@ -1086,6 +1150,35 @@ void gpu_exec(int32 cycles) while (cycles > 0 && GPU_RUNNING) { +if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03 + && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00) +{ + if (gpu_pc == 0xF03000) + { + extern uint32 starCount; + starCount = 0; +/* WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]); + uint32 base = gpu_reg_bank_0[3]; + for(uint32 i=0; i<0x100; i+=16) + { + WriteLog("%02X: ", i); + for(uint32 j=0; j<16; j++) + { + WriteLog("%02X ", JaguarReadByte(base + i + j)); + } + WriteLog("\n"); + }*/ + } +// if (gpu_pc == 0xF03) + { + } +}//*/ +/*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0) +{ + GPUDumpRegisters(); + WriteLog("GPU: Starting disassembly log...\n"); + doGPUDis = true; +}//*/ /*if (gpu_pc == 0xF0359A) { doGPUDis = true; @@ -1094,7 +1187,7 @@ void gpu_exec(int32 cycles) /* gpu_flag_c = (gpu_flag_c ? 1 : 0); gpu_flag_z = (gpu_flag_z ? 1 : 0); gpu_flag_n = (gpu_flag_n ? 1 : 0);*/ - + uint16 opcode = GPUReadWord(gpu_pc, GPU); uint32 index = opcode >> 10; gpu_instruction = opcode; // Added for GPU #3... @@ -1200,7 +1293,7 @@ WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN); // gpu3_opcode[index](); // BIOS hacking -//GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken. +//GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken. /*static bool firstTime = true; if (gpu_pc == 0xF03548 && firstTime) { @@ -1277,7 +1370,7 @@ GPU opcodes use (offset punch--vertically below bad guy): static void gpu_opcode_jump(void) { #ifdef GPU_DIS_JUMP -char * condition[32] = +const char * condition[32] = { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz", "c z", "???", "???", "???", "???", "???", "???", "???", "???", "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???", @@ -1301,7 +1394,7 @@ char * condition[32] = if (gpu_start_log) WriteLog(" --> JUMP: Branch taken.\n"); uint32 delayed_pc = RM; - gpu_exec(1); + GPUExec(1); gpu_pc = delayed_pc; /* uint16 opcode = GPUReadWord(gpu_pc, GPU); gpu_opcode_first_parameter = (opcode >> 5) & 0x1F; @@ -1320,7 +1413,7 @@ if (gpu_start_log) static void gpu_opcode_jr(void) { #ifdef GPU_DIS_JR -char * condition[32] = +const char * condition[32] = { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz", "c z", "???", "???", "???", "???", "???", "???", "???", "???", "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???", @@ -1330,8 +1423,8 @@ char * condition[32] = #endif /* if (CONDITION(jaguar.op & 31)) { - INT32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2; - UINT32 newpc = jaguar.PC + r1; + int32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2; + uint32 newpc = jaguar.PC + r1; CALL_MAME_DEBUG; jaguar.op = ROPCODE(jaguar.PC); jaguar.PC = newpc; @@ -1356,7 +1449,7 @@ if (gpu_start_log) WriteLog(" --> JR: Branch taken.\n"); int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1 int32 delayed_pc = gpu_pc + (offset * 2); - gpu_exec(1); + GPUExec(1); gpu_pc = delayed_pc; /* uint16 opcode = GPUReadWord(gpu_pc, GPU); gpu_opcode_first_parameter = (opcode >> 5) & 0x1F; @@ -1378,7 +1471,7 @@ static void gpu_opcode_add(void) if (doGPUDis) WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - UINT32 res = RN + RM; + uint32 res = RN + RM; CLR_ZNC; SET_ZNC_ADD(RN, RM, res); RN = res; #ifdef GPU_DIS_ADD @@ -1394,14 +1487,14 @@ static void gpu_opcode_addc(void) WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif /* int dreg = jaguar.op & 31; - UINT32 r1 = jaguar.r[(jaguar.op >> 5) & 31]; - UINT32 r2 = jaguar.r[dreg]; - UINT32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1); + uint32 r1 = jaguar.r[(jaguar.op >> 5) & 31]; + uint32 r2 = jaguar.r[dreg]; + uint32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1); jaguar.r[dreg] = res; CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/ - UINT32 res = RN + RM + gpu_flag_c; - UINT32 carry = gpu_flag_c; + uint32 res = RN + RM + gpu_flag_c; + uint32 carry = gpu_flag_c; // SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes! SET_ZNC_ADD(RN + carry, RM, res); // SET_ZNC_ADD(RN, RM + carry, res); @@ -1418,8 +1511,8 @@ static void gpu_opcode_addq(void) if (doGPUDis) WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - UINT32 r1 = gpu_convert_zero[IMM_1]; - UINT32 res = RN + r1; + uint32 r1 = gpu_convert_zero[IMM_1]; + uint32 res = RN + r1; CLR_ZNC; SET_ZNC_ADD(RN, r1, res); RN = res; #ifdef GPU_DIS_ADDQ @@ -1447,7 +1540,7 @@ static void gpu_opcode_sub(void) if (doGPUDis) WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - UINT32 res = RN - RM; + uint32 res = RN - RM; SET_ZNC_SUB(RN, RM, res); RN = res; #ifdef GPU_DIS_SUB @@ -1462,25 +1555,36 @@ static void gpu_opcode_subc(void) if (doGPUDis) WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - UINT32 res = RN - RM - gpu_flag_c; - UINT32 borrow = gpu_flag_c; + uint32 res = RN - RM - gpu_flag_c; + uint32 borrow = gpu_flag_c; // SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!! - SET_ZNC_SUB(RN - borrow, RM, res); +//No matter how you do it, there is a problem. With below, it's 0-0 with carry, +//and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!! +// SET_ZNC_SUB(RN - borrow, RM, res); + SET_ZNC_SUB(RN, RM + borrow, res); RN = res; #ifdef GPU_DIS_SUBC if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif } +/* +N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case: +N = 0, M = 1, 0 - 1 = -1, C = 0! +#define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a))) +#define SET_ZN(r) SET_N(r); SET_Z(r) +#define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b) +#define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b) +*/ static void gpu_opcode_subq(void) { #ifdef GPU_DIS_SUBQ if (doGPUDis) WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - UINT32 r1 = gpu_convert_zero[IMM_1]; - UINT32 res = RN - r1; + uint32 r1 = gpu_convert_zero[IMM_1]; + uint32 res = RN - r1; SET_ZNC_SUB(RN, r1, res); RN = res; #ifdef GPU_DIS_SUBQ @@ -1508,7 +1612,7 @@ static void gpu_opcode_cmp(void) if (doGPUDis) WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - UINT32 res = RN - RM; + uint32 res = RN - RM; SET_ZNC_SUB(RN, RM, res); #ifdef GPU_DIS_CMP if (doGPUDis) @@ -1524,8 +1628,8 @@ static void gpu_opcode_cmpq(void) if (doGPUDis) WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - UINT32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3; - UINT32 res = RN - r1; + uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3; + uint32 res = RN - r1; SET_ZNC_SUB(RN, r1, res); #ifdef GPU_DIS_CMPQ if (doGPUDis) @@ -1593,7 +1697,7 @@ static void gpu_opcode_move_pc(void) { #ifdef GPU_DIS_MOVEPC if (doGPUDis) - WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN); + WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN); #endif // Should be previous PC--this might not always be previous instruction! // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R! @@ -1636,7 +1740,16 @@ static void gpu_opcode_store_r14_indexed(void) if (doGPUDis) WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2)); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2); + + if (address >= 0xF03000 && address <= 0xF03FFF) + GPUWriteLong(address & 0xFFFFFFFC, RN, GPU); + else + GPUWriteLong(address, RN, GPU); +#else GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU); +#endif } static void gpu_opcode_store_r15_indexed(void) @@ -1645,7 +1758,16 @@ static void gpu_opcode_store_r15_indexed(void) if (doGPUDis) WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2)); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2); + + if (address >= 0xF03000 && address <= 0xF03FFF) + GPUWriteLong(address & 0xFFFFFFFC, RN, GPU); + else + GPUWriteLong(address, RN, GPU); +#else GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU); +#endif } static void gpu_opcode_load_r14_ri(void) @@ -1654,7 +1776,16 @@ static void gpu_opcode_load_r14_ri(void) if (doGPUDis) WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32 address = gpu_reg[14] + RM; + + if (address >= 0xF03000 && address <= 0xF03FFF) + RN = GPUReadLong(address & 0xFFFFFFFC, GPU); + else + RN = GPUReadLong(address, GPU); +#else RN = GPUReadLong(gpu_reg[14] + RM, GPU); +#endif #ifdef GPU_DIS_LOAD14R if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); @@ -1667,7 +1798,16 @@ static void gpu_opcode_load_r15_ri(void) if (doGPUDis) WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32 address = gpu_reg[15] + RM; + + if (address >= 0xF03000 && address <= 0xF03FFF) + RN = GPUReadLong(address & 0xFFFFFFFC, GPU); + else + RN = GPUReadLong(address, GPU); +#else RN = GPUReadLong(gpu_reg[15] + RM, GPU); +#endif #ifdef GPU_DIS_LOAD15R if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); @@ -1680,7 +1820,16 @@ static void gpu_opcode_store_r14_ri(void) if (doGPUDis) WriteLog("%06X: STORE R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32 address = gpu_reg[14] + RM; + + if (address >= 0xF03000 && address <= 0xF03FFF) + GPUWriteLong(address & 0xFFFFFFFC, RN, GPU); + else + GPUWriteLong(address, RN, GPU); +#else GPUWriteLong(gpu_reg[14] + RM, RN, GPU); +#endif } static void gpu_opcode_store_r15_ri(void) @@ -1689,7 +1838,16 @@ static void gpu_opcode_store_r15_ri(void) if (doGPUDis) WriteLog("%06X: STORE R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]); #endif +#ifdef GPU_CORRECT_ALIGNMENT_STORE + uint32 address = gpu_reg[15] + RM; + + if (address >= 0xF03000 && address <= 0xF03FFF) + GPUWriteLong(address & 0xFFFFFFFC, RN, GPU); + else + GPUWriteLong(address, RN, GPU); +#else GPUWriteLong(gpu_reg[15] + RM, RN, GPU); +#endif } static void gpu_opcode_nop(void) @@ -1739,10 +1897,17 @@ static void gpu_opcode_storew(void) if (doGPUDis) WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM); #endif +#ifdef GPU_CORRECT_ALIGNMENT + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU); + else + JaguarWriteWord(RM, RN, GPU); +#else if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) GPUWriteLong(RM, RN & 0xFFFF, GPU); else JaguarWriteWord(RM, RN, GPU); +#endif } static void gpu_opcode_store(void) @@ -1751,13 +1916,33 @@ static void gpu_opcode_store(void) if (doGPUDis) WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM); #endif +#ifdef GPU_CORRECT_ALIGNMENT + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU); + else + GPUWriteLong(RM, RN, GPU); +#else GPUWriteLong(RM, RN, GPU); +#endif } static void gpu_opcode_storep(void) { +#ifdef GPU_CORRECT_ALIGNMENT + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + { + GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU); + GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU); + } + else + { + GPUWriteLong(RM + 0, gpu_hidata, GPU); + GPUWriteLong(RM + 4, RN, GPU); + } +#else GPUWriteLong(RM + 0, gpu_hidata, GPU); GPUWriteLong(RM + 4, RN, GPU); +#endif } static void gpu_opcode_loadb(void) @@ -1782,23 +1967,60 @@ static void gpu_opcode_loadw(void) if (doGPUDis) WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif +#ifdef GPU_CORRECT_ALIGNMENT + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF; + else + RN = JaguarReadWord(RM, GPU); +#else if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) RN = GPUReadLong(RM, GPU) & 0xFFFF; else RN = JaguarReadWord(RM, GPU); +#endif #ifdef GPU_DIS_LOADW if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif } +// According to the docs, & "Do The Same", this address is long aligned... +// So let's try it: +// And it works!!! Need to fix all instances... +// Also, Power Drive Rally seems to contradict the idea that only LOADs in +// the $F03000-$F03FFF range are aligned... +#warning "!!! Alignment issues, need to find definitive final word on this !!!" +/* +Preliminary testing on real hardware seems to confirm that something strange goes on +with unaligned reads in main memory. When the address is off by 1, the result is the +same as the long address with the top byte replaced by something. So if the read is +from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue. +When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown. +When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown. +It may be that the "unknown" values come from the prefetch queue, but not sure how +to test that. They seem to be stable, though, which would indicate such a mechanism. +Sometimes, however, the off by 2 case returns $12345678! +*/ static void gpu_opcode_load(void) { #ifdef GPU_DIS_LOAD if (doGPUDis) WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32 mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 }; +// if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + RN = GPUReadLong(RM & 0xFFFFFFFC, GPU); +// RN = GPUReadLong(RM & 0x00FFFFFC, GPU); +// else +// RN = GPUReadLong(RM, GPU); + // Simulate garbage in unaligned reads... +//seems that this behavior is different in GPU mem vs. main mem... +// if ((RM < 0xF03000) || (RM > 0xF0BFFF)) +// RN |= mask[RM & 0x03]; +#else RN = GPUReadLong(RM, GPU); +#endif #ifdef GPU_DIS_LOAD if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); @@ -1807,8 +2029,21 @@ static void gpu_opcode_load(void) static void gpu_opcode_loadp(void) { +#ifdef GPU_CORRECT_ALIGNMENT + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + { + gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU); + RN = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU); + } + else + { + gpu_hidata = GPUReadLong(RM + 0, GPU); + RN = GPUReadLong(RM + 4, GPU); + } +#else gpu_hidata = GPUReadLong(RM + 0, GPU); RN = GPUReadLong(RM + 4, GPU); +#endif } static void gpu_opcode_load_r14_indexed(void) @@ -1817,7 +2052,16 @@ static void gpu_opcode_load_r14_indexed(void) if (doGPUDis) WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2); + + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + RN = GPUReadLong(address & 0xFFFFFFFC, GPU); + else + RN = GPUReadLong(address, GPU); +#else RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU); +#endif #ifdef GPU_DIS_LOAD14I if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); @@ -1830,7 +2074,16 @@ static void gpu_opcode_load_r15_indexed(void) if (doGPUDis) WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2); + + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + RN = GPUReadLong(address & 0xFFFFFFFC, GPU); + else + RN = GPUReadLong(address, GPU); +#else RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU); +#endif #ifdef GPU_DIS_LOAD15I if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); @@ -1943,7 +2196,7 @@ static void gpu_opcode_bclr(void) if (doGPUDis) WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - UINT32 res = RN & ~(1 << IMM_1); + uint32 res = RN & ~(1 << IMM_1); RN = res; SET_ZN(res); #ifdef GPU_DIS_BCLR @@ -1971,7 +2224,7 @@ static void gpu_opcode_bset(void) if (doGPUDis) WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - UINT32 res = RN | (1 << IMM_1); + uint32 res = RN | (1 << IMM_1); RN = res; SET_ZN(res); #ifdef GPU_DIS_BSET @@ -1989,7 +2242,7 @@ static void gpu_opcode_imacn(void) static void gpu_opcode_mtoi(void) { uint32 _RM = RM; - uint32 res = RN = (((INT32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF); + uint32 res = RN = (((int32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF); SET_ZN(res); } @@ -2025,7 +2278,7 @@ static void gpu_opcode_mmult(void) if (gpu_matrix_control & 0x10) // Column stepping { for(int i=0; i> 1)] >> 16) & 0xFFFF); @@ -2094,8 +2347,8 @@ static void gpu_opcode_div(void) // RN / RM { if (gpu_div_control & 0x01) // 16.16 division { - RN = ((UINT64)RN << 16) / RM; - gpu_remain = ((UINT64)RN << 16) % RM; + RN = ((uint64)RN << 16) / RM; + gpu_remain = ((uint64)RN << 16) % RM; } else { @@ -2151,7 +2404,7 @@ static void gpu_opcode_neg(void) if (doGPUDis) WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - UINT32 res = -RN; + uint32 res = -RN; SET_ZNC_SUB(0, RN, res); RN = res; #ifdef GPU_DIS_NEG @@ -2168,8 +2421,8 @@ static void gpu_opcode_shlq(void) #endif // Was a bug here... // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!) - INT32 r1 = 32 - IMM_1; - UINT32 res = RN << r1; + int32 r1 = 32 - IMM_1; + uint32 res = RN << r1; SET_ZN(res); gpu_flag_c = (RN >> 31) & 1; RN = res; #ifdef GPU_DIS_SHLQ @@ -2184,8 +2437,8 @@ static void gpu_opcode_shrq(void) if (doGPUDis) WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - INT32 r1 = gpu_convert_zero[IMM_1]; - UINT32 res = RN >> r1; + int32 r1 = gpu_convert_zero[IMM_1]; + uint32 res = RN >> r1; SET_ZN(res); gpu_flag_c = RN & 1; RN = res; #ifdef GPU_DIS_SHRQ @@ -2200,8 +2453,8 @@ static void gpu_opcode_ror(void) if (doGPUDis) WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - UINT32 r1 = RM & 0x1F; - UINT32 res = (RN >> r1) | (RN << (32 - r1)); + uint32 r1 = RM & 0x1F; + uint32 res = (RN >> r1) | (RN << (32 - r1)); SET_ZN(res); gpu_flag_c = (RN >> 31) & 1; RN = res; #ifdef GPU_DIS_ROR @@ -2216,9 +2469,9 @@ static void gpu_opcode_rorq(void) if (doGPUDis) WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - UINT32 r1 = gpu_convert_zero[IMM_1 & 0x1F]; - UINT32 r2 = RN; - UINT32 res = (r2 >> r1) | (r2 << (32 - r1)); + uint32 r1 = gpu_convert_zero[IMM_1 & 0x1F]; + uint32 r2 = RN; + uint32 res = (r2 >> r1) | (r2 << (32 - r1)); RN = res; SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01; #ifdef GPU_DIS_RORQ @@ -2230,9 +2483,9 @@ static void gpu_opcode_rorq(void) static void gpu_opcode_sha(void) { /* int dreg = jaguar.op & 31; - INT32 r1 = (INT32)jaguar.r[(jaguar.op >> 5) & 31]; - UINT32 r2 = jaguar.r[dreg]; - UINT32 res; + int32 r1 = (int32)jaguar.r[(jaguar.op >> 5) & 31]; + uint32 r2 = jaguar.r[dreg]; + uint32 res; CLR_ZNC; if (r1 < 0) @@ -2242,7 +2495,7 @@ static void gpu_opcode_sha(void) } else { - res = (r1 >= 32) ? ((INT32)r2 >> 31) : ((INT32)r2 >> r1); + res = (r1 >= 32) ? ((int32)r2 >> 31) : ((int32)r2 >> r1); jaguar.FLAGS |= (r2 << 1) & 2; } jaguar.r[dreg] = res; @@ -2252,16 +2505,16 @@ static void gpu_opcode_sha(void) if (doGPUDis) WriteLog("%06X: SHA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - UINT32 res; + uint32 res; - if ((INT32)RM < 0) + if ((int32)RM < 0) { - res = ((INT32)RM <= -32) ? 0 : (RN << -(INT32)RM); + res = ((int32)RM <= -32) ? 0 : (RN << -(int32)RM); gpu_flag_c = RN >> 31; } else { - res = ((INT32)RM >= 32) ? ((INT32)RN >> 31) : ((INT32)RN >> (INT32)RM); + res = ((int32)RM >= 32) ? ((int32)RN >> 31) : ((int32)RN >> (int32)RM); gpu_flag_c = RN & 0x01; } RN = res; @@ -2307,7 +2560,7 @@ static void gpu_opcode_sharq(void) if (doGPUDis) WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - UINT32 res = (INT32)RN >> gpu_convert_zero[IMM_1]; + uint32 res = (int32)RN >> gpu_convert_zero[IMM_1]; SET_ZN(res); gpu_flag_c = RN & 0x01; RN = res; #ifdef GPU_DIS_SHARQ @@ -2342,3 +2595,13 @@ static void gpu_opcode_sh(void) //Temporary: Testing only! //#include "gpu2.cpp" //#include "gpu3.cpp" + +#else + +// New thread-safe GPU core + +int GPUCore(void * data) +{ +} + +#endif