X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fgpu.cpp;h=9ee426425e42c69f3624939930258765cfde8a92;hb=240a6df48aebb5e17f82452c32e770cdfe9b5d5e;hp=4892b7aa536d4bee7d790435bacfd929b621fa28;hpb=5da604521611a960140b58a2fb0f236c65610b70;p=virtualjaguar diff --git a/src/gpu.cpp b/src/gpu.cpp index 4892b7a..9ee4264 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -5,14 +5,15 @@ // // Originally by David Raingeard (Cal2) // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS) -// Cleanups, endian wrongness, and bad ASM amelioration by James L. Hammons +// Cleanups, endian wrongness, and bad ASM amelioration by James Hammons // (C) 2010 Underground Software // -// JLH = James L. Hammons +// JLH = James Hammons // // Who When What // --- ---------- ------------------------------------------------------------- // JLH 01/16/2010 Created this log ;-) +// JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues // // Note: Endian wrongness probably stems from the MAME origins of this emu and @@ -31,14 +32,18 @@ #include "jagdasm.h" #include "jaguar.h" #include "log.h" -#include "m68k.h" +#include "m68000/m68kinterface.h" //#include "memory.h" #include "tom.h" + +// Seems alignment in loads & stores was off... +#define GPU_CORRECT_ALIGNMENT //#define GPU_DEBUG // For GPU dissasembly... +#if 0 #define GPU_DIS_ABS #define GPU_DIS_ADD #define GPU_DIS_ADDC @@ -96,7 +101,8 @@ bool doGPUDis = false; //bool doGPUDis = true; -//*/ +#endif + /* GPU opcodes use (BIOS flying ATARI logo): + add 357416 @@ -244,7 +250,7 @@ static void gpu_opcode_sat24(void); static void gpu_opcode_pack(void); // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!! -/*uint8 gpu_opcode_cycles[64] = +/*uint8_t gpu_opcode_cycles[64] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, @@ -259,7 +265,7 @@ static void gpu_opcode_pack(void); //This is wrong, wrong, WRONG, but it seems to work for the time being... //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!) //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)... -/*uint8 gpu_opcode_cycles[64] = +/*uint8_t gpu_opcode_cycles[64] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -270,7 +276,7 @@ static void gpu_opcode_pack(void); 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 3, 3, 1, 1, 1, 1 };//*/ -uint8 gpu_opcode_cycles[64] = +uint8_t gpu_opcode_cycles[64] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -302,29 +308,30 @@ void (*gpu_opcode[64])()= gpu_opcode_store_r14_ri, gpu_opcode_store_r15_ri, gpu_opcode_sat24, gpu_opcode_pack, }; -static uint8 gpu_ram_8[0x1000]; -uint32 gpu_pc; -static uint32 gpu_acc; -static uint32 gpu_remain; -static uint32 gpu_hidata; -static uint32 gpu_flags; -static uint32 gpu_matrix_control; -static uint32 gpu_pointer_to_matrix; -static uint32 gpu_data_organization; -static uint32 gpu_control; -static uint32 gpu_div_control; -// There is a distinct advantage to having these separated out--there's no need to clear -// a bit before writing a result. I.e., if the result of an operation leaves a zero in -// the carry flag, you don't have to zero gpu_flag_c before you can write that zero! -static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c; -static uint32 gpu_reg_bank_0[32]; -static uint32 gpu_reg_bank_1[32]; -static uint32 * gpu_reg; -static uint32 * gpu_alternate_reg; - -static uint32 gpu_instruction; -static uint32 gpu_opcode_first_parameter; -static uint32 gpu_opcode_second_parameter; +static uint8_t gpu_ram_8[0x1000]; +uint32_t gpu_pc; +static uint32_t gpu_acc; +static uint32_t gpu_remain; +static uint32_t gpu_hidata; +static uint32_t gpu_flags; +static uint32_t gpu_matrix_control; +static uint32_t gpu_pointer_to_matrix; +static uint32_t gpu_data_organization; +static uint32_t gpu_control; +static uint32_t gpu_div_control; +// There is a distinct advantage to having these separated out--there's no need +// to clear a bit before writing a result. I.e., if the result of an operation +// leaves a zero in the carry flag, you don't have to zero gpu_flag_c before +// you can write that zero! +static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c; +uint32_t gpu_reg_bank_0[32]; +uint32_t gpu_reg_bank_1[32]; +static uint32_t * gpu_reg; +static uint32_t * gpu_alternate_reg; + +static uint32_t gpu_instruction; +static uint32_t gpu_opcode_first_parameter; +static uint32_t gpu_opcode_second_parameter; #define GPU_RUNNING (gpu_control & 0x01) @@ -336,7 +343,7 @@ static uint32 gpu_opcode_second_parameter; #define IMM_2 gpu_opcode_second_parameter #define SET_FLAG_Z(r) (gpu_flag_z = ((r) == 0)); -#define SET_FLAG_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01)); +#define SET_FLAG_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01)); #define RESET_FLAG_Z() gpu_flag_z = 0; #define RESET_FLAG_N() gpu_flag_n = 0; @@ -346,20 +353,20 @@ static uint32 gpu_opcode_second_parameter; #define CLR_ZN (gpu_flag_z = gpu_flag_n = 0) #define CLR_ZNC (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0) #define SET_Z(r) (gpu_flag_z = ((r) == 0)) -#define SET_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01)) -#define SET_C_ADD(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(~(a)))) -#define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a))) +#define SET_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01)) +#define SET_C_ADD(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a)))) +#define SET_C_SUB(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a))) #define SET_ZN(r) SET_N(r); SET_Z(r) #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b) #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b) -uint32 gpu_convert_zero[32] = +uint32_t gpu_convert_zero[32] = { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 }; -uint8 * branch_condition_table = 0; +uint8_t * branch_condition_table = 0; #define BRANCH_CONDITION(x) branch_condition_table[(x) + ((jaguar_flags & 7) << 5)] -uint32 gpu_opcode_use[64]; +uint32_t gpu_opcode_use[64]; const char * gpu_opcode_str[64]= { @@ -381,15 +388,15 @@ const char * gpu_opcode_str[64]= "store_r14_ri", "store_r15_ri", "sat24", "pack", }; -static uint32 gpu_in_exec = 0; -static uint32 gpu_releaseTimeSlice_flag = 0; +static uint32_t gpu_in_exec = 0; +static uint32_t gpu_releaseTimeSlice_flag = 0; void GPUReleaseTimeslice(void) { gpu_releaseTimeSlice_flag = 1; } -uint32 GPUGetPC(void) +uint32_t GPUGetPC(void) { return gpu_pc; } @@ -398,7 +405,7 @@ void build_branch_condition_table(void) { if (!branch_condition_table) { - branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0])); + branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0])); if (branch_condition_table) { @@ -429,7 +436,7 @@ void build_branch_condition_table(void) // // GPU byte access (read) // -uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/) +uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/) { if (offset >= 0xF02000 && offset <= 0xF020FF) WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]); @@ -438,7 +445,7 @@ uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/) return gpu_ram_8[offset & 0xFFF]; else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20)) { - uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who); + uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who); if ((offset & 0x03) == 0) return data >> 24; @@ -456,7 +463,7 @@ uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/) // // GPU word access (read) // -uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/) +uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/) { if (offset >= 0xF02000 && offset <= 0xF020FF) WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]); @@ -464,7 +471,7 @@ uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/) if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000)) { offset &= 0xFFF; - uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1]; + uint16_t data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1]; return data; } else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20)) @@ -474,7 +481,7 @@ uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/) if (offset & 0x01) // Catch cases 1 & 3... (unaligned read) return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who); - uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who); + uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who); if (offset & 0x02) // Cases 0 & 2... return data & 0xFFFF; @@ -492,17 +499,21 @@ uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/) // // GPU dword access (read) // -uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/) +uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/) { if (offset >= 0xF02000 && offset <= 0xF020FF) - WriteLog("GPU: ReadLong--Attempt to read from GPU register file by %s!\n", whoName[who]); + { + WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]); + uint32_t reg = (offset & 0xFC) >> 2; + return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]); + } // if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000)) if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC)) { offset &= 0xFFF; - return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16) - | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/ + return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16) + | ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/ // return GET32(gpu_ram_8, offset); } // else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20)) @@ -552,7 +563,7 @@ uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/) // // GPU byte access (write) // -void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/) +void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/) { if (offset >= 0xF02000 && offset <= 0xF020FF) WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]); @@ -571,7 +582,7 @@ void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/) } else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F)) { - uint32 reg = offset & 0x1C; + uint32_t reg = offset & 0x1C; int bytenum = offset & 0x03; //This is definitely wrong! @@ -579,7 +590,7 @@ void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/) gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3)); else { - uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who); + uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who); bytenum = 3 - bytenum; // convention motorola !!! old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3)); GPUWriteLong(offset & 0xFFFFFFC, old_data, who); @@ -593,7 +604,7 @@ void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/) // // GPU word access (write) // -void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/) +void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/) { if (offset >= 0xF02000 && offset <= 0xF020FF) WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]); @@ -640,13 +651,16 @@ void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/) else { //WriteLog("[GPU W16:%08X,%04X]", offset, data); - uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who); + uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who); + if (offset & 0x02) old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF); else old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16); + GPUWriteLong(offset & 0xFFFFFFC, old_data, who); } + return; } else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F)) @@ -665,7 +679,7 @@ void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/) // // GPU dword access (write) // -void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/) +void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/) { if (offset >= 0xF02000 && offset <= 0xF020FF) WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]); @@ -694,7 +708,9 @@ void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/) case 0x00: { bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK); - gpu_flags = data; + // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the + // IRQ logic can set it. So we mask it out here to prevent problems... + gpu_flags = data & (~IMASK); gpu_flag_z = gpu_flags & ZERO_FLAG; gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1; gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2; @@ -730,7 +746,7 @@ WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNI break; case 0x14: { -// uint32 gpu_was_running = GPU_RUNNING; +// uint32_t gpu_was_running = GPU_RUNNING; data &= ~0xF7C0; // Disable writes to INT_LAT0-4 & TOM version number // check for GPU -> CPU interrupt @@ -739,10 +755,11 @@ WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNI //WriteLog("GPU->CPU interrupt\n"); if (TOMIRQEnabled(IRQ_GPU)) { - if ((TOMIRQEnabled(IRQ_GPU)) && (JaguarInterruptHandlerIsValid(64))) +//This is the programmer's responsibility, to make sure the handler is valid, not ours! +// if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64))) { TOMSetPendingGPUInt(); - m68k_set_irq(7); // Set 68000 NMI + m68k_set_irq(2); // Set 68000 IPL 2 GPUReleaseTimeslice(); } } @@ -764,6 +781,7 @@ WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNI { //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not "); } + gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0)); // if gpu wasn't running but is now running, execute a few cycles @@ -809,13 +827,13 @@ static bool finished = false; if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8) { // Let's do a dump of $6528! -/* uint32 numItems = JaguarReadWord(0x6BD6); +/* uint32_t numItems = JaguarReadWord(0x6BD6); WriteLog("\nDump of $6528: %u items.\n\n", numItems); for(int i=0; i ", 0x6528+i, JaguarReadLong(0x6528+i), JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8)); - uint16 link = JaguarReadWord(0x6528+i+8+2); + uint16_t link = JaguarReadWord(0x6528+i+8+2); for(int j=0; j<40; j+=4) WriteLog("%08X ", JaguarReadLong(link + j)); WriteLog("\n"); @@ -825,7 +843,7 @@ if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8) //This isn't working the way it should! !!! FIX !!! //Err, actually, it is. // NOW, it works right! Problem solved!!! It's a blitter bug! -/* uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4; +/* uint32_t src = 0x4D54, dst = 0xF03000, width = 10 * 4; for(int y=0; y<127; y++) { for(int x=0; x<2; x++) @@ -927,7 +945,7 @@ void GPUHandleIRQs(void) return; // Get the interrupt latch & enable bits - uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F; + uint32_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F; // Bail out if latched interrupts aren't enabled bits &= mask; @@ -935,7 +953,7 @@ void GPUHandleIRQs(void) return; // Determine which interrupt to service - uint32 which = 0; //Isn't there a #pragma to disable this warning??? + uint32_t which = 0; //Isn't there a #pragma to disable this warning??? if (bits & 0x01) which = 0; if (bits & 0x02) @@ -971,7 +989,7 @@ void GPUSetIRQLine(int irqline, int state) if (start_logging) WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline); - uint32 mask = 0x0040 << irqline; + uint32_t mask = 0x0040 << irqline; gpu_control &= ~mask; // Clear the interrupt latch if (state) @@ -988,8 +1006,8 @@ void GPUSetIRQLine(int irqline, int state) void GPUInit(void) { // memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM"); -// memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs"); -// memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs"); +// memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32_t), "GPU bank 0 regs"); +// memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32_t), "GPU bank 1 regs"); build_branch_condition_table(); @@ -1027,34 +1045,42 @@ void GPUReset(void) gpu_in_exec = 0; //not needed GPUInterruptPending = false; GPUResetStats(); + + // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents + for(uint32_t i=0; i<4096; i+=4) + *((uint32_t *)(&gpu_ram_8[i])) = rand(); } -uint32 GPUReadPC(void) + +uint32_t GPUReadPC(void) { return gpu_pc; } + void GPUResetStats(void) { - for(uint32 i=0; i<64; i++) + for(uint32_t i=0; i<64; i++) gpu_opcode_use[i] = 0; WriteLog("--> GPU stats were reset!\n"); } + void GPUDumpDisassembly(void) { char buffer[512]; WriteLog("\n---[GPU code at 00F03000]---------------------------\n"); - uint32 j = 0xF03000; + uint32_t j = 0xF03000; while (j <= 0xF03FFF) { - uint32 oldj = j; + uint32_t oldj = j; j += dasmjag(JAGUAR_GPU, buffer, j); WriteLog("\t%08X: %s\n", oldj, buffer); } } + void GPUDumpRegisters(void) { WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z); @@ -1078,6 +1104,7 @@ void GPUDumpRegisters(void) } } + void GPUDumpMemory(void) { WriteLog("\n---[GPU data at 00F03000]---------------------------\n"); @@ -1086,12 +1113,27 @@ void GPUDumpMemory(void) gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]); } + void GPUDone(void) { + WriteLog("\n\n---------------------------------------------------------------------\n"); + WriteLog("GPU I/O Registers\n"); + WriteLog("---------------------------------------------------------------------\n"); + WriteLog("F0%04X (G_FLAGS): $%06X\n", 0x2100, (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z); + WriteLog("F0%04X (G_MTXC): $%04X\n", 0x2104, gpu_matrix_control); + WriteLog("F0%04X (G_MTXA): $%04X\n", 0x2108, gpu_pointer_to_matrix); + WriteLog("F0%04X (G_END): $%02X\n", 0x210C, gpu_data_organization); + WriteLog("F0%04X (G_PC): $%06X\n", 0x2110, gpu_pc); + WriteLog("F0%04X (G_CTRL): $%06X\n", 0x2114, gpu_control); + WriteLog("F0%04X (G_HIDATA): $%08X\n", 0x2118, gpu_hidata); + WriteLog("F0%04X (G_REMAIN): $%08X\n", 0x211C, gpu_remain); + WriteLog("F0%04X (G_DIVCTRL): $%02X\n", 0x211C, gpu_div_control); + WriteLog("---------------------------------------------------------------------\n\n\n"); + WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't"); // Get the interrupt latch & enable bits - uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F; + uint8_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F; WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask); GPUDumpRegisters(); @@ -1104,19 +1146,16 @@ void GPUDone(void) WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]); } WriteLog("\n"); - -// memory_free(gpu_ram_8); -// memory_free(gpu_reg_bank_0); -// memory_free(gpu_reg_bank_1); } + // // Main GPU execution core // static int testCount = 1; static int len = 0; static bool tripwire = false; -void GPUExec(int32 cycles) +void GPUExec(int32_t cycles) { if (!GPU_RUNNING) return; @@ -1139,14 +1178,14 @@ if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == { if (gpu_pc == 0xF03000) { - extern uint32 starCount; + extern uint32_t starCount; starCount = 0; /* WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]); - uint32 base = gpu_reg_bank_0[3]; - for(uint32 i=0; i<0x100; i+=16) + uint32_t base = gpu_reg_bank_0[3]; + for(uint32_t i=0; i<0x100; i+=16) { WriteLog("%02X: ", i); - for(uint32 j=0; j<16; j++) + for(uint32_t j=0; j<16; j++) { WriteLog("%02X ", JaguarReadByte(base + i + j)); } @@ -1171,9 +1210,13 @@ if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == /* gpu_flag_c = (gpu_flag_c ? 1 : 0); gpu_flag_z = (gpu_flag_z ? 1 : 0); gpu_flag_n = (gpu_flag_n ? 1 : 0);*/ +#if 0 +if (gpu_pc == 0xF03200) + doGPUDis = true; +#endif - uint16 opcode = GPUReadWord(gpu_pc, GPU); - uint32 index = opcode >> 10; + uint16_t opcode = GPUReadWord(gpu_pc, GPU); + uint32_t index = opcode >> 10; gpu_instruction = opcode; // Added for GPU #3... gpu_opcode_first_parameter = (opcode >> 5) & 0x1F; gpu_opcode_second_parameter = opcode & 0x1F; @@ -1351,6 +1394,7 @@ GPU opcodes use (offset punch--vertically below bad guy): nop 41362 */ + static void gpu_opcode_jump(void) { #ifdef GPU_DIS_JUMP @@ -1367,7 +1411,7 @@ const char * condition[32] = gpu_flag_z = (gpu_flag_z ? 1 : 0); gpu_flag_n = (gpu_flag_n ? 1 : 0);*/ // KLUDGE: Used by BRANCH_CONDITION - uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z; + uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z; if (BRANCH_CONDITION(IMM_2)) { @@ -1377,10 +1421,10 @@ const char * condition[32] = #endif if (gpu_start_log) WriteLog(" --> JUMP: Branch taken.\n"); - uint32 delayed_pc = RM; + uint32_t delayed_pc = RM; GPUExec(1); gpu_pc = delayed_pc; -/* uint16 opcode = GPUReadWord(gpu_pc, GPU); +/* uint16_t opcode = GPUReadWord(gpu_pc, GPU); gpu_opcode_first_parameter = (opcode >> 5) & 0x1F; gpu_opcode_second_parameter = opcode & 0x1F; @@ -1394,6 +1438,7 @@ if (gpu_start_log) #endif } + static void gpu_opcode_jr(void) { #ifdef GPU_DIS_JR @@ -1407,8 +1452,8 @@ const char * condition[32] = #endif /* if (CONDITION(jaguar.op & 31)) { - int32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2; - uint32 newpc = jaguar.PC + r1; + int32_t r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2; + uint32_t newpc = jaguar.PC + r1; CALL_MAME_DEBUG; jaguar.op = ROPCODE(jaguar.PC); jaguar.PC = newpc; @@ -1421,7 +1466,7 @@ const char * condition[32] = gpu_flag_c = (gpu_flag_c ? 1 : 0); gpu_flag_z = (gpu_flag_z ? 1 : 0);*/ // KLUDGE: Used by BRANCH_CONDITION - uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z; + uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z; if (BRANCH_CONDITION(IMM_2)) { @@ -1431,11 +1476,11 @@ const char * condition[32] = #endif if (gpu_start_log) WriteLog(" --> JR: Branch taken.\n"); - int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1 - int32 delayed_pc = gpu_pc + (offset * 2); + int32_t offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1 + int32_t delayed_pc = gpu_pc + (offset * 2); GPUExec(1); gpu_pc = delayed_pc; -/* uint16 opcode = GPUReadWord(gpu_pc, GPU); +/* uint16_t opcode = GPUReadWord(gpu_pc, GPU); gpu_opcode_first_parameter = (opcode >> 5) & 0x1F; gpu_opcode_second_parameter = opcode & 0x1F; @@ -1449,13 +1494,14 @@ if (gpu_start_log) #endif } + static void gpu_opcode_add(void) { #ifdef GPU_DIS_ADD if (doGPUDis) WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - uint32 res = RN + RM; + uint32_t res = RN + RM; CLR_ZNC; SET_ZNC_ADD(RN, RM, res); RN = res; #ifdef GPU_DIS_ADD @@ -1464,6 +1510,7 @@ static void gpu_opcode_add(void) #endif } + static void gpu_opcode_addc(void) { #ifdef GPU_DIS_ADDC @@ -1471,14 +1518,14 @@ static void gpu_opcode_addc(void) WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif /* int dreg = jaguar.op & 31; - uint32 r1 = jaguar.r[(jaguar.op >> 5) & 31]; - uint32 r2 = jaguar.r[dreg]; - uint32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1); + uint32_t r1 = jaguar.r[(jaguar.op >> 5) & 31]; + uint32_t r2 = jaguar.r[dreg]; + uint32_t res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1); jaguar.r[dreg] = res; CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/ - uint32 res = RN + RM + gpu_flag_c; - uint32 carry = gpu_flag_c; + uint32_t res = RN + RM + gpu_flag_c; + uint32_t carry = gpu_flag_c; // SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes! SET_ZNC_ADD(RN + carry, RM, res); // SET_ZNC_ADD(RN, RM + carry, res); @@ -1489,14 +1536,15 @@ static void gpu_opcode_addc(void) #endif } + static void gpu_opcode_addq(void) { #ifdef GPU_DIS_ADDQ if (doGPUDis) WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - uint32 r1 = gpu_convert_zero[IMM_1]; - uint32 res = RN + r1; + uint32_t r1 = gpu_convert_zero[IMM_1]; + uint32_t res = RN + r1; CLR_ZNC; SET_ZNC_ADD(RN, r1, res); RN = res; #ifdef GPU_DIS_ADDQ @@ -1505,6 +1553,7 @@ static void gpu_opcode_addq(void) #endif } + static void gpu_opcode_addqt(void) { #ifdef GPU_DIS_ADDQT @@ -1518,13 +1567,14 @@ static void gpu_opcode_addqt(void) #endif } + static void gpu_opcode_sub(void) { #ifdef GPU_DIS_SUB if (doGPUDis) WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - uint32 res = RN - RM; + uint32_t res = RN - RM; SET_ZNC_SUB(RN, RM, res); RN = res; #ifdef GPU_DIS_SUB @@ -1533,42 +1583,34 @@ static void gpu_opcode_sub(void) #endif } + static void gpu_opcode_subc(void) { #ifdef GPU_DIS_SUBC if (doGPUDis) WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - uint32 res = RN - RM - gpu_flag_c; - uint32 borrow = gpu_flag_c; -// SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!! -//No matter how you do it, there is a problem. With below, it's 0-0 with carry, -//and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!! -// SET_ZNC_SUB(RN - borrow, RM, res); - SET_ZNC_SUB(RN, RM + borrow, res); - RN = res; + // This is how the GPU ALU does it--Two's complement with inverted carry + uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (gpu_flag_c ^ 1); + // Carry out of the result is inverted too + gpu_flag_c = ((res >> 32) & 0x01) ^ 1; + RN = (res & 0xFFFFFFFF); + SET_ZN(RN); #ifdef GPU_DIS_SUBC if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif } -/* -N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case: -N = 0, M = 1, 0 - 1 = -1, C = 0! -#define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a))) -#define SET_ZN(r) SET_N(r); SET_Z(r) -#define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b) -#define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b) -*/ + static void gpu_opcode_subq(void) { #ifdef GPU_DIS_SUBQ if (doGPUDis) WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - uint32 r1 = gpu_convert_zero[IMM_1]; - uint32 res = RN - r1; + uint32_t r1 = gpu_convert_zero[IMM_1]; + uint32_t res = RN - r1; SET_ZNC_SUB(RN, r1, res); RN = res; #ifdef GPU_DIS_SUBQ @@ -1577,6 +1619,7 @@ static void gpu_opcode_subq(void) #endif } + static void gpu_opcode_subqt(void) { #ifdef GPU_DIS_SUBQT @@ -1590,13 +1633,14 @@ static void gpu_opcode_subqt(void) #endif } + static void gpu_opcode_cmp(void) { #ifdef GPU_DIS_CMP if (doGPUDis) WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - uint32 res = RN - RM; + uint32_t res = RN - RM; SET_ZNC_SUB(RN, RM, res); #ifdef GPU_DIS_CMP if (doGPUDis) @@ -1604,16 +1648,17 @@ static void gpu_opcode_cmp(void) #endif } + static void gpu_opcode_cmpq(void) { - static int32 sqtable[32] = + static int32_t sqtable[32] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 }; #ifdef GPU_DIS_CMPQ if (doGPUDis) WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3; - uint32 res = RN - r1; + uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3; + uint32_t res = RN - r1; SET_ZNC_SUB(RN, r1, res); #ifdef GPU_DIS_CMPQ if (doGPUDis) @@ -1621,6 +1666,7 @@ static void gpu_opcode_cmpq(void) #endif } + static void gpu_opcode_and(void) { #ifdef GPU_DIS_AND @@ -1635,6 +1681,7 @@ static void gpu_opcode_and(void) #endif } + static void gpu_opcode_or(void) { #ifdef GPU_DIS_OR @@ -1649,6 +1696,7 @@ static void gpu_opcode_or(void) #endif } + static void gpu_opcode_xor(void) { #ifdef GPU_DIS_XOR @@ -1663,6 +1711,7 @@ static void gpu_opcode_xor(void) #endif } + static void gpu_opcode_not(void) { #ifdef GPU_DIS_NOT @@ -1677,6 +1726,7 @@ static void gpu_opcode_not(void) #endif } + static void gpu_opcode_move_pc(void) { #ifdef GPU_DIS_MOVEPC @@ -1692,13 +1742,14 @@ static void gpu_opcode_move_pc(void) #endif } + static void gpu_opcode_sat8(void) { #ifdef GPU_DIS_SAT8 if (doGPUDis) WriteLog("%06X: SAT8 R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN)); + RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN)); SET_ZN(RN); #ifdef GPU_DIS_SAT8 if (doGPUDis) @@ -1706,80 +1757,142 @@ static void gpu_opcode_sat8(void) #endif } + static void gpu_opcode_sat16(void) { - RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN)); + RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN)); SET_ZN(RN); } static void gpu_opcode_sat24(void) { - RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN)); + RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN)); SET_ZN(RN); } + static void gpu_opcode_store_r14_indexed(void) { #ifdef GPU_DIS_STORE14I if (doGPUDis) WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2)); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2); + + if (address >= 0xF03000 && address <= 0xF03FFF) + GPUWriteLong(address & 0xFFFFFFFC, RN, GPU); + else + GPUWriteLong(address, RN, GPU); +#else GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU); +#endif } + static void gpu_opcode_store_r15_indexed(void) { #ifdef GPU_DIS_STORE15I if (doGPUDis) WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2)); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2); + + if (address >= 0xF03000 && address <= 0xF03FFF) + GPUWriteLong(address & 0xFFFFFFFC, RN, GPU); + else + GPUWriteLong(address, RN, GPU); +#else GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU); +#endif } + static void gpu_opcode_load_r14_ri(void) { #ifdef GPU_DIS_LOAD14R if (doGPUDis) WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32_t address = gpu_reg[14] + RM; + + if (address >= 0xF03000 && address <= 0xF03FFF) + RN = GPUReadLong(address & 0xFFFFFFFC, GPU); + else + RN = GPUReadLong(address, GPU); +#else RN = GPUReadLong(gpu_reg[14] + RM, GPU); +#endif #ifdef GPU_DIS_LOAD14R if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif } + static void gpu_opcode_load_r15_ri(void) { #ifdef GPU_DIS_LOAD15R if (doGPUDis) WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32_t address = gpu_reg[15] + RM; + + if (address >= 0xF03000 && address <= 0xF03FFF) + RN = GPUReadLong(address & 0xFFFFFFFC, GPU); + else + RN = GPUReadLong(address, GPU); +#else RN = GPUReadLong(gpu_reg[15] + RM, GPU); +#endif #ifdef GPU_DIS_LOAD15R if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif } + static void gpu_opcode_store_r14_ri(void) { #ifdef GPU_DIS_STORE14R if (doGPUDis) WriteLog("%06X: STORE R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32_t address = gpu_reg[14] + RM; + + if (address >= 0xF03000 && address <= 0xF03FFF) + GPUWriteLong(address & 0xFFFFFFFC, RN, GPU); + else + GPUWriteLong(address, RN, GPU); +#else GPUWriteLong(gpu_reg[14] + RM, RN, GPU); +#endif } + static void gpu_opcode_store_r15_ri(void) { #ifdef GPU_DIS_STORE15R if (doGPUDis) WriteLog("%06X: STORE R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]); #endif +#ifdef GPU_CORRECT_ALIGNMENT_STORE + uint32_t address = gpu_reg[15] + RM; + + if (address >= 0xF03000 && address <= 0xF03FFF) + GPUWriteLong(address & 0xFFFFFFFC, RN, GPU); + else + GPUWriteLong(address, RN, GPU); +#else GPUWriteLong(gpu_reg[15] + RM, RN, GPU); +#endif } + static void gpu_opcode_nop(void) { #ifdef GPU_DIS_NOP @@ -1788,13 +1901,14 @@ static void gpu_opcode_nop(void) #endif } + static void gpu_opcode_pack(void) { #ifdef GPU_DIS_PACK if (doGPUDis) WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - uint32 val = RN; + uint32_t val = RN; //BUG! if (RM == 0) // Pack if (IMM_1 == 0) // Pack @@ -1807,6 +1921,7 @@ static void gpu_opcode_pack(void) #endif } + static void gpu_opcode_storeb(void) { #ifdef GPU_DIS_STOREB @@ -1821,31 +1936,61 @@ static void gpu_opcode_storeb(void) JaguarWriteByte(RM, RN, GPU); } + static void gpu_opcode_storew(void) { #ifdef GPU_DIS_STOREW if (doGPUDis) WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM); #endif +#ifdef GPU_CORRECT_ALIGNMENT + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU); + else + JaguarWriteWord(RM, RN, GPU); +#else if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) GPUWriteLong(RM, RN & 0xFFFF, GPU); else JaguarWriteWord(RM, RN, GPU); +#endif } + static void gpu_opcode_store(void) { #ifdef GPU_DIS_STORE if (doGPUDis) WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM); #endif +#ifdef GPU_CORRECT_ALIGNMENT + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU); + else + GPUWriteLong(RM, RN, GPU); +#else GPUWriteLong(RM, RN, GPU); +#endif } + static void gpu_opcode_storep(void) { +#ifdef GPU_CORRECT_ALIGNMENT + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + { + GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU); + GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU); + } + else + { + GPUWriteLong(RM + 0, gpu_hidata, GPU); + GPUWriteLong(RM + 4, RN, GPU); + } +#else GPUWriteLong(RM + 0, gpu_hidata, GPU); GPUWriteLong(RM + 4, RN, GPU); +#endif } static void gpu_opcode_loadb(void) @@ -1864,75 +2009,149 @@ static void gpu_opcode_loadb(void) #endif } + static void gpu_opcode_loadw(void) { #ifdef GPU_DIS_LOADW if (doGPUDis) WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif +#ifdef GPU_CORRECT_ALIGNMENT + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF; + else + RN = JaguarReadWord(RM, GPU); +#else if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) RN = GPUReadLong(RM, GPU) & 0xFFFF; else RN = JaguarReadWord(RM, GPU); +#endif #ifdef GPU_DIS_LOADW if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif } + +// According to the docs, & "Do The Same", this address is long aligned... +// So let's try it: +// And it works!!! Need to fix all instances... +// Also, Power Drive Rally seems to contradict the idea that only LOADs in +// the $F03000-$F03FFF range are aligned... +#warning "!!! Alignment issues, need to find definitive final word on this !!!" +/* +Preliminary testing on real hardware seems to confirm that something strange goes on +with unaligned reads in main memory. When the address is off by 1, the result is the +same as the long address with the top byte replaced by something. So if the read is +from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue. +When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown. +When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown. +It may be that the "unknown" values come from the prefetch queue, but not sure how +to test that. They seem to be stable, though, which would indicate such a mechanism. +Sometimes, however, the off by 2 case returns $12345678! +*/ static void gpu_opcode_load(void) { #ifdef GPU_DIS_LOAD if (doGPUDis) WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32_t mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 }; +// if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + RN = GPUReadLong(RM & 0xFFFFFFFC, GPU); +// RN = GPUReadLong(RM & 0x00FFFFFC, GPU); +// else +// RN = GPUReadLong(RM, GPU); + // Simulate garbage in unaligned reads... +//seems that this behavior is different in GPU mem vs. main mem... +// if ((RM < 0xF03000) || (RM > 0xF0BFFF)) +// RN |= mask[RM & 0x03]; +#else RN = GPUReadLong(RM, GPU); +#endif #ifdef GPU_DIS_LOAD if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif } + static void gpu_opcode_loadp(void) { +#ifdef GPU_CORRECT_ALIGNMENT + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + { + gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU); + RN = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU); + } + else + { + gpu_hidata = GPUReadLong(RM + 0, GPU); + RN = GPUReadLong(RM + 4, GPU); + } +#else gpu_hidata = GPUReadLong(RM + 0, GPU); RN = GPUReadLong(RM + 4, GPU); +#endif } + static void gpu_opcode_load_r14_indexed(void) { #ifdef GPU_DIS_LOAD14I if (doGPUDis) WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2); + + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + RN = GPUReadLong(address & 0xFFFFFFFC, GPU); + else + RN = GPUReadLong(address, GPU); +#else RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU); +#endif #ifdef GPU_DIS_LOAD14I if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif } + static void gpu_opcode_load_r15_indexed(void) { #ifdef GPU_DIS_LOAD15I if (doGPUDis) WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN); #endif +#ifdef GPU_CORRECT_ALIGNMENT + uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2); + + if ((RM >= 0xF03000) && (RM <= 0xF03FFF)) + RN = GPUReadLong(address & 0xFFFFFFFC, GPU); + else + RN = GPUReadLong(address, GPU); +#else RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU); +#endif #ifdef GPU_DIS_LOAD15I if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif } + static void gpu_opcode_movei(void) { #ifdef GPU_DIS_MOVEI if (doGPUDis) - WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); + WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32_t)GPUReadWord(gpu_pc) | ((uint32_t)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif // This instruction is followed by 32-bit value in LSW / MSW format... - RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16); + RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16); gpu_pc += 4; #ifdef GPU_DIS_MOVEI if (doGPUDis) @@ -1940,6 +2159,7 @@ static void gpu_opcode_movei(void) #endif } + static void gpu_opcode_moveta(void) { #ifdef GPU_DIS_MOVETA @@ -1953,6 +2173,7 @@ static void gpu_opcode_moveta(void) #endif } + static void gpu_opcode_movefa(void) { #ifdef GPU_DIS_MOVEFA @@ -1966,6 +2187,7 @@ static void gpu_opcode_movefa(void) #endif } + static void gpu_opcode_move(void) { #ifdef GPU_DIS_MOVE @@ -1979,6 +2201,7 @@ static void gpu_opcode_move(void) #endif } + static void gpu_opcode_moveq(void) { #ifdef GPU_DIS_MOVEQ @@ -1992,18 +2215,20 @@ static void gpu_opcode_moveq(void) #endif } + static void gpu_opcode_resmac(void) { RN = gpu_acc; } + static void gpu_opcode_imult(void) { #ifdef GPU_DIS_IMULT if (doGPUDis) WriteLog("%06X: IMULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - RN = (int16)RN * (int16)RM; + RN = (int16_t)RN * (int16_t)RM; SET_ZN(RN); #ifdef GPU_DIS_IMULT if (doGPUDis) @@ -2011,13 +2236,15 @@ static void gpu_opcode_imult(void) #endif } + static void gpu_opcode_mult(void) { #ifdef GPU_DIS_MULT if (doGPUDis) WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - RN = (uint16)RM * (uint16)RN; + RN = (uint16_t)RM * (uint16_t)RN; +// RN = (RM & 0xFFFF) * (RN & 0xFFFF); SET_ZN(RN); #ifdef GPU_DIS_MULT if (doGPUDis) @@ -2025,13 +2252,14 @@ static void gpu_opcode_mult(void) #endif } + static void gpu_opcode_bclr(void) { #ifdef GPU_DIS_BCLR if (doGPUDis) WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - uint32 res = RN & ~(1 << IMM_1); + uint32_t res = RN & ~(1 << IMM_1); RN = res; SET_ZN(res); #ifdef GPU_DIS_BCLR @@ -2040,6 +2268,7 @@ static void gpu_opcode_bclr(void) #endif } + static void gpu_opcode_btst(void) { #ifdef GPU_DIS_BTST @@ -2053,13 +2282,14 @@ static void gpu_opcode_btst(void) #endif } + static void gpu_opcode_bset(void) { #ifdef GPU_DIS_BSET if (doGPUDis) WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - uint32 res = RN | (1 << IMM_1); + uint32_t res = RN | (1 << IMM_1); RN = res; SET_ZN(res); #ifdef GPU_DIS_BSET @@ -2068,23 +2298,26 @@ static void gpu_opcode_bset(void) #endif } + static void gpu_opcode_imacn(void) { - uint32 res = (int16)RM * (int16)(RN); + uint32_t res = (int16_t)RM * (int16_t)(RN); gpu_acc += res; } + static void gpu_opcode_mtoi(void) { - uint32 _RM = RM; - uint32 res = RN = (((int32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF); + uint32_t _RM = RM; + uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF); SET_ZN(res); } + static void gpu_opcode_normi(void) { - uint32 _RM = RM; - uint32 res = 0; + uint32_t _RM = RM; + uint32_t res = 0; if (_RM) { @@ -2106,21 +2339,21 @@ static void gpu_opcode_normi(void) static void gpu_opcode_mmult(void) { int count = gpu_matrix_control & 0x0F; // Matrix width - uint32 addr = gpu_pointer_to_matrix; // In the GPU's RAM - int64 accum = 0; - uint32 res; + uint32_t addr = gpu_pointer_to_matrix; // In the GPU's RAM + int64_t accum = 0; + uint32_t res; if (gpu_matrix_control & 0x10) // Column stepping { for(int i=0; i> 1)] >> 16) & 0xFFFF); + a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF); else - a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF); + a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF); - int16 b = ((int16)GPUReadWord(addr + 2, GPU)); + int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU)); accum += a * b; addr += 4 * count; } @@ -2129,22 +2362,23 @@ static void gpu_opcode_mmult(void) { for(int i=0; i> 1)] >> 16) & 0xFFFF); + a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF); else - a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF); + a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF); - int16 b = ((int16)GPUReadWord(addr + 2, GPU)); + int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU)); accum += a * b; addr += 4; } } - RN = res = (int32)accum; + RN = res = (int32_t)accum; // carry flag to do (out of the last add) SET_ZN(res); } + static void gpu_opcode_abs(void) { #ifdef GPU_DIS_ABS @@ -2167,79 +2401,81 @@ static void gpu_opcode_abs(void) #endif } + static void gpu_opcode_div(void) // RN / RM { #ifdef GPU_DIS_DIV if (doGPUDis) WriteLog("%06X: DIV R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif -// NOTE: remainder is NOT calculated correctly here! -// The original tried to get it right by checking to see if the -// remainder was negative, but that's too late... -// The code there should do it now, but I'm not 100% sure... - +#if 0 if (RM) { if (gpu_div_control & 0x01) // 16.16 division { - RN = ((uint64)RN << 16) / RM; - gpu_remain = ((uint64)RN << 16) % RM; + gpu_remain = ((uint64_t)RN << 16) % RM; + RN = ((uint64_t)RN << 16) / RM; } else { - RN = RN / RM; + // We calculate the remainder first because we destroy RN after + // this by assigning it to itself. gpu_remain = RN % RM; + RN = RN / RM; } - - if ((gpu_remain - RM) & 0x80000000) // If the result would have been negative... - gpu_remain -= RM; // Then make it negative! } else + { + // This is what happens according to SCPCD. NYAN! RN = 0xFFFFFFFF; + gpu_remain = 0; + } +#else + // Real algorithm, courtesy of SCPCD: NYAN! + uint32_t q = RN; + uint32_t r = 0; -/* uint32 _RM=RM; - uint32 _RN=RN; + // If 16.16 division, stuff top 16 bits of RN into remainder and put the + // bottom 16 of RN in top 16 of quotient + if (gpu_div_control & 0x01) + q <<= 16, r = RN >> 16; - if (_RM) + for(int i=0; i<32; i++) { - if (gpu_div_control & 1) - { - gpu_remain = (((uint64)_RN) << 16) % _RM; - if (gpu_remain&0x80000000) - gpu_remain-=_RM; - RN = (((uint64)_RN) << 16) / _RM; - } - else - { - gpu_remain = _RN % _RM; - if (gpu_remain&0x80000000) - gpu_remain-=_RM; - RN/=_RM; - } +// uint32_t sign = (r >> 31) & 0x01; + uint32_t sign = r & 0x80000000; + r = (r << 1) | ((q >> 31) & 0x01); + r += (sign ? RM : -RM); + q = (q << 1) | (((~r) >> 31) & 0x01); } - else - RN=0xffffffff;*/ + + RN = q; + gpu_remain = r; +#endif + #ifdef GPU_DIS_DIV if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain); #endif } + static void gpu_opcode_imultn(void) { - uint32 res = (int32)((int16)RN * (int16)RM); - gpu_acc = (int32)res; + uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM); + gpu_acc = (int32_t)res; SET_FLAG_Z(res); SET_FLAG_N(res); } + static void gpu_opcode_neg(void) { #ifdef GPU_DIS_NEG if (doGPUDis) WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - uint32 res = -RN; + uint32_t res = -RN; SET_ZNC_SUB(0, RN, res); RN = res; #ifdef GPU_DIS_NEG @@ -2248,6 +2484,7 @@ static void gpu_opcode_neg(void) #endif } + static void gpu_opcode_shlq(void) { #ifdef GPU_DIS_SHLQ @@ -2256,8 +2493,8 @@ static void gpu_opcode_shlq(void) #endif // Was a bug here... // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!) - int32 r1 = 32 - IMM_1; - uint32 res = RN << r1; + int32_t r1 = 32 - IMM_1; + uint32_t res = RN << r1; SET_ZN(res); gpu_flag_c = (RN >> 31) & 1; RN = res; #ifdef GPU_DIS_SHLQ @@ -2266,14 +2503,15 @@ static void gpu_opcode_shlq(void) #endif } + static void gpu_opcode_shrq(void) { #ifdef GPU_DIS_SHRQ if (doGPUDis) WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - int32 r1 = gpu_convert_zero[IMM_1]; - uint32 res = RN >> r1; + int32_t r1 = gpu_convert_zero[IMM_1]; + uint32_t res = RN >> r1; SET_ZN(res); gpu_flag_c = RN & 1; RN = res; #ifdef GPU_DIS_SHRQ @@ -2282,14 +2520,15 @@ static void gpu_opcode_shrq(void) #endif } + static void gpu_opcode_ror(void) { #ifdef GPU_DIS_ROR if (doGPUDis) WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - uint32 r1 = RM & 0x1F; - uint32 res = (RN >> r1) | (RN << (32 - r1)); + uint32_t r1 = RM & 0x1F; + uint32_t res = (RN >> r1) | (RN << (32 - r1)); SET_ZN(res); gpu_flag_c = (RN >> 31) & 1; RN = res; #ifdef GPU_DIS_ROR @@ -2298,15 +2537,16 @@ static void gpu_opcode_ror(void) #endif } + static void gpu_opcode_rorq(void) { #ifdef GPU_DIS_RORQ if (doGPUDis) WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - uint32 r1 = gpu_convert_zero[IMM_1 & 0x1F]; - uint32 r2 = RN; - uint32 res = (r2 >> r1) | (r2 << (32 - r1)); + uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F]; + uint32_t r2 = RN; + uint32_t res = (r2 >> r1) | (r2 << (32 - r1)); RN = res; SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01; #ifdef GPU_DIS_RORQ @@ -2315,12 +2555,13 @@ static void gpu_opcode_rorq(void) #endif } + static void gpu_opcode_sha(void) { /* int dreg = jaguar.op & 31; - int32 r1 = (int32)jaguar.r[(jaguar.op >> 5) & 31]; - uint32 r2 = jaguar.r[dreg]; - uint32 res; + int32_t r1 = (int32_t)jaguar.r[(jaguar.op >> 5) & 31]; + uint32_t r2 = jaguar.r[dreg]; + uint32_t res; CLR_ZNC; if (r1 < 0) @@ -2330,7 +2571,7 @@ static void gpu_opcode_sha(void) } else { - res = (r1 >= 32) ? ((int32)r2 >> 31) : ((int32)r2 >> r1); + res = (r1 >= 32) ? ((int32_t)r2 >> 31) : ((int32_t)r2 >> r1); jaguar.FLAGS |= (r2 << 1) & 2; } jaguar.r[dreg] = res; @@ -2340,16 +2581,16 @@ static void gpu_opcode_sha(void) if (doGPUDis) WriteLog("%06X: SHA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - uint32 res; + uint32_t res; - if ((int32)RM < 0) + if ((int32_t)RM < 0) { - res = ((int32)RM <= -32) ? 0 : (RN << -(int32)RM); + res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM); gpu_flag_c = RN >> 31; } else { - res = ((int32)RM >= 32) ? ((int32)RN >> 31) : ((int32)RN >> (int32)RM); + res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM); gpu_flag_c = RN & 0x01; } RN = res; @@ -2359,12 +2600,12 @@ static void gpu_opcode_sha(void) WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif -/* int32 sRM=(int32)RM; - uint32 _RN=RN; +/* int32_t sRM=(int32_t)RM; + uint32_t _RN=RN; if (sRM<0) { - uint32 shift=-sRM; + uint32_t shift=-sRM; if (shift>=32) shift=32; gpu_flag_c=(_RN&0x80000000)>>31; while (shift) @@ -2375,12 +2616,12 @@ static void gpu_opcode_sha(void) } else { - uint32 shift=sRM; + uint32_t shift=sRM; if (shift>=32) shift=32; gpu_flag_c=_RN&0x1; while (shift) { - _RN=((int32)_RN)>>1; + _RN=((int32_t)_RN)>>1; shift--; } } @@ -2389,13 +2630,14 @@ static void gpu_opcode_sha(void) SET_FLAG_N(_RN);*/ } + static void gpu_opcode_sharq(void) { #ifdef GPU_DIS_SHARQ if (doGPUDis) WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN); #endif - uint32 res = (int32)RN >> gpu_convert_zero[IMM_1]; + uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1]; SET_ZN(res); gpu_flag_c = RN & 0x01; RN = res; #ifdef GPU_DIS_SHARQ @@ -2404,6 +2646,7 @@ static void gpu_opcode_sharq(void) #endif } + static void gpu_opcode_sh(void) { #ifdef GPU_DIS_SH @@ -2413,7 +2656,7 @@ static void gpu_opcode_sh(void) if (RM & 0x80000000) // Shift left { gpu_flag_c = RN >> 31; - RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM); + RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM); } else // Shift right { @@ -2427,12 +2670,14 @@ static void gpu_opcode_sh(void) #endif } + //Temporary: Testing only! //#include "gpu2.cpp" //#include "gpu3.cpp" #else + // New thread-safe GPU core int GPUCore(void * data) @@ -2440,3 +2685,4 @@ int GPUCore(void * data) } #endif +