X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fgpu.cpp;h=9ee426425e42c69f3624939930258765cfde8a92;hb=240a6df48aebb5e17f82452c32e770cdfe9b5d5e;hp=1f1bef9351cbe14eb6ab5ba18a1c94d00d5fa7e8;hpb=5d76d651dfc3aa0a2e810e6b6db2ae8a2e34c53e;p=virtualjaguar diff --git a/src/gpu.cpp b/src/gpu.cpp index 1f1bef9..9ee4264 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -99,8 +99,8 @@ #define GPU_DIS_SUBQT #define GPU_DIS_XOR -//bool doGPUDis = false; -bool doGPUDis = true; +bool doGPUDis = false; +//bool doGPUDis = true; #endif /* @@ -319,9 +319,10 @@ static uint32_t gpu_pointer_to_matrix; static uint32_t gpu_data_organization; static uint32_t gpu_control; static uint32_t gpu_div_control; -// There is a distinct advantage to having these separated out--there's no need to clear -// a bit before writing a result. I.e., if the result of an operation leaves a zero in -// the carry flag, you don't have to zero gpu_flag_c before you can write that zero! +// There is a distinct advantage to having these separated out--there's no need +// to clear a bit before writing a result. I.e., if the result of an operation +// leaves a zero in the carry flag, you don't have to zero gpu_flag_c before +// you can write that zero! static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c; uint32_t gpu_reg_bank_0[32]; uint32_t gpu_reg_bank_1[32]; @@ -1050,11 +1051,13 @@ void GPUReset(void) *((uint32_t *)(&gpu_ram_8[i])) = rand(); } + uint32_t GPUReadPC(void) { return gpu_pc; } + void GPUResetStats(void) { for(uint32_t i=0; i<64; i++) @@ -1062,6 +1065,7 @@ void GPUResetStats(void) WriteLog("--> GPU stats were reset!\n"); } + void GPUDumpDisassembly(void) { char buffer[512]; @@ -1076,6 +1080,7 @@ void GPUDumpDisassembly(void) } } + void GPUDumpRegisters(void) { WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z); @@ -1099,6 +1104,7 @@ void GPUDumpRegisters(void) } } + void GPUDumpMemory(void) { WriteLog("\n---[GPU data at 00F03000]---------------------------\n"); @@ -1107,8 +1113,23 @@ void GPUDumpMemory(void) gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]); } + void GPUDone(void) { + WriteLog("\n\n---------------------------------------------------------------------\n"); + WriteLog("GPU I/O Registers\n"); + WriteLog("---------------------------------------------------------------------\n"); + WriteLog("F0%04X (G_FLAGS): $%06X\n", 0x2100, (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z); + WriteLog("F0%04X (G_MTXC): $%04X\n", 0x2104, gpu_matrix_control); + WriteLog("F0%04X (G_MTXA): $%04X\n", 0x2108, gpu_pointer_to_matrix); + WriteLog("F0%04X (G_END): $%02X\n", 0x210C, gpu_data_organization); + WriteLog("F0%04X (G_PC): $%06X\n", 0x2110, gpu_pc); + WriteLog("F0%04X (G_CTRL): $%06X\n", 0x2114, gpu_control); + WriteLog("F0%04X (G_HIDATA): $%08X\n", 0x2118, gpu_hidata); + WriteLog("F0%04X (G_REMAIN): $%08X\n", 0x211C, gpu_remain); + WriteLog("F0%04X (G_DIVCTRL): $%02X\n", 0x211C, gpu_div_control); + WriteLog("---------------------------------------------------------------------\n\n\n"); + WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't"); // Get the interrupt latch & enable bits @@ -1125,12 +1146,9 @@ void GPUDone(void) WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]); } WriteLog("\n"); - -// memory_free(gpu_ram_8); -// memory_free(gpu_reg_bank_0); -// memory_free(gpu_reg_bank_1); } + // // Main GPU execution core // @@ -1192,6 +1210,10 @@ if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == /* gpu_flag_c = (gpu_flag_c ? 1 : 0); gpu_flag_z = (gpu_flag_z ? 1 : 0); gpu_flag_n = (gpu_flag_n ? 1 : 0);*/ +#if 0 +if (gpu_pc == 0xF03200) + doGPUDis = true; +#endif uint16_t opcode = GPUReadWord(gpu_pc, GPU); uint32_t index = opcode >> 10; @@ -1372,6 +1394,7 @@ GPU opcodes use (offset punch--vertically below bad guy): nop 41362 */ + static void gpu_opcode_jump(void) { #ifdef GPU_DIS_JUMP @@ -1415,6 +1438,7 @@ if (gpu_start_log) #endif } + static void gpu_opcode_jr(void) { #ifdef GPU_DIS_JR @@ -1470,6 +1494,7 @@ if (gpu_start_log) #endif } + static void gpu_opcode_add(void) { #ifdef GPU_DIS_ADD @@ -1485,6 +1510,7 @@ static void gpu_opcode_add(void) #endif } + static void gpu_opcode_addc(void) { #ifdef GPU_DIS_ADDC @@ -1510,6 +1536,7 @@ static void gpu_opcode_addc(void) #endif } + static void gpu_opcode_addq(void) { #ifdef GPU_DIS_ADDQ @@ -1526,6 +1553,7 @@ static void gpu_opcode_addq(void) #endif } + static void gpu_opcode_addqt(void) { #ifdef GPU_DIS_ADDQT @@ -1539,6 +1567,7 @@ static void gpu_opcode_addqt(void) #endif } + static void gpu_opcode_sub(void) { #ifdef GPU_DIS_SUB @@ -1554,34 +1583,26 @@ static void gpu_opcode_sub(void) #endif } + static void gpu_opcode_subc(void) { #ifdef GPU_DIS_SUBC if (doGPUDis) WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - uint32_t res = RN - RM - gpu_flag_c; - uint32_t borrow = gpu_flag_c; -// SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!! -//No matter how you do it, there is a problem. With below, it's 0-0 with carry, -//and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!! -// SET_ZNC_SUB(RN - borrow, RM, res); - SET_ZNC_SUB(RN, RM + borrow, res); - RN = res; + // This is how the GPU ALU does it--Two's complement with inverted carry + uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (gpu_flag_c ^ 1); + // Carry out of the result is inverted too + gpu_flag_c = ((res >> 32) & 0x01) ^ 1; + RN = (res & 0xFFFFFFFF); + SET_ZN(RN); #ifdef GPU_DIS_SUBC if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif } -/* -N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case: -N = 0, M = 1, 0 - 1 = -1, C = 0! -#define SET_C_SUB(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a))) -#define SET_ZN(r) SET_N(r); SET_Z(r) -#define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b) -#define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b) -*/ + static void gpu_opcode_subq(void) { #ifdef GPU_DIS_SUBQ @@ -1598,6 +1619,7 @@ static void gpu_opcode_subq(void) #endif } + static void gpu_opcode_subqt(void) { #ifdef GPU_DIS_SUBQT @@ -1611,6 +1633,7 @@ static void gpu_opcode_subqt(void) #endif } + static void gpu_opcode_cmp(void) { #ifdef GPU_DIS_CMP @@ -1625,6 +1648,7 @@ static void gpu_opcode_cmp(void) #endif } + static void gpu_opcode_cmpq(void) { static int32_t sqtable[32] = @@ -1642,6 +1666,7 @@ static void gpu_opcode_cmpq(void) #endif } + static void gpu_opcode_and(void) { #ifdef GPU_DIS_AND @@ -1656,6 +1681,7 @@ static void gpu_opcode_and(void) #endif } + static void gpu_opcode_or(void) { #ifdef GPU_DIS_OR @@ -1670,6 +1696,7 @@ static void gpu_opcode_or(void) #endif } + static void gpu_opcode_xor(void) { #ifdef GPU_DIS_XOR @@ -1684,6 +1711,7 @@ static void gpu_opcode_xor(void) #endif } + static void gpu_opcode_not(void) { #ifdef GPU_DIS_NOT @@ -1698,6 +1726,7 @@ static void gpu_opcode_not(void) #endif } + static void gpu_opcode_move_pc(void) { #ifdef GPU_DIS_MOVEPC @@ -1713,6 +1742,7 @@ static void gpu_opcode_move_pc(void) #endif } + static void gpu_opcode_sat8(void) { #ifdef GPU_DIS_SAT8 @@ -1727,6 +1757,7 @@ static void gpu_opcode_sat8(void) #endif } + static void gpu_opcode_sat16(void) { RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN)); @@ -1739,6 +1770,7 @@ static void gpu_opcode_sat24(void) SET_ZN(RN); } + static void gpu_opcode_store_r14_indexed(void) { #ifdef GPU_DIS_STORE14I @@ -1757,6 +1789,7 @@ static void gpu_opcode_store_r14_indexed(void) #endif } + static void gpu_opcode_store_r15_indexed(void) { #ifdef GPU_DIS_STORE15I @@ -1775,6 +1808,7 @@ static void gpu_opcode_store_r15_indexed(void) #endif } + static void gpu_opcode_load_r14_ri(void) { #ifdef GPU_DIS_LOAD14R @@ -1797,6 +1831,7 @@ static void gpu_opcode_load_r14_ri(void) #endif } + static void gpu_opcode_load_r15_ri(void) { #ifdef GPU_DIS_LOAD15R @@ -1819,6 +1854,7 @@ static void gpu_opcode_load_r15_ri(void) #endif } + static void gpu_opcode_store_r14_ri(void) { #ifdef GPU_DIS_STORE14R @@ -1837,6 +1873,7 @@ static void gpu_opcode_store_r14_ri(void) #endif } + static void gpu_opcode_store_r15_ri(void) { #ifdef GPU_DIS_STORE15R @@ -1855,6 +1892,7 @@ static void gpu_opcode_store_r15_ri(void) #endif } + static void gpu_opcode_nop(void) { #ifdef GPU_DIS_NOP @@ -1863,6 +1901,7 @@ static void gpu_opcode_nop(void) #endif } + static void gpu_opcode_pack(void) { #ifdef GPU_DIS_PACK @@ -1882,6 +1921,7 @@ static void gpu_opcode_pack(void) #endif } + static void gpu_opcode_storeb(void) { #ifdef GPU_DIS_STOREB @@ -1896,6 +1936,7 @@ static void gpu_opcode_storeb(void) JaguarWriteByte(RM, RN, GPU); } + static void gpu_opcode_storew(void) { #ifdef GPU_DIS_STOREW @@ -1915,6 +1956,7 @@ static void gpu_opcode_storew(void) #endif } + static void gpu_opcode_store(void) { #ifdef GPU_DIS_STORE @@ -1931,6 +1973,7 @@ static void gpu_opcode_store(void) #endif } + static void gpu_opcode_storep(void) { #ifdef GPU_CORRECT_ALIGNMENT @@ -1966,6 +2009,7 @@ static void gpu_opcode_loadb(void) #endif } + static void gpu_opcode_loadw(void) { #ifdef GPU_DIS_LOADW @@ -1989,6 +2033,7 @@ static void gpu_opcode_loadw(void) #endif } + // According to the docs, & "Do The Same", this address is long aligned... // So let's try it: // And it works!!! Need to fix all instances... @@ -2032,6 +2077,7 @@ static void gpu_opcode_load(void) #endif } + static void gpu_opcode_loadp(void) { #ifdef GPU_CORRECT_ALIGNMENT @@ -2051,6 +2097,7 @@ static void gpu_opcode_loadp(void) #endif } + static void gpu_opcode_load_r14_indexed(void) { #ifdef GPU_DIS_LOAD14I @@ -2073,6 +2120,7 @@ static void gpu_opcode_load_r14_indexed(void) #endif } + static void gpu_opcode_load_r15_indexed(void) { #ifdef GPU_DIS_LOAD15I @@ -2095,6 +2143,7 @@ static void gpu_opcode_load_r15_indexed(void) #endif } + static void gpu_opcode_movei(void) { #ifdef GPU_DIS_MOVEI @@ -2110,6 +2159,7 @@ static void gpu_opcode_movei(void) #endif } + static void gpu_opcode_moveta(void) { #ifdef GPU_DIS_MOVETA @@ -2123,6 +2173,7 @@ static void gpu_opcode_moveta(void) #endif } + static void gpu_opcode_movefa(void) { #ifdef GPU_DIS_MOVEFA @@ -2136,6 +2187,7 @@ static void gpu_opcode_movefa(void) #endif } + static void gpu_opcode_move(void) { #ifdef GPU_DIS_MOVE @@ -2149,6 +2201,7 @@ static void gpu_opcode_move(void) #endif } + static void gpu_opcode_moveq(void) { #ifdef GPU_DIS_MOVEQ @@ -2162,11 +2215,13 @@ static void gpu_opcode_moveq(void) #endif } + static void gpu_opcode_resmac(void) { RN = gpu_acc; } + static void gpu_opcode_imult(void) { #ifdef GPU_DIS_IMULT @@ -2181,6 +2236,7 @@ static void gpu_opcode_imult(void) #endif } + static void gpu_opcode_mult(void) { #ifdef GPU_DIS_MULT @@ -2188,6 +2244,7 @@ static void gpu_opcode_mult(void) WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif RN = (uint16_t)RM * (uint16_t)RN; +// RN = (RM & 0xFFFF) * (RN & 0xFFFF); SET_ZN(RN); #ifdef GPU_DIS_MULT if (doGPUDis) @@ -2195,6 +2252,7 @@ static void gpu_opcode_mult(void) #endif } + static void gpu_opcode_bclr(void) { #ifdef GPU_DIS_BCLR @@ -2210,6 +2268,7 @@ static void gpu_opcode_bclr(void) #endif } + static void gpu_opcode_btst(void) { #ifdef GPU_DIS_BTST @@ -2223,6 +2282,7 @@ static void gpu_opcode_btst(void) #endif } + static void gpu_opcode_bset(void) { #ifdef GPU_DIS_BSET @@ -2238,12 +2298,14 @@ static void gpu_opcode_bset(void) #endif } + static void gpu_opcode_imacn(void) { uint32_t res = (int16_t)RM * (int16_t)(RN); gpu_acc += res; } + static void gpu_opcode_mtoi(void) { uint32_t _RM = RM; @@ -2251,6 +2313,7 @@ static void gpu_opcode_mtoi(void) SET_ZN(res); } + static void gpu_opcode_normi(void) { uint32_t _RM = RM; @@ -2315,6 +2378,7 @@ static void gpu_opcode_mmult(void) SET_ZN(res); } + static void gpu_opcode_abs(void) { #ifdef GPU_DIS_ABS @@ -2337,64 +2401,65 @@ static void gpu_opcode_abs(void) #endif } + static void gpu_opcode_div(void) // RN / RM { #ifdef GPU_DIS_DIV if (doGPUDis) WriteLog("%06X: DIV R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif -// NOTE: remainder is NOT calculated correctly here! -// The original tried to get it right by checking to see if the -// remainder was negative, but that's too late... -// The code there should do it now, but I'm not 100% sure... - +#if 0 if (RM) { if (gpu_div_control & 0x01) // 16.16 division { - RN = ((uint64_t)RN << 16) / RM; gpu_remain = ((uint64_t)RN << 16) % RM; + RN = ((uint64_t)RN << 16) / RM; } else { - RN = RN / RM; + // We calculate the remainder first because we destroy RN after + // this by assigning it to itself. gpu_remain = RN % RM; + RN = RN / RM; } - - if ((gpu_remain - RM) & 0x80000000) // If the result would have been negative... - gpu_remain -= RM; // Then make it negative! } else + { + // This is what happens according to SCPCD. NYAN! RN = 0xFFFFFFFF; + gpu_remain = 0; + } +#else + // Real algorithm, courtesy of SCPCD: NYAN! + uint32_t q = RN; + uint32_t r = 0; -/* uint32_t _RM=RM; - uint32_t _RN=RN; + // If 16.16 division, stuff top 16 bits of RN into remainder and put the + // bottom 16 of RN in top 16 of quotient + if (gpu_div_control & 0x01) + q <<= 16, r = RN >> 16; - if (_RM) + for(int i=0; i<32; i++) { - if (gpu_div_control & 1) - { - gpu_remain = (((uint64_t)_RN) << 16) % _RM; - if (gpu_remain&0x80000000) - gpu_remain-=_RM; - RN = (((uint64_t)_RN) << 16) / _RM; - } - else - { - gpu_remain = _RN % _RM; - if (gpu_remain&0x80000000) - gpu_remain-=_RM; - RN/=_RM; - } +// uint32_t sign = (r >> 31) & 0x01; + uint32_t sign = r & 0x80000000; + r = (r << 1) | ((q >> 31) & 0x01); + r += (sign ? RM : -RM); + q = (q << 1) | (((~r) >> 31) & 0x01); } - else - RN=0xffffffff;*/ + + RN = q; + gpu_remain = r; +#endif + #ifdef GPU_DIS_DIV if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain); #endif } + static void gpu_opcode_imultn(void) { uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM); @@ -2403,6 +2468,7 @@ static void gpu_opcode_imultn(void) SET_FLAG_N(res); } + static void gpu_opcode_neg(void) { #ifdef GPU_DIS_NEG @@ -2418,6 +2484,7 @@ static void gpu_opcode_neg(void) #endif } + static void gpu_opcode_shlq(void) { #ifdef GPU_DIS_SHLQ @@ -2436,6 +2503,7 @@ static void gpu_opcode_shlq(void) #endif } + static void gpu_opcode_shrq(void) { #ifdef GPU_DIS_SHRQ @@ -2452,6 +2520,7 @@ static void gpu_opcode_shrq(void) #endif } + static void gpu_opcode_ror(void) { #ifdef GPU_DIS_ROR @@ -2468,6 +2537,7 @@ static void gpu_opcode_ror(void) #endif } + static void gpu_opcode_rorq(void) { #ifdef GPU_DIS_RORQ @@ -2485,6 +2555,7 @@ static void gpu_opcode_rorq(void) #endif } + static void gpu_opcode_sha(void) { /* int dreg = jaguar.op & 31; @@ -2559,6 +2630,7 @@ static void gpu_opcode_sha(void) SET_FLAG_N(_RN);*/ } + static void gpu_opcode_sharq(void) { #ifdef GPU_DIS_SHARQ @@ -2574,6 +2646,7 @@ static void gpu_opcode_sharq(void) #endif } + static void gpu_opcode_sh(void) { #ifdef GPU_DIS_SH @@ -2597,12 +2670,14 @@ static void gpu_opcode_sh(void) #endif } + //Temporary: Testing only! //#include "gpu2.cpp" //#include "gpu3.cpp" #else + // New thread-safe GPU core int GPUCore(void * data) @@ -2610,3 +2685,4 @@ int GPUCore(void * data) } #endif +