X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fgpu.cpp;h=fbf26a0e6846fba758246ee0cec664eaf677f044;hb=c3254eecf4ad7be7d10cde6a0b803c09de883914;hp=49a862a1fe945efff93633bd3a1b5810436b357c;hpb=c436dad60e34fb9da720a89db917eb4cf4e3a624;p=virtualjaguar diff --git a/src/gpu.cpp b/src/gpu.cpp index 49a862a..fbf26a0 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -1376,6 +1376,7 @@ GPU opcodes use (offset punch--vertically below bad guy): nop 41362 */ + static void gpu_opcode_jump(void) { #ifdef GPU_DIS_JUMP @@ -1419,6 +1420,7 @@ if (gpu_start_log) #endif } + static void gpu_opcode_jr(void) { #ifdef GPU_DIS_JR @@ -1474,6 +1476,7 @@ if (gpu_start_log) #endif } + static void gpu_opcode_add(void) { #ifdef GPU_DIS_ADD @@ -1489,6 +1492,7 @@ static void gpu_opcode_add(void) #endif } + static void gpu_opcode_addc(void) { #ifdef GPU_DIS_ADDC @@ -1514,6 +1518,7 @@ static void gpu_opcode_addc(void) #endif } + static void gpu_opcode_addq(void) { #ifdef GPU_DIS_ADDQ @@ -1530,6 +1535,7 @@ static void gpu_opcode_addq(void) #endif } + static void gpu_opcode_addqt(void) { #ifdef GPU_DIS_ADDQT @@ -1543,6 +1549,7 @@ static void gpu_opcode_addqt(void) #endif } + static void gpu_opcode_sub(void) { #ifdef GPU_DIS_SUB @@ -1558,34 +1565,26 @@ static void gpu_opcode_sub(void) #endif } + static void gpu_opcode_subc(void) { #ifdef GPU_DIS_SUBC if (doGPUDis) WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif - uint32_t res = RN - RM - gpu_flag_c; - uint32_t borrow = gpu_flag_c; -// SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!! -//No matter how you do it, there is a problem. With below, it's 0-0 with carry, -//and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!! -// SET_ZNC_SUB(RN - borrow, RM, res); - SET_ZNC_SUB(RN, RM + borrow, res); - RN = res; + // This is how the GPU ALU does it--Two's complement with inverted carry + uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (gpu_flag_c ^ 1); + // Carry out of the result is inverted too + gpu_flag_c = ((res >> 32) & 0x01) ^ 1; + RN = (res & 0xFFFFFFFF); + SET_ZN(RN); #ifdef GPU_DIS_SUBC if (doGPUDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif } -/* -N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case: -N = 0, M = 1, 0 - 1 = -1, C = 0! -#define SET_C_SUB(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a))) -#define SET_ZN(r) SET_N(r); SET_Z(r) -#define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b) -#define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b) -*/ + static void gpu_opcode_subq(void) { #ifdef GPU_DIS_SUBQ @@ -1602,6 +1601,7 @@ static void gpu_opcode_subq(void) #endif } + static void gpu_opcode_subqt(void) { #ifdef GPU_DIS_SUBQT @@ -1615,6 +1615,7 @@ static void gpu_opcode_subqt(void) #endif } + static void gpu_opcode_cmp(void) { #ifdef GPU_DIS_CMP @@ -1629,6 +1630,7 @@ static void gpu_opcode_cmp(void) #endif } + static void gpu_opcode_cmpq(void) { static int32_t sqtable[32] = @@ -1646,6 +1648,7 @@ static void gpu_opcode_cmpq(void) #endif } + static void gpu_opcode_and(void) { #ifdef GPU_DIS_AND @@ -1660,6 +1663,7 @@ static void gpu_opcode_and(void) #endif } + static void gpu_opcode_or(void) { #ifdef GPU_DIS_OR @@ -1674,6 +1678,7 @@ static void gpu_opcode_or(void) #endif } + static void gpu_opcode_xor(void) { #ifdef GPU_DIS_XOR @@ -1688,6 +1693,7 @@ static void gpu_opcode_xor(void) #endif } + static void gpu_opcode_not(void) { #ifdef GPU_DIS_NOT @@ -1702,6 +1708,7 @@ static void gpu_opcode_not(void) #endif } + static void gpu_opcode_move_pc(void) { #ifdef GPU_DIS_MOVEPC @@ -1717,6 +1724,7 @@ static void gpu_opcode_move_pc(void) #endif } + static void gpu_opcode_sat8(void) { #ifdef GPU_DIS_SAT8 @@ -1731,6 +1739,7 @@ static void gpu_opcode_sat8(void) #endif } + static void gpu_opcode_sat16(void) { RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN)); @@ -1743,6 +1752,7 @@ static void gpu_opcode_sat24(void) SET_ZN(RN); } + static void gpu_opcode_store_r14_indexed(void) { #ifdef GPU_DIS_STORE14I @@ -1761,6 +1771,7 @@ static void gpu_opcode_store_r14_indexed(void) #endif } + static void gpu_opcode_store_r15_indexed(void) { #ifdef GPU_DIS_STORE15I @@ -1779,6 +1790,7 @@ static void gpu_opcode_store_r15_indexed(void) #endif } + static void gpu_opcode_load_r14_ri(void) { #ifdef GPU_DIS_LOAD14R @@ -1801,6 +1813,7 @@ static void gpu_opcode_load_r14_ri(void) #endif } + static void gpu_opcode_load_r15_ri(void) { #ifdef GPU_DIS_LOAD15R @@ -1823,6 +1836,7 @@ static void gpu_opcode_load_r15_ri(void) #endif } + static void gpu_opcode_store_r14_ri(void) { #ifdef GPU_DIS_STORE14R @@ -1841,6 +1855,7 @@ static void gpu_opcode_store_r14_ri(void) #endif } + static void gpu_opcode_store_r15_ri(void) { #ifdef GPU_DIS_STORE15R @@ -1859,6 +1874,7 @@ static void gpu_opcode_store_r15_ri(void) #endif } + static void gpu_opcode_nop(void) { #ifdef GPU_DIS_NOP @@ -1867,6 +1883,7 @@ static void gpu_opcode_nop(void) #endif } + static void gpu_opcode_pack(void) { #ifdef GPU_DIS_PACK @@ -1886,6 +1903,7 @@ static void gpu_opcode_pack(void) #endif } + static void gpu_opcode_storeb(void) { #ifdef GPU_DIS_STOREB @@ -1900,6 +1918,7 @@ static void gpu_opcode_storeb(void) JaguarWriteByte(RM, RN, GPU); } + static void gpu_opcode_storew(void) { #ifdef GPU_DIS_STOREW @@ -1919,6 +1938,7 @@ static void gpu_opcode_storew(void) #endif } + static void gpu_opcode_store(void) { #ifdef GPU_DIS_STORE @@ -1935,6 +1955,7 @@ static void gpu_opcode_store(void) #endif } + static void gpu_opcode_storep(void) { #ifdef GPU_CORRECT_ALIGNMENT @@ -1970,6 +1991,7 @@ static void gpu_opcode_loadb(void) #endif } + static void gpu_opcode_loadw(void) { #ifdef GPU_DIS_LOADW @@ -1993,6 +2015,7 @@ static void gpu_opcode_loadw(void) #endif } + // According to the docs, & "Do The Same", this address is long aligned... // So let's try it: // And it works!!! Need to fix all instances... @@ -2036,6 +2059,7 @@ static void gpu_opcode_load(void) #endif } + static void gpu_opcode_loadp(void) { #ifdef GPU_CORRECT_ALIGNMENT @@ -2055,6 +2079,7 @@ static void gpu_opcode_loadp(void) #endif } + static void gpu_opcode_load_r14_indexed(void) { #ifdef GPU_DIS_LOAD14I @@ -2077,6 +2102,7 @@ static void gpu_opcode_load_r14_indexed(void) #endif } + static void gpu_opcode_load_r15_indexed(void) { #ifdef GPU_DIS_LOAD15I @@ -2099,6 +2125,7 @@ static void gpu_opcode_load_r15_indexed(void) #endif } + static void gpu_opcode_movei(void) { #ifdef GPU_DIS_MOVEI @@ -2114,6 +2141,7 @@ static void gpu_opcode_movei(void) #endif } + static void gpu_opcode_moveta(void) { #ifdef GPU_DIS_MOVETA @@ -2127,6 +2155,7 @@ static void gpu_opcode_moveta(void) #endif } + static void gpu_opcode_movefa(void) { #ifdef GPU_DIS_MOVEFA @@ -2140,6 +2169,7 @@ static void gpu_opcode_movefa(void) #endif } + static void gpu_opcode_move(void) { #ifdef GPU_DIS_MOVE @@ -2153,6 +2183,7 @@ static void gpu_opcode_move(void) #endif } + static void gpu_opcode_moveq(void) { #ifdef GPU_DIS_MOVEQ @@ -2166,11 +2197,13 @@ static void gpu_opcode_moveq(void) #endif } + static void gpu_opcode_resmac(void) { RN = gpu_acc; } + static void gpu_opcode_imult(void) { #ifdef GPU_DIS_IMULT @@ -2185,6 +2218,7 @@ static void gpu_opcode_imult(void) #endif } + static void gpu_opcode_mult(void) { #ifdef GPU_DIS_MULT @@ -2200,6 +2234,7 @@ static void gpu_opcode_mult(void) #endif } + static void gpu_opcode_bclr(void) { #ifdef GPU_DIS_BCLR @@ -2215,6 +2250,7 @@ static void gpu_opcode_bclr(void) #endif } + static void gpu_opcode_btst(void) { #ifdef GPU_DIS_BTST @@ -2228,6 +2264,7 @@ static void gpu_opcode_btst(void) #endif } + static void gpu_opcode_bset(void) { #ifdef GPU_DIS_BSET @@ -2243,12 +2280,14 @@ static void gpu_opcode_bset(void) #endif } + static void gpu_opcode_imacn(void) { uint32_t res = (int16_t)RM * (int16_t)(RN); gpu_acc += res; } + static void gpu_opcode_mtoi(void) { uint32_t _RM = RM; @@ -2256,6 +2295,7 @@ static void gpu_opcode_mtoi(void) SET_ZN(res); } + static void gpu_opcode_normi(void) { uint32_t _RM = RM; @@ -2320,6 +2360,7 @@ static void gpu_opcode_mmult(void) SET_ZN(res); } + static void gpu_opcode_abs(void) { #ifdef GPU_DIS_ABS @@ -2342,19 +2383,14 @@ static void gpu_opcode_abs(void) #endif } + static void gpu_opcode_div(void) // RN / RM { #ifdef GPU_DIS_DIV if (doGPUDis) WriteLog("%06X: DIV R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN); #endif -// NOTE: remainder is NOT calculated correctly here! -// The original tried to get it right by checking to see if the -// remainder was negative, but that's too late... -// The code there should do it now, but I'm not 100% sure... -// [Now it should be correct, but not displaying correct behavior of the actual -// hardware. A step in the right direction.] - +#if 0 if (RM) { if (gpu_div_control & 0x01) // 16.16 division @@ -2369,16 +2405,35 @@ static void gpu_opcode_div(void) // RN / RM gpu_remain = RN % RM; RN = RN / RM; } - -// What we really should do here is figure out why this condition -// happens in the real divide unit and emulate *that* behavior. -#if 0 - if ((gpu_remain - RM) & 0x80000000) // If the result would have been negative... - gpu_remain -= RM; // Then make it negative! -#endif } else + { + // This is what happens according to SCPCD. NYAN! RN = 0xFFFFFFFF; + gpu_remain = 0; + } +#else + // Real algorithm, courtesy of SCPCD: NYAN! + uint32_t q = RN; + uint32_t r = 0; + + // If 16.16 division, stuff top 16 bits of RN into remainder and put the + // bottom 16 of RN in top 16 of quotient + if (gpu_div_control & 0x01) + q <<= 16, r = RN >> 16; + + for(int i=0; i<32; i++) + { +// uint32_t sign = (r >> 31) & 0x01; + uint32_t sign = r & 0x80000000; + r = (r << 1) | ((q >> 31) & 0x01); + r += (sign ? RM : -RM); + q = (q << 1) | (((~r) >> 31) & 0x01); + } + + RN = q; + gpu_remain = r; +#endif #ifdef GPU_DIS_DIV if (doGPUDis) @@ -2386,6 +2441,7 @@ static void gpu_opcode_div(void) // RN / RM #endif } + static void gpu_opcode_imultn(void) { uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM); @@ -2394,6 +2450,7 @@ static void gpu_opcode_imultn(void) SET_FLAG_N(res); } + static void gpu_opcode_neg(void) { #ifdef GPU_DIS_NEG @@ -2409,6 +2466,7 @@ static void gpu_opcode_neg(void) #endif } + static void gpu_opcode_shlq(void) { #ifdef GPU_DIS_SHLQ @@ -2427,6 +2485,7 @@ static void gpu_opcode_shlq(void) #endif } + static void gpu_opcode_shrq(void) { #ifdef GPU_DIS_SHRQ @@ -2443,6 +2502,7 @@ static void gpu_opcode_shrq(void) #endif } + static void gpu_opcode_ror(void) { #ifdef GPU_DIS_ROR @@ -2459,6 +2519,7 @@ static void gpu_opcode_ror(void) #endif } + static void gpu_opcode_rorq(void) { #ifdef GPU_DIS_RORQ @@ -2476,6 +2537,7 @@ static void gpu_opcode_rorq(void) #endif } + static void gpu_opcode_sha(void) { /* int dreg = jaguar.op & 31; @@ -2550,6 +2612,7 @@ static void gpu_opcode_sha(void) SET_FLAG_N(_RN);*/ } + static void gpu_opcode_sharq(void) { #ifdef GPU_DIS_SHARQ @@ -2565,6 +2628,7 @@ static void gpu_opcode_sharq(void) #endif } + static void gpu_opcode_sh(void) { #ifdef GPU_DIS_SH @@ -2588,12 +2652,14 @@ static void gpu_opcode_sh(void) #endif } + //Temporary: Testing only! //#include "gpu2.cpp" //#include "gpu3.cpp" #else + // New thread-safe GPU core int GPUCore(void * data) @@ -2601,3 +2667,4 @@ int GPUCore(void * data) } #endif +