- uint32_t res = RN - RM - gpu_flag_c;
- uint32_t borrow = gpu_flag_c;
-// SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
-//No matter how you do it, there is a problem. With below, it's 0-0 with carry,
-//and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
-// SET_ZNC_SUB(RN - borrow, RM, res);
- SET_ZNC_SUB(RN, RM + borrow, res);
- RN = res;
+ // This is how the GPU ALU does it--Two's complement with inverted carry
+ uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (gpu_flag_c ^ 1);
+ // Carry out of the result is inverted too
+ gpu_flag_c = ((res >> 32) & 0x01) ^ 1;
+ RN = (res & 0xFFFFFFFF);
+ SET_ZN(RN);