+#if 1
+
//
// GPU Core
//
// Originally by David Raingeard (Cal2)
// GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
-// Cleanups, endian wrongness, and bad ASM amelioration by James L. Hammons
+// Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
+// (C) 2010 Underground Software
+//
+// JLH = James Hammons <jlhamm@acm.org>
+//
+// Who When What
+// --- ---------- -------------------------------------------------------------
+// JLH 01/16/2010 Created this log ;-)
+// JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues
+
+//
// Note: Endian wrongness probably stems from the MAME origins of this emu and
// the braindead way in which MAME handles memory. :-)
//
// Same problem with ADDC...
//
-#include <stdlib.h>
#include "gpu.h"
+#include <stdlib.h>
+#include <string.h> // For memset
+#include "dsp.h"
+#include "jagdasm.h"
+#include "jaguar.h"
+#include "log.h"
+#include "m68000/m68kinterface.h"
+//#include "memory.h"
+#include "tom.h"
+
+
+// Seems alignment in loads & stores was off...
+#define GPU_CORRECT_ALIGNMENT
//#define GPU_DEBUG
// For GPU dissasembly...
+#if 0
#define GPU_DIS_ABS
#define GPU_DIS_ADD
#define GPU_DIS_ADDC
bool doGPUDis = false;
//bool doGPUDis = true;
-//*/
+#endif
+
/*
GPU opcodes use (BIOS flying ATARI logo):
+ add 357416
// Private function prototypes
void GPUUpdateRegisterBanks(void);
-
void GPUDumpDisassembly(void);
void GPUDumpRegisters(void);
void GPUDumpMemory(void);
static void gpu_opcode_pack(void);
// This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
-/*uint8 gpu_opcode_cycles[64] =
+/*uint8_t gpu_opcode_cycles[64] =
{
3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
//This is wrong, wrong, WRONG, but it seems to work for the time being...
//(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
//What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
-/*uint8 gpu_opcode_cycles[64] =
+/*uint8_t gpu_opcode_cycles[64] =
{
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 4, 1,
1, 1, 3, 3, 1, 1, 1, 1
};//*/
-uint8 gpu_opcode_cycles[64] =
+uint8_t gpu_opcode_cycles[64] =
{
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1
};//*/
-void (*gpu_opcode[64])()=
-{
+void (*gpu_opcode[64])()=
+{
gpu_opcode_add, gpu_opcode_addc, gpu_opcode_addq, gpu_opcode_addqt,
gpu_opcode_sub, gpu_opcode_subc, gpu_opcode_subq, gpu_opcode_subqt,
gpu_opcode_neg, gpu_opcode_and, gpu_opcode_or, gpu_opcode_xor,
gpu_opcode_store_r14_ri, gpu_opcode_store_r15_ri, gpu_opcode_sat24, gpu_opcode_pack,
};
-static uint8 * gpu_ram_8;
-uint32 gpu_pc;
-static uint32 gpu_acc;
-static uint32 gpu_remain;
-static uint32 gpu_hidata;
-static uint32 gpu_flags;
-static uint32 gpu_matrix_control;
-static uint32 gpu_pointer_to_matrix;
-static uint32 gpu_data_organization;
-static uint32 gpu_control;
-static uint32 gpu_div_control;
-// There is a distinct advantage to having these separated out--there's no need to clear
-// a bit before writing a result. I.e., if the result of an operation leaves a zero in
-// the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
-static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c;
-static uint32 * gpu_reg_bank_0;
-static uint32 * gpu_reg_bank_1;
-static uint32 * gpu_reg;
-static uint32 * gpu_alternate_reg;
-
-static uint32 gpu_instruction;
-static uint32 gpu_opcode_first_parameter;
-static uint32 gpu_opcode_second_parameter;
+static uint8_t gpu_ram_8[0x1000];
+uint32_t gpu_pc;
+static uint32_t gpu_acc;
+static uint32_t gpu_remain;
+static uint32_t gpu_hidata;
+static uint32_t gpu_flags;
+static uint32_t gpu_matrix_control;
+static uint32_t gpu_pointer_to_matrix;
+static uint32_t gpu_data_organization;
+static uint32_t gpu_control;
+static uint32_t gpu_div_control;
+// There is a distinct advantage to having these separated out--there's no need
+// to clear a bit before writing a result. I.e., if the result of an operation
+// leaves a zero in the carry flag, you don't have to zero gpu_flag_c before
+// you can write that zero!
+static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c;
+uint32_t gpu_reg_bank_0[32];
+uint32_t gpu_reg_bank_1[32];
+static uint32_t * gpu_reg;
+static uint32_t * gpu_alternate_reg;
+
+static uint32_t gpu_instruction;
+static uint32_t gpu_opcode_first_parameter;
+static uint32_t gpu_opcode_second_parameter;
#define GPU_RUNNING (gpu_control & 0x01)
#define IMM_2 gpu_opcode_second_parameter
#define SET_FLAG_Z(r) (gpu_flag_z = ((r) == 0));
-#define SET_FLAG_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01));
+#define SET_FLAG_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01));
#define RESET_FLAG_Z() gpu_flag_z = 0;
#define RESET_FLAG_N() gpu_flag_n = 0;
-#define RESET_FLAG_C() gpu_flag_c = 0;
+#define RESET_FLAG_C() gpu_flag_c = 0;
#define CLR_Z (gpu_flag_z = 0)
#define CLR_ZN (gpu_flag_z = gpu_flag_n = 0)
#define CLR_ZNC (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
#define SET_Z(r) (gpu_flag_z = ((r) == 0))
-#define SET_N(r) (gpu_flag_n = (((uint32)(r) >> 31) & 0x01))
-#define SET_C_ADD(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(~(a))))
-#define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
+#define SET_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01))
+#define SET_C_ADD(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
+#define SET_C_SUB(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
#define SET_ZN(r) SET_N(r); SET_Z(r)
#define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
#define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
-uint32 gpu_convert_zero[32] =
+uint32_t gpu_convert_zero[32] =
{ 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
-uint8 * branch_condition_table = 0;
+uint8_t * branch_condition_table = 0;
#define BRANCH_CONDITION(x) branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
-uint32 gpu_opcode_use[64];
+uint32_t gpu_opcode_use[64];
-char * gpu_opcode_str[64]=
-{
+const char * gpu_opcode_str[64]=
+{
"add", "addc", "addq", "addqt",
"sub", "subc", "subq", "subqt",
"neg", "and", "or", "xor",
"store_r14_ri", "store_r15_ri", "sat24", "pack",
};
-static uint32 gpu_in_exec = 0;
-static uint32 gpu_releaseTimeSlice_flag = 0;
+static uint32_t gpu_in_exec = 0;
+static uint32_t gpu_releaseTimeSlice_flag = 0;
-void gpu_releaseTimeslice(void)
+void GPUReleaseTimeslice(void)
{
gpu_releaseTimeSlice_flag = 1;
}
-uint32 gpu_get_pc(void)
+uint32_t GPUGetPC(void)
{
return gpu_pc;
}
{
if (!branch_condition_table)
{
- branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
+ branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
if (branch_condition_table)
{
//
// GPU byte access (read)
//
-uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
+uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
{
if (offset >= 0xF02000 && offset <= 0xF020FF)
WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
return gpu_ram_8[offset & 0xFFF];
else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
{
- uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
+ uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
if ((offset & 0x03) == 0)
return data >> 24;
//
// GPU word access (read)
//
-uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
+uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
{
if (offset >= 0xF02000 && offset <= 0xF020FF)
WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
{
offset &= 0xFFF;
- uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
+ uint16_t data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1];
return data;
}
else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
if (offset & 0x01) // Catch cases 1 & 3... (unaligned read)
return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
- uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
+ uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
if (offset & 0x02) // Cases 0 & 2...
return data & 0xFFFF;
//
// GPU dword access (read)
//
-uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
+uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
{
if (offset >= 0xF02000 && offset <= 0xF020FF)
- WriteLog("GPU: ReadLong--Attempt to read from GPU register file by %s!\n", whoName[who]);
+ {
+ WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
+ uint32_t reg = (offset & 0xFC) >> 2;
+ return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]);
+ }
// if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
{
offset &= 0xFFF;
- return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
- | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/
+ return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16)
+ | ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/
// return GET32(gpu_ram_8, offset);
}
// else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
gpu_flag_n = (gpu_flag_n ? 1 : 0);
gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
-
+
return gpu_flags & 0xFFFFC1FF;
case 0x04:
return gpu_matrix_control;
//
// GPU byte access (write)
//
-void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
+void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
{
if (offset >= 0xF02000 && offset <= 0xF020FF)
WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
}
else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
{
- uint32 reg = offset & 0x1C;
+ uint32_t reg = offset & 0x1C;
int bytenum = offset & 0x03;
//This is definitely wrong!
gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
else
{
- uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
+ uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
bytenum = 3 - bytenum; // convention motorola !!!
old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
//
// GPU word access (write)
//
-void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
+void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
{
if (offset >= 0xF02000 && offset <= 0xF020FF)
WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
else
gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
}
- else
+ else
{
//WriteLog("[GPU W16:%08X,%04X]", offset, data);
- uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
+ uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
+
if (offset & 0x02)
old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
else
old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
+
GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
}
+
return;
}
else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
//
// GPU dword access (write)
//
-void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
+void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
{
if (offset >= 0xF02000 && offset <= 0xF020FF)
WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
case 0x00:
{
bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
- gpu_flags = data;
+ // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
+ // IRQ logic can set it. So we mask it out here to prevent problems...
+ gpu_flags = data & (~IMASK);
gpu_flag_z = gpu_flags & ZERO_FLAG;
gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
#endif // GPU_DEBUG
break;
case 0x14:
- {
-// uint32 gpu_was_running = GPU_RUNNING;
+ {
+// uint32_t gpu_was_running = GPU_RUNNING;
data &= ~0xF7C0; // Disable writes to INT_LAT0-4 & TOM version number
// check for GPU -> CPU interrupt
if (data & 0x02)
{
//WriteLog("GPU->CPU interrupt\n");
- if (tom_irq_enabled(IRQ_GPU))
+ if (TOMIRQEnabled(IRQ_GPU))
{
- if ((tom_irq_enabled(IRQ_GPU)) && (jaguar_interrupt_handler_is_valid(64)))
+//This is the programmer's responsibility, to make sure the handler is valid, not ours!
+// if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
{
- tom_set_pending_gpu_int();
- m68k_set_irq(7); // Set 68000 NMI
- gpu_releaseTimeslice();
+ TOMSetPendingGPUInt();
+ m68k_set_irq(2); // Set 68000 IPL 2
+ GPUReleaseTimeslice();
}
}
data &= ~0x02;
//WriteLog("CPU->GPU interrupt\n");
GPUSetIRQLine(0, ASSERT_LINE);
m68k_end_timeslice();
- dsp_releaseTimeslice();
+ DSPReleaseTimeslice();
data &= ~0x04;
}
{
//WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
}
+
gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
// if gpu wasn't running but is now running, execute a few cycles
{
WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
#endif // GPU_DEBUG
- gpu_exec(200);
+ GPUExec(200);
#ifdef GPU_DEBUG
}
#endif // GPU_DEBUG//*/
#else
if (gpu_control & 0x18)
- gpu_exec(1);
+ GPUExec(1);
#endif // #ifndef GPU_SINGLE_STEPPING
#ifdef GPU_DEBUG
WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
{
// Let's do a dump of $6528!
-/* uint32 numItems = JaguarReadWord(0x6BD6);
+/* uint32_t numItems = JaguarReadWord(0x6BD6);
WriteLog("\nDump of $6528: %u items.\n\n", numItems);
for(int i=0; i<numItems*3*4; i+=3*4)
{
WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
- uint16 link = JaguarReadWord(0x6528+i+8+2);
+ uint16_t link = JaguarReadWord(0x6528+i+8+2);
for(int j=0; j<40; j+=4)
WriteLog("%08X ", JaguarReadLong(link + j));
WriteLog("\n");
//This isn't working the way it should! !!! FIX !!!
//Err, actually, it is.
// NOW, it works right! Problem solved!!! It's a blitter bug!
-/* uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4;
+/* uint32_t src = 0x4D54, dst = 0xF03000, width = 10 * 4;
for(int y=0; y<127; y++)
{
for(int x=0; x<2; x++)
{
JaguarWriteLong(dst, JaguarReadLong(src));
-
+
src += 4;
dst += 4;
}
return;
// Get the interrupt latch & enable bits
- uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
-
+ uint32_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
+
// Bail out if latched interrupts aren't enabled
bits &= mask;
if (!bits)
return;
-
+
// Determine which interrupt to service
- uint32 which = 0; //Isn't there a #pragma to disable this warning???
+ uint32_t which = 0; //Isn't there a #pragma to disable this warning???
if (bits & 0x01)
which = 0;
if (bits & 0x02)
if (start_logging)
WriteLog("GPU: Generating IRQ #%i\n", which);
- // set the interrupt flag
+ // set the interrupt flag
gpu_flags |= IMASK;
GPUUpdateRegisterBanks();
- // subqt #4,r31 ; pre-decrement stack pointer
- // move pc,r30 ; address of interrupted code
+ // subqt #4,r31 ; pre-decrement stack pointer
+ // move pc,r30 ; address of interrupted code
// store r30,(r31) ; store return address
gpu_reg[31] -= 4;
GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
-
- // movei #service_address,r30 ; pointer to ISR entry
- // jump (r30) ; jump to ISR
+
+ // movei #service_address,r30 ; pointer to ISR entry
+ // jump (r30) ; jump to ISR
// nop
gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
}
if (start_logging)
WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
- uint32 mask = 0x0040 << irqline;
+ uint32_t mask = 0x0040 << irqline;
gpu_control &= ~mask; // Clear the interrupt latch
if (state)
//#include "gpu2.h"
//#include "gpu3.h"
-void gpu_init(void)
+void GPUInit(void)
{
- memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
- memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs");
- memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs");
+// memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
+// memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32_t), "GPU bank 0 regs");
+// memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32_t), "GPU bank 1 regs");
build_branch_condition_table();
- gpu_reset();
+ GPUReset();
//TEMPORARY: Testing only!
// gpu2_init();
// gpu3_init();
}
-void gpu_reset(void)
+void GPUReset(void)
{
// GPU registers (directly visible)
gpu_flags = 0x00000000;
memset(gpu_ram_8, 0xFF, 0x1000);
gpu_in_exec = 0;
//not needed GPUInterruptPending = false;
- gpu_reset_stats();
+ GPUResetStats();
+
+ // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
+ for(uint32_t i=0; i<4096; i+=4)
+ *((uint32_t *)(&gpu_ram_8[i])) = rand();
}
-uint32 gpu_read_pc(void)
+
+uint32_t GPUReadPC(void)
{
return gpu_pc;
}
-void gpu_reset_stats(void)
+
+void GPUResetStats(void)
{
- for(uint32 i=0; i<64; i++)
+ for(uint32_t i=0; i<64; i++)
gpu_opcode_use[i] = 0;
WriteLog("--> GPU stats were reset!\n");
}
+
void GPUDumpDisassembly(void)
{
char buffer[512];
WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
- uint32 j = 0xF03000;
+ uint32_t j = 0xF03000;
while (j <= 0xF03FFF)
{
- uint32 oldj = j;
+ uint32_t oldj = j;
j += dasmjag(JAGUAR_GPU, buffer, j);
WriteLog("\t%08X: %s\n", oldj, buffer);
}
}
+
void GPUDumpRegisters(void)
{
WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
}
}
+
void GPUDumpMemory(void)
{
WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
}
-void gpu_done(void)
-{
+
+void GPUDone(void)
+{
+ WriteLog("\n\n---------------------------------------------------------------------\n");
+ WriteLog("GPU I/O Registers\n");
+ WriteLog("---------------------------------------------------------------------\n");
+ WriteLog("F0%04X (G_FLAGS): $%06X\n", 0x2100, (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z);
+ WriteLog("F0%04X (G_MTXC): $%04X\n", 0x2104, gpu_matrix_control);
+ WriteLog("F0%04X (G_MTXA): $%04X\n", 0x2108, gpu_pointer_to_matrix);
+ WriteLog("F0%04X (G_END): $%02X\n", 0x210C, gpu_data_organization);
+ WriteLog("F0%04X (G_PC): $%06X\n", 0x2110, gpu_pc);
+ WriteLog("F0%04X (G_CTRL): $%06X\n", 0x2114, gpu_control);
+ WriteLog("F0%04X (G_HIDATA): $%08X\n", 0x2118, gpu_hidata);
+ WriteLog("F0%04X (G_REMAIN): $%08X\n", 0x211C, gpu_remain);
+ WriteLog("F0%04X (G_DIVCTRL): $%02X\n", 0x211C, gpu_div_control);
+ WriteLog("---------------------------------------------------------------------\n\n\n");
+
WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
- // Get the interrupt latch & enable bits
- uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
+ // Get the interrupt latch & enable bits
+ uint8_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
GPUDumpRegisters();
WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
}
WriteLog("\n");
-
- memory_free(gpu_ram_8);
- memory_free(gpu_reg_bank_0);
- memory_free(gpu_reg_bank_1);
}
+
//
// Main GPU execution core
//
static int testCount = 1;
static int len = 0;
static bool tripwire = false;
-void gpu_exec(int32 cycles)
+void GPUExec(int32_t cycles)
{
if (!GPU_RUNNING)
return;
{
if (gpu_pc == 0xF03000)
{
- extern uint32 starCount;
+ extern uint32_t starCount;
starCount = 0;
/* WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
- uint32 base = gpu_reg_bank_0[3];
- for(uint32 i=0; i<0x100; i+=16)
+ uint32_t base = gpu_reg_bank_0[3];
+ for(uint32_t i=0; i<0x100; i+=16)
{
WriteLog("%02X: ", i);
- for(uint32 j=0; j<16; j++)
+ for(uint32_t j=0; j<16; j++)
{
WriteLog("%02X ", JaguarReadByte(base + i + j));
}
/* gpu_flag_c = (gpu_flag_c ? 1 : 0);
gpu_flag_z = (gpu_flag_z ? 1 : 0);
gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
-
- uint16 opcode = GPUReadWord(gpu_pc, GPU);
- uint32 index = opcode >> 10;
+#if 0
+if (gpu_pc == 0xF03200)
+ doGPUDis = true;
+#endif
+
+ uint16_t opcode = GPUReadWord(gpu_pc, GPU);
+ uint32_t index = opcode >> 10;
gpu_instruction = opcode; // Added for GPU #3...
gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
gpu_opcode_second_parameter = opcode & 0x1F;
// gpu3_opcode[index]();
// BIOS hacking
-//GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken.
+//GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken.
/*static bool firstTime = true;
if (gpu_pc == 0xF03548 && firstTime)
{
nop 41362
*/
+
static void gpu_opcode_jump(void)
{
#ifdef GPU_DIS_JUMP
-char * condition[32] =
+const char * condition[32] =
{ "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
"c z", "???", "???", "???", "???", "???", "???", "???", "???",
"???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
gpu_flag_z = (gpu_flag_z ? 1 : 0);
gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
// KLUDGE: Used by BRANCH_CONDITION
- uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
+ uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
if (BRANCH_CONDITION(IMM_2))
{
#endif
if (gpu_start_log)
WriteLog(" --> JUMP: Branch taken.\n");
- uint32 delayed_pc = RM;
- gpu_exec(1);
+ uint32_t delayed_pc = RM;
+ GPUExec(1);
gpu_pc = delayed_pc;
-/* uint16 opcode = GPUReadWord(gpu_pc, GPU);
+/* uint16_t opcode = GPUReadWord(gpu_pc, GPU);
gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
gpu_opcode_second_parameter = opcode & 0x1F;
#endif
}
+
static void gpu_opcode_jr(void)
{
#ifdef GPU_DIS_JR
-char * condition[32] =
+const char * condition[32] =
{ "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
"c z", "???", "???", "???", "???", "???", "???", "???", "???",
"???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
#endif
/* if (CONDITION(jaguar.op & 31))
{
- int32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
- uint32 newpc = jaguar.PC + r1;
+ int32_t r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
+ uint32_t newpc = jaguar.PC + r1;
CALL_MAME_DEBUG;
jaguar.op = ROPCODE(jaguar.PC);
jaguar.PC = newpc;
gpu_flag_c = (gpu_flag_c ? 1 : 0);
gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
// KLUDGE: Used by BRANCH_CONDITION
- uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
+ uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
if (BRANCH_CONDITION(IMM_2))
{
#endif
if (gpu_start_log)
WriteLog(" --> JR: Branch taken.\n");
- int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
- int32 delayed_pc = gpu_pc + (offset * 2);
- gpu_exec(1);
+ int32_t offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
+ int32_t delayed_pc = gpu_pc + (offset * 2);
+ GPUExec(1);
gpu_pc = delayed_pc;
-/* uint16 opcode = GPUReadWord(gpu_pc, GPU);
+/* uint16_t opcode = GPUReadWord(gpu_pc, GPU);
gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
gpu_opcode_second_parameter = opcode & 0x1F;
#endif
}
+
static void gpu_opcode_add(void)
{
#ifdef GPU_DIS_ADD
if (doGPUDis)
WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
- uint32 res = RN + RM;
+ uint32_t res = RN + RM;
CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
RN = res;
#ifdef GPU_DIS_ADD
#endif
}
+
static void gpu_opcode_addc(void)
{
#ifdef GPU_DIS_ADDC
WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
/* int dreg = jaguar.op & 31;
- uint32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
- uint32 r2 = jaguar.r[dreg];
- uint32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
+ uint32_t r1 = jaguar.r[(jaguar.op >> 5) & 31];
+ uint32_t r2 = jaguar.r[dreg];
+ uint32_t res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
jaguar.r[dreg] = res;
CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
- uint32 res = RN + RM + gpu_flag_c;
- uint32 carry = gpu_flag_c;
+ uint32_t res = RN + RM + gpu_flag_c;
+ uint32_t carry = gpu_flag_c;
// SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
SET_ZNC_ADD(RN + carry, RM, res);
// SET_ZNC_ADD(RN, RM + carry, res);
#endif
}
+
static void gpu_opcode_addq(void)
{
#ifdef GPU_DIS_ADDQ
if (doGPUDis)
WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
- uint32 r1 = gpu_convert_zero[IMM_1];
- uint32 res = RN + r1;
+ uint32_t r1 = gpu_convert_zero[IMM_1];
+ uint32_t res = RN + r1;
CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
RN = res;
#ifdef GPU_DIS_ADDQ
#endif
}
+
static void gpu_opcode_addqt(void)
{
#ifdef GPU_DIS_ADDQT
#endif
}
+
static void gpu_opcode_sub(void)
{
#ifdef GPU_DIS_SUB
if (doGPUDis)
WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
- uint32 res = RN - RM;
+ uint32_t res = RN - RM;
SET_ZNC_SUB(RN, RM, res);
RN = res;
#ifdef GPU_DIS_SUB
#endif
}
+
static void gpu_opcode_subc(void)
{
#ifdef GPU_DIS_SUBC
if (doGPUDis)
WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
- uint32 res = RN - RM - gpu_flag_c;
- uint32 borrow = gpu_flag_c;
-// SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
-//No matter how you do it, there is a problem. With below, it's 0-0 with carry,
-//and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
-// SET_ZNC_SUB(RN - borrow, RM, res);
- SET_ZNC_SUB(RN, RM + borrow, res);
- RN = res;
+ // This is how the GPU ALU does it--Two's complement with inverted carry
+ uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (gpu_flag_c ^ 1);
+ // Carry out of the result is inverted too
+ gpu_flag_c = ((res >> 32) & 0x01) ^ 1;
+ RN = (res & 0xFFFFFFFF);
+ SET_ZN(RN);
#ifdef GPU_DIS_SUBC
if (doGPUDis)
WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
}
-/*
-N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
-N = 0, M = 1, 0 - 1 = -1, C = 0!
-#define SET_C_SUB(a,b) (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
-#define SET_ZN(r) SET_N(r); SET_Z(r)
-#define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
-#define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
-*/
+
static void gpu_opcode_subq(void)
{
#ifdef GPU_DIS_SUBQ
if (doGPUDis)
WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
- uint32 r1 = gpu_convert_zero[IMM_1];
- uint32 res = RN - r1;
+ uint32_t r1 = gpu_convert_zero[IMM_1];
+ uint32_t res = RN - r1;
SET_ZNC_SUB(RN, r1, res);
RN = res;
#ifdef GPU_DIS_SUBQ
#endif
}
+
static void gpu_opcode_subqt(void)
{
#ifdef GPU_DIS_SUBQT
#endif
}
+
static void gpu_opcode_cmp(void)
{
#ifdef GPU_DIS_CMP
if (doGPUDis)
WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
- uint32 res = RN - RM;
+ uint32_t res = RN - RM;
SET_ZNC_SUB(RN, RM, res);
#ifdef GPU_DIS_CMP
if (doGPUDis)
#endif
}
+
static void gpu_opcode_cmpq(void)
{
- static int32 sqtable[32] =
+ static int32_t sqtable[32] =
{ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
#ifdef GPU_DIS_CMPQ
if (doGPUDis)
WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
- uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
- uint32 res = RN - r1;
+ uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
+ uint32_t res = RN - r1;
SET_ZNC_SUB(RN, r1, res);
#ifdef GPU_DIS_CMPQ
if (doGPUDis)
#endif
}
+
static void gpu_opcode_and(void)
{
#ifdef GPU_DIS_AND
#endif
}
+
static void gpu_opcode_or(void)
{
#ifdef GPU_DIS_OR
#endif
}
+
static void gpu_opcode_xor(void)
{
#ifdef GPU_DIS_XOR
#endif
}
+
static void gpu_opcode_not(void)
{
#ifdef GPU_DIS_NOT
#endif
}
+
static void gpu_opcode_move_pc(void)
{
#ifdef GPU_DIS_MOVEPC
#endif
}
+
static void gpu_opcode_sat8(void)
{
#ifdef GPU_DIS_SAT8
if (doGPUDis)
WriteLog("%06X: SAT8 R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
- RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
+ RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
SET_ZN(RN);
#ifdef GPU_DIS_SAT8
if (doGPUDis)
#endif
}
+
static void gpu_opcode_sat16(void)
{
- RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
+ RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
SET_ZN(RN);
}
static void gpu_opcode_sat24(void)
{
- RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
+ RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
SET_ZN(RN);
}
+
static void gpu_opcode_store_r14_indexed(void)
{
#ifdef GPU_DIS_STORE14I
if (doGPUDis)
WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
#endif
+#ifdef GPU_CORRECT_ALIGNMENT
+ uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
+
+ if (address >= 0xF03000 && address <= 0xF03FFF)
+ GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
+ else
+ GPUWriteLong(address, RN, GPU);
+#else
GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
+#endif
}
+
static void gpu_opcode_store_r15_indexed(void)
{
#ifdef GPU_DIS_STORE15I
if (doGPUDis)
WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
#endif
+#ifdef GPU_CORRECT_ALIGNMENT
+ uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
+
+ if (address >= 0xF03000 && address <= 0xF03FFF)
+ GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
+ else
+ GPUWriteLong(address, RN, GPU);
+#else
GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
+#endif
}
+
static void gpu_opcode_load_r14_ri(void)
{
#ifdef GPU_DIS_LOAD14R
if (doGPUDis)
WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
#endif
+#ifdef GPU_CORRECT_ALIGNMENT
+ uint32_t address = gpu_reg[14] + RM;
+
+ if (address >= 0xF03000 && address <= 0xF03FFF)
+ RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
+ else
+ RN = GPUReadLong(address, GPU);
+#else
RN = GPUReadLong(gpu_reg[14] + RM, GPU);
+#endif
#ifdef GPU_DIS_LOAD14R
if (doGPUDis)
WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
}
+
static void gpu_opcode_load_r15_ri(void)
{
#ifdef GPU_DIS_LOAD15R
if (doGPUDis)
WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
#endif
+#ifdef GPU_CORRECT_ALIGNMENT
+ uint32_t address = gpu_reg[15] + RM;
+
+ if (address >= 0xF03000 && address <= 0xF03FFF)
+ RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
+ else
+ RN = GPUReadLong(address, GPU);
+#else
RN = GPUReadLong(gpu_reg[15] + RM, GPU);
+#endif
#ifdef GPU_DIS_LOAD15R
if (doGPUDis)
WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
}
+
static void gpu_opcode_store_r14_ri(void)
{
#ifdef GPU_DIS_STORE14R
if (doGPUDis)
WriteLog("%06X: STORE R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
#endif
+#ifdef GPU_CORRECT_ALIGNMENT
+ uint32_t address = gpu_reg[14] + RM;
+
+ if (address >= 0xF03000 && address <= 0xF03FFF)
+ GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
+ else
+ GPUWriteLong(address, RN, GPU);
+#else
GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
+#endif
}
+
static void gpu_opcode_store_r15_ri(void)
{
#ifdef GPU_DIS_STORE15R
if (doGPUDis)
WriteLog("%06X: STORE R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
#endif
+#ifdef GPU_CORRECT_ALIGNMENT_STORE
+ uint32_t address = gpu_reg[15] + RM;
+
+ if (address >= 0xF03000 && address <= 0xF03FFF)
+ GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
+ else
+ GPUWriteLong(address, RN, GPU);
+#else
GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
+#endif
}
+
static void gpu_opcode_nop(void)
{
#ifdef GPU_DIS_NOP
#endif
}
+
static void gpu_opcode_pack(void)
{
#ifdef GPU_DIS_PACK
if (doGPUDis)
WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
- uint32 val = RN;
+ uint32_t val = RN;
//BUG! if (RM == 0) // Pack
if (IMM_1 == 0) // Pack
#endif
}
+
static void gpu_opcode_storeb(void)
{
#ifdef GPU_DIS_STOREB
JaguarWriteByte(RM, RN, GPU);
}
+
static void gpu_opcode_storew(void)
{
#ifdef GPU_DIS_STOREW
if (doGPUDis)
WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
#endif
+#ifdef GPU_CORRECT_ALIGNMENT
+ if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
+ GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
+ else
+ JaguarWriteWord(RM, RN, GPU);
+#else
if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
GPUWriteLong(RM, RN & 0xFFFF, GPU);
else
JaguarWriteWord(RM, RN, GPU);
+#endif
}
+
static void gpu_opcode_store(void)
{
#ifdef GPU_DIS_STORE
if (doGPUDis)
WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
#endif
+#ifdef GPU_CORRECT_ALIGNMENT
+ if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
+ GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
+ else
+ GPUWriteLong(RM, RN, GPU);
+#else
GPUWriteLong(RM, RN, GPU);
+#endif
}
+
static void gpu_opcode_storep(void)
{
+#ifdef GPU_CORRECT_ALIGNMENT
+ if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
+ {
+ GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
+ GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
+ }
+ else
+ {
+ GPUWriteLong(RM + 0, gpu_hidata, GPU);
+ GPUWriteLong(RM + 4, RN, GPU);
+ }
+#else
GPUWriteLong(RM + 0, gpu_hidata, GPU);
GPUWriteLong(RM + 4, RN, GPU);
+#endif
}
static void gpu_opcode_loadb(void)
#endif
}
+
static void gpu_opcode_loadw(void)
{
#ifdef GPU_DIS_LOADW
if (doGPUDis)
WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
+#ifdef GPU_CORRECT_ALIGNMENT
+ if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
+ RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
+ else
+ RN = JaguarReadWord(RM, GPU);
+#else
if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
RN = GPUReadLong(RM, GPU) & 0xFFFF;
else
RN = JaguarReadWord(RM, GPU);
+#endif
#ifdef GPU_DIS_LOADW
if (doGPUDis)
WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
}
+
+// According to the docs, & "Do The Same", this address is long aligned...
+// So let's try it:
+// And it works!!! Need to fix all instances...
+// Also, Power Drive Rally seems to contradict the idea that only LOADs in
+// the $F03000-$F03FFF range are aligned...
+#warning "!!! Alignment issues, need to find definitive final word on this !!!"
+/*
+Preliminary testing on real hardware seems to confirm that something strange goes on
+with unaligned reads in main memory. When the address is off by 1, the result is the
+same as the long address with the top byte replaced by something. So if the read is
+from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
+When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
+When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
+It may be that the "unknown" values come from the prefetch queue, but not sure how
+to test that. They seem to be stable, though, which would indicate such a mechanism.
+Sometimes, however, the off by 2 case returns $12345678!
+*/
static void gpu_opcode_load(void)
{
#ifdef GPU_DIS_LOAD
if (doGPUDis)
WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
+#ifdef GPU_CORRECT_ALIGNMENT
+ uint32_t mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
+// if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
+ RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
+// RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
+// else
+// RN = GPUReadLong(RM, GPU);
+ // Simulate garbage in unaligned reads...
+//seems that this behavior is different in GPU mem vs. main mem...
+// if ((RM < 0xF03000) || (RM > 0xF0BFFF))
+// RN |= mask[RM & 0x03];
+#else
RN = GPUReadLong(RM, GPU);
+#endif
#ifdef GPU_DIS_LOAD
if (doGPUDis)
WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
}
+
static void gpu_opcode_loadp(void)
{
+#ifdef GPU_CORRECT_ALIGNMENT
+ if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
+ {
+ gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
+ RN = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
+ }
+ else
+ {
+ gpu_hidata = GPUReadLong(RM + 0, GPU);
+ RN = GPUReadLong(RM + 4, GPU);
+ }
+#else
gpu_hidata = GPUReadLong(RM + 0, GPU);
RN = GPUReadLong(RM + 4, GPU);
+#endif
}
+
static void gpu_opcode_load_r14_indexed(void)
{
#ifdef GPU_DIS_LOAD14I
if (doGPUDis)
WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
#endif
+#ifdef GPU_CORRECT_ALIGNMENT
+ uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
+
+ if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
+ RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
+ else
+ RN = GPUReadLong(address, GPU);
+#else
RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
+#endif
#ifdef GPU_DIS_LOAD14I
if (doGPUDis)
WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
}
+
static void gpu_opcode_load_r15_indexed(void)
{
#ifdef GPU_DIS_LOAD15I
if (doGPUDis)
WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
#endif
+#ifdef GPU_CORRECT_ALIGNMENT
+ uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
+
+ if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
+ RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
+ else
+ RN = GPUReadLong(address, GPU);
+#else
RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
+#endif
#ifdef GPU_DIS_LOAD15I
if (doGPUDis)
WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
}
+
static void gpu_opcode_movei(void)
{
#ifdef GPU_DIS_MOVEI
if (doGPUDis)
- WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
+ WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32_t)GPUReadWord(gpu_pc) | ((uint32_t)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
// This instruction is followed by 32-bit value in LSW / MSW format...
- RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16);
+ RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16);
gpu_pc += 4;
#ifdef GPU_DIS_MOVEI
if (doGPUDis)
#endif
}
+
static void gpu_opcode_moveta(void)
{
#ifdef GPU_DIS_MOVETA
#endif
}
+
static void gpu_opcode_movefa(void)
{
#ifdef GPU_DIS_MOVEFA
#endif
}
+
static void gpu_opcode_move(void)
{
#ifdef GPU_DIS_MOVE
#endif
}
+
static void gpu_opcode_moveq(void)
{
#ifdef GPU_DIS_MOVEQ
#endif
}
+
static void gpu_opcode_resmac(void)
{
RN = gpu_acc;
}
+
static void gpu_opcode_imult(void)
{
#ifdef GPU_DIS_IMULT
if (doGPUDis)
WriteLog("%06X: IMULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
- RN = (int16)RN * (int16)RM;
+ RN = (int16_t)RN * (int16_t)RM;
SET_ZN(RN);
#ifdef GPU_DIS_IMULT
if (doGPUDis)
#endif
}
+
static void gpu_opcode_mult(void)
{
#ifdef GPU_DIS_MULT
if (doGPUDis)
WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
- RN = (uint16)RM * (uint16)RN;
+ RN = (uint16_t)RM * (uint16_t)RN;
+// RN = (RM & 0xFFFF) * (RN & 0xFFFF);
SET_ZN(RN);
#ifdef GPU_DIS_MULT
if (doGPUDis)
#endif
}
+
static void gpu_opcode_bclr(void)
{
#ifdef GPU_DIS_BCLR
if (doGPUDis)
WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
- uint32 res = RN & ~(1 << IMM_1);
+ uint32_t res = RN & ~(1 << IMM_1);
RN = res;
SET_ZN(res);
#ifdef GPU_DIS_BCLR
#endif
}
+
static void gpu_opcode_btst(void)
{
#ifdef GPU_DIS_BTST
#endif
}
+
static void gpu_opcode_bset(void)
{
#ifdef GPU_DIS_BSET
if (doGPUDis)
WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
- uint32 res = RN | (1 << IMM_1);
+ uint32_t res = RN | (1 << IMM_1);
RN = res;
SET_ZN(res);
#ifdef GPU_DIS_BSET
#endif
}
+
static void gpu_opcode_imacn(void)
{
- uint32 res = (int16)RM * (int16)(RN);
+ uint32_t res = (int16_t)RM * (int16_t)(RN);
gpu_acc += res;
}
+
static void gpu_opcode_mtoi(void)
{
- uint32 _RM = RM;
- uint32 res = RN = (((int32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
+ uint32_t _RM = RM;
+ uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
SET_ZN(res);
}
+
static void gpu_opcode_normi(void)
{
- uint32 _RM = RM;
- uint32 res = 0;
+ uint32_t _RM = RM;
+ uint32_t res = 0;
if (_RM)
{
static void gpu_opcode_mmult(void)
{
int count = gpu_matrix_control & 0x0F; // Matrix width
- uint32 addr = gpu_pointer_to_matrix; // In the GPU's RAM
- int64 accum = 0;
- uint32 res;
+ uint32_t addr = gpu_pointer_to_matrix; // In the GPU's RAM
+ int64_t accum = 0;
+ uint32_t res;
if (gpu_matrix_control & 0x10) // Column stepping
{
for(int i=0; i<count; i++)
- {
- int16 a;
+ {
+ int16_t a;
if (i & 0x01)
- a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
+ a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
else
- a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
+ a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
- int16 b = ((int16)GPUReadWord(addr + 2, GPU));
+ int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
accum += a * b;
addr += 4 * count;
}
{
for(int i=0; i<count; i++)
{
- int16 a;
+ int16_t a;
if (i & 0x01)
- a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
+ a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
else
- a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
+ a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
- int16 b = ((int16)GPUReadWord(addr + 2, GPU));
+ int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
accum += a * b;
addr += 4;
}
}
- RN = res = (int32)accum;
+ RN = res = (int32_t)accum;
// carry flag to do (out of the last add)
SET_ZN(res);
}
+
static void gpu_opcode_abs(void)
{
#ifdef GPU_DIS_ABS
#endif
}
+
static void gpu_opcode_div(void) // RN / RM
{
#ifdef GPU_DIS_DIV
if (doGPUDis)
WriteLog("%06X: DIV R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
-// NOTE: remainder is NOT calculated correctly here!
-// The original tried to get it right by checking to see if the
-// remainder was negative, but that's too late...
-// The code there should do it now, but I'm not 100% sure...
-
+#if 0
if (RM)
{
if (gpu_div_control & 0x01) // 16.16 division
{
- RN = ((uint64)RN << 16) / RM;
- gpu_remain = ((uint64)RN << 16) % RM;
+ gpu_remain = ((uint64_t)RN << 16) % RM;
+ RN = ((uint64_t)RN << 16) / RM;
}
else
{
- RN = RN / RM;
+ // We calculate the remainder first because we destroy RN after
+ // this by assigning it to itself.
gpu_remain = RN % RM;
+ RN = RN / RM;
}
-
- if ((gpu_remain - RM) & 0x80000000) // If the result would have been negative...
- gpu_remain -= RM; // Then make it negative!
}
else
+ {
+ // This is what happens according to SCPCD. NYAN!
RN = 0xFFFFFFFF;
+ gpu_remain = 0;
+ }
+#else
+ // Real algorithm, courtesy of SCPCD: NYAN!
+ uint32_t q = RN;
+ uint32_t r = 0;
-/* uint32 _RM=RM;
- uint32 _RN=RN;
+ // If 16.16 division, stuff top 16 bits of RN into remainder and put the
+ // bottom 16 of RN in top 16 of quotient
+ if (gpu_div_control & 0x01)
+ q <<= 16, r = RN >> 16;
- if (_RM)
+ for(int i=0; i<32; i++)
{
- if (gpu_div_control & 1)
- {
- gpu_remain = (((uint64)_RN) << 16) % _RM;
- if (gpu_remain&0x80000000)
- gpu_remain-=_RM;
- RN = (((uint64)_RN) << 16) / _RM;
- }
- else
- {
- gpu_remain = _RN % _RM;
- if (gpu_remain&0x80000000)
- gpu_remain-=_RM;
- RN/=_RM;
- }
+// uint32_t sign = (r >> 31) & 0x01;
+ uint32_t sign = r & 0x80000000;
+ r = (r << 1) | ((q >> 31) & 0x01);
+ r += (sign ? RM : -RM);
+ q = (q << 1) | (((~r) >> 31) & 0x01);
}
- else
- RN=0xffffffff;*/
+
+ RN = q;
+ gpu_remain = r;
+#endif
+
#ifdef GPU_DIS_DIV
if (doGPUDis)
WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
#endif
}
+
static void gpu_opcode_imultn(void)
{
- uint32 res = (int32)((int16)RN * (int16)RM);
- gpu_acc = (int32)res;
+ uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
+ gpu_acc = (int32_t)res;
SET_FLAG_Z(res);
SET_FLAG_N(res);
}
+
static void gpu_opcode_neg(void)
{
#ifdef GPU_DIS_NEG
if (doGPUDis)
WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
- uint32 res = -RN;
+ uint32_t res = -RN;
SET_ZNC_SUB(0, RN, res);
RN = res;
#ifdef GPU_DIS_NEG
#endif
}
+
static void gpu_opcode_shlq(void)
{
#ifdef GPU_DIS_SHLQ
#endif
// Was a bug here...
// (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
- int32 r1 = 32 - IMM_1;
- uint32 res = RN << r1;
+ int32_t r1 = 32 - IMM_1;
+ uint32_t res = RN << r1;
SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
RN = res;
#ifdef GPU_DIS_SHLQ
#endif
}
+
static void gpu_opcode_shrq(void)
{
#ifdef GPU_DIS_SHRQ
if (doGPUDis)
WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
- int32 r1 = gpu_convert_zero[IMM_1];
- uint32 res = RN >> r1;
+ int32_t r1 = gpu_convert_zero[IMM_1];
+ uint32_t res = RN >> r1;
SET_ZN(res); gpu_flag_c = RN & 1;
RN = res;
#ifdef GPU_DIS_SHRQ
#endif
}
+
static void gpu_opcode_ror(void)
{
#ifdef GPU_DIS_ROR
if (doGPUDis)
WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
- uint32 r1 = RM & 0x1F;
- uint32 res = (RN >> r1) | (RN << (32 - r1));
+ uint32_t r1 = RM & 0x1F;
+ uint32_t res = (RN >> r1) | (RN << (32 - r1));
SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
RN = res;
#ifdef GPU_DIS_ROR
#endif
}
+
static void gpu_opcode_rorq(void)
{
#ifdef GPU_DIS_RORQ
if (doGPUDis)
WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
- uint32 r1 = gpu_convert_zero[IMM_1 & 0x1F];
- uint32 r2 = RN;
- uint32 res = (r2 >> r1) | (r2 << (32 - r1));
+ uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F];
+ uint32_t r2 = RN;
+ uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
RN = res;
SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
#ifdef GPU_DIS_RORQ
#endif
}
+
static void gpu_opcode_sha(void)
{
/* int dreg = jaguar.op & 31;
- int32 r1 = (int32)jaguar.r[(jaguar.op >> 5) & 31];
- uint32 r2 = jaguar.r[dreg];
- uint32 res;
+ int32_t r1 = (int32_t)jaguar.r[(jaguar.op >> 5) & 31];
+ uint32_t r2 = jaguar.r[dreg];
+ uint32_t res;
CLR_ZNC;
if (r1 < 0)
}
else
{
- res = (r1 >= 32) ? ((int32)r2 >> 31) : ((int32)r2 >> r1);
+ res = (r1 >= 32) ? ((int32_t)r2 >> 31) : ((int32_t)r2 >> r1);
jaguar.FLAGS |= (r2 << 1) & 2;
}
jaguar.r[dreg] = res;
if (doGPUDis)
WriteLog("%06X: SHA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
- uint32 res;
+ uint32_t res;
- if ((int32)RM < 0)
+ if ((int32_t)RM < 0)
{
- res = ((int32)RM <= -32) ? 0 : (RN << -(int32)RM);
+ res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM);
gpu_flag_c = RN >> 31;
}
else
{
- res = ((int32)RM >= 32) ? ((int32)RN >> 31) : ((int32)RN >> (int32)RM);
+ res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM);
gpu_flag_c = RN & 0x01;
}
RN = res;
WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
#endif
-/* int32 sRM=(int32)RM;
- uint32 _RN=RN;
+/* int32_t sRM=(int32_t)RM;
+ uint32_t _RN=RN;
if (sRM<0)
{
- uint32 shift=-sRM;
+ uint32_t shift=-sRM;
if (shift>=32) shift=32;
gpu_flag_c=(_RN&0x80000000)>>31;
while (shift)
}
else
{
- uint32 shift=sRM;
+ uint32_t shift=sRM;
if (shift>=32) shift=32;
gpu_flag_c=_RN&0x1;
while (shift)
{
- _RN=((int32)_RN)>>1;
+ _RN=((int32_t)_RN)>>1;
shift--;
}
}
SET_FLAG_N(_RN);*/
}
+
static void gpu_opcode_sharq(void)
{
#ifdef GPU_DIS_SHARQ
if (doGPUDis)
WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
#endif
- uint32 res = (int32)RN >> gpu_convert_zero[IMM_1];
+ uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1];
SET_ZN(res); gpu_flag_c = RN & 0x01;
RN = res;
#ifdef GPU_DIS_SHARQ
#endif
}
+
static void gpu_opcode_sh(void)
{
#ifdef GPU_DIS_SH
if (RM & 0x80000000) // Shift left
{
gpu_flag_c = RN >> 31;
- RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM);
+ RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM);
}
else // Shift right
{
#endif
}
+
//Temporary: Testing only!
//#include "gpu2.cpp"
//#include "gpu3.cpp"
+
+#else
+
+
+// New thread-safe GPU core
+
+int GPUCore(void * data)
+{
+}
+
+#endif
+