X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdsp.cpp;h=c0fd60387925b1e6821fc7005bb7a6f985967e2a;hb=2136446c0d38d00a651d3eb665ee564b18f94b28;hp=014578bee047bce472c26e7b9db106e31b1f8fd0;hpb=242de5f08fc7194abeb40eee8861a1f16f66e323;p=virtualjaguar diff --git a/src/dsp.cpp b/src/dsp.cpp index 014578b..c0fd603 100644 --- a/src/dsp.cpp +++ b/src/dsp.cpp @@ -1,23 +1,40 @@ // // DSP core // -// Original source by Cal2 +// Originally by David Raingeard // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS) // Extensive cleanups/rewrites by James L. Hammons +// (C) 2010 Underground Software +// +// JLH = James L. Hammons +// +// Who When What +// --- ---------- ------------------------------------------------------------- +// JLH 01/16/2010 Created this log ;-) // -#include // Used only for SDL_GetTicks... #include "dsp.h" +#include // Used only for SDL_GetTicks... +#include +#include "gpu.h" +#include "jagdasm.h" +#include "jaguar.h" +#include "jerry.h" +#include "log.h" +#include "m68k.h" +//#include "memory.h" + //#define DSP_DEBUG //#define DSP_DEBUG_IRQ //#define DSP_DEBUG_PL2 //#define DSP_DEBUG_STALL //#define DSP_DEBUG_CC +#define NEW_SCOREBOARD // Disassembly definitions -#define DSP_DIS_ABS // Pipelined only +#define DSP_DIS_ABS #define DSP_DIS_ADD #define DSP_DIS_ADDC #define DSP_DIS_ADDQ @@ -46,6 +63,7 @@ #define DSP_DIS_MOVEI #define DSP_DIS_MOVEQ #define DSP_DIS_MOVEFA +#define DSP_DIS_MOVEPC // Pipeline only! #define DSP_DIS_MOVETA #define DSP_DIS_MULT #define DSP_DIS_NEG @@ -156,7 +174,11 @@ struct PipelineStage #define TYPE_WORD 1 #define TYPE_DWORD 2 #define PIPELINE_STALL 64 // Set to # of opcodes + 1 +#ifndef NEW_SCOREBOARD bool scoreboard[32]; +#else +uint8 scoreboard[32]; +#endif uint8 plPtrFetch, plPtrRead, plPtrExec, plPtrWrite; PipelineStage pipeline[4]; bool IMASKCleared = false; @@ -216,7 +238,7 @@ static void dsp_opcode_abs(void); static void dsp_opcode_add(void); static void dsp_opcode_addc(void); static void dsp_opcode_addq(void); -static void dsp_opcode_addqmod(void); +static void dsp_opcode_addqmod(void); static void dsp_opcode_addqt(void); static void dsp_opcode_and(void); static void dsp_opcode_bclr(void); @@ -237,7 +259,7 @@ static void dsp_opcode_load_r14_indexed(void); static void dsp_opcode_load_r14_ri(void); static void dsp_opcode_load_r15_indexed(void); static void dsp_opcode_load_r15_ri(void); -static void dsp_opcode_mirror(void); +static void dsp_opcode_mirror(void); static void dsp_opcode_mmult(void); static void dsp_opcode_move(void); static void dsp_opcode_movei(void); @@ -256,8 +278,8 @@ static void dsp_opcode_resmac(void); static void dsp_opcode_ror(void); static void dsp_opcode_rorq(void); static void dsp_opcode_xor(void); -static void dsp_opcode_sat16s(void); -static void dsp_opcode_sat32s(void); +static void dsp_opcode_sat16s(void); +static void dsp_opcode_sat32s(void); static void dsp_opcode_sh(void); static void dsp_opcode_sha(void); static void dsp_opcode_sharq(void); @@ -273,31 +295,38 @@ static void dsp_opcode_store_r15_ri(void); static void dsp_opcode_sub(void); static void dsp_opcode_subc(void); static void dsp_opcode_subq(void); -static void dsp_opcode_subqmod(void); +static void dsp_opcode_subqmod(void); static void dsp_opcode_subqt(void); uint8 dsp_opcode_cycles[64] = { - 3, 3, 3, 3, - 3, 3, 3, 3, - 3, 3, 3, 3, - 3, 3, 3, 3, - 3, 3, 1, 3, - 1, 18, 3, 3, - 3, 3, 3, 3, - 3, 3, 3, 3, - 3, 3, 2, 2, - 2, 2, 3, 4, - 5, 4, 5, 6, - 6, 1, 1, 1, - 1, 2, 2, 2, - 1, 1, 9, 3, - 3, 1, 6, 6, - 2, 2, 3, 3 -}; + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 1, 3, 1, 18, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 2, 2, 2, 2, 3, 4, + 5, 4, 5, 6, 6, 1, 1, 1, + 1, 2, 2, 2, 1, 1, 9, 3, + 3, 1, 6, 6, 2, 2, 3, 3 +};//*/ +//Here's a QnD kludge... +//This is wrong, wrong, WRONG, but it seems to work for the time being... +//(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!) +//What's needed here is a way to take pipeline effects into account (including pipeline stalls!)... +/*uint8 dsp_opcode_cycles[64] = +{ + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 9, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 2, + 2, 2, 2, 3, 3, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 4, 1, + 1, 1, 3, 3, 1, 1, 1, 1 +};//*/ void (* dsp_opcode[64])() = -{ +{ dsp_opcode_add, dsp_opcode_addc, dsp_opcode_addq, dsp_opcode_addqt, dsp_opcode_sub, dsp_opcode_subc, dsp_opcode_subq, dsp_opcode_subqt, dsp_opcode_neg, dsp_opcode_and, dsp_opcode_or, dsp_opcode_xor, @@ -318,8 +347,8 @@ void (* dsp_opcode[64])() = uint32 dsp_opcode_use[65]; -char * dsp_opcode_str[65]= -{ +const char * dsp_opcode_str[65]= +{ "add", "addc", "addq", "addqt", "sub", "subc", "subq", "subqt", "neg", "and", "or", "xor", @@ -349,9 +378,9 @@ static uint32 dsp_pointer_to_matrix; static uint32 dsp_data_organization; uint32 dsp_control; static uint32 dsp_div_control; -static uint8 dsp_flag_z, dsp_flag_n, dsp_flag_c; -static uint32 * dsp_reg, * dsp_alternate_reg; -static uint32 * dsp_reg_bank_0, * dsp_reg_bank_1; +static uint8 dsp_flag_z, dsp_flag_n, dsp_flag_c; +static uint32 * dsp_reg = NULL, * dsp_alternate_reg = NULL; +static uint32 dsp_reg_bank_0[32], dsp_reg_bank_1[32]; static uint32 dsp_opcode_first_parameter; static uint32 dsp_opcode_second_parameter; @@ -369,9 +398,9 @@ static uint32 dsp_opcode_second_parameter; #define CLR_ZN (dsp_flag_z = dsp_flag_n = 0) #define CLR_ZNC (dsp_flag_z = dsp_flag_n = dsp_flag_c = 0) #define SET_Z(r) (dsp_flag_z = ((r) == 0)) -#define SET_N(r) (dsp_flag_n = (((UINT32)(r) >> 31) & 0x01)) -#define SET_C_ADD(a,b) (dsp_flag_c = ((UINT32)(b) > (UINT32)(~(a)))) -#define SET_C_SUB(a,b) (dsp_flag_c = ((UINT32)(b) > (UINT32)(a))) +#define SET_N(r) (dsp_flag_n = (((uint32)(r) >> 31) & 0x01)) +#define SET_C_ADD(a,b) (dsp_flag_c = ((uint32)(b) > (uint32)(~(a)))) +#define SET_C_SUB(a,b) (dsp_flag_c = ((uint32)(b) > (uint32)(a))) #define SET_ZN(r) SET_N(r); SET_Z(r) #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b) #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b) @@ -379,7 +408,7 @@ static uint32 dsp_opcode_second_parameter; uint32 dsp_convert_zero[32] = { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 }; uint8 * dsp_branch_condition_table = NULL; static uint16 * mirror_table = NULL; -static uint8 * dsp_ram_8 = NULL; +static uint8 dsp_ram_8[0x2000]; #define BRANCH_CONDITION(x) dsp_branch_condition_table[(x) + ((jaguar_flags & 7) << 5)] @@ -409,7 +438,7 @@ void dsp_reset_stats(void) dsp_opcode_use[i] = 0; } -void dsp_releaseTimeslice(void) +void DSPReleaseTimeslice(void) { //This does absolutely nothing!!! !!! FIX !!! dsp_releaseTimeSlice_flag = 1; @@ -419,7 +448,7 @@ void dsp_build_branch_condition_table(void) { // Allocate the mirror table if (!mirror_table) - mirror_table = (uint16 *)malloc(65536 * sizeof(mirror_table[0])); + mirror_table = (uint16 *)malloc(65536 * sizeof(uint16)); // Fill in the mirror table if (mirror_table) @@ -435,7 +464,7 @@ void dsp_build_branch_condition_table(void) if (!dsp_branch_condition_table) { - dsp_branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(dsp_branch_condition_table[0])); + dsp_branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(uint8)); // Fill in the condition table if (dsp_branch_condition_table) @@ -498,7 +527,7 @@ uint8 DSPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/) } return JaguarReadByte(offset, who); -} +} uint16 DSPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/) { @@ -523,12 +552,12 @@ uint16 DSPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/) if (offset==0xF1B2C2) return(0x0000); } */ - // pour permettre à wolfenstein 3d de tourner sans le dsp + // pour permettre � wolfenstein 3d de tourner sans le dsp /* if ((offset==0xF1B0D0)||(offset==0xF1B0D2)) return(0); */ - // pour permettre à nba jam de tourner sans le dsp + // pour permettre � nba jam de tourner sans le dsp /* if (jaguar_mainRom_crc32==0x4faddb18) { if (offset==0xf1b2c0) return(0); @@ -637,7 +666,7 @@ void DSPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/) { uint32 reg = offset & 0x1C; int bytenum = offset & 0x03; - + if ((reg >= 0x1C) && (reg <= 0x1F)) dsp_div_control = (dsp_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3)); else @@ -645,7 +674,7 @@ void DSPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/) //This looks funky. !!! FIX !!! uint32 old_data = DSPReadLong(offset&0xFFFFFFC, who); bytenum = 3 - bytenum; // convention motorola !!! - old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3)); + old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3)); DSPWriteLong(offset & 0xFFFFFFC, old_data, who); } return; @@ -667,6 +696,10 @@ void DSPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/) // WriteLog("dsp: writing %.4x at 0x%.8x\n",data,offset); if ((offset >= DSP_WORK_RAM_BASE) && (offset < DSP_WORK_RAM_BASE+0x2000)) { +/*if (offset == 0xF1B2F4) +{ + WriteLog("DSP: %s is writing %04X at location 0xF1B2F4 (DSP_PC: %08X)...\n", whoName[who], data, dsp_pc); +}//*/ offset -= DSP_WORK_RAM_BASE; dsp_ram_8[offset] = data >> 8; dsp_ram_8[offset+1] = data & 0xFF; @@ -765,6 +798,29 @@ SET32(ram2, offset, data); #ifdef DSP_DEBUG_IRQ } #endif//*/ +#if 0 + if (/*4-8, 16*/data & 0x101F0) + WriteLog("DSP: %s is enabling interrupts %s%s%s%s%s%s\n", whoName[who], + (data & 0x010 ? "CPU " : ""), (data & 0x020 ? "I2S " : ""), + (data & 0x040 ? "TIMER0 " : ""), (data & 0x080 ? "TIMER1 " : ""), + (data & 0x100 ? "EXT0 " : ""), (data & 0x10000 ? "EXT1" : "")); +/*if (data & 0x00020) // CD BIOS DSP code... +{ +//001AC1BA: movea.l #$1AC200, A0 +//001AC1C0: move.l #$1AC68C, D0 + char buffer[512]; + + WriteLog("\n---[DSP code at 00F1B97C]---------------------------\n"); + uint32 j = 0xF1B97C;//0x1AC200; + while (j <= 0xF1BE08)//0x1AC68C) + { + uint32 oldj = j; + j += dasmjag(JAGUAR_DSP, buffer, j); +// WriteLog("\t%08X: %s\n", oldj+0xD6F77C, buffer); + WriteLog("\t%08X: %s\n", oldj, buffer); + } +}//*/ +#endif break; } case 0x04: @@ -791,10 +847,10 @@ if (who != DSP) //!!!!!!!! break; case 0x14: - { -#ifdef DSP_DEBUG + { +//#ifdef DSP_DEBUG WriteLog("Write to DSP CTRL by %s: %08X\n", whoName[who], data); -#endif +//#endif bool wasRunning = DSP_RUNNING; // uint32 dsp_was_running = DSP_RUNNING; // Check for DSP -> CPU interrupt @@ -804,12 +860,13 @@ WriteLog("Write to DSP CTRL by %s: %08X\n", whoName[who], data); WriteLog("DSP: DSP -> CPU interrupt\n"); #endif // This was WRONG -// Why do we check for a valid handler at 64? Isn't that the Jag programmer's responsibility? +// Why do we check for a valid handler at 64? Isn't that the Jag programmer's responsibility? (YES) +#warning "!!! DSP IRQs that go to the 68K have to be routed thru TOM !!! FIX !!!" if (JERRYIRQEnabled(IRQ2_DSP))// && jaguar_interrupt_handler_is_valid(64)) { JERRYSetPendingIRQ(IRQ2_DSP); - dsp_releaseTimeslice(); - m68k_set_irq(7); // Set 68000 NMI... + DSPReleaseTimeslice(); + m68k_set_irq(2); // Set 68000 IPL 2... } data &= ~CPUINT; } @@ -820,7 +877,7 @@ WriteLog("Write to DSP CTRL by %s: %08X\n", whoName[who], data); WriteLog("DSP: CPU -> DSP interrupt\n"); #endif m68k_end_timeslice(); - gpu_releaseTimeslice(); + GPUReleaseTimeslice(); DSPSetIRQLine(DSPIRQ_CPU, ASSERT_LINE); data &= ~DSPINT0; } @@ -867,7 +924,7 @@ WriteLog("\n"); if (who == M68K) m68k_end_timeslice(); else if (who == GPU) - gpu_releaseTimeslice(); + GPUReleaseTimeslice(); if (!wasRunning) FlushDSPPipeline(); @@ -929,7 +986,7 @@ void DSPHandleIRQs(void) if (!bits) // Bail if nothing is enabled return; - int which = 0; // Determine which interrupt + int which = 0; // Determine which interrupt if (bits & 0x01) which = 0; if (bits & 0x02) @@ -988,8 +1045,15 @@ WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", p } } +#ifndef NEW_SCOREBOARD if (affectsScoreboard[pipeline[plPtrWrite].opcode]) scoreboard[pipeline[plPtrWrite].operand2] = false; +#else +//Yup, sequential MOVEQ # problem fixing (I hope!)... + if (affectsScoreboard[pipeline[plPtrWrite].opcode]) + if (scoreboard[pipeline[plPtrWrite].operand2]) + scoreboard[pipeline[plPtrWrite].operand2]--; +#endif } dsp_flags |= IMASK; @@ -1004,8 +1068,8 @@ ctrl2[4] = dsp_flags; WriteLog(" [PC will return to %08X, R31 = %08X]\n", dsp_pc - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2)), dsp_reg[31]); #endif - // subqt #4,r31 ; pre-decrement stack pointer - // move pc,r30 ; address of interrupted code + // subqt #4,r31 ; pre-decrement stack pointer + // move pc,r30 ; address of interrupted code // store r30,(r31) ; store return address dsp_reg[31] -= 4; //CC only! @@ -1036,8 +1100,8 @@ SET32(ram2, regs2[31] - 0xF1B000, dsp_pc - 2 - (pipeline[plPtrExec].opcode == 38 #endif //!!!!!!!! - // movei #service_address,r30 ; pointer to ISR entry - // jump (r30) ; jump to ISR + // movei #service_address,r30 ; pointer to ISR entry + // jump (r30) ; jump to ISR // nop dsp_pc = dsp_reg[30] = DSP_WORK_RAM_BASE + (which * 0x10); //CC only! @@ -1088,7 +1152,7 @@ DSPUpdateRegisterBanks(); if (!bits) // Bail if nothing is enabled return; - int which = 0; // Determine which interrupt + int which = 0; // Determine which interrupt if (bits & 0x01) which = 0; if (bits & 0x02) @@ -1117,8 +1181,8 @@ ctrl1[4] = dsp_flags; WriteLog(" [PC will return to %08X, R31 = %08X]\n", dsp_pc, dsp_reg[31]); #endif - // subqt #4,r31 ; pre-decrement stack pointer - // move pc,r30 ; address of interrupted code + // subqt #4,r31 ; pre-decrement stack pointer + // move pc,r30 ; address of interrupted code // store r30,(r31) ; store return address dsp_reg[31] -= 4; //CC only! @@ -1133,8 +1197,8 @@ SET32(ram1, regs1[31] - 0xF1B000, dsp_pc - 2); #endif //!!!!!!!! - // movei #service_address,r30 ; pointer to ISR entry - // jump (r30) ; jump to ISR + // movei #service_address,r30 ; pointer to ISR entry + // jump (r30) ; jump to ISR // nop dsp_pc = dsp_reg[30] = DSP_WORK_RAM_BASE + (which * 0x10); //CC only! @@ -1169,13 +1233,18 @@ DSPHandleIRQsNP(); #endif //!!!!!!!! } + + // Not sure if this is correct behavior, but according to JTRM, + // the IRQ output of JERRY is fed to this IRQ in the GPU... +// Not sure this is right--DSP interrupts seem to be different from the JERRY interrupts! +// GPUSetIRQLine(GPUIRQ_DSP, ASSERT_LINE); } void DSPInit(void) { - memory_malloc_secure((void **)&dsp_ram_8, 0x2000, "DSP work RAM"); - memory_malloc_secure((void **)&dsp_reg_bank_0, 32 * sizeof(int32), "DSP bank 0 regs"); - memory_malloc_secure((void **)&dsp_reg_bank_1, 32 * sizeof(int32), "DSP bank 1 regs"); +// memory_malloc_secure((void **)&dsp_ram_8, 0x2000, "DSP work RAM"); +// memory_malloc_secure((void **)&dsp_reg_bank_0, 32 * sizeof(int32), "DSP bank 0 regs"); +// memory_malloc_secure((void **)&dsp_reg_bank_1, 32 * sizeof(int32), "DSP bank 1 regs"); dsp_build_branch_condition_table(); DSPReset(); @@ -1252,16 +1321,16 @@ void DSPDone(void) WriteLog("DSP: Stopped at PC=%08X dsp_modulo=%08X (dsp %s running)\n", dsp_pc, dsp_modulo, (DSP_RUNNING ? "was" : "wasn't")); WriteLog("DSP: %sin interrupt handler\n", (dsp_flags & IMASK ? "" : "not ")); - // get the active interrupt bits + // get the active interrupt bits int bits = ((dsp_control >> 10) & 0x20) | ((dsp_control >> 6) & 0x1F); - // get the interrupt mask + // get the interrupt mask int mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F); WriteLog("DSP: pending=%08X enabled=%08X\n", bits, mask); WriteLog("\nRegisters bank 0\n"); for(int j=0; j<8; j++) { - WriteLog("\tr%2i=0x%.8x r%2i=0x%.8x r%2i=0x%.8x r%2i=0x%.8x\n", + WriteLog("\tR%02i=%08X R%02i=%08X R%02i=%08X R%02i=%08X\n", (j << 2) + 0, dsp_reg_bank_0[(j << 2) + 0], (j << 2) + 1, dsp_reg_bank_0[(j << 2) + 1], (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2], @@ -1270,7 +1339,7 @@ void DSPDone(void) WriteLog("\nRegisters bank 1\n"); for (j=0; j<8; j++) { - WriteLog("\tr%2i=0x%.8x r%2i=0x%.8x r%2i=0x%.8x r%2i=0x%.8x\n", + WriteLog("\tR%02i=%08X R%02i=%08X R%02i=%08X R%02i=%08X\n", (j << 2) + 0, dsp_reg_bank_1[(j << 2) + 0], (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1], (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2], @@ -1294,7 +1363,14 @@ void DSPDone(void) WriteLog("\t%s %i\n", dsp_opcode_str[i], dsp_opcode_use[i]); }//*/ - memory_free(dsp_ram_8); +// memory_free(dsp_ram_8); +// memory_free(dsp_reg_bank_0); +// memory_free(dsp_reg_bank_1); + if (dsp_branch_condition_table) + free(dsp_branch_condition_table); + + if (mirror_table) + free(mirror_table); } @@ -1523,6 +1599,21 @@ void DSPExec(int32 cycles) while (cycles > 0 && DSP_RUNNING) { +/*extern uint32 totalFrames; +//F1B2F6: LOAD (R14+$04), R24 [NCZ:001, R14+$04=00F20018, R24=FFFFFFFF] -> Jaguar: Unknown word read at 00F20018 by DSP (M68K PC=00E32E) +//-> 43 + 1 + 24 -> $2B + $01 + $18 -> 101011 00001 11000 -> 1010 1100 0011 1000 -> AC38 +//C470 -> 1100 0100 0111 0000 -> 110001 00011 10000 -> 49, 3, 16 -> STORE R16, (R14+$0C) +//F1B140: +if (totalFrames >= 377 && GET16(dsp_ram_8, 0x0002F6) == 0xAC38 && dsp_pc == 0xF1B140) +{ + doDSPDis = true; + WriteLog("Starting disassembly at frame #%u...\n", totalFrames); +} +if (dsp_pc == 0xF1B092) + doDSPDis = false;//*/ +/*if (dsp_pc == 0xF1B140) + doDSPDis = true;//*/ + if (IMASKCleared) // If IMASK was cleared, { #ifdef DSP_DEBUG_IRQ @@ -1596,7 +1687,7 @@ if ((dsp_pc < 0xF1B000 || dsp_pc > 0xF1CFFE) && !tripwire) static void dsp_opcode_jump(void) { #ifdef DSP_DIS_JUMP -char * condition[32] = +const char * condition[32] = { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz", "c z", "???", "???", "???", "???", "???", "???", "???", "???", "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???", @@ -1631,7 +1722,7 @@ char * condition[32] = static void dsp_opcode_jr(void) { #ifdef DSP_DIS_JR -char * condition[32] = +const char * condition[32] = { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz", "c z", "???", "???", "???", "???", "???", "???", "???", "???", "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???", @@ -1670,7 +1761,7 @@ static void dsp_opcode_add(void) if (doDSPDis) WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN); #endif - UINT32 res = RN + RM; + uint32 res = RN + RM; SET_ZNC_ADD(RN, RM, res); RN = res; #ifdef DSP_DIS_ADD @@ -1685,8 +1776,8 @@ static void dsp_opcode_addc(void) if (doDSPDis) WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN); #endif - UINT32 res = RN + RM + dsp_flag_c; - UINT32 carry = dsp_flag_c; + uint32 res = RN + RM + dsp_flag_c; + uint32 carry = dsp_flag_c; // SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes! SET_ZNC_ADD(RN + carry, RM, res); // SET_ZNC_ADD(RN, RM + carry, res); @@ -1703,8 +1794,8 @@ static void dsp_opcode_addq(void) if (doDSPDis) WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif - UINT32 r1 = dsp_convert_zero[IMM_1]; - UINT32 res = RN + r1; + uint32 r1 = dsp_convert_zero[IMM_1]; + uint32 res = RN + r1; CLR_ZNC; SET_ZNC_ADD(RN, r1, res); RN = res; #ifdef DSP_DIS_ADDQ @@ -1719,7 +1810,7 @@ static void dsp_opcode_sub(void) if (doDSPDis) WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN); #endif - UINT32 res = RN - RM; + uint32 res = RN - RM; SET_ZNC_SUB(RN, RM, res); RN = res; #ifdef DSP_DIS_SUB @@ -1734,8 +1825,8 @@ static void dsp_opcode_subc(void) if (doDSPDis) WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN); #endif - UINT32 res = RN - RM - dsp_flag_c; - UINT32 borrow = dsp_flag_c; + uint32 res = RN - RM - dsp_flag_c; + uint32 borrow = dsp_flag_c; SET_ZNC_SUB(RN - borrow, RM, res); RN = res; #ifdef DSP_DIS_SUBC @@ -1750,8 +1841,8 @@ static void dsp_opcode_subq(void) if (doDSPDis) WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif - UINT32 r1 = dsp_convert_zero[IMM_1]; - UINT32 res = RN - r1; + uint32 r1 = dsp_convert_zero[IMM_1]; + uint32 res = RN - r1; SET_ZNC_SUB(RN, r1, res); RN = res; #ifdef DSP_DIS_SUBQ @@ -1766,7 +1857,7 @@ static void dsp_opcode_cmp(void) if (doDSPDis) WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN); #endif - UINT32 res = RN - RM; + uint32 res = RN - RM; SET_ZNC_SUB(RN, RM, res); #ifdef DSP_DIS_CMP if (doDSPDis) @@ -1782,8 +1873,8 @@ static void dsp_opcode_cmpq(void) if (doDSPDis) WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, sqtable[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif - UINT32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3; - UINT32 res = RN - r1; + uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3; + uint32 res = RN - r1; SET_ZNC_SUB(RN, r1, res); #ifdef DSP_DIS_CMPQ if (doDSPDis) @@ -1837,13 +1928,13 @@ static void dsp_opcode_not(void) { #ifdef DSP_DIS_NOT if (doDSPDis) - WriteLog("%06X: NOT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN); + WriteLog("%06X: NOT R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif RN = ~RN; SET_ZN(RN); #ifdef DSP_DIS_NOT if (doDSPDis) - WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN); + WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif } @@ -2132,7 +2223,7 @@ static void dsp_opcode_bclr(void) if (doDSPDis) WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif - UINT32 res = RN & ~(1 << IMM_1); + uint32 res = RN & ~(1 << IMM_1); RN = res; SET_ZN(res); #ifdef DSP_DIS_BCLR @@ -2160,7 +2251,7 @@ static void dsp_opcode_bset(void) if (doDSPDis) WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif - UINT32 res = RN | (1 << IMM_1); + uint32 res = RN | (1 << IMM_1); RN = res; SET_ZN(res); #ifdef DSP_DIS_BSET @@ -2208,11 +2299,11 @@ static void dsp_opcode_imacn(void) if (doDSPDis) WriteLog("[NCZ:%u%u%u, DSP_ACC=%02X%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, (uint8)(dsp_acc >> 32), (uint32)(dsp_acc & 0xFFFFFFFF)); #endif -} +} static void dsp_opcode_mtoi(void) { - RN = (((INT32)RM >> 8) & 0xFF800000) | (RM & 0x007FFFFF); + RN = (((int32)RM >> 8) & 0xFF800000) | (RM & 0x007FFFFF); SET_ZN(RN); } @@ -2248,7 +2339,7 @@ static void dsp_opcode_mmult(void) if (!(dsp_matrix_control & 0x10)) { for (int i = 0; i < count; i++) - { + { int16 a; if (i&0x01) a=(int16)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff); @@ -2281,9 +2372,13 @@ static void dsp_opcode_mmult(void) static void dsp_opcode_abs(void) { +#ifdef DSP_DIS_ABS + if (doDSPDis) + WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); +#endif uint32 _Rn = RN; uint32 res; - + if (_Rn == 0x80000000) dsp_flag_n = 1; else @@ -2292,6 +2387,10 @@ static void dsp_opcode_abs(void) res = RN = (_Rn & 0x80000000 ? -_Rn : _Rn); CLR_ZN; SET_Z(res); } +#ifdef DSP_DIS_ABS + if (doDSPDis) + WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); +#endif } static void dsp_opcode_div(void) @@ -2342,7 +2441,7 @@ static void dsp_opcode_neg(void) if (doDSPDis) WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif - UINT32 res = -RN; + uint32 res = -RN; SET_ZNC_SUB(0, RN, res); RN = res; #ifdef DSP_DIS_NEG @@ -2357,8 +2456,8 @@ static void dsp_opcode_shlq(void) if (doDSPDis) WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, 32 - IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif - INT32 r1 = 32 - IMM_1; - UINT32 res = RN << r1; + int32 r1 = 32 - IMM_1; + uint32 res = RN << r1; SET_ZN(res); dsp_flag_c = (RN >> 31) & 1; RN = res; #ifdef DSP_DIS_SHLQ @@ -2373,8 +2472,8 @@ static void dsp_opcode_shrq(void) if (doDSPDis) WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif - INT32 r1 = dsp_convert_zero[IMM_1]; - UINT32 res = RN >> r1; + int32 r1 = dsp_convert_zero[IMM_1]; + uint32 res = RN >> r1; SET_ZN(res); dsp_flag_c = RN & 1; RN = res; #ifdef DSP_DIS_SHRQ @@ -2389,8 +2488,8 @@ static void dsp_opcode_ror(void) if (doDSPDis) WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN); #endif - UINT32 r1 = RM & 0x1F; - UINT32 res = (RN >> r1) | (RN << (32 - r1)); + uint32 r1 = RM & 0x1F; + uint32 res = (RN >> r1) | (RN << (32 - r1)); SET_ZN(res); dsp_flag_c = (RN >> 31) & 1; RN = res; #ifdef DSP_DIS_ROR @@ -2405,9 +2504,9 @@ static void dsp_opcode_rorq(void) if (doDSPDis) WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif - UINT32 r1 = dsp_convert_zero[IMM_1 & 0x1F]; - UINT32 r2 = RN; - UINT32 res = (r2 >> r1) | (r2 << (32 - r1)); + uint32 r1 = dsp_convert_zero[IMM_1 & 0x1F]; + uint32 r2 = RN; + uint32 res = (r2 >> r1) | (r2 << (32 - r1)); RN = res; SET_ZN(res); dsp_flag_c = (r2 >> 31) & 0x01; #ifdef DSP_DIS_RORQ @@ -2453,7 +2552,7 @@ static void dsp_opcode_sharq(void) if (doDSPDis) WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif - UINT32 res = (INT32)RN >> dsp_convert_zero[IMM_1]; + uint32 res = (int32)RN >> dsp_convert_zero[IMM_1]; SET_ZN(res); dsp_flag_c = RN & 0x01; RN = res; #ifdef DSP_DIS_SHARQ @@ -2499,9 +2598,9 @@ void dsp_opcode_addqmod(void) if (doDSPDis) WriteLog("%06X: ADDQMOD #%u, R%02u [NCZ:%u%u%u, R%02u=%08X, DSP_MOD=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, dsp_modulo); #endif - UINT32 r1 = dsp_convert_zero[IMM_1]; - UINT32 r2 = RN; - UINT32 res = r2 + r1; + uint32 r1 = dsp_convert_zero[IMM_1]; + uint32 r2 = RN; + uint32 res = r2 + r1; res = (res & (~dsp_modulo)) | (r2 & dsp_modulo); RN = res; SET_ZNC_ADD(r2, r1, res); @@ -2511,37 +2610,37 @@ void dsp_opcode_addqmod(void) #endif } -void dsp_opcode_subqmod(void) +void dsp_opcode_subqmod(void) { - UINT32 r1 = dsp_convert_zero[IMM_1]; - UINT32 r2 = RN; - UINT32 res = r2 - r1; + uint32 r1 = dsp_convert_zero[IMM_1]; + uint32 r2 = RN; + uint32 res = r2 - r1; res = (res & (~dsp_modulo)) | (r2 & dsp_modulo); RN = res; - + SET_ZNC_SUB(r2, r1, res); } -void dsp_opcode_mirror(void) +void dsp_opcode_mirror(void) { - UINT32 r1 = RN; + uint32 r1 = RN; RN = (mirror_table[r1 & 0xFFFF] << 16) | mirror_table[r1 >> 16]; SET_ZN(RN); } -void dsp_opcode_sat32s(void) +void dsp_opcode_sat32s(void) { - INT32 r2 = (UINT32)RN; - INT32 temp = dsp_acc >> 32; - UINT32 res = (temp < -1) ? (INT32)0x80000000 : (temp > 0) ? (INT32)0x7FFFFFFF : r2; + int32 r2 = (uint32)RN; + int32 temp = dsp_acc >> 32; + uint32 res = (temp < -1) ? (int32)0x80000000 : (temp > 0) ? (int32)0x7FFFFFFF : r2; RN = res; SET_ZN(res); } -void dsp_opcode_sat16s(void) +void dsp_opcode_sat16s(void) { - INT32 r2 = RN; - UINT32 res = (r2 < -32768) ? -32768 : (r2 > 32767) ? 32767 : r2; + int32 r2 = RN; + uint32 res = (r2 < -32768) ? -32768 : (r2 > 32767) ? 32767 : r2; RN = res; SET_ZN(res); } @@ -2554,7 +2653,7 @@ static void DSP_abs(void); static void DSP_add(void); static void DSP_addc(void); static void DSP_addq(void); -static void DSP_addqmod(void); +static void DSP_addqmod(void); static void DSP_addqt(void); static void DSP_and(void); static void DSP_bclr(void); @@ -2576,7 +2675,7 @@ static void DSP_load_r14_i(void); static void DSP_load_r14_r(void); static void DSP_load_r15_i(void); static void DSP_load_r15_r(void); -static void DSP_mirror(void); +static void DSP_mirror(void); static void DSP_mmult(void); static void DSP_move(void); static void DSP_movefa(void); @@ -2594,8 +2693,8 @@ static void DSP_or(void); static void DSP_resmac(void); static void DSP_ror(void); static void DSP_rorq(void); -static void DSP_sat16s(void); -static void DSP_sat32s(void); +static void DSP_sat16s(void); +static void DSP_sat32s(void); static void DSP_sh(void); static void DSP_sha(void); static void DSP_sharq(void); @@ -2611,7 +2710,7 @@ static void DSP_store_r15_r(void); static void DSP_sub(void); static void DSP_subc(void); static void DSP_subq(void); -static void DSP_subqmod(void); +static void DSP_subqmod(void); static void DSP_subqt(void); static void DSP_xor(void); @@ -2661,6 +2760,21 @@ bool readAffected[64][2] = { true, true}, { true, true}, {false, false}, {false, true} }; +bool isLoadStore[65] = +{ + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, true, + true, true, false, true, true, true, true, true, + + false, true, true, false, false, false, false, false, + false, false, true, true, true, true, false, false, false +}; + void FlushDSPPipeline(void) { plPtrFetch = 3, plPtrRead = 2, plPtrExec = 1, plPtrWrite = 0; @@ -2669,7 +2783,7 @@ void FlushDSPPipeline(void) pipeline[i].opcode = PIPELINE_STALL; for(int i=0; i<32; i++) - scoreboard[i] = false; + scoreboard[i] = 0; } // @@ -2836,7 +2950,7 @@ because the STORE instruction writes back on stage #2 of the pipeline instead of If it were done properly, the STORE write back would occur *after* (well, technically, during) the execution of the the JUMP that follows it. -!!! FIX !!! +!!! FIX !!! [DONE] F1B08A: JR z, F1B082 [NCZ:001] Branched! F1B08A: NOP [NCZ:001] @@ -2938,7 +3052,7 @@ F1B1FC: MOVEI #$00F1A100, R01 [NCZ:001, R01=00F1A100] -> [NCZ:001, R01=00F1A100 uint32 pcQueue1[0x400]; uint32 pcQPtr1 = 0; - +static uint32 prevR1; //Let's try a 3 stage pipeline.... //Looks like 3 stage is correct, otherwise bad things happen... void DSPExecP2(int32 cycles) @@ -2948,8 +3062,33 @@ void DSPExecP2(int32 cycles) while (cycles > 0 && DSP_RUNNING) { -if (dsp_pc == 0xF1B0A0) +/*extern uint32 totalFrames; +//F1B2F6: LOAD (R14+$04), R24 [NCZ:001, R14+$04=00F20018, R24=FFFFFFFF] -> Jaguar: Unknown word read at 00F20018 by DSP (M68K PC=00E32E) +//-> 43 + 1 + 24 -> $2B + $01 + $18 -> 101011 00001 11000 -> 1010 1100 0011 1000 -> AC38 +//C470 -> 1100 0100 0111 0000 -> 110001 00011 10000 -> 49, 3, 16 -> STORE R16, (R14+$0C) +//F1B140: +if (totalFrames >= 377 && GET16(dsp_ram_8, 0x0002F6) == 0xAC38 && dsp_pc == 0xF1B140) +{ doDSPDis = true; + WriteLog("Starting disassembly at frame #%u...\n", totalFrames); +} +if (dsp_pc == 0xF1B092) + doDSPDis = false;//*/ +/*if (totalFrames >= 373 && GET16(dsp_ram_8, 0x0002F6) == 0xAC38) + doDSPDis = true;//*/ +/*if (totalFrames >= 373 && dsp_pc == 0xF1B0A0) + doDSPDis = true;//*/ +/*if (dsp_pc == 0xF1B0A0) + doDSPDis = true;//*/ +/*if (dsp_pc == 0xF1B0D2) && dsp_reg[1] == 0x2140C) + doDSPDis = true;//*/ +//Two parter... (not sure how to write this) +//if (dsp_pc == 0xF1B0D2) +// prevR1 = dsp_reg[1]; + +//F1B0D2: ADDQT #8, R01 [NCZ:000, R01=0002140C] -> [NCZ:000, R01=00021414] +//F1B0D2: ADDQT #8, R01 [NCZ:000, R01=0002140C] -> [NCZ:000, R01=00021414] + pcQueue1[pcQPtr1++] = dsp_pc; pcQPtr1 &= 0x3FF; @@ -3014,10 +3153,15 @@ WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", p //Small problem--when say LOAD or STORE (R14/5+$nn) is executed AFTER an instruction that //modifies R14/5, we don't check the scoreboard for R14/5 (and we need to!)... !!! FIX !!! //Ugly, but [DONE] +//Another problem: Any sequential combination of LOAD and STORE operations will cause the +//pipeline to stall, and we don't take care of that here. !!! FIX !!! if ((scoreboard[pipeline[plPtrRead].operand1] && readAffected[pipeline[plPtrRead].opcode][0]) || (scoreboard[pipeline[plPtrRead].operand2] && readAffected[pipeline[plPtrRead].opcode][1]) || ((pipeline[plPtrRead].opcode == 43 || pipeline[plPtrRead].opcode == 58) && scoreboard[14]) - || ((pipeline[plPtrRead].opcode == 44 || pipeline[plPtrRead].opcode == 59) && scoreboard[15])) + || ((pipeline[plPtrRead].opcode == 44 || pipeline[plPtrRead].opcode == 59) && scoreboard[15]) +//Not sure that this is the best way to fix the LOAD/STORE problem... But it seems to +//work--somewhat... + || (isLoadStore[pipeline[plPtrRead].opcode] && isLoadStore[pipeline[plPtrExec].opcode])) // We have a hit in the scoreboard, so we have to stall the pipeline... #ifdef DSP_DEBUG_PL2 { @@ -3043,7 +3187,12 @@ WriteLog("\n"); // Shouldn't we be more selective with the register scoreboarding? // Yes, we should. !!! FIX !!! Kinda [DONE] +#ifndef NEW_SCOREBOARD scoreboard[pipeline[plPtrRead].operand2] = affectsScoreboard[pipeline[plPtrRead].opcode]; +#else +//Hopefully this will fix the dual MOVEQ # problem... + scoreboard[pipeline[plPtrRead].operand2] += (affectsScoreboard[pipeline[plPtrRead].opcode] ? 1 : 0); +#endif //Advance PC here??? Yes. dsp_pc += (pipeline[plPtrRead].opcode == 38 ? 6 : 2); @@ -3061,6 +3210,8 @@ WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", p // Stage 2: Execute if (pipeline[plPtrExec].opcode != PIPELINE_STALL) { +if (doDSPDis) + WriteLog("\t[inst=%02u][R28=%08X, alt R28=%08X, REGPAGE=%s]\n", pipeline[plPtrExec].opcode, dsp_reg[28], dsp_alternate_reg[28], (dsp_flags & REGPAGE ? "set" : "not set")); #ifdef DSP_DEBUG_PL2 if (doDSPDis) { @@ -3082,6 +3233,11 @@ lastExec = pipeline[plPtrExec].instruction; //Let's not, until we do the stalling correctly... //But, we gotta while we're doing the comparison core...! //Or do we? cycles--; +//Really, the whole thing is wrong. When the pipeline is correctly stuffed, most instructions +//will execute in one clock cycle (others, like DIV, will likely not). So, the challenge is +//to model this clock cycle behavior correctly... +//Also, the pipeline stalls too much--mostly because the transparent writebacks at stage 3 +//don't affect the reads at stage 1... #ifdef DSP_DEBUG_STALL if (doDSPDis) WriteLog("[STALL... DSP_PC = %08X]\n", dsp_pc); @@ -3123,8 +3279,15 @@ WriteLog("\n"); } } +#ifndef NEW_SCOREBOARD if (affectsScoreboard[pipeline[plPtrWrite].opcode]) scoreboard[pipeline[plPtrWrite].operand2] = false; +#else +//Yup, sequential MOVEQ # problem fixing (I hope!)... + if (affectsScoreboard[pipeline[plPtrWrite].opcode]) + if (scoreboard[pipeline[plPtrWrite].operand2]) + scoreboard[pipeline[plPtrWrite].operand2]--; +#endif } // Push instructions through the pipeline... @@ -3268,7 +3431,7 @@ static void DSP_abs(void) WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif uint32 _Rn = PRN; - + if (_Rn == 0x80000000) dsp_flag_n = 1; else @@ -3289,7 +3452,7 @@ static void DSP_add(void) if (doDSPDis) WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN); #endif - UINT32 res = PRN + PRM; + uint32 res = PRN + PRM; SET_ZNC_ADD(PRN, PRM, res); PRES = res; #ifdef DSP_DIS_ADD @@ -3304,8 +3467,8 @@ static void DSP_addc(void) if (doDSPDis) WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN); #endif - UINT32 res = PRN + PRM + dsp_flag_c; - UINT32 carry = dsp_flag_c; + uint32 res = PRN + PRM + dsp_flag_c; + uint32 carry = dsp_flag_c; // SET_ZNC_ADD(PRN, PRM, res); //???BUG??? Yes! SET_ZNC_ADD(PRN + carry, PRM, res); // SET_ZNC_ADD(PRN, PRM + carry, res); @@ -3322,8 +3485,8 @@ static void DSP_addq(void) if (doDSPDis) WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif - UINT32 r1 = dsp_convert_zero[PIMM1]; - UINT32 res = PRN + r1; + uint32 r1 = dsp_convert_zero[PIMM1]; + uint32 res = PRN + r1; CLR_ZNC; SET_ZNC_ADD(PRN, r1, res); PRES = res; #ifdef DSP_DIS_ADDQ @@ -3338,9 +3501,9 @@ static void DSP_addqmod(void) if (doDSPDis) WriteLog("%06X: ADDQMOD #%u, R%02u [NCZ:%u%u%u, R%02u=%08X, DSP_MOD=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN, dsp_modulo); #endif - UINT32 r1 = dsp_convert_zero[PIMM1]; - UINT32 r2 = PRN; - UINT32 res = r2 + r1; + uint32 r1 = dsp_convert_zero[PIMM1]; + uint32 r2 = PRN; + uint32 res = r2 + r1; res = (res & (~dsp_modulo)) | (r2 & dsp_modulo); PRES = res; SET_ZNC_ADD(r2, r1, res); @@ -3415,7 +3578,7 @@ static void DSP_btst(void) NO_WRITEBACK; #ifdef DSP_DIS_BTST if (doDSPDis) - WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES); + WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif } @@ -3425,7 +3588,7 @@ static void DSP_cmp(void) if (doDSPDis) WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN); #endif - UINT32 res = PRN - PRM; + uint32 res = PRN - PRM; SET_ZNC_SUB(PRN, PRM, res); NO_WRITEBACK; #ifdef DSP_DIS_CMP @@ -3442,8 +3605,8 @@ static void DSP_cmpq(void) if (doDSPDis) WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, sqtable[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif - UINT32 r1 = sqtable[PIMM1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3; - UINT32 res = PRN - r1; + uint32 r1 = sqtable[PIMM1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3; + uint32 res = PRN - r1; SET_ZNC_SUB(PRN, r1, res); NO_WRITEBACK; #ifdef DSP_DIS_CMPQ @@ -3491,7 +3654,7 @@ static void DSP_imacn(void) if (doDSPDis) WriteLog("[NCZ:%u%u%u, DSP_ACC=%02X%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, (uint8)(dsp_acc >> 32), (uint32)(dsp_acc & 0xFFFFFFFF)); #endif -} +} static void DSP_imult(void) { @@ -3541,14 +3704,14 @@ static void DSP_illegal(void) static void DSP_jr(void) { #ifdef DSP_DIS_JR -char * condition[32] = +const char * condition[32] = { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz", "c z", "???", "???", "???", "???", "???", "???", "???", "???", "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???", "???", "???", "???", "F" }; if (doDSPDis) //How come this is always off by 2??? - WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", DSP_PPC, condition[PIMM2], DSP_PPC+((PIMM1 & 0x10 ? 0xFFFFFFF0 | PIMM1 : PIMM1) * 2), dsp_flag_n, dsp_flag_c, dsp_flag_z); + WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", DSP_PPC, condition[PIMM2], DSP_PPC+((PIMM1 & 0x10 ? 0xFFFFFFF0 | PIMM1 : PIMM1) * 2)+2, dsp_flag_n, dsp_flag_c, dsp_flag_z); #endif // KLUDGE: Used by BRANCH_CONDITION macro uint32 jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z; @@ -3567,7 +3730,7 @@ char * condition[32] = // Now that we've branched, we have to make sure that the following instruction // is executed atomically with this one and then flush the pipeline before setting // the new PC. - + // Step 1: Handle writebacks at stage 3 of pipeline /* if (pipeline[plPtrWrite].opcode != PIPELINE_STALL) { @@ -3594,8 +3757,15 @@ char * condition[32] = } } +#ifndef NEW_SCOREBOARD if (affectsScoreboard[pipeline[plPtrWrite].opcode]) scoreboard[pipeline[plPtrWrite].operand2] = false; +#else +//Yup, sequential MOVEQ # problem fixing (I hope!)... + if (affectsScoreboard[pipeline[plPtrWrite].opcode]) + if (scoreboard[pipeline[plPtrWrite].operand2]) + scoreboard[pipeline[plPtrWrite].operand2]--; +#endif } // Step 2: Push instruction through pipeline & execute following instruction @@ -3619,6 +3789,7 @@ char * condition[32] = pipeline[plPtrExec].reg2 = dsp_reg[pipeline[plPtrExec].operand2]; pipeline[plPtrExec].writebackRegister = pipeline[plPtrExec].operand2; // Set it to RN }//*/ + dsp_pc += 2; // For DSP_DIS_* accuracy DSPOpcode[pipeline[plPtrExec].opcode](); dsp_opcode_use[pipeline[plPtrExec].opcode]++; pipeline[plPtrWrite] = pipeline[plPtrExec]; @@ -3643,7 +3814,7 @@ char * condition[32] = static void DSP_jump(void) { #ifdef DSP_DIS_JUMP -char * condition[32] = +const char * condition[32] = { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz", "c z", "???", "???", "???", "???", "???", "???", "???", "???", "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???", @@ -3664,7 +3835,7 @@ char * condition[32] = // Now that we've branched, we have to make sure that the following instruction // is executed atomically with this one and then flush the pipeline before setting // the new PC. - + // Step 1: Handle writebacks at stage 3 of pipeline /* if (pipeline[plPtrWrite].opcode != PIPELINE_STALL) { @@ -3691,8 +3862,15 @@ char * condition[32] = } } +#ifndef NEW_SCOREBOARD if (affectsScoreboard[pipeline[plPtrWrite].opcode]) scoreboard[pipeline[plPtrWrite].operand2] = false; +#else +//Yup, sequential MOVEQ # problem fixing (I hope!)... + if (affectsScoreboard[pipeline[plPtrWrite].opcode]) + if (scoreboard[pipeline[plPtrWrite].operand2]) + scoreboard[pipeline[plPtrWrite].operand2]--; +#endif } // Step 2: Push instruction through pipeline & execute following instruction @@ -3717,6 +3895,7 @@ char * condition[32] = pipeline[plPtrExec].reg2 = dsp_reg[pipeline[plPtrExec].operand2]; pipeline[plPtrExec].writebackRegister = pipeline[plPtrExec].operand2; // Set it to RN }//*/ + dsp_pc += 2; // For DSP_DIS_* accuracy DSPOpcode[pipeline[plPtrExec].opcode](); dsp_opcode_use[pipeline[plPtrExec].opcode]++; pipeline[plPtrWrite] = pipeline[plPtrExec]; @@ -3834,9 +4013,9 @@ static void DSP_load_r15_r(void) #endif } -static void DSP_mirror(void) +static void DSP_mirror(void) { - UINT32 r1 = PRN; + uint32 r1 = PRN; PRES = (mirror_table[r1 & 0xFFFF] << 16) | mirror_table[r1 >> 16]; SET_ZN(PRES); } @@ -3851,7 +4030,7 @@ static void DSP_mmult(void) if (!(dsp_matrix_control & 0x10)) { for (int i = 0; i < count; i++) - { + { int16 a; if (i&0x01) a=(int16)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff); @@ -3929,10 +4108,18 @@ static void DSP_movei(void) static void DSP_movepc(void) { +#ifdef DSP_DIS_MOVEPC + if (doDSPDis) + WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); +#endif //Need to fix this to take into account pipelining effects... !!! FIX !!! [DONE] // PRES = dsp_pc - 2; //Account for pipeline effects... - PRES = dsp_pc - (pipeline[plPtrRead].opcode == 38 ? 6 : (pipeline[plPtrRead].opcode == PIPELINE_STALL ? 0 : 2)); + PRES = dsp_pc - 2 - (pipeline[plPtrRead].opcode == 38 ? 6 : (pipeline[plPtrRead].opcode == PIPELINE_STALL ? 0 : 2)); +#ifdef DSP_DIS_MOVEPC + if (doDSPDis) + WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES); +#endif } static void DSP_moveq(void) @@ -3967,7 +4154,7 @@ static void DSP_moveta(void) static void DSP_mtoi(void) { - PRES = (((INT32)PRM >> 8) & 0xFF800000) | (PRM & 0x007FFFFF); + PRES = (((int32)PRM >> 8) & 0xFF800000) | (PRM & 0x007FFFFF); SET_ZN(PRES); } @@ -3991,7 +4178,7 @@ static void DSP_neg(void) if (doDSPDis) WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif - UINT32 res = -PRN; + uint32 res = -PRN; SET_ZNC_SUB(0, PRN, res); PRES = res; #ifdef DSP_DIS_NEG @@ -4035,13 +4222,13 @@ static void DSP_not(void) { #ifdef DSP_DIS_NOT if (doDSPDis) - WriteLog("%06X: NOT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN); + WriteLog("%06X: NOT R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif PRES = ~PRN; SET_ZN(PRES); #ifdef DSP_DIS_NOT if (doDSPDis) - WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES); + WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES); #endif } @@ -4078,8 +4265,8 @@ static void DSP_ror(void) if (doDSPDis) WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN); #endif - UINT32 r1 = PRM & 0x1F; - UINT32 res = (PRN >> r1) | (PRN << (32 - r1)); + uint32 r1 = PRM & 0x1F; + uint32 res = (PRN >> r1) | (PRN << (32 - r1)); SET_ZN(res); dsp_flag_c = (PRN >> 31) & 1; PRES = res; #ifdef DSP_DIS_ROR @@ -4094,9 +4281,9 @@ static void DSP_rorq(void) if (doDSPDis) WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif - UINT32 r1 = dsp_convert_zero[PIMM1 & 0x1F]; - UINT32 r2 = PRN; - UINT32 res = (r2 >> r1) | (r2 << (32 - r1)); + uint32 r1 = dsp_convert_zero[PIMM1 & 0x1F]; + uint32 r2 = PRN; + uint32 res = (r2 >> r1) | (r2 << (32 - r1)); PRES = res; SET_ZN(res); dsp_flag_c = (r2 >> 31) & 0x01; #ifdef DSP_DIS_RORQ @@ -4105,19 +4292,19 @@ static void DSP_rorq(void) #endif } -static void DSP_sat16s(void) +static void DSP_sat16s(void) { - INT32 r2 = PRN; - UINT32 res = (r2 < -32768) ? -32768 : (r2 > 32767) ? 32767 : r2; + int32 r2 = PRN; + uint32 res = (r2 < -32768) ? -32768 : (r2 > 32767) ? 32767 : r2; PRES = res; SET_ZN(res); } -static void DSP_sat32s(void) +static void DSP_sat32s(void) { - INT32 r2 = (UINT32)PRN; - INT32 temp = dsp_acc >> 32; - UINT32 res = (temp < -1) ? (INT32)0x80000000 : (temp > 0) ? (INT32)0x7FFFFFFF : r2; + int32 r2 = (uint32)PRN; + int32 temp = dsp_acc >> 32; + uint32 res = (temp < -1) ? (int32)0x80000000 : (temp > 0) ? (int32)0x7FFFFFFF : r2; PRES = res; SET_ZN(res); } @@ -4208,7 +4395,7 @@ static void DSP_sharq(void) if (doDSPDis) WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif - UINT32 res = (INT32)PRN >> dsp_convert_zero[PIMM1]; + uint32 res = (int32)PRN >> dsp_convert_zero[PIMM1]; SET_ZN(res); dsp_flag_c = PRN & 0x01; PRES = res; #ifdef DSP_DIS_SHARQ @@ -4223,8 +4410,8 @@ static void DSP_shlq(void) if (doDSPDis) WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, 32 - PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif - INT32 r1 = 32 - PIMM1; - UINT32 res = PRN << r1; + int32 r1 = 32 - PIMM1; + uint32 res = PRN << r1; SET_ZN(res); dsp_flag_c = (PRN >> 31) & 1; PRES = res; #ifdef DSP_DIS_SHLQ @@ -4239,8 +4426,8 @@ static void DSP_shrq(void) if (doDSPDis) WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif - INT32 r1 = dsp_convert_zero[PIMM1]; - UINT32 res = PRN >> r1; + int32 r1 = dsp_convert_zero[PIMM1]; + uint32 res = PRN >> r1; SET_ZN(res); dsp_flag_c = PRN & 1; PRES = res; #ifdef DSP_DIS_SHRQ @@ -4372,7 +4559,7 @@ static void DSP_sub(void) if (doDSPDis) WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN); #endif - UINT32 res = PRN - PRM; + uint32 res = PRN - PRM; SET_ZNC_SUB(PRN, PRM, res); PRES = res; #ifdef DSP_DIS_SUB @@ -4387,8 +4574,8 @@ static void DSP_subc(void) if (doDSPDis) WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN); #endif - UINT32 res = PRN - PRM - dsp_flag_c; - UINT32 borrow = dsp_flag_c; + uint32 res = PRN - PRM - dsp_flag_c; + uint32 borrow = dsp_flag_c; SET_ZNC_SUB(PRN - borrow, PRM, res); PRES = res; #ifdef DSP_DIS_SUBC @@ -4403,8 +4590,8 @@ static void DSP_subq(void) if (doDSPDis) WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif - UINT32 r1 = dsp_convert_zero[PIMM1]; - UINT32 res = PRN - r1; + uint32 r1 = dsp_convert_zero[PIMM1]; + uint32 res = PRN - r1; SET_ZNC_SUB(PRN, r1, res); PRES = res; #ifdef DSP_DIS_SUBQ @@ -4413,11 +4600,11 @@ static void DSP_subq(void) #endif } -static void DSP_subqmod(void) +static void DSP_subqmod(void) { - UINT32 r1 = dsp_convert_zero[PIMM1]; - UINT32 r2 = PRN; - UINT32 res = r2 - r1; + uint32 r1 = dsp_convert_zero[PIMM1]; + uint32 r2 = PRN; + uint32 res = r2 - r1; res = (res & (~dsp_modulo)) | (r2 & dsp_modulo); PRES = res; SET_ZNC_SUB(r2, r1, res);