X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdsp.cpp;h=f21fa692a65bf647000288776f52313f4bd2d400;hb=4b32affecce8cf8f557feb96daa76ba1692c3afe;hp=878d4443e148f91971fa6af125d6060b6d7722b2;hpb=f28b39e7d7d5fff9d7790da5bd6585e07b7f555e;p=virtualjaguar diff --git a/src/dsp.cpp b/src/dsp.cpp index 878d444..f21fa69 100644 --- a/src/dsp.cpp +++ b/src/dsp.cpp @@ -1,7 +1,7 @@ // // DSP core // -// by Cal2 +// Originally by David Raingeard // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS) // Extensive cleanups/rewrites by James L. Hammons // @@ -11,11 +11,14 @@ //#define DSP_DEBUG //#define DSP_DEBUG_IRQ +//#define DSP_DEBUG_PL2 //#define DSP_DEBUG_STALL +//#define DSP_DEBUG_CC +#define NEW_SCOREBOARD // Disassembly definitions -#define DSP_DIS_ABS // Pipelined only +#define DSP_DIS_ABS #define DSP_DIS_ADD #define DSP_DIS_ADDC #define DSP_DIS_ADDQ @@ -44,6 +47,7 @@ #define DSP_DIS_MOVEI #define DSP_DIS_MOVEQ #define DSP_DIS_MOVEFA +#define DSP_DIS_MOVEPC // Pipeline only! #define DSP_DIS_MOVETA #define DSP_DIS_MULT #define DSP_DIS_NEG @@ -154,7 +158,11 @@ struct PipelineStage #define TYPE_WORD 1 #define TYPE_DWORD 2 #define PIPELINE_STALL 64 // Set to # of opcodes + 1 +#ifndef NEW_SCOREBOARD bool scoreboard[32]; +#else +uint8 scoreboard[32]; +#endif uint8 plPtrFetch, plPtrRead, plPtrExec, plPtrWrite; PipelineStage pipeline[4]; bool IMASKCleared = false; @@ -276,23 +284,30 @@ static void dsp_opcode_subqt(void); uint8 dsp_opcode_cycles[64] = { - 3, 3, 3, 3, - 3, 3, 3, 3, - 3, 3, 3, 3, - 3, 3, 3, 3, - 3, 3, 1, 3, - 1, 18, 3, 3, - 3, 3, 3, 3, - 3, 3, 3, 3, - 3, 3, 2, 2, - 2, 2, 3, 4, - 5, 4, 5, 6, - 6, 1, 1, 1, - 1, 2, 2, 2, - 1, 1, 9, 3, - 3, 1, 6, 6, - 2, 2, 3, 3 -}; + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 1, 3, 1, 18, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 2, 2, 2, 2, 3, 4, + 5, 4, 5, 6, 6, 1, 1, 1, + 1, 2, 2, 2, 1, 1, 9, 3, + 3, 1, 6, 6, 2, 2, 3, 3 +};//*/ +//Here's a QnD kludge... +//This is wrong, wrong, WRONG, but it seems to work for the time being... +//(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!) +//What's needed here is a way to take pipeline effects into account (including pipeline stalls!)... +/*uint8 dsp_opcode_cycles[64] = +{ + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 9, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 2, + 2, 2, 2, 3, 3, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 4, 1, + 1, 1, 3, 3, 1, 1, 1, 1 +};//*/ void (* dsp_opcode[64])() = { @@ -386,6 +401,14 @@ static uint32 dsp_releaseTimeSlice_flag = 0; FILE * dsp_fp; +#ifdef DSP_DEBUG_CC +// Comparison core vars (used only for core comparison! :-) +static uint64 count = 0; +static uint8 ram1[0x2000], ram2[0x2000]; +static uint32 regs1[64], regs2[64]; +static uint32 ctrl1[14], ctrl2[14]; +#endif + // Private function prototypes void DSPDumpRegisters(void); @@ -657,6 +680,10 @@ void DSPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/) // WriteLog("dsp: writing %.4x at 0x%.8x\n",data,offset); if ((offset >= DSP_WORK_RAM_BASE) && (offset < DSP_WORK_RAM_BASE+0x2000)) { +/*if (offset == 0xF1B2F4) +{ + WriteLog("DSP: %s is writing %04X at location 0xF1B2F4 (DSP_PC: %08X)...\n", whoName[who], data, dsp_pc); +}//*/ offset -= DSP_WORK_RAM_BASE; dsp_ram_8[offset] = data >> 8; dsp_ram_8[offset+1] = data & 0xFF; @@ -667,6 +694,12 @@ void DSPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/) m68k_end_timeslice(); gpu_releaseTimeslice(); }*/ +//CC only! +#ifdef DSP_DEBUG_CC +SET16(ram1, offset, data), +SET16(ram2, offset, data); +#endif +//!!!!!!!! return; } else if ((offset >= DSP_CONTROL_RAM_BASE) && (offset < DSP_CONTROL_RAM_BASE+0x20)) @@ -713,6 +746,12 @@ void DSPWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/) }//*/ offset -= DSP_WORK_RAM_BASE; SET32(dsp_ram_8, offset, data); +//CC only! +#ifdef DSP_DEBUG_CC +SET32(ram1, offset, data), +SET32(ram2, offset, data); +#endif +//!!!!!!!! return; } else if (offset >= DSP_CONTROL_RAM_BASE && offset <= (DSP_CONTROL_RAM_BASE + 0x1F)) @@ -743,13 +782,36 @@ void DSPWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/) #ifdef DSP_DEBUG_IRQ } #endif//*/ +#if 0 + if (/*4-8, 16*/data & 0x101F0) + WriteLog("DSP: %s is enabling interrupts %s%s%s%s%s%s\n", whoName[who], + (data & 0x010 ? "CPU " : ""), (data & 0x020 ? "I2S " : ""), + (data & 0x040 ? "TIMER0 " : ""), (data & 0x080 ? "TIMER1 " : ""), + (data & 0x100 ? "EXT0 " : ""), (data & 0x10000 ? "EXT1" : "")); +/*if (data & 0x00020) // CD BIOS DSP code... +{ +//001AC1BA: movea.l #$1AC200, A0 +//001AC1C0: move.l #$1AC68C, D0 + char buffer[512]; + + WriteLog("\n---[DSP code at 00F1B97C]---------------------------\n"); + uint32 j = 0xF1B97C;//0x1AC200; + while (j <= 0xF1BE08)//0x1AC68C) + { + uint32 oldj = j; + j += dasmjag(JAGUAR_DSP, buffer, j); +// WriteLog("\t%08X: %s\n", oldj+0xD6F77C, buffer); + WriteLog("\t%08X: %s\n", oldj, buffer); + } +}//*/ +#endif break; } case 0x04: dsp_matrix_control = data; break; case 0x08: - // According to JTRM, only lines 2-11 are adressable, the rest being + // According to JTRM, only lines 2-11 are addressable, the rest being // hardwired to $F1Bxxx. dsp_pointer_to_matrix = 0xF1B000 | (data & 0x000FFC); break; @@ -761,14 +823,26 @@ void DSPWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/) #ifdef DSP_DEBUG WriteLog("DSP: Setting DSP PC to %08X by %s%s\n", dsp_pc, whoName[who], (DSP_RUNNING ? " (DSP is RUNNING!)" : ""));//*/ #endif +//CC only! +#ifdef DSP_DEBUG_CC +if (who != DSP) + ctrl1[0] = ctrl2[0] = data; +#endif +//!!!!!!!! break; case 0x14: { +//#ifdef DSP_DEBUG +WriteLog("Write to DSP CTRL by %s: %08X\n", whoName[who], data); +//#endif + bool wasRunning = DSP_RUNNING; // uint32 dsp_was_running = DSP_RUNNING; // Check for DSP -> CPU interrupt if (data & CPUINT) { -// WriteLog("DSP: DSP -> CPU interrupt\n"); +#ifdef DSP_DEBUG + WriteLog("DSP: DSP -> CPU interrupt\n"); +#endif // This was WRONG // Why do we check for a valid handler at 64? Isn't that the Jag programmer's responsibility? if (JERRYIRQEnabled(IRQ2_DSP))// && jaguar_interrupt_handler_is_valid(64)) @@ -799,6 +873,12 @@ void DSPWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/) // Protect writes to VERSION and the interrupt latches... uint32 mask = VERSION | INT_LAT0 | INT_LAT1 | INT_LAT2 | INT_LAT3 | INT_LAT4 | INT_LAT5; dsp_control = (dsp_control & mask) | (data & ~mask); +//CC only! +#ifdef DSP_DEBUG_CC +if (who != DSP) + ctrl1[8] = ctrl2[8] = dsp_control; +#endif +//!!!!!!!! // if dsp wasn't running but is now running // execute a few cycles @@ -814,7 +894,6 @@ void DSPWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/) DSPExec(1); #endif #ifdef DSP_DEBUG -WriteLog("Write to DSP CTRL: %08X ", data); if (DSP_RUNNING) WriteLog(" --> Starting to run at %08X by %s...", dsp_pc, whoName[who]); else @@ -830,7 +909,8 @@ WriteLog("\n"); else if (who == GPU) gpu_releaseTimeslice(); - FlushDSPPipeline(); + if (!wasRunning) + FlushDSPPipeline(); //DSPDumpDisassembly(); } break; @@ -872,7 +952,7 @@ void DSPUpdateRegisterBanks(void) } // -// Check for an handle any asserted DSP IRQs +// Check for and handle any asserted DSP IRQs // void DSPHandleIRQs(void) { @@ -921,6 +1001,16 @@ void DSPHandleIRQs(void) if (affectsScoreboard[pipeline[plPtrWrite].opcode]) scoreboard[pipeline[plPtrWrite].operand2] = false; }//*/ +//This should be execute (or should it?--not sure now!) +//Actually, the way this is called now, this should be correct (i.e., the plPtrs advance, +//and what just executed is now in the Write position...). So why didn't it do the +//writeback into register 0? +#ifdef DSP_DEBUG_IRQ +WriteLog("--> Pipeline dump [DSP_PC=%08X]...\n", dsp_pc); +WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]); +WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]); +WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister, dsp_opcode_str[pipeline[plPtrWrite].opcode]); +#endif if (pipeline[plPtrWrite].opcode != PIPELINE_STALL) { if (pipeline[plPtrWrite].writebackRegister != 0xFF) @@ -938,11 +1028,23 @@ void DSPHandleIRQs(void) } } +#ifndef NEW_SCOREBOARD if (affectsScoreboard[pipeline[plPtrWrite].opcode]) scoreboard[pipeline[plPtrWrite].operand2] = false; +#else +//Yup, sequential MOVEQ # problem fixing (I hope!)... + if (affectsScoreboard[pipeline[plPtrWrite].opcode]) + if (scoreboard[pipeline[plPtrWrite].operand2]) + scoreboard[pipeline[plPtrWrite].operand2]--; +#endif } dsp_flags |= IMASK; +//CC only! +#ifdef DSP_DEBUG_CC +ctrl2[4] = dsp_flags; +#endif +//!!!!!!!! DSPUpdateRegisterBanks(); #ifdef DSP_DEBUG_IRQ // WriteLog(" [PC will return to %08X, R31 = %08X]\n", dsp_pc, dsp_reg[31]); @@ -953,6 +1055,11 @@ void DSPHandleIRQs(void) // move pc,r30 ; address of interrupted code // store r30,(r31) ; store return address dsp_reg[31] -= 4; +//CC only! +#ifdef DSP_DEBUG_CC +regs2[31] -= 4; +#endif +//!!!!!!!! //This might not come back to the right place if the instruction was MOVEI #. !!! FIX !!! //But, then again, JTRM says that it adds two regardless of what the instruction was... //It missed the place that it was supposed to come back to, so this is WRONG! @@ -970,14 +1077,120 @@ void DSPHandleIRQs(void) // DSPWriteLong(dsp_reg[31], dsp_pc - 2, DSP); DSPWriteLong(dsp_reg[31], dsp_pc - 2 - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2)), DSP); +//CC only! +#ifdef DSP_DEBUG_CC +SET32(ram2, regs2[31] - 0xF1B000, dsp_pc - 2 - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2))); +#endif +//!!!!!!!! // movei #service_address,r30 ; pointer to ISR entry // jump (r30) ; jump to ISR // nop dsp_pc = dsp_reg[30] = DSP_WORK_RAM_BASE + (which * 0x10); +//CC only! +#ifdef DSP_DEBUG_CC +ctrl2[0] = regs2[30] = dsp_pc; +#endif +//!!!!!!!! FlushDSPPipeline(); } +// +// Non-pipelined version... +// +void DSPHandleIRQsNP(void) +{ +//CC only! +#ifdef DSP_DEBUG_CC + memcpy(dsp_ram_8, ram1, 0x2000); + memcpy(dsp_reg_bank_0, regs1, 32 * 4); + memcpy(dsp_reg_bank_1, ®s1[32], 32 * 4); + dsp_pc = ctrl1[0]; + dsp_acc = ctrl1[1]; + dsp_remain = ctrl1[2]; + dsp_modulo = ctrl1[3]; + dsp_flags = ctrl1[4]; + dsp_matrix_control = ctrl1[5]; + dsp_pointer_to_matrix = ctrl1[6]; + dsp_data_organization = ctrl1[7]; + dsp_control = ctrl1[8]; + dsp_div_control = ctrl1[9]; + IMASKCleared = ctrl1[10]; + dsp_flag_z = ctrl1[11]; + dsp_flag_n = ctrl1[12]; + dsp_flag_c = ctrl1[13]; +DSPUpdateRegisterBanks(); +#endif +//!!!!!!!! + if (dsp_flags & IMASK) // Bail if we're already inside an interrupt + return; + + // Get the active interrupt bits (latches) & interrupt mask (enables) + uint32 bits = ((dsp_control >> 10) & 0x20) | ((dsp_control >> 6) & 0x1F), + mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F); + +// WriteLog("dsp: bits=%.2x mask=%.2x\n",bits,mask); + bits &= mask; + + if (!bits) // Bail if nothing is enabled + return; + + int which = 0; // Determine which interrupt + if (bits & 0x01) + which = 0; + if (bits & 0x02) + which = 1; + if (bits & 0x04) + which = 2; + if (bits & 0x08) + which = 3; + if (bits & 0x10) + which = 4; + if (bits & 0x20) + which = 5; + +#ifdef DSP_DEBUG_IRQ + WriteLog("DSP: Generating interrupt #%i...", which); +#endif + + dsp_flags |= IMASK; +//CC only! +#ifdef DSP_DEBUG_CC +ctrl1[4] = dsp_flags; +#endif +//!!!!!!!! + DSPUpdateRegisterBanks(); +#ifdef DSP_DEBUG_IRQ + WriteLog(" [PC will return to %08X, R31 = %08X]\n", dsp_pc, dsp_reg[31]); +#endif + + // subqt #4,r31 ; pre-decrement stack pointer + // move pc,r30 ; address of interrupted code + // store r30,(r31) ; store return address + dsp_reg[31] -= 4; +//CC only! +#ifdef DSP_DEBUG_CC +regs1[31] -= 4; +#endif +//!!!!!!!! + DSPWriteLong(dsp_reg[31], dsp_pc - 2, DSP); +//CC only! +#ifdef DSP_DEBUG_CC +SET32(ram1, regs1[31] - 0xF1B000, dsp_pc - 2); +#endif +//!!!!!!!! + + // movei #service_address,r30 ; pointer to ISR entry + // jump (r30) ; jump to ISR + // nop + dsp_pc = dsp_reg[30] = DSP_WORK_RAM_BASE + (which * 0x10); +//CC only! +#ifdef DSP_DEBUG_CC +ctrl1[0] = regs1[30] = dsp_pc; +#endif +//!!!!!!!! +} + // // Set the specified DSP IRQ line to a given state // @@ -986,12 +1199,28 @@ void DSPSetIRQLine(int irqline, int state) //NOTE: This doesn't take INT_LAT5 into account. !!! FIX !!! uint32 mask = INT_LAT0 << irqline; dsp_control &= ~mask; // Clear the latch bit +//CC only! +#ifdef DSP_DEBUG_CC +ctrl1[8] = ctrl2[8] = dsp_control; +#endif +//!!!!!!!! if (state) { dsp_control |= mask; // Set the latch bit DSPHandleIRQs(); +//CC only! +#ifdef DSP_DEBUG_CC +ctrl1[8] = ctrl2[8] = dsp_control; +DSPHandleIRQsNP(); +#endif +//!!!!!!!! } + + // Not sure if this is correct behavior, but according to JTRM, + // the IRQ output of JERRY is fed to this IRQ in the GPU... +// Not sure this is right--DSP interrupts seem to be different from the JERRY interrupts! +// GPUSetIRQLine(GPUIRQ_DSP, ASSERT_LINE); } void DSPInit(void) @@ -1026,6 +1255,7 @@ void DSPReset(void) CLR_ZNC; IMASKCleared = false; + FlushDSPPipeline(); dsp_reset_stats(); memset(dsp_ram_8, 0xFF, 0x2000); } @@ -1083,7 +1313,7 @@ void DSPDone(void) WriteLog("\nRegisters bank 0\n"); for(int j=0; j<8; j++) { - WriteLog("\tr%2i=0x%.8x r%2i=0x%.8x r%2i=0x%.8x r%2i=0x%.8x\n", + WriteLog("\tR%02i=%08X R%02i=%08X R%02i=%08X R%02i=%08X\n", (j << 2) + 0, dsp_reg_bank_0[(j << 2) + 0], (j << 2) + 1, dsp_reg_bank_0[(j << 2) + 1], (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2], @@ -1092,7 +1322,7 @@ void DSPDone(void) WriteLog("\nRegisters bank 1\n"); for (j=0; j<8; j++) { - WriteLog("\tr%2i=0x%.8x r%2i=0x%.8x r%2i=0x%.8x r%2i=0x%.8x\n", + WriteLog("\tR%02i=%08X R%02i=%08X R%02i=%08X R%02i=%08X\n", (j << 2) + 0, dsp_reg_bank_1[(j << 2) + 0], (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1], (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2], @@ -1117,8 +1347,212 @@ void DSPDone(void) }//*/ memory_free(dsp_ram_8); + memory_free(dsp_reg_bank_0); + memory_free(dsp_reg_bank_1); } + + +// +// DSP comparison core... +// +#ifdef DSP_DEBUG_CC +static uint16 lastExec; +void DSPExecComp(int32 cycles) +{ + while (cycles > 0 && DSP_RUNNING) + { + // Load up vars for non-pipelined core + memcpy(dsp_ram_8, ram1, 0x2000); + memcpy(dsp_reg_bank_0, regs1, 32 * 4); + memcpy(dsp_reg_bank_1, ®s1[32], 32 * 4); + dsp_pc = ctrl1[0]; + dsp_acc = ctrl1[1]; + dsp_remain = ctrl1[2]; + dsp_modulo = ctrl1[3]; + dsp_flags = ctrl1[4]; + dsp_matrix_control = ctrl1[5]; + dsp_pointer_to_matrix = ctrl1[6]; + dsp_data_organization = ctrl1[7]; + dsp_control = ctrl1[8]; + dsp_div_control = ctrl1[9]; + IMASKCleared = ctrl1[10]; + dsp_flag_z = ctrl1[11]; + dsp_flag_n = ctrl1[12]; + dsp_flag_c = ctrl1[13]; +DSPUpdateRegisterBanks(); + + // Decrement cycles based on non-pipelined core... + uint16 instr1 = DSPReadWord(dsp_pc, DSP); + cycles -= dsp_opcode_cycles[instr1 >> 10]; + +//WriteLog("\tAbout to execute non-pipelined core on tick #%u (DSP_PC=%08X)...\n", (uint32)count, dsp_pc); + DSPExec(1); // Do *one* instruction + + // Save vars + memcpy(ram1, dsp_ram_8, 0x2000); + memcpy(regs1, dsp_reg_bank_0, 32 * 4); + memcpy(®s1[32], dsp_reg_bank_1, 32 * 4); + ctrl1[0] = dsp_pc; + ctrl1[1] = dsp_acc; + ctrl1[2] = dsp_remain; + ctrl1[3] = dsp_modulo; + ctrl1[4] = dsp_flags; + ctrl1[5] = dsp_matrix_control; + ctrl1[6] = dsp_pointer_to_matrix; + ctrl1[7] = dsp_data_organization; + ctrl1[8] = dsp_control; + ctrl1[9] = dsp_div_control; + ctrl1[10] = IMASKCleared; + ctrl1[11] = dsp_flag_z; + ctrl1[12] = dsp_flag_n; + ctrl1[13] = dsp_flag_c; + + // Load up vars for pipelined core + memcpy(dsp_ram_8, ram2, 0x2000); + memcpy(dsp_reg_bank_0, regs2, 32 * 4); + memcpy(dsp_reg_bank_1, ®s2[32], 32 * 4); + dsp_pc = ctrl2[0]; + dsp_acc = ctrl2[1]; + dsp_remain = ctrl2[2]; + dsp_modulo = ctrl2[3]; + dsp_flags = ctrl2[4]; + dsp_matrix_control = ctrl2[5]; + dsp_pointer_to_matrix = ctrl2[6]; + dsp_data_organization = ctrl2[7]; + dsp_control = ctrl2[8]; + dsp_div_control = ctrl2[9]; + IMASKCleared = ctrl2[10]; + dsp_flag_z = ctrl2[11]; + dsp_flag_n = ctrl2[12]; + dsp_flag_c = ctrl2[13]; +DSPUpdateRegisterBanks(); + +//WriteLog("\tAbout to execute pipelined core on tick #%u (DSP_PC=%08X)...\n", (uint32)count, dsp_pc); + DSPExecP2(1); // Do *one* instruction + + // Save vars + memcpy(ram2, dsp_ram_8, 0x2000); + memcpy(regs2, dsp_reg_bank_0, 32 * 4); + memcpy(®s2[32], dsp_reg_bank_1, 32 * 4); + ctrl2[0] = dsp_pc; + ctrl2[1] = dsp_acc; + ctrl2[2] = dsp_remain; + ctrl2[3] = dsp_modulo; + ctrl2[4] = dsp_flags; + ctrl2[5] = dsp_matrix_control; + ctrl2[6] = dsp_pointer_to_matrix; + ctrl2[7] = dsp_data_organization; + ctrl2[8] = dsp_control; + ctrl2[9] = dsp_div_control; + ctrl2[10] = IMASKCleared; + ctrl2[11] = dsp_flag_z; + ctrl2[12] = dsp_flag_n; + ctrl2[13] = dsp_flag_c; + + if (instr1 != lastExec) + { +// WriteLog("\nCores diverged at instruction tick #%u!\nAttemping to synchronize...\n\n", count); + +// uint32 ppc = ctrl2[0] - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2)) - (pipeline[plPtrWrite].opcode == 38 ? 6 : (pipeline[plPtrWrite].opcode == PIPELINE_STALL ? 0 : 2)); +//WriteLog("[DSP_PC1=%08X, DSP_PC2=%08X]\n", ctrl1[0], ppc); +// if (ctrl1[0] < ppc) // P ran ahead of NP +//How to test this crap??? +// if (1) + { + DSPExecP2(1); // Do one more instruction + + // Save vars + memcpy(ram2, dsp_ram_8, 0x2000); + memcpy(regs2, dsp_reg_bank_0, 32 * 4); + memcpy(®s2[32], dsp_reg_bank_1, 32 * 4); + ctrl2[0] = dsp_pc; + ctrl2[1] = dsp_acc; + ctrl2[2] = dsp_remain; + ctrl2[3] = dsp_modulo; + ctrl2[4] = dsp_flags; + ctrl2[5] = dsp_matrix_control; + ctrl2[6] = dsp_pointer_to_matrix; + ctrl2[7] = dsp_data_organization; + ctrl2[8] = dsp_control; + ctrl2[9] = dsp_div_control; + ctrl2[10] = IMASKCleared; + ctrl2[11] = dsp_flag_z; + ctrl2[12] = dsp_flag_n; + ctrl2[13] = dsp_flag_c; + } +// else // NP ran ahead of P + if (instr1 != lastExec) // Must be the other way... + + { + // Load up vars for non-pipelined core + memcpy(dsp_ram_8, ram1, 0x2000); + memcpy(dsp_reg_bank_0, regs1, 32 * 4); + memcpy(dsp_reg_bank_1, ®s1[32], 32 * 4); + dsp_pc = ctrl1[0]; + dsp_acc = ctrl1[1]; + dsp_remain = ctrl1[2]; + dsp_modulo = ctrl1[3]; + dsp_flags = ctrl1[4]; + dsp_matrix_control = ctrl1[5]; + dsp_pointer_to_matrix = ctrl1[6]; + dsp_data_organization = ctrl1[7]; + dsp_control = ctrl1[8]; + dsp_div_control = ctrl1[9]; + IMASKCleared = ctrl1[10]; + dsp_flag_z = ctrl1[11]; + dsp_flag_n = ctrl1[12]; + dsp_flag_c = ctrl1[13]; +DSPUpdateRegisterBanks(); + +for(int k=0; k<2; k++) +{ + // Decrement cycles based on non-pipelined core... + instr1 = DSPReadWord(dsp_pc, DSP); + cycles -= dsp_opcode_cycles[instr1 >> 10]; + +//WriteLog("\tAbout to execute non-pipelined core on tick #%u (DSP_PC=%08X)...\n", (uint32)count, dsp_pc); + DSPExec(1); // Do *one* instruction +} + + // Save vars + memcpy(ram1, dsp_ram_8, 0x2000); + memcpy(regs1, dsp_reg_bank_0, 32 * 4); + memcpy(®s1[32], dsp_reg_bank_1, 32 * 4); + ctrl1[0] = dsp_pc; + ctrl1[1] = dsp_acc; + ctrl1[2] = dsp_remain; + ctrl1[3] = dsp_modulo; + ctrl1[4] = dsp_flags; + ctrl1[5] = dsp_matrix_control; + ctrl1[6] = dsp_pointer_to_matrix; + ctrl1[7] = dsp_data_organization; + ctrl1[8] = dsp_control; + ctrl1[9] = dsp_div_control; + ctrl1[10] = IMASKCleared; + ctrl1[11] = dsp_flag_z; + ctrl1[12] = dsp_flag_n; + ctrl1[13] = dsp_flag_c; + } + } + + if (instr1 != lastExec) + { + WriteLog("\nCores diverged at instruction tick #%u!\nStopped!\n\n", count); + + WriteLog("Instruction for non-pipelined core: %04X\n", instr1); + WriteLog("Instruction for pipelined core: %04X\n", lastExec); + + log_done(); + exit(1); + } + + count++; + } +} +#endif + + // // DSP execution core // @@ -1143,6 +1577,30 @@ void DSPExec(int32 cycles) while (cycles > 0 && DSP_RUNNING) { +/*extern uint32 totalFrames; +//F1B2F6: LOAD (R14+$04), R24 [NCZ:001, R14+$04=00F20018, R24=FFFFFFFF] -> Jaguar: Unknown word read at 00F20018 by DSP (M68K PC=00E32E) +//-> 43 + 1 + 24 -> $2B + $01 + $18 -> 101011 00001 11000 -> 1010 1100 0011 1000 -> AC38 +//C470 -> 1100 0100 0111 0000 -> 110001 00011 10000 -> 49, 3, 16 -> STORE R16, (R14+$0C) +//F1B140: +if (totalFrames >= 377 && GET16(dsp_ram_8, 0x0002F6) == 0xAC38 && dsp_pc == 0xF1B140) +{ + doDSPDis = true; + WriteLog("Starting disassembly at frame #%u...\n", totalFrames); +} +if (dsp_pc == 0xF1B092) + doDSPDis = false;//*/ +/*if (dsp_pc == 0xF1B140) + doDSPDis = true;//*/ + + if (IMASKCleared) // If IMASK was cleared, + { +#ifdef DSP_DEBUG_IRQ + WriteLog("DSP: Finished interrupt.\n"); +#endif + DSPHandleIRQsNP(); // See if any other interrupts are pending! + IMASKCleared = false; + } + /*if (badWrite) { WriteLog("\nDSP: Encountered bad write in Atari Synth module. PC=%08X, R15=%08X\n", dsp_pc, dsp_reg[15]); @@ -1448,13 +1906,13 @@ static void dsp_opcode_not(void) { #ifdef DSP_DIS_NOT if (doDSPDis) - WriteLog("%06X: NOT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN); + WriteLog("%06X: NOT R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif RN = ~RN; SET_ZN(RN); #ifdef DSP_DIS_NOT if (doDSPDis) - WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN); + WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif } @@ -1892,6 +2350,10 @@ static void dsp_opcode_mmult(void) static void dsp_opcode_abs(void) { +#ifdef DSP_DIS_ABS + if (doDSPDis) + WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); +#endif uint32 _Rn = RN; uint32 res; @@ -1903,6 +2365,10 @@ static void dsp_opcode_abs(void) res = RN = (_Rn & 0x80000000 ? -_Rn : _Rn); CLR_ZN; SET_Z(res); } +#ifdef DSP_DIS_ABS + if (doDSPDis) + WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); +#endif } static void dsp_opcode_div(void) @@ -2272,6 +2738,21 @@ bool readAffected[64][2] = { true, true}, { true, true}, {false, false}, {false, true} }; +bool isLoadStore[65] = +{ + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + + false, false, false, false, false, false, false, true, + true, true, false, true, true, true, true, true, + + false, true, true, false, false, false, false, false, + false, false, true, true, true, true, false, false, false +}; + void FlushDSPPipeline(void) { plPtrFetch = 3, plPtrRead = 2, plPtrExec = 1, plPtrWrite = 0; @@ -2280,7 +2761,7 @@ void FlushDSPPipeline(void) pipeline[i].opcode = PIPELINE_STALL; for(int i=0; i<32; i++) - scoreboard[i] = false; + scoreboard[i] = 0; } // @@ -2447,7 +2928,7 @@ because the STORE instruction writes back on stage #2 of the pipeline instead of If it were done properly, the STORE write back would occur *after* (well, technically, during) the execution of the the JUMP that follows it. -!!! FIX !!! +!!! FIX !!! [DONE] F1B08A: JR z, F1B082 [NCZ:001] Branched! F1B08A: NOP [NCZ:001] @@ -2547,8 +3028,9 @@ F1B016: NOP [NCZ:001] F1B1FC: MOVEI #$00F1A100, R01 [NCZ:001, R01=00F1A100] -> [NCZ:001, R01=00F1A100] */ - -//#define DSP_DEBUG_PL2 +uint32 pcQueue1[0x400]; +uint32 pcQPtr1 = 0; +static uint32 prevR1; //Let's try a 3 stage pipeline.... //Looks like 3 stage is correct, otherwise bad things happen... void DSPExecP2(int32 cycles) @@ -2558,6 +3040,51 @@ void DSPExecP2(int32 cycles) while (cycles > 0 && DSP_RUNNING) { +/*extern uint32 totalFrames; +//F1B2F6: LOAD (R14+$04), R24 [NCZ:001, R14+$04=00F20018, R24=FFFFFFFF] -> Jaguar: Unknown word read at 00F20018 by DSP (M68K PC=00E32E) +//-> 43 + 1 + 24 -> $2B + $01 + $18 -> 101011 00001 11000 -> 1010 1100 0011 1000 -> AC38 +//C470 -> 1100 0100 0111 0000 -> 110001 00011 10000 -> 49, 3, 16 -> STORE R16, (R14+$0C) +//F1B140: +if (totalFrames >= 377 && GET16(dsp_ram_8, 0x0002F6) == 0xAC38 && dsp_pc == 0xF1B140) +{ + doDSPDis = true; + WriteLog("Starting disassembly at frame #%u...\n", totalFrames); +} +if (dsp_pc == 0xF1B092) + doDSPDis = false;//*/ +/*if (totalFrames >= 373 && GET16(dsp_ram_8, 0x0002F6) == 0xAC38) + doDSPDis = true;//*/ +/*if (totalFrames >= 373 && dsp_pc == 0xF1B0A0) + doDSPDis = true;//*/ +/*if (dsp_pc == 0xF1B0A0) + doDSPDis = true;//*/ +/*if (dsp_pc == 0xF1B0D2) && dsp_reg[1] == 0x2140C) + doDSPDis = true;//*/ +//Two parter... (not sure how to write this) +//if (dsp_pc == 0xF1B0D2) +// prevR1 = dsp_reg[1]; + +//F1B0D2: ADDQT #8, R01 [NCZ:000, R01=0002140C] -> [NCZ:000, R01=00021414] +//F1B0D2: ADDQT #8, R01 [NCZ:000, R01=0002140C] -> [NCZ:000, R01=00021414] + + +pcQueue1[pcQPtr1++] = dsp_pc; +pcQPtr1 &= 0x3FF; + +if ((dsp_pc < 0xF1B000 || dsp_pc > 0xF1CFFF) && !doDSPDis) +{ + WriteLog("DSP: PC has stepped out of bounds...\n\nBacktrace:\n\n"); + doDSPDis = true; + + char buffer[512]; + + for(int i=0; i<0x400; i++) + { + dasmjag(JAGUAR_DSP, buffer, pcQueue1[(i + pcQPtr1) & 0x3FF]); + WriteLog("\t%08X: %s\n", pcQueue1[(i + pcQPtr1) & 0x3FF], buffer); + } + WriteLog("\n"); +}//*/ if (IMASKCleared) // If IMASK was cleared, { #ifdef DSP_DEBUG_IRQ @@ -2570,6 +3097,8 @@ void DSPExecP2(int32 cycles) //if (dsp_flags & REGPAGE) // WriteLog(" --> REGPAGE has just been set!\n"); #ifdef DSP_DEBUG_PL2 +if (doDSPDis) +{ WriteLog("DSPExecP: Pipeline status [PC=%08X]...\n", dsp_pc); WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]); WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]); @@ -2578,6 +3107,7 @@ WriteLog(" --> Scoreboard: "); for(int i=0; i<32; i++) WriteLog("%s ", scoreboard[i] ? "T" : "F"); WriteLog("\n"); +} #endif // Stage 1a: Instruction fetch pipeline[plPtrRead].instruction = DSPReadWord(dsp_pc, DSP); @@ -2588,29 +3118,40 @@ WriteLog("\n"); pipeline[plPtrRead].result = (uint32)DSPReadWord(dsp_pc + 2, DSP) | ((uint32)DSPReadWord(dsp_pc + 4, DSP) << 16); #ifdef DSP_DEBUG_PL2 +if (doDSPDis) +{ WriteLog("DSPExecP: Fetching instruction (%04X) from DSP_PC = %08X...\n", pipeline[plPtrRead].instruction, dsp_pc); WriteLog("DSPExecP: Pipeline status (after stage 1a) [PC=%08X]...\n", dsp_pc); WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]); WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]); WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister, dsp_opcode_str[pipeline[plPtrWrite].opcode]); +} #endif // Stage 1b: Read registers //Small problem--when say LOAD or STORE (R14/5+$nn) is executed AFTER an instruction that //modifies R14/5, we don't check the scoreboard for R14/5 (and we need to!)... !!! FIX !!! //Ugly, but [DONE] +//Another problem: Any sequential combination of LOAD and STORE operations will cause the +//pipeline to stall, and we don't take care of that here. !!! FIX !!! if ((scoreboard[pipeline[plPtrRead].operand1] && readAffected[pipeline[plPtrRead].opcode][0]) || (scoreboard[pipeline[plPtrRead].operand2] && readAffected[pipeline[plPtrRead].opcode][1]) || ((pipeline[plPtrRead].opcode == 43 || pipeline[plPtrRead].opcode == 58) && scoreboard[14]) - || ((pipeline[plPtrRead].opcode == 44 || pipeline[plPtrRead].opcode == 59) && scoreboard[15])) + || ((pipeline[plPtrRead].opcode == 44 || pipeline[plPtrRead].opcode == 59) && scoreboard[15]) +//Not sure that this is the best way to fix the LOAD/STORE problem... But it seems to +//work--somewhat... + || (isLoadStore[pipeline[plPtrRead].opcode] && isLoadStore[pipeline[plPtrExec].opcode])) // We have a hit in the scoreboard, so we have to stall the pipeline... #ifdef DSP_DEBUG_PL2 { +if (doDSPDis) +{ WriteLog(" --> Stalling pipeline: "); if (readAffected[pipeline[plPtrRead].opcode][0]) WriteLog("scoreboard[%u] = %s (reg 1) ", pipeline[plPtrRead].operand1, scoreboard[pipeline[plPtrRead].operand1] ? "true" : "false"); if (readAffected[pipeline[plPtrRead].opcode][1]) WriteLog("scoreboard[%u] = %s (reg 2)", pipeline[plPtrRead].operand2, scoreboard[pipeline[plPtrRead].operand2] ? "true" : "false"); WriteLog("\n"); +} #endif pipeline[plPtrRead].opcode = PIPELINE_STALL; #ifdef DSP_DEBUG_PL2 @@ -2624,47 +3165,83 @@ WriteLog("\n"); // Shouldn't we be more selective with the register scoreboarding? // Yes, we should. !!! FIX !!! Kinda [DONE] +#ifndef NEW_SCOREBOARD scoreboard[pipeline[plPtrRead].operand2] = affectsScoreboard[pipeline[plPtrRead].opcode]; +#else +//Hopefully this will fix the dual MOVEQ # problem... + scoreboard[pipeline[plPtrRead].operand2] += (affectsScoreboard[pipeline[plPtrRead].opcode] ? 1 : 0); +#endif //Advance PC here??? Yes. dsp_pc += (pipeline[plPtrRead].opcode == 38 ? 6 : 2); } #ifdef DSP_DEBUG_PL2 +if (doDSPDis) +{ WriteLog("DSPExecP: Pipeline status (after stage 1b) [PC=%08X]...\n", dsp_pc); WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]); WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]); WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister, dsp_opcode_str[pipeline[plPtrWrite].opcode]); +} #endif // Stage 2: Execute if (pipeline[plPtrExec].opcode != PIPELINE_STALL) { +if (doDSPDis) + WriteLog("\t[inst=%02u][R28=%08X, alt R28=%08X, REGPAGE=%s]\n", pipeline[plPtrExec].opcode, dsp_reg[28], dsp_alternate_reg[28], (dsp_flags & REGPAGE ? "set" : "not set")); #ifdef DSP_DEBUG_PL2 +if (doDSPDis) +{ WriteLog("DSPExecP: About to execute opcode %s...\n", dsp_opcode_str[pipeline[plPtrExec].opcode]); +} +#endif +//CC only! +#ifdef DSP_DEBUG_CC +lastExec = pipeline[plPtrExec].instruction; +//WriteLog("[lastExec = %04X]\n", lastExec); #endif + cycles -= dsp_opcode_cycles[pipeline[plPtrExec].opcode]; + dsp_opcode_use[pipeline[plPtrExec].opcode]++; DSPOpcode[pipeline[plPtrExec].opcode](); //WriteLog(" --> Returned from execute. DSP_PC: %08X\n", dsp_pc); - dsp_opcode_use[pipeline[plPtrExec].opcode]++; - cycles -= dsp_opcode_cycles[pipeline[plPtrExec].opcode]; } else { - cycles--; +//Let's not, until we do the stalling correctly... +//But, we gotta while we're doing the comparison core...! +//Or do we? cycles--; +//Really, the whole thing is wrong. When the pipeline is correctly stuffed, most instructions +//will execute in one clock cycle (others, like DIV, will likely not). So, the challenge is +//to model this clock cycle behavior correctly... +//Also, the pipeline stalls too much--mostly because the transparent writebacks at stage 3 +//don't affect the reads at stage 1... #ifdef DSP_DEBUG_STALL -WriteLog("[STALL...]\n"); +if (doDSPDis) + WriteLog("[STALL... DSP_PC = %08X]\n", dsp_pc); #endif } #ifdef DSP_DEBUG_PL2 +if (doDSPDis) +{ WriteLog("DSPExecP: Pipeline status (after stage 2) [PC=%08X]...\n", dsp_pc); WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]); WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]); WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister, dsp_opcode_str[pipeline[plPtrWrite].opcode]); WriteLog("\n"); +} #endif // Stage 3: Write back register/memory address if (pipeline[plPtrWrite].opcode != PIPELINE_STALL) { +/*if (pipeline[plPtrWrite].writebackRegister == 3 + && (pipeline[plPtrWrite].result < 0xF14000 || pipeline[plPtrWrite].result > 0xF1CFFF) + && !doDSPDis) +{ + WriteLog("DSP: Register R03 has stepped out of bounds...\n\n"); + doDSPDis = true; +}//*/ if (pipeline[plPtrWrite].writebackRegister != 0xFF) { if (pipeline[plPtrWrite].writebackRegister != 0xFE) @@ -2680,8 +3257,15 @@ WriteLog("\n"); } } +#ifndef NEW_SCOREBOARD if (affectsScoreboard[pipeline[plPtrWrite].opcode]) scoreboard[pipeline[plPtrWrite].operand2] = false; +#else +//Yup, sequential MOVEQ # problem fixing (I hope!)... + if (affectsScoreboard[pipeline[plPtrWrite].opcode]) + if (scoreboard[pipeline[plPtrWrite].operand2]) + scoreboard[pipeline[plPtrWrite].operand2]--; +#endif } // Push instructions through the pipeline... @@ -2822,7 +3406,7 @@ static void DSP_abs(void) { #ifdef DSP_DIS_ABS if (doDSPDis) - WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); + WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif uint32 _Rn = PRN; @@ -2972,7 +3556,7 @@ static void DSP_btst(void) NO_WRITEBACK; #ifdef DSP_DIS_BTST if (doDSPDis) - WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES); + WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif } @@ -3104,7 +3688,8 @@ char * condition[32] = "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???", "???", "???", "???", "F" }; if (doDSPDis) - WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", DSP_PPC, condition[PIMM2], dsp_pc+((PIMM1 & 0x10 ? 0xFFFFFFF0 | PIMM1 : PIMM1) * 2), dsp_flag_n, dsp_flag_c, dsp_flag_z); +//How come this is always off by 2??? + WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", DSP_PPC, condition[PIMM2], DSP_PPC+((PIMM1 & 0x10 ? 0xFFFFFFF0 | PIMM1 : PIMM1) * 2)+2, dsp_flag_n, dsp_flag_c, dsp_flag_z); #endif // KLUDGE: Used by BRANCH_CONDITION macro uint32 jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z; @@ -3150,8 +3735,15 @@ char * condition[32] = } } +#ifndef NEW_SCOREBOARD if (affectsScoreboard[pipeline[plPtrWrite].opcode]) scoreboard[pipeline[plPtrWrite].operand2] = false; +#else +//Yup, sequential MOVEQ # problem fixing (I hope!)... + if (affectsScoreboard[pipeline[plPtrWrite].opcode]) + if (scoreboard[pipeline[plPtrWrite].operand2]) + scoreboard[pipeline[plPtrWrite].operand2]--; +#endif } // Step 2: Push instruction through pipeline & execute following instruction @@ -3175,6 +3767,7 @@ char * condition[32] = pipeline[plPtrExec].reg2 = dsp_reg[pipeline[plPtrExec].operand2]; pipeline[plPtrExec].writebackRegister = pipeline[plPtrExec].operand2; // Set it to RN }//*/ + dsp_pc += 2; // For DSP_DIS_* accuracy DSPOpcode[pipeline[plPtrExec].opcode](); dsp_opcode_use[pipeline[plPtrExec].opcode]++; pipeline[plPtrWrite] = pipeline[plPtrExec]; @@ -3247,8 +3840,15 @@ char * condition[32] = } } +#ifndef NEW_SCOREBOARD if (affectsScoreboard[pipeline[plPtrWrite].opcode]) scoreboard[pipeline[plPtrWrite].operand2] = false; +#else +//Yup, sequential MOVEQ # problem fixing (I hope!)... + if (affectsScoreboard[pipeline[plPtrWrite].opcode]) + if (scoreboard[pipeline[plPtrWrite].operand2]) + scoreboard[pipeline[plPtrWrite].operand2]--; +#endif } // Step 2: Push instruction through pipeline & execute following instruction @@ -3273,6 +3873,7 @@ char * condition[32] = pipeline[plPtrExec].reg2 = dsp_reg[pipeline[plPtrExec].operand2]; pipeline[plPtrExec].writebackRegister = pipeline[plPtrExec].operand2; // Set it to RN }//*/ + dsp_pc += 2; // For DSP_DIS_* accuracy DSPOpcode[pipeline[plPtrExec].opcode](); dsp_opcode_use[pipeline[plPtrExec].opcode]++; pipeline[plPtrWrite] = pipeline[plPtrExec]; @@ -3485,8 +4086,18 @@ static void DSP_movei(void) static void DSP_movepc(void) { -//Need to fix this to take into account pipelining effects... !!! FIX !!! - PRES = dsp_pc - 2; +#ifdef DSP_DIS_MOVEPC + if (doDSPDis) + WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); +#endif +//Need to fix this to take into account pipelining effects... !!! FIX !!! [DONE] +// PRES = dsp_pc - 2; +//Account for pipeline effects... + PRES = dsp_pc - 2 - (pipeline[plPtrRead].opcode == 38 ? 6 : (pipeline[plPtrRead].opcode == PIPELINE_STALL ? 0 : 2)); +#ifdef DSP_DIS_MOVEPC + if (doDSPDis) + WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES); +#endif } static void DSP_moveq(void) @@ -3589,13 +4200,13 @@ static void DSP_not(void) { #ifdef DSP_DIS_NOT if (doDSPDis) - WriteLog("%06X: NOT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN); + WriteLog("%06X: NOT R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif PRES = ~PRN; SET_ZN(PRES); #ifdef DSP_DIS_NOT if (doDSPDis) - WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES); + WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES); #endif }