X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdsp.cpp;h=f39449b5016fb4336c05d8813383dd8038806c11;hb=f7b2692b338aaf37470be0a1d0e5ae42c82e0c65;hp=6f27dd86c57e332d0aec76d1cb7eca734f555184;hpb=683f283e1328164c176618088c34408ea6c03cf7;p=virtualjaguar diff --git a/src/dsp.cpp b/src/dsp.cpp index 6f27dd8..f39449b 100644 --- a/src/dsp.cpp +++ b/src/dsp.cpp @@ -3,13 +3,36 @@ // // Originally by David Raingeard // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS) -// Extensive cleanups/rewrites by James L. Hammons +// Extensive cleanups/rewrites by James Hammons +// (C) 2010 Underground Software +// +// JLH = James Hammons +// +// Who When What +// --- ---------- ------------------------------------------------------------- +// JLH 01/16/2010 Created this log ;-) +// JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues // -#include -#include // Used only for SDL_GetTicks... #include "dsp.h" +#include // Used only for SDL_GetTicks... +#include +#include +#include "dac.h" +#include "gpu.h" +#include "jagdasm.h" +#include "jaguar.h" +#include "jerry.h" +#include "log.h" +#include "m68000/m68kinterface.h" +//#include "memory.h" + + +// Seems alignment in loads & stores was off... +#define DSP_CORRECT_ALIGNMENT +//#define DSP_CORRECT_ALIGNMENT_STORE + //#define DSP_DEBUG //#define DSP_DEBUG_IRQ //#define DSP_DEBUG_PL2 @@ -19,6 +42,7 @@ // Disassembly definitions +#if 0 #define DSP_DIS_ABS #define DSP_DIS_ADD #define DSP_DIS_ADDC @@ -74,7 +98,10 @@ //*/ bool doDSPDis = false; //bool doDSPDis = true; - +#endif +bool doDSPDis = false; +//#define DSP_DIS_JR +//#define DSP_DIS_JUMP /* No dis yet: @@ -223,7 +250,7 @@ static void dsp_opcode_abs(void); static void dsp_opcode_add(void); static void dsp_opcode_addc(void); static void dsp_opcode_addq(void); -static void dsp_opcode_addqmod(void); +static void dsp_opcode_addqmod(void); static void dsp_opcode_addqt(void); static void dsp_opcode_and(void); static void dsp_opcode_bclr(void); @@ -244,7 +271,7 @@ static void dsp_opcode_load_r14_indexed(void); static void dsp_opcode_load_r14_ri(void); static void dsp_opcode_load_r15_indexed(void); static void dsp_opcode_load_r15_ri(void); -static void dsp_opcode_mirror(void); +static void dsp_opcode_mirror(void); static void dsp_opcode_mmult(void); static void dsp_opcode_move(void); static void dsp_opcode_movei(void); @@ -263,8 +290,8 @@ static void dsp_opcode_resmac(void); static void dsp_opcode_ror(void); static void dsp_opcode_rorq(void); static void dsp_opcode_xor(void); -static void dsp_opcode_sat16s(void); -static void dsp_opcode_sat32s(void); +static void dsp_opcode_sat16s(void); +static void dsp_opcode_sat32s(void); static void dsp_opcode_sh(void); static void dsp_opcode_sha(void); static void dsp_opcode_sharq(void); @@ -280,25 +307,26 @@ static void dsp_opcode_store_r15_ri(void); static void dsp_opcode_sub(void); static void dsp_opcode_subc(void); static void dsp_opcode_subq(void); -static void dsp_opcode_subqmod(void); +static void dsp_opcode_subqmod(void); static void dsp_opcode_subqt(void); +static void dsp_opcode_illegal(void); uint8 dsp_opcode_cycles[64] = { - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 1, 3, 1, 18, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 2, 2, 2, 2, 3, 4, + 3, 3, 1, 3, 1, 18, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 2, 2, 2, 2, 3, 4, 5, 4, 5, 6, 6, 1, 1, 1, - 1, 2, 2, 2, 1, 1, 9, 3, + 1, 2, 2, 2, 1, 1, 9, 3, 3, 1, 6, 6, 2, 2, 3, 3 };//*/ //Here's a QnD kludge... //This is wrong, wrong, WRONG, but it seems to work for the time being... //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!) //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)... -/*uint8 dsp_opcode_cycles[64] = +/*uint8 dsp_opcode_cycles[64] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -311,7 +339,7 @@ uint8 dsp_opcode_cycles[64] = };//*/ void (* dsp_opcode[64])() = -{ +{ dsp_opcode_add, dsp_opcode_addc, dsp_opcode_addq, dsp_opcode_addqt, dsp_opcode_sub, dsp_opcode_subc, dsp_opcode_subq, dsp_opcode_subqt, dsp_opcode_neg, dsp_opcode_and, dsp_opcode_or, dsp_opcode_xor, @@ -327,13 +355,13 @@ void (* dsp_opcode[64])() = dsp_opcode_mirror, dsp_opcode_store_r14_indexed, dsp_opcode_store_r15_indexed, dsp_opcode_move_pc, dsp_opcode_jump, dsp_opcode_jr, dsp_opcode_mmult, dsp_opcode_mtoi, dsp_opcode_normi, dsp_opcode_nop, dsp_opcode_load_r14_ri, dsp_opcode_load_r15_ri, - dsp_opcode_store_r14_ri, dsp_opcode_store_r15_ri, dsp_opcode_nop, dsp_opcode_addqmod, + dsp_opcode_store_r14_ri, dsp_opcode_store_r15_ri, dsp_opcode_illegal, dsp_opcode_addqmod, }; uint32 dsp_opcode_use[65]; -char * dsp_opcode_str[65]= -{ +const char * dsp_opcode_str[65]= +{ "add", "addc", "addq", "addqt", "sub", "subc", "subq", "subqt", "neg", "and", "or", "xor", @@ -363,9 +391,9 @@ static uint32 dsp_pointer_to_matrix; static uint32 dsp_data_organization; uint32 dsp_control; static uint32 dsp_div_control; -static uint8 dsp_flag_z, dsp_flag_n, dsp_flag_c; -static uint32 * dsp_reg, * dsp_alternate_reg; -static uint32 * dsp_reg_bank_0, * dsp_reg_bank_1; +static uint8 dsp_flag_z, dsp_flag_n, dsp_flag_c; +static uint32 * dsp_reg = NULL, * dsp_alternate_reg = NULL; +uint32 dsp_reg_bank_0[32], dsp_reg_bank_1[32]; static uint32 dsp_opcode_first_parameter; static uint32 dsp_opcode_second_parameter; @@ -390,10 +418,14 @@ static uint32 dsp_opcode_second_parameter; #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b) #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b) -uint32 dsp_convert_zero[32] = { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 }; -uint8 * dsp_branch_condition_table = NULL; -static uint16 * mirror_table = NULL; -static uint8 * dsp_ram_8 = NULL; +uint32 dsp_convert_zero[32] = { + 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 +}; + +uint8 dsp_branch_condition_table[32 * 8]; +static uint16 mirror_table[65536]; +static uint8 dsp_ram_8[0x2000]; #define BRANCH_CONDITION(x) dsp_branch_condition_table[(x) + ((jaguar_flags & 7) << 5)] @@ -423,7 +455,7 @@ void dsp_reset_stats(void) dsp_opcode_use[i] = 0; } -void dsp_releaseTimeslice(void) +void DSPReleaseTimeslice(void) { //This does absolutely nothing!!! !!! FIX !!! dsp_releaseTimeSlice_flag = 1; @@ -431,49 +463,39 @@ void dsp_releaseTimeslice(void) void dsp_build_branch_condition_table(void) { - // Allocate the mirror table - if (!mirror_table) - mirror_table = (uint16 *)malloc(65536 * sizeof(mirror_table[0])); - // Fill in the mirror table - if (mirror_table) - for(int i=0; i<65536; i++) - mirror_table[i] = ((i >> 15) & 0x0001) | ((i >> 13) & 0x0002) | - ((i >> 11) & 0x0004) | ((i >> 9) & 0x0008) | - ((i >> 7) & 0x0010) | ((i >> 5) & 0x0020) | - ((i >> 3) & 0x0040) | ((i >> 1) & 0x0080) | - ((i << 1) & 0x0100) | ((i << 3) & 0x0200) | - ((i << 5) & 0x0400) | ((i << 7) & 0x0800) | - ((i << 9) & 0x1000) | ((i << 11) & 0x2000) | - ((i << 13) & 0x4000) | ((i << 15) & 0x8000); - - if (!dsp_branch_condition_table) + for(int i=0; i<65536; i++) { - dsp_branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(dsp_branch_condition_table[0])); + mirror_table[i] = ((i >> 15) & 0x0001) | ((i >> 13) & 0x0002) + | ((i >> 11) & 0x0004) | ((i >> 9) & 0x0008) + | ((i >> 7) & 0x0010) | ((i >> 5) & 0x0020) + | ((i >> 3) & 0x0040) | ((i >> 1) & 0x0080) + | ((i << 1) & 0x0100) | ((i << 3) & 0x0200) + | ((i << 5) & 0x0400) | ((i << 7) & 0x0800) + | ((i << 9) & 0x1000) | ((i << 11) & 0x2000) + | ((i << 13) & 0x4000) | ((i << 15) & 0x8000); + } - // Fill in the condition table - if (dsp_branch_condition_table) + // Fill in the condition table + for(int i=0; i<8; i++) + { + for(int j=0; j<32; j++) { - for(int i=0; i<8; i++) - { - for(int j=0; j<32; j++) - { - int result = 1; - if (j & 1) - if (i & ZERO_FLAG) - result = 0; - if (j & 2) - if (!(i & ZERO_FLAG)) - result = 0; - if (j & 4) - if (i & (CARRY_FLAG << (j >> 4))) - result = 0; - if (j & 8) - if (!(i & (CARRY_FLAG << (j >> 4)))) - result = 0; - dsp_branch_condition_table[i * 32 + j] = result; - } - } + int result = 1; + + if ((j & 1) && (i & ZERO_FLAG)) + result = 0; + + if ((j & 2) && (!(i & ZERO_FLAG))) + result = 0; + + if ((j & 4) && (i & (CARRY_FLAG << (j >> 4)))) + result = 0; + + if ((j & 8) && (!(i & (CARRY_FLAG << (j >> 4))))) + result = 0; + + dsp_branch_condition_table[i * 32 + j] = result; } } } @@ -512,7 +534,7 @@ uint8 DSPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/) } return JaguarReadByte(offset, who); -} +} uint16 DSPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/) { @@ -520,48 +542,6 @@ uint16 DSPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/) WriteLog("DSP: ReadWord--Attempt to read from DSP register file by %s!\n", whoName[who]); //??? offset &= 0xFFFFFFFE; - // jaguar cd bios -/* if (jaguar_mainRom_crc32==0xa74a97cd) - { - if (offset==0xF1A114) return(0x0000); - if (offset==0xF1A116) return(0x0000); - if (offset==0xF1B000) return(0x1234); - if (offset==0xF1B002) return(0x5678); - }*/ -/* - if (jaguar_mainRom_crc32==0x7ae20823) - { - if (offset==0xF1B9D8) return(0x0000); - if (offset==0xF1B9Da) return(0x0000); - if (offset==0xF1B2C0) return(0x0000); - if (offset==0xF1B2C2) return(0x0000); - } -*/ - // pour permettre à wolfenstein 3d de tourner sans le dsp -/* if ((offset==0xF1B0D0)||(offset==0xF1B0D2)) - return(0); -*/ - - // pour permettre à nba jam de tourner sans le dsp -/* if (jaguar_mainRom_crc32==0x4faddb18) - { - if (offset==0xf1b2c0) return(0); - if (offset==0xf1b2c2) return(0); - if (offset==0xf1b240) return(0); - if (offset==0xf1b242) return(0); - if (offset==0xF1B340) return(0); - if (offset==0xF1B342) return(0); - if (offset==0xF1BAD8) return(0); - if (offset==0xF1BADA) return(0); - if (offset==0xF1B040) return(0); - if (offset==0xF1B042) return(0); - if (offset==0xF1B0C0) return(0); - if (offset==0xF1B0C2) return(0); - if (offset==0xF1B140) return(0); - if (offset==0xF1B142) return(0); - if (offset==0xF1B1C0) return(0); - if (offset==0xF1B1C2) return(0); - }*/ if (offset >= DSP_WORK_RAM_BASE && offset <= DSP_WORK_RAM_BASE+0x1FFF) { @@ -607,12 +587,9 @@ uint32 DSPReadLong(uint32 offset, uint32 who/*=UNKNOWN*/) offset &= 0x3F; switch (offset) { - case 0x00: /*dsp_flag_c?(dsp_flag_c=1):(dsp_flag_c=0); - dsp_flag_z?(dsp_flag_z=1):(dsp_flag_z=0); - dsp_flag_n?(dsp_flag_n=1):(dsp_flag_n=0);*/ - - dsp_flags = (dsp_flags & 0xFFFFFFF8) | (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z; - return dsp_flags & 0xFFFFC1FF; + case 0x00: + dsp_flags = (dsp_flags & 0xFFFFFFF8) | (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z; + return dsp_flags & 0xFFFFC1FF; case 0x04: return dsp_matrix_control; case 0x08: return dsp_pointer_to_matrix; case 0x0C: return dsp_data_organization; @@ -643,7 +620,7 @@ void DSPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/) /* if (dsp_in_exec == 0) { m68k_end_timeslice(); - gpu_releaseTimeslice(); + dsp_releaseTimeslice(); }*/ return; } @@ -651,7 +628,7 @@ void DSPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/) { uint32 reg = offset & 0x1C; int bytenum = offset & 0x03; - + if ((reg >= 0x1C) && (reg <= 0x1F)) dsp_div_control = (dsp_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3)); else @@ -659,7 +636,7 @@ void DSPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/) //This looks funky. !!! FIX !!! uint32 old_data = DSPReadLong(offset&0xFFFFFFC, who); bytenum = 3 - bytenum; // convention motorola !!! - old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3)); + old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3)); DSPWriteLong(offset & 0xFFFFFFC, old_data, who); } return; @@ -693,7 +670,7 @@ void DSPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/) { // WriteLog("dsp: writing %.4x at 0x%.8x\n",data,offset+DSP_WORK_RAM_BASE); m68k_end_timeslice(); - gpu_releaseTimeslice(); + dsp_releaseTimeslice(); }*/ //CC only! #ifdef DSP_DEBUG_CC @@ -708,19 +685,22 @@ SET16(ram2, offset, data); if ((offset & 0x1C) == 0x1C) { if (offset & 0x03) - dsp_div_control = (dsp_div_control&0xffff0000)|(data&0xffff); + dsp_div_control = (dsp_div_control & 0xFFFF0000) | (data & 0xFFFF); else - dsp_div_control = (dsp_div_control&0xffff)|((data&0xffff)<<16); + dsp_div_control = (dsp_div_control & 0xFFFF) | ((data & 0xFFFF) << 16); } else { - uint32 old_data = DSPReadLong(offset & 0xffffffc, who); + uint32 old_data = DSPReadLong(offset & 0xFFFFFFC, who); + if (offset & 0x03) - old_data = (old_data&0xffff0000)|(data&0xffff); + old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF); else - old_data = (old_data&0xffff)|((data&0xffff)<<16); - DSPWriteLong(offset & 0xffffffc, old_data, who); + old_data = (old_data & 0xFFFF) | ((data & 0xFFFF) << 16); + + DSPWriteLong(offset & 0xFFFFFFC, old_data, who); } + return; } @@ -763,17 +743,61 @@ SET32(ram2, offset, data); case 0x00: { #ifdef DSP_DEBUG - WriteLog("DSP: Writing %08X to DSP_FLAGS by %s (REGPAGE is %s)...\n", data, whoName[who], (dsp_flags & REGPAGE ? "set" : "not set")); + WriteLog("DSP: Writing %08X to DSP_FLAGS by %s (REGPAGE is %sset)...\n", data, whoName[who], (dsp_flags & REGPAGE ? "" : "not ")); #endif // bool IMASKCleared = (dsp_flags & IMASK) && !(data & IMASK); IMASKCleared = (dsp_flags & IMASK) && !(data & IMASK); - dsp_flags = data; + // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the + // IRQ logic can set it. So we mask it out here to prevent problems... + dsp_flags = data & (~IMASK); dsp_flag_z = dsp_flags & 0x01; dsp_flag_c = (dsp_flags >> 1) & 0x01; dsp_flag_n = (dsp_flags >> 2) & 0x01; DSPUpdateRegisterBanks(); dsp_control &= ~((dsp_flags & CINT04FLAGS) >> 3); dsp_control &= ~((dsp_flags & CINT5FLAG) >> 1); + +// NB: This is just a wild hairy-assed guess as to what the playback frequency is. +// It can be timed to anything really, anything that writes to L/RTXD at a regular +// interval. Most things seem to use either the I2S interrupt or the TIMER 0 +// interrupt, so that's what we check for here. Just know that this approach +// can be easily fooled! +// Note also that if both interrupts are enabled, the I2S freq will win. :-P + +// Further Note: +// The impetus for this "fix" was Cybermorph, which sets the SCLK to 7 which is an +// audio frequency > 48 KHz. However, it stuffs the L/RTXD registers using TIMER0. +// So, while this works, it's a by-product of the lame way in which audio is currently +// handled. Hopefully, once we run the DSP in the host audio IRQ, this problem will +// go away of its own accord. :-P +// Or does it? It seems the I2S interrupt isn't on with Cybermorph, so something +// weird is going on here... +// Maybe it works like this: It acknowledges the 1st interrupt, but never clears it. +// So subsequent interrupts come into the chip, but they're never serviced but the +// I2S subsystem keeps going. +// After some testing on real hardware, it seems that if you enable TIMER0 and EXTERNAL +// IRQs on J_INT ($F10020), you don't have to run an I2S interrupt on the DSP. Also, +// It seems that it's only stable for values of SCLK <= 9. + +// All of the preceeding is moot now; we run the DSP in the host audio IRQ. This means +// that we don't actually need this stuff anymore. :-D +#if 0 + if (data & INT_ENA1) // I2S interrupt + { + int freq = GetCalculatedFrequency(); +//This happens too often to be useful... +// WriteLog("DSP: Setting audio freqency to %u Hz...\n", freq); + DACSetNewFrequency(freq); + } + else if (data & INT_ENA2) // TIMER 0 interrupt + { + int freq = JERRYGetPIT1Frequency(); +//This happens too often to be useful... +// WriteLog("DSP: Setting audio freqency to %u Hz...\n", freq); + DACSetNewFrequency(freq); + } +#endif + /* if (IMASKCleared) // If IMASK was cleared, #ifdef DSP_DEBUG_IRQ { @@ -832,9 +856,9 @@ if (who != DSP) //!!!!!!!! break; case 0x14: - { + { //#ifdef DSP_DEBUG -WriteLog("Write to DSP CTRL by %s: %08X\n", whoName[who], data); +WriteLog("Write to DSP CTRL by %s: %08X (DSP PC=$%08X)\n", whoName[who], data, dsp_pc); //#endif bool wasRunning = DSP_RUNNING; // uint32 dsp_was_running = DSP_RUNNING; @@ -845,12 +869,13 @@ WriteLog("Write to DSP CTRL by %s: %08X\n", whoName[who], data); WriteLog("DSP: DSP -> CPU interrupt\n"); #endif // This was WRONG -// Why do we check for a valid handler at 64? Isn't that the Jag programmer's responsibility? +// Why do we check for a valid handler at 64? Isn't that the Jag programmer's responsibility? (YES) +#warning "!!! DSP IRQs that go to the 68K have to be routed thru TOM !!! FIX !!!" if (JERRYIRQEnabled(IRQ2_DSP))// && jaguar_interrupt_handler_is_valid(64)) { JERRYSetPendingIRQ(IRQ2_DSP); - dsp_releaseTimeslice(); - m68k_set_irq(7); // Set 68000 NMI... + DSPReleaseTimeslice(); + m68k_set_irq(2); // Set 68000 IPL 2... } data &= ~CPUINT; } @@ -861,7 +886,7 @@ WriteLog("Write to DSP CTRL by %s: %08X\n", whoName[who], data); WriteLog("DSP: CPU -> DSP interrupt\n"); #endif m68k_end_timeslice(); - gpu_releaseTimeslice(); + DSPReleaseTimeslice(); DSPSetIRQLine(DSPIRQ_CPU, ASSERT_LINE); data &= ~DSPINT0; } @@ -901,14 +926,14 @@ else WriteLog(" --> Stopped by %s! (DSP PC: %08X)", whoName[who], dsp_pc); WriteLog("\n"); #endif // DSP_DEBUG -//This isn't exactly right either--we don't know if it was the M68K or the GPU writing here... +//This isn't exactly right either--we don't know if it was the M68K or the DSP writing here... // !!! FIX !!! [DONE] if (DSP_RUNNING) { if (who == M68K) m68k_end_timeslice(); - else if (who == GPU) - gpu_releaseTimeslice(); + else if (who == DSP) + DSPReleaseTimeslice(); if (!wasRunning) FlushDSPPipeline(); @@ -917,6 +942,7 @@ WriteLog("\n"); break; } case 0x18: +WriteLog("DSP: Modulo data %08X written by %s.\n", data, whoName[who]); dsp_modulo = data; break; case 0x1C: @@ -950,6 +976,10 @@ void DSPUpdateRegisterBanks(void) dsp_reg = dsp_reg_bank_1, dsp_alternate_reg = dsp_reg_bank_0; else dsp_reg = dsp_reg_bank_0, dsp_alternate_reg = dsp_reg_bank_1; + +#ifdef DSP_DEBUG_IRQ + WriteLog("DSP: Register bank #%s active.\n", (bank ? "1" : "0")); +#endif } // @@ -970,7 +1000,8 @@ void DSPHandleIRQs(void) if (!bits) // Bail if nothing is enabled return; - int which = 0; // Determine which interrupt + int which = 0; // Determine which interrupt + if (bits & 0x01) which = 0; if (bits & 0x02) @@ -1052,8 +1083,8 @@ ctrl2[4] = dsp_flags; WriteLog(" [PC will return to %08X, R31 = %08X]\n", dsp_pc - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2)), dsp_reg[31]); #endif - // subqt #4,r31 ; pre-decrement stack pointer - // move pc,r30 ; address of interrupted code + // subqt #4,r31 ; pre-decrement stack pointer + // move pc,r30 ; address of interrupted code // store r30,(r31) ; store return address dsp_reg[31] -= 4; //CC only! @@ -1084,8 +1115,8 @@ SET32(ram2, regs2[31] - 0xF1B000, dsp_pc - 2 - (pipeline[plPtrExec].opcode == 38 #endif //!!!!!!!! - // movei #service_address,r30 ; pointer to ISR entry - // jump (r30) ; jump to ISR + // movei #service_address,r30 ; pointer to ISR entry + // jump (r30) ; jump to ISR // nop dsp_pc = dsp_reg[30] = DSP_WORK_RAM_BASE + (which * 0x10); //CC only! @@ -1136,7 +1167,7 @@ DSPUpdateRegisterBanks(); if (!bits) // Bail if nothing is enabled return; - int which = 0; // Determine which interrupt + int which = 0; // Determine which interrupt if (bits & 0x01) which = 0; if (bits & 0x02) @@ -1150,39 +1181,48 @@ DSPUpdateRegisterBanks(); if (bits & 0x20) which = 5; -#ifdef DSP_DEBUG_IRQ - WriteLog("DSP: Generating interrupt #%i...", which); -#endif - - dsp_flags |= IMASK; + dsp_flags |= IMASK; // Force Bank #0 //CC only! #ifdef DSP_DEBUG_CC ctrl1[4] = dsp_flags; #endif //!!!!!!!! +#ifdef DSP_DEBUG_IRQ + WriteLog("DSP: Bank 0: R30=%08X, R31=%08X\n", dsp_reg_bank_0[30], dsp_reg_bank_0[31]); + WriteLog("DSP: Bank 1: R30=%08X, R31=%08X\n", dsp_reg_bank_1[30], dsp_reg_bank_1[31]); +#endif DSPUpdateRegisterBanks(); #ifdef DSP_DEBUG_IRQ + WriteLog("DSP: Bank 0: R30=%08X, R31=%08X\n", dsp_reg_bank_0[30], dsp_reg_bank_0[31]); + WriteLog("DSP: Bank 1: R30=%08X, R31=%08X\n", dsp_reg_bank_1[30], dsp_reg_bank_1[31]); +#endif + +#ifdef DSP_DEBUG_IRQ + WriteLog("DSP: Generating interrupt #%i...", which); WriteLog(" [PC will return to %08X, R31 = %08X]\n", dsp_pc, dsp_reg[31]); #endif - // subqt #4,r31 ; pre-decrement stack pointer - // move pc,r30 ; address of interrupted code + // subqt #4,r31 ; pre-decrement stack pointer + // move pc,r30 ; address of interrupted code // store r30,(r31) ; store return address dsp_reg[31] -= 4; + dsp_reg[30] = dsp_pc - 2; // -2 because we've executed the instruction already + //CC only! #ifdef DSP_DEBUG_CC regs1[31] -= 4; #endif //!!!!!!!! - DSPWriteLong(dsp_reg[31], dsp_pc - 2, DSP); +// DSPWriteLong(dsp_reg[31], dsp_pc - 2, DSP); + DSPWriteLong(dsp_reg[31], dsp_reg[30], DSP); //CC only! #ifdef DSP_DEBUG_CC SET32(ram1, regs1[31] - 0xF1B000, dsp_pc - 2); #endif //!!!!!!!! - // movei #service_address,r30 ; pointer to ISR entry - // jump (r30) ; jump to ISR + // movei #service_address,r30 ; pointer to ISR entry + // jump (r30) ; jump to ISR // nop dsp_pc = dsp_reg[30] = DSP_WORK_RAM_BASE + (which * 0x10); //CC only! @@ -1209,7 +1249,9 @@ ctrl1[8] = ctrl2[8] = dsp_control; if (state) { dsp_control |= mask; // Set the latch bit - DSPHandleIRQs(); +#warning !!! No checking done to see if we're using pipelined DSP or not !!! +// DSPHandleIRQs(); + DSPHandleIRQsNP(); //CC only! #ifdef DSP_DEBUG_CC ctrl1[8] = ctrl2[8] = dsp_control; @@ -1224,14 +1266,20 @@ DSPHandleIRQsNP(); // GPUSetIRQLine(GPUIRQ_DSP, ASSERT_LINE); } +bool DSPIsRunning(void) +{ + return (DSP_RUNNING ? true : false); +} + void DSPInit(void) { - memory_malloc_secure((void **)&dsp_ram_8, 0x2000, "DSP work RAM"); - memory_malloc_secure((void **)&dsp_reg_bank_0, 32 * sizeof(int32), "DSP bank 0 regs"); - memory_malloc_secure((void **)&dsp_reg_bank_1, 32 * sizeof(int32), "DSP bank 1 regs"); +// memory_malloc_secure((void **)&dsp_ram_8, 0x2000, "DSP work RAM"); +// memory_malloc_secure((void **)&dsp_reg_bank_0, 32 * sizeof(int32), "DSP bank 0 regs"); +// memory_malloc_secure((void **)&dsp_reg_bank_1, 32 * sizeof(int32), "DSP bank 1 regs"); dsp_build_branch_condition_table(); DSPReset(); + srand(time(NULL)); // For randomizing local RAM } void DSPReset(void) @@ -1258,7 +1306,12 @@ void DSPReset(void) IMASKCleared = false; FlushDSPPipeline(); dsp_reset_stats(); - memset(dsp_ram_8, 0xFF, 0x2000); +// memset(dsp_ram_8, 0xFF, 0x2000); + // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents + for(uint32 i=0; i<8192; i+=4) + { + *((uint32 *)(&dsp_ram_8[i])) = rand(); + } } void DSPDumpDisassembly(void) @@ -1267,6 +1320,7 @@ void DSPDumpDisassembly(void) WriteLog("\n---[DSP code at 00F1B000]---------------------------\n"); uint32 j = 0xF1B000; + while (j <= 0xF1CFFF) { uint32 oldj = j; @@ -1280,38 +1334,45 @@ void DSPDumpRegisters(void) //Shoud add modulus, etc to dump here... WriteLog("\n---[DSP flags: NCZ %d%d%d, DSP PC: %08X]------------\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_pc); WriteLog("\nRegisters bank 0\n"); + for(int j=0; j<8; j++) { WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n", - (j << 2) + 0, dsp_reg_bank_0[(j << 2) + 0], - (j << 2) + 1, dsp_reg_bank_0[(j << 2) + 1], - (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2], - (j << 2) + 3, dsp_reg_bank_0[(j << 2) + 3]); + (j << 2) + 0, dsp_reg_bank_0[(j << 2) + 0], + (j << 2) + 1, dsp_reg_bank_0[(j << 2) + 1], + (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2], + (j << 2) + 3, dsp_reg_bank_0[(j << 2) + 3]); } + WriteLog("Registers bank 1\n"); + for(int j=0; j<8; j++) { WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n", - (j << 2) + 0, dsp_reg_bank_1[(j << 2) + 0], - (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1], - (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2], - (j << 2) + 3, dsp_reg_bank_1[(j << 2) + 3]); + (j << 2) + 0, dsp_reg_bank_1[(j << 2) + 0], + (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1], + (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2], + (j << 2) + 3, dsp_reg_bank_1[(j << 2) + 3]); } } void DSPDone(void) { int i, j; - WriteLog("DSP: Stopped at PC=%08X dsp_modulo=%08X (dsp %s running)\n", dsp_pc, dsp_modulo, (DSP_RUNNING ? "was" : "wasn't")); + WriteLog("DSP: Stopped at PC=%08X dsp_modulo=%08X (dsp was%s running)\n", dsp_pc, dsp_modulo, (DSP_RUNNING ? "" : "n't")); WriteLog("DSP: %sin interrupt handler\n", (dsp_flags & IMASK ? "" : "not ")); - // get the active interrupt bits + // get the active interrupt bits int bits = ((dsp_control >> 10) & 0x20) | ((dsp_control >> 6) & 0x1F); - // get the interrupt mask + // get the interrupt mask int mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F); - WriteLog("DSP: pending=%08X enabled=%08X\n", bits, mask); + WriteLog("DSP: pending=$%X enabled=$%X (%s%s%s%s%s%s)\n", bits, mask, + (mask & 0x01 ? "CPU " : ""), (mask & 0x02 ? "I2S " : ""), + (mask & 0x04 ? "Timer0 " : ""), (mask & 0x08 ? "Timer1 " : ""), + (mask & 0x10 ? "Ext0 " : ""), (mask & 0x20 ? "Ext1" : "")); WriteLog("\nRegisters bank 0\n"); + for(int j=0; j<8; j++) { WriteLog("\tR%02i=%08X R%02i=%08X R%02i=%08X R%02i=%08X\n", @@ -1320,7 +1381,9 @@ void DSPDone(void) (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2], (j << 2) + 3, dsp_reg_bank_0[(j << 2) + 3]); } + WriteLog("\nRegisters bank 1\n"); + for (j=0; j<8; j++) { WriteLog("\tR%02i=%08X R%02i=%08X R%02i=%08X R%02i=%08X\n", @@ -1328,12 +1391,14 @@ void DSPDone(void) (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1], (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2], (j << 2) + 3, dsp_reg_bank_1[(j << 2) + 3]); - } + WriteLog("\n"); + static char buffer[512]; j = DSP_WORK_RAM_BASE; - while (j <= 0xF1BFFF) + + while (j <= 0xF1CFFF) { uint32 oldj = j; j += dasmjag(JAGUAR_DSP, buffer, j); @@ -1341,15 +1406,12 @@ void DSPDone(void) }//*/ WriteLog("DSP opcodes use:\n"); + for (i=0;i<64;i++) { if (dsp_opcode_use[i]) WriteLog("\t%s %i\n", dsp_opcode_str[i], dsp_opcode_use[i]); }//*/ - - memory_free(dsp_ram_8); - memory_free(dsp_reg_bank_0); - memory_free(dsp_reg_bank_1); } @@ -1561,9 +1623,6 @@ for(int k=0; k<2; k++) //static uint32 pcQueue[32], ptrPCQ = 0; void DSPExec(int32 cycles) { -/*HACKS!!! -> if (cycles != 1 && jaguar_mainRom_crc32 == 0xba74c3ed) - dsp_check_if_i2s_interrupt_needed();*/ - #ifdef DSP_SINGLE_STEPPING if (dsp_control & 0x18) { @@ -1596,7 +1655,7 @@ if (dsp_pc == 0xF1B092) if (IMASKCleared) // If IMASK was cleared, { #ifdef DSP_DEBUG_IRQ - WriteLog("DSP: Finished interrupt.\n"); + WriteLog("DSP: Finished interrupt. PC=$%06X\n", dsp_pc); #endif DSPHandleIRQsNP(); // See if any other interrupts are pending! IMASKCleared = false; @@ -1666,7 +1725,7 @@ if ((dsp_pc < 0xF1B000 || dsp_pc > 0xF1CFFE) && !tripwire) static void dsp_opcode_jump(void) { #ifdef DSP_DIS_JUMP -char * condition[32] = +const char * condition[32] = { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz", "c z", "???", "???", "???", "???", "???", "???", "???", "???", "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???", @@ -1701,7 +1760,7 @@ char * condition[32] = static void dsp_opcode_jr(void) { #ifdef DSP_DIS_JR -char * condition[32] = +const char * condition[32] = { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz", "c z", "???", "???", "???", "???", "???", "???", "???", "???", "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???", @@ -1928,7 +1987,11 @@ static void dsp_opcode_store_r14_indexed(void) if (doDSPDis) WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", dsp_pc-2, IMM_2, dsp_convert_zero[IMM_1] << 2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, dsp_convert_zero[IMM_1] << 2, dsp_reg[14]+(dsp_convert_zero[IMM_1] << 2)); #endif +#ifdef DSP_CORRECT_ALIGNMENT_STORE + DSPWriteLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), RN, DSP); +#else DSPWriteLong(dsp_reg[14] + (dsp_convert_zero[IMM_1] << 2), RN, DSP); +#endif } static void dsp_opcode_store_r15_indexed(void) @@ -1937,7 +2000,11 @@ static void dsp_opcode_store_r15_indexed(void) if (doDSPDis) WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", dsp_pc-2, IMM_2, dsp_convert_zero[IMM_1] << 2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, dsp_convert_zero[IMM_1] << 2, dsp_reg[15]+(dsp_convert_zero[IMM_1] << 2)); #endif +#ifdef DSP_CORRECT_ALIGNMENT_STORE + DSPWriteLong((dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), RN, DSP); +#else DSPWriteLong(dsp_reg[15] + (dsp_convert_zero[IMM_1] << 2), RN, DSP); +#endif } static void dsp_opcode_load_r14_ri(void) @@ -1946,7 +2013,11 @@ static void dsp_opcode_load_r14_ri(void) if (doDSPDis) WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM+dsp_reg[14], IMM_2, RN); #endif +#ifdef DSP_CORRECT_ALIGNMENT + RN = DSPReadLong((dsp_reg[14] + RM) & 0xFFFFFFFC, DSP); +#else RN = DSPReadLong(dsp_reg[14] + RM, DSP); +#endif #ifdef DSP_DIS_LOAD14R if (doDSPDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); @@ -1959,7 +2030,11 @@ static void dsp_opcode_load_r15_ri(void) if (doDSPDis) WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM+dsp_reg[15], IMM_2, RN); #endif +#ifdef DSP_CORRECT_ALIGNMENT + RN = DSPReadLong((dsp_reg[15] + RM) & 0xFFFFFFFC, DSP); +#else RN = DSPReadLong(dsp_reg[15] + RM, DSP); +#endif #ifdef DSP_DIS_LOAD15R if (doDSPDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); @@ -2002,10 +2077,17 @@ static void dsp_opcode_storew(void) if (doDSPDis) WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_pc-2, IMM_2, IMM_1, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, IMM_1, RM); #endif +#ifdef DSP_CORRECT_ALIGNMENT_STORE + if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF)) + DSPWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, DSP); + else + JaguarWriteWord(RM & 0xFFFFFFFE, RN, DSP); +#else if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF)) DSPWriteLong(RM, RN & 0xFFFF, DSP); else JaguarWriteWord(RM, RN, DSP); +#endif } static void dsp_opcode_store(void) @@ -2014,7 +2096,11 @@ static void dsp_opcode_store(void) if (doDSPDis) WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_pc-2, IMM_2, IMM_1, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, IMM_1, RM); #endif +#ifdef DSP_CORRECT_ALIGNMENT_STORE + DSPWriteLong(RM & 0xFFFFFFFC, RN, DSP); +#else DSPWriteLong(RM, RN, DSP); +#endif } static void dsp_opcode_loadb(void) @@ -2039,10 +2125,17 @@ static void dsp_opcode_loadw(void) if (doDSPDis) WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN); #endif +#ifdef DSP_CORRECT_ALIGNMENT + if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF)) + RN = DSPReadLong(RM & 0xFFFFFFFE, DSP) & 0xFFFF; + else + RN = JaguarReadWord(RM & 0xFFFFFFFE, DSP); +#else if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF)) RN = DSPReadLong(RM, DSP) & 0xFFFF; else RN = JaguarReadWord(RM, DSP); +#endif #ifdef DSP_DIS_LOADW if (doDSPDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); @@ -2055,7 +2148,11 @@ static void dsp_opcode_load(void) if (doDSPDis) WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN); #endif +#ifdef DSP_CORRECT_ALIGNMENT + RN = DSPReadLong(RM & 0xFFFFFFFC, DSP); +#else RN = DSPReadLong(RM, DSP); +#endif #ifdef DSP_DIS_LOAD if (doDSPDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); @@ -2068,7 +2165,11 @@ static void dsp_opcode_load_r14_indexed(void) if (doDSPDis) WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1] << 2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_convert_zero[IMM_1] << 2, dsp_reg[14]+(dsp_convert_zero[IMM_1] << 2), IMM_2, RN); #endif +#ifdef DSP_CORRECT_ALIGNMENT + RN = DSPReadLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), DSP); +#else RN = DSPReadLong(dsp_reg[14] + (dsp_convert_zero[IMM_1] << 2), DSP); +#endif #ifdef DSP_DIS_LOAD14I if (doDSPDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); @@ -2081,7 +2182,11 @@ static void dsp_opcode_load_r15_indexed(void) if (doDSPDis) WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1] << 2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_convert_zero[IMM_1] << 2, dsp_reg[15]+(dsp_convert_zero[IMM_1] << 2), IMM_2, RN); #endif +#ifdef DSP_CORRECT_ALIGNMENT + RN = DSPReadLong((dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), DSP); +#else RN = DSPReadLong(dsp_reg[15] + (dsp_convert_zero[IMM_1] << 2), DSP); +#endif #ifdef DSP_DIS_LOAD15I if (doDSPDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); @@ -2278,7 +2383,7 @@ static void dsp_opcode_imacn(void) if (doDSPDis) WriteLog("[NCZ:%u%u%u, DSP_ACC=%02X%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, (uint8)(dsp_acc >> 32), (uint32)(dsp_acc & 0xFFFFFFFF)); #endif -} +} static void dsp_opcode_mtoi(void) { @@ -2311,14 +2416,14 @@ static void dsp_opcode_normi(void) static void dsp_opcode_mmult(void) { int count = dsp_matrix_control&0x0f; - uint32 addr = dsp_pointer_to_matrix; // in the gpu ram + uint32 addr = dsp_pointer_to_matrix; // in the dsp ram int64 accum = 0; uint32 res; if (!(dsp_matrix_control & 0x10)) { for (int i = 0; i < count; i++) - { + { int16 a; if (i&0x01) a=(int16)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff); @@ -2357,7 +2462,7 @@ static void dsp_opcode_abs(void) #endif uint32 _Rn = RN; uint32 res; - + if (_Rn == 0x80000000) dsp_flag_n = 1; else @@ -2435,6 +2540,7 @@ static void dsp_opcode_shlq(void) if (doDSPDis) WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, 32 - IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN); #endif + // NB: This instruction is the *only* one that does (32 - immediate data). int32 r1 = 32 - IMM_1; uint32 res = RN << r1; SET_ZN(res); dsp_flag_c = (RN >> 31) & 1; @@ -2589,25 +2695,25 @@ void dsp_opcode_addqmod(void) #endif } -void dsp_opcode_subqmod(void) +void dsp_opcode_subqmod(void) { uint32 r1 = dsp_convert_zero[IMM_1]; uint32 r2 = RN; uint32 res = r2 - r1; res = (res & (~dsp_modulo)) | (r2 & dsp_modulo); RN = res; - + SET_ZNC_SUB(r2, r1, res); } -void dsp_opcode_mirror(void) +void dsp_opcode_mirror(void) { uint32 r1 = RN; RN = (mirror_table[r1 & 0xFFFF] << 16) | mirror_table[r1 >> 16]; SET_ZN(RN); } -void dsp_opcode_sat32s(void) +void dsp_opcode_sat32s(void) { int32 r2 = (uint32)RN; int32 temp = dsp_acc >> 32; @@ -2616,7 +2722,7 @@ void dsp_opcode_sat32s(void) SET_ZN(res); } -void dsp_opcode_sat16s(void) +void dsp_opcode_sat16s(void) { int32 r2 = RN; uint32 res = (r2 < -32768) ? -32768 : (r2 > 32767) ? 32767 : r2; @@ -2624,6 +2730,12 @@ void dsp_opcode_sat16s(void) SET_ZN(res); } +void dsp_opcode_illegal(void) +{ + // Don't know what it does, but it does *something*... + WriteLog("%06X: illegal %u, %u [NCZ:%u%u%u]\n", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z); +} + // // New pipelined DSP core // @@ -2632,7 +2744,7 @@ static void DSP_abs(void); static void DSP_add(void); static void DSP_addc(void); static void DSP_addq(void); -static void DSP_addqmod(void); +static void DSP_addqmod(void); static void DSP_addqt(void); static void DSP_and(void); static void DSP_bclr(void); @@ -2654,7 +2766,7 @@ static void DSP_load_r14_i(void); static void DSP_load_r14_r(void); static void DSP_load_r15_i(void); static void DSP_load_r15_r(void); -static void DSP_mirror(void); +static void DSP_mirror(void); static void DSP_mmult(void); static void DSP_move(void); static void DSP_movefa(void); @@ -2672,8 +2784,8 @@ static void DSP_or(void); static void DSP_resmac(void); static void DSP_ror(void); static void DSP_rorq(void); -static void DSP_sat16s(void); -static void DSP_sat32s(void); +static void DSP_sat16s(void); +static void DSP_sat32s(void); static void DSP_sh(void); static void DSP_sha(void); static void DSP_sharq(void); @@ -2689,7 +2801,7 @@ static void DSP_store_r15_r(void); static void DSP_sub(void); static void DSP_subc(void); static void DSP_subq(void); -static void DSP_subqmod(void); +static void DSP_subqmod(void); static void DSP_subqt(void); static void DSP_xor(void); @@ -3072,6 +3184,7 @@ if (dsp_pc == 0xF1B092) pcQueue1[pcQPtr1++] = dsp_pc; pcQPtr1 &= 0x3FF; +#ifdef DSP_DEBUG_PL2 if ((dsp_pc < 0xF1B000 || dsp_pc > 0xF1CFFF) && !doDSPDis) { WriteLog("DSP: PC has stepped out of bounds...\n\nBacktrace:\n\n"); @@ -3086,6 +3199,8 @@ if ((dsp_pc < 0xF1B000 || dsp_pc > 0xF1CFFF) && !doDSPDis) } WriteLog("\n"); }//*/ +#endif + if (IMASKCleared) // If IMASK was cleared, { #ifdef DSP_DEBUG_IRQ @@ -3189,9 +3304,10 @@ WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", p // Stage 2: Execute if (pipeline[plPtrExec].opcode != PIPELINE_STALL) { +#ifdef DSP_DEBUG_PL2 if (doDSPDis) WriteLog("\t[inst=%02u][R28=%08X, alt R28=%08X, REGPAGE=%s]\n", pipeline[plPtrExec].opcode, dsp_reg[28], dsp_alternate_reg[28], (dsp_flags & REGPAGE ? "set" : "not set")); -#ifdef DSP_DEBUG_PL2 + if (doDSPDis) { WriteLog("DSPExecP: About to execute opcode %s...\n", dsp_opcode_str[pipeline[plPtrExec].opcode]); @@ -3410,7 +3526,7 @@ static void DSP_abs(void) WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN); #endif uint32 _Rn = PRN; - + if (_Rn == 0x80000000) dsp_flag_n = 1; else @@ -3633,7 +3749,7 @@ static void DSP_imacn(void) if (doDSPDis) WriteLog("[NCZ:%u%u%u, DSP_ACC=%02X%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, (uint8)(dsp_acc >> 32), (uint32)(dsp_acc & 0xFFFFFFFF)); #endif -} +} static void DSP_imult(void) { @@ -3683,7 +3799,7 @@ static void DSP_illegal(void) static void DSP_jr(void) { #ifdef DSP_DIS_JR -char * condition[32] = +const char * condition[32] = { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz", "c z", "???", "???", "???", "???", "???", "???", "???", "???", "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???", @@ -3709,7 +3825,7 @@ char * condition[32] = // Now that we've branched, we have to make sure that the following instruction // is executed atomically with this one and then flush the pipeline before setting // the new PC. - + // Step 1: Handle writebacks at stage 3 of pipeline /* if (pipeline[plPtrWrite].opcode != PIPELINE_STALL) { @@ -3793,7 +3909,7 @@ char * condition[32] = static void DSP_jump(void) { #ifdef DSP_DIS_JUMP -char * condition[32] = +const char * condition[32] = { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz", "c z", "???", "???", "???", "???", "???", "???", "???", "???", "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???", @@ -3814,7 +3930,7 @@ char * condition[32] = // Now that we've branched, we have to make sure that the following instruction // is executed atomically with this one and then flush the pipeline before setting // the new PC. - + // Step 1: Handle writebacks at stage 3 of pipeline /* if (pipeline[plPtrWrite].opcode != PIPELINE_STALL) { @@ -3901,7 +4017,11 @@ static void DSP_load(void) if (doDSPDis) WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN); #endif +#ifdef DSP_CORRECT_ALIGNMENT + PRES = DSPReadLong(PRM & 0xFFFFFFFC, DSP); +#else PRES = DSPReadLong(PRM, DSP); +#endif #ifdef DSP_DIS_LOAD if (doDSPDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES); @@ -3930,10 +4050,17 @@ static void DSP_loadw(void) if (doDSPDis) WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN); #endif +#ifdef DSP_CORRECT_ALIGNMENT + if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF)) + PRES = DSPReadLong(PRM & 0xFFFFFFFE, DSP) & 0xFFFF; + else + PRES = JaguarReadWord(PRM & 0xFFFFFFFE, DSP); +#else if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF)) PRES = DSPReadLong(PRM, DSP) & 0xFFFF; else PRES = JaguarReadWord(PRM, DSP); +#endif #ifdef DSP_DIS_LOADW if (doDSPDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES); @@ -3946,7 +4073,11 @@ static void DSP_load_r14_i(void) if (doDSPDis) WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1] << 2, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_convert_zero[PIMM1] << 2, dsp_reg[14]+(dsp_convert_zero[PIMM1] << 2), PIMM2, PRN); #endif +#ifdef DSP_CORRECT_ALIGNMENT + PRES = DSPReadLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2), DSP); +#else PRES = DSPReadLong(dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2), DSP); +#endif #ifdef DSP_DIS_LOAD14I if (doDSPDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES); @@ -3959,7 +4090,11 @@ static void DSP_load_r14_r(void) if (doDSPDis) WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM+dsp_reg[14], PIMM2, PRES); #endif +#ifdef DSP_CORRECT_ALIGNMENT + PRES = DSPReadLong((dsp_reg[14] + PRM) & 0xFFFFFFFC, DSP); +#else PRES = DSPReadLong(dsp_reg[14] + PRM, DSP); +#endif #ifdef DSP_DIS_LOAD14R if (doDSPDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES); @@ -3972,7 +4107,11 @@ static void DSP_load_r15_i(void) if (doDSPDis) WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1] << 2, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_convert_zero[PIMM1] << 2, dsp_reg[15]+(dsp_convert_zero[PIMM1] << 2), PIMM2, PRN); #endif +#ifdef DSP_CORRECT_ALIGNMENT + PRES = DSPReadLong((dsp_reg[15] &0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2), DSP); +#else PRES = DSPReadLong(dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2), DSP); +#endif #ifdef DSP_DIS_LOAD15I if (doDSPDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES); @@ -3985,14 +4124,18 @@ static void DSP_load_r15_r(void) if (doDSPDis) WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM+dsp_reg[15], PIMM2, PRN); #endif +#ifdef DSP_CORRECT_ALIGNMENT + PRES = DSPReadLong((dsp_reg[15] + PRM) & 0xFFFFFFFC, DSP); +#else PRES = DSPReadLong(dsp_reg[15] + PRM, DSP); +#endif #ifdef DSP_DIS_LOAD15R if (doDSPDis) WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES); #endif } -static void DSP_mirror(void) +static void DSP_mirror(void) { uint32 r1 = PRN; PRES = (mirror_table[r1 & 0xFFFF] << 16) | mirror_table[r1 >> 16]; @@ -4002,14 +4145,14 @@ static void DSP_mirror(void) static void DSP_mmult(void) { int count = dsp_matrix_control&0x0f; - uint32 addr = dsp_pointer_to_matrix; // in the gpu ram + uint32 addr = dsp_pointer_to_matrix; // in the dsp ram int64 accum = 0; uint32 res; if (!(dsp_matrix_control & 0x10)) { for (int i = 0; i < count; i++) - { + { int16 a; if (i&0x01) a=(int16)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff); @@ -4271,7 +4414,7 @@ static void DSP_rorq(void) #endif } -static void DSP_sat16s(void) +static void DSP_sat16s(void) { int32 r2 = PRN; uint32 res = (r2 < -32768) ? -32768 : (r2 > 32767) ? 32767 : r2; @@ -4279,7 +4422,7 @@ static void DSP_sat16s(void) SET_ZN(res); } -static void DSP_sat32s(void) +static void DSP_sat32s(void) { int32 r2 = (uint32)PRN; int32 temp = dsp_acc >> 32; @@ -4423,7 +4566,11 @@ static void DSP_store(void) #endif // DSPWriteLong(PRM, PRN, DSP); // NO_WRITEBACK; +#ifdef DSP_CORRECT_ALIGNMENT_STORE + pipeline[plPtrExec].address = PRM & 0xFFFFFFFC; +#else pipeline[plPtrExec].address = PRM; +#endif pipeline[plPtrExec].value = PRN; pipeline[plPtrExec].type = TYPE_DWORD; WRITEBACK_ADDR; @@ -4469,7 +4616,11 @@ static void DSP_storew(void) // JaguarWriteWord(PRM, PRN, DSP); // // NO_WRITEBACK; +#ifdef DSP_CORRECT_ALIGNMENT_STORE + pipeline[plPtrExec].address = PRM & 0xFFFFFFFE; +#else pipeline[plPtrExec].address = PRM; +#endif if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF)) { @@ -4492,7 +4643,11 @@ static void DSP_store_r14_i(void) #endif // DSPWriteLong(dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2), PRN, DSP); // NO_WRITEBACK; +#ifdef DSP_CORRECT_ALIGNMENT_STORE + pipeline[plPtrExec].address = (dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2); +#else pipeline[plPtrExec].address = dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2); +#endif pipeline[plPtrExec].value = PRN; pipeline[plPtrExec].type = TYPE_DWORD; WRITEBACK_ADDR; @@ -4502,7 +4657,11 @@ static void DSP_store_r14_r(void) { // DSPWriteLong(dsp_reg[14] + PRM, PRN, DSP); // NO_WRITEBACK; +#ifdef DSP_CORRECT_ALIGNMENT_STORE + pipeline[plPtrExec].address = (dsp_reg[14] + PRM) & 0xFFFFFFFC; +#else pipeline[plPtrExec].address = dsp_reg[14] + PRM; +#endif pipeline[plPtrExec].value = PRN; pipeline[plPtrExec].type = TYPE_DWORD; WRITEBACK_ADDR; @@ -4516,7 +4675,11 @@ static void DSP_store_r15_i(void) #endif // DSPWriteLong(dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2), PRN, DSP); // NO_WRITEBACK; +#ifdef DSP_CORRECT_ALIGNMENT_STORE + pipeline[plPtrExec].address = (dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2); +#else pipeline[plPtrExec].address = dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2); +#endif pipeline[plPtrExec].value = PRN; pipeline[plPtrExec].type = TYPE_DWORD; WRITEBACK_ADDR; @@ -4526,7 +4689,11 @@ static void DSP_store_r15_r(void) { // DSPWriteLong(dsp_reg[15] + PRM, PRN, DSP); // NO_WRITEBACK; +#ifdef DSP_CORRECT_ALIGNMENT_STORE + pipeline[plPtrExec].address = (dsp_reg[15] + PRM) & 0xFFFFFFFC; +#else pipeline[plPtrExec].address = dsp_reg[15] + PRM; +#endif pipeline[plPtrExec].value = PRN; pipeline[plPtrExec].type = TYPE_DWORD; WRITEBACK_ADDR; @@ -4579,7 +4746,7 @@ static void DSP_subq(void) #endif } -static void DSP_subqmod(void) +static void DSP_subqmod(void) { uint32 r1 = dsp_convert_zero[PIMM1]; uint32 r2 = PRN;