]> Shamusworld >> Repos - virtualjaguar/blobdiff - src/dsp.cpp
Fix to M68K core vs. DSP thread sync problem.
[virtualjaguar] / src / dsp.cpp
index c0fd60387925b1e6821fc7005bb7a6f985967e2a..f39449b5016fb4336c05d8813383dd8038806c11 100644 (file)
@@ -3,28 +3,36 @@
 //
 // Originally by David Raingeard
 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
-// Extensive cleanups/rewrites by James L. Hammons
+// Extensive cleanups/rewrites by James Hammons
 // (C) 2010 Underground Software
 //
-// JLH = James L. Hammons <jlhamm@acm.org>
+// JLH = James Hammons <jlhamm@acm.org>
 //
 // Who  When        What
 // ---  ----------  -------------------------------------------------------------
 // JLH  01/16/2010  Created this log ;-)
+// JLH  11/26/2011  Added fixes for LOAD/STORE alignment issues
 //
 
 #include "dsp.h"
 
 #include <SDL.h>                                                               // Used only for SDL_GetTicks...
 #include <stdlib.h>
+#include <time.h>
+#include "dac.h"
 #include "gpu.h"
 #include "jagdasm.h"
 #include "jaguar.h"
 #include "jerry.h"
 #include "log.h"
-#include "m68k.h"
+#include "m68000/m68kinterface.h"
 //#include "memory.h"
 
+
+// Seems alignment in loads & stores was off...
+#define DSP_CORRECT_ALIGNMENT
+//#define DSP_CORRECT_ALIGNMENT_STORE
+
 //#define DSP_DEBUG
 //#define DSP_DEBUG_IRQ
 //#define DSP_DEBUG_PL2
@@ -34,6 +42,7 @@
 
 // Disassembly definitions
 
+#if 0
 #define DSP_DIS_ABS
 #define DSP_DIS_ADD
 #define DSP_DIS_ADDC
 //*/
 bool doDSPDis = false;
 //bool doDSPDis = true;
-
+#endif
+bool doDSPDis = false;
+//#define DSP_DIS_JR
+//#define DSP_DIS_JUMP
 
 /*
 No dis yet:
@@ -297,6 +309,7 @@ static void dsp_opcode_subc(void);
 static void dsp_opcode_subq(void);
 static void dsp_opcode_subqmod(void);
 static void dsp_opcode_subqt(void);
+static void dsp_opcode_illegal(void);
 
 uint8 dsp_opcode_cycles[64] =
 {
@@ -342,7 +355,7 @@ void (* dsp_opcode[64])() =
        dsp_opcode_mirror,                              dsp_opcode_store_r14_indexed,   dsp_opcode_store_r15_indexed,   dsp_opcode_move_pc,
        dsp_opcode_jump,                                dsp_opcode_jr,                                  dsp_opcode_mmult,                               dsp_opcode_mtoi,
        dsp_opcode_normi,                               dsp_opcode_nop,                                 dsp_opcode_load_r14_ri,                 dsp_opcode_load_r15_ri,
-       dsp_opcode_store_r14_ri,                dsp_opcode_store_r15_ri,                dsp_opcode_nop,                                 dsp_opcode_addqmod,
+       dsp_opcode_store_r14_ri,                dsp_opcode_store_r15_ri,                dsp_opcode_illegal,                             dsp_opcode_addqmod,
 };
 
 uint32 dsp_opcode_use[65];
@@ -380,7 +393,7 @@ uint32 dsp_control;
 static uint32 dsp_div_control;
 static uint8 dsp_flag_z, dsp_flag_n, dsp_flag_c;
 static uint32 * dsp_reg = NULL, * dsp_alternate_reg = NULL;
-static uint32 dsp_reg_bank_0[32], dsp_reg_bank_1[32];
+uint32 dsp_reg_bank_0[32], dsp_reg_bank_1[32];
 
 static uint32 dsp_opcode_first_parameter;
 static uint32 dsp_opcode_second_parameter;
@@ -405,9 +418,13 @@ static uint32 dsp_opcode_second_parameter;
 #define SET_ZNC_ADD(a,b,r)     SET_N(r); SET_Z(r); SET_C_ADD(a,b)
 #define SET_ZNC_SUB(a,b,r)     SET_N(r); SET_Z(r); SET_C_SUB(a,b)
 
-uint32 dsp_convert_zero[32] = { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
-uint8 * dsp_branch_condition_table = NULL;
-static uint16 * mirror_table = NULL;
+uint32 dsp_convert_zero[32] = {
+       32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+};
+
+uint8 dsp_branch_condition_table[32 * 8];
+static uint16 mirror_table[65536];
 static uint8 dsp_ram_8[0x2000];
 
 #define BRANCH_CONDITION(x)            dsp_branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
@@ -446,49 +463,39 @@ void DSPReleaseTimeslice(void)
 
 void dsp_build_branch_condition_table(void)
 {
-       // Allocate the mirror table
-       if (!mirror_table)
-               mirror_table = (uint16 *)malloc(65536 * sizeof(uint16));
-
        // Fill in the mirror table
-       if (mirror_table)
-               for(int i=0; i<65536; i++)
-                       mirror_table[i] = ((i >> 15) & 0x0001) | ((i >> 13) & 0x0002) |
-                                         ((i >> 11) & 0x0004) | ((i >> 9)  & 0x0008) |
-                                         ((i >> 7)  & 0x0010) | ((i >> 5)  & 0x0020) |
-                                         ((i >> 3)  & 0x0040) | ((i >> 1)  & 0x0080) |
-                                         ((i << 1)  & 0x0100) | ((i << 3)  & 0x0200) |
-                                         ((i << 5)  & 0x0400) | ((i << 7)  & 0x0800) |
-                                         ((i << 9)  & 0x1000) | ((i << 11) & 0x2000) |
-                                         ((i << 13) & 0x4000) | ((i << 15) & 0x8000);
-
-       if (!dsp_branch_condition_table)
+       for(int i=0; i<65536; i++)
        {
-               dsp_branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(uint8));
+               mirror_table[i] = ((i >> 15) & 0x0001) | ((i >> 13) & 0x0002)
+                       | ((i >> 11) & 0x0004) | ((i >> 9)  & 0x0008)
+                       | ((i >> 7)  & 0x0010) | ((i >> 5)  & 0x0020)
+                       | ((i >> 3)  & 0x0040) | ((i >> 1)  & 0x0080)
+                       | ((i << 1)  & 0x0100) | ((i << 3)  & 0x0200)
+                       | ((i << 5)  & 0x0400) | ((i << 7)  & 0x0800)
+                       | ((i << 9)  & 0x1000) | ((i << 11) & 0x2000)
+                       | ((i << 13) & 0x4000) | ((i << 15) & 0x8000);
+       }
 
-               // Fill in the condition table
-               if (dsp_branch_condition_table)
+       // Fill in the condition table
+       for(int i=0; i<8; i++)
+       {
+               for(int j=0; j<32; j++)
                {
-                       for(int i=0; i<8; i++)
-                       {
-                               for(int j=0; j<32; j++)
-                               {
-                                       int result = 1;
-                                       if (j & 1)
-                                               if (i & ZERO_FLAG)
-                                                       result = 0;
-                                       if (j & 2)
-                                               if (!(i & ZERO_FLAG))
-                                                       result = 0;
-                                       if (j & 4)
-                                               if (i & (CARRY_FLAG << (j >> 4)))
-                                                       result = 0;
-                                       if (j & 8)
-                                               if (!(i & (CARRY_FLAG << (j >> 4))))
-                                                       result = 0;
-                                       dsp_branch_condition_table[i * 32 + j] = result;
-                               }
-                       }
+                       int result = 1;
+
+                       if ((j & 1) && (i & ZERO_FLAG))
+                               result = 0;
+
+                       if ((j & 2) && (!(i & ZERO_FLAG)))
+                               result = 0;
+
+                       if ((j & 4) && (i & (CARRY_FLAG << (j >> 4))))
+                               result = 0;
+
+                       if ((j & 8) && (!(i & (CARRY_FLAG << (j >> 4)))))
+                               result = 0;
+
+                       dsp_branch_condition_table[i * 32 + j] = result;
                }
        }
 }
@@ -535,48 +542,6 @@ uint16 DSPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
                WriteLog("DSP: ReadWord--Attempt to read from DSP register file by %s!\n", whoName[who]);
        //???
        offset &= 0xFFFFFFFE;
-       // jaguar cd bios
-/*     if (jaguar_mainRom_crc32==0xa74a97cd)
-       {
-               if (offset==0xF1A114) return(0x0000);
-               if (offset==0xF1A116) return(0x0000);
-               if (offset==0xF1B000) return(0x1234);
-               if (offset==0xF1B002) return(0x5678);
-       }*/
-/*
-       if (jaguar_mainRom_crc32==0x7ae20823)
-       {
-               if (offset==0xF1B9D8) return(0x0000);
-               if (offset==0xF1B9Da) return(0x0000);
-               if (offset==0xF1B2C0) return(0x0000);
-               if (offset==0xF1B2C2) return(0x0000);
-       }
-*/
-       // pour permettre � wolfenstein 3d de tourner sans le dsp
-/*     if ((offset==0xF1B0D0)||(offset==0xF1B0D2))
-               return(0);
-*/
-
-               // pour permettre � nba jam de tourner sans le dsp
-/*     if (jaguar_mainRom_crc32==0x4faddb18)
-       {
-               if (offset==0xf1b2c0) return(0);
-               if (offset==0xf1b2c2) return(0);
-               if (offset==0xf1b240) return(0);
-               if (offset==0xf1b242) return(0);
-               if (offset==0xF1B340) return(0);
-               if (offset==0xF1B342) return(0);
-               if (offset==0xF1BAD8) return(0);
-               if (offset==0xF1BADA) return(0);
-               if (offset==0xF1B040) return(0);
-               if (offset==0xF1B042) return(0);
-               if (offset==0xF1B0C0) return(0);
-               if (offset==0xF1B0C2) return(0);
-               if (offset==0xF1B140) return(0);
-               if (offset==0xF1B142) return(0);
-               if (offset==0xF1B1C0) return(0);
-               if (offset==0xF1B1C2) return(0);
-       }*/
 
        if (offset >= DSP_WORK_RAM_BASE && offset <= DSP_WORK_RAM_BASE+0x1FFF)
        {
@@ -622,12 +587,9 @@ uint32 DSPReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
                offset &= 0x3F;
                switch (offset)
                {
-               case 0x00:      /*dsp_flag_c?(dsp_flag_c=1):(dsp_flag_c=0);
-                                       dsp_flag_z?(dsp_flag_z=1):(dsp_flag_z=0);
-                                       dsp_flag_n?(dsp_flag_n=1):(dsp_flag_n=0);*/
-
-                                       dsp_flags = (dsp_flags & 0xFFFFFFF8) | (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
-                                       return dsp_flags & 0xFFFFC1FF;
+               case 0x00:
+                       dsp_flags = (dsp_flags & 0xFFFFFFF8) | (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
+                       return dsp_flags & 0xFFFFC1FF;
                case 0x04: return dsp_matrix_control;
                case 0x08: return dsp_pointer_to_matrix;
                case 0x0C: return dsp_data_organization;
@@ -658,7 +620,7 @@ void DSPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
 /*             if (dsp_in_exec == 0)
                {
                        m68k_end_timeslice();
-                       gpu_releaseTimeslice();
+                       dsp_releaseTimeslice();
                }*/
                return;
        }
@@ -708,7 +670,7 @@ void DSPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
                {
 //                     WriteLog("dsp: writing %.4x at 0x%.8x\n",data,offset+DSP_WORK_RAM_BASE);
                        m68k_end_timeslice();
-                       gpu_releaseTimeslice();
+                       dsp_releaseTimeslice();
                }*/
 //CC only!
 #ifdef DSP_DEBUG_CC
@@ -723,19 +685,22 @@ SET16(ram2, offset, data);
                if ((offset & 0x1C) == 0x1C)
                {
                        if (offset & 0x03)
-                               dsp_div_control = (dsp_div_control&0xffff0000)|(data&0xffff);
+                               dsp_div_control = (dsp_div_control & 0xFFFF0000) | (data & 0xFFFF);
                        else
-                               dsp_div_control = (dsp_div_control&0xffff)|((data&0xffff)<<16);
+                               dsp_div_control = (dsp_div_control & 0xFFFF) | ((data & 0xFFFF) << 16);
                }
                else
                {
-                       uint32 old_data = DSPReadLong(offset & 0xffffffc, who);
+                       uint32 old_data = DSPReadLong(offset & 0xFFFFFFC, who);
+
                        if (offset & 0x03)
-                               old_data = (old_data&0xffff0000)|(data&0xffff);
+                               old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
                        else
-                               old_data = (old_data&0xffff)|((data&0xffff)<<16);
-                       DSPWriteLong(offset & 0xffffffc, old_data, who);
+                               old_data = (old_data & 0xFFFF) | ((data & 0xFFFF) << 16);
+
+                       DSPWriteLong(offset & 0xFFFFFFC, old_data, who);
                }
+
                return;
        }
 
@@ -778,17 +743,61 @@ SET32(ram2, offset, data);
                case 0x00:
                {
 #ifdef DSP_DEBUG
-                       WriteLog("DSP: Writing %08X to DSP_FLAGS by %s (REGPAGE is %s)...\n", data, whoName[who], (dsp_flags & REGPAGE ? "set" : "not set"));
+                       WriteLog("DSP: Writing %08X to DSP_FLAGS by %s (REGPAGE is %sset)...\n", data, whoName[who], (dsp_flags & REGPAGE ? "" : "not "));
 #endif
 //                     bool IMASKCleared = (dsp_flags & IMASK) && !(data & IMASK);
                        IMASKCleared = (dsp_flags & IMASK) && !(data & IMASK);
-                       dsp_flags = data;
+                       // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
+                       //       IRQ logic can set it. So we mask it out here to prevent problems...
+                       dsp_flags = data & (~IMASK);
                        dsp_flag_z = dsp_flags & 0x01;
                        dsp_flag_c = (dsp_flags >> 1) & 0x01;
                        dsp_flag_n = (dsp_flags >> 2) & 0x01;
                        DSPUpdateRegisterBanks();
                        dsp_control &= ~((dsp_flags & CINT04FLAGS) >> 3);
                        dsp_control &= ~((dsp_flags & CINT5FLAG) >> 1);
+
+// NB: This is just a wild hairy-assed guess as to what the playback frequency is.
+//     It can be timed to anything really, anything that writes to L/RTXD at a regular
+//     interval. Most things seem to use either the I2S interrupt or the TIMER 0
+//     interrupt, so that's what we check for here. Just know that this approach
+//     can be easily fooled!
+//     Note also that if both interrupts are enabled, the I2S freq will win. :-P
+
+// Further Note:
+// The impetus for this "fix" was Cybermorph, which sets the SCLK to 7 which is an
+// audio frequency > 48 KHz. However, it stuffs the L/RTXD registers using TIMER0.
+// So, while this works, it's a by-product of the lame way in which audio is currently
+// handled. Hopefully, once we run the DSP in the host audio IRQ, this problem will
+// go away of its own accord. :-P
+// Or does it? It seems the I2S interrupt isn't on with Cybermorph, so something
+// weird is going on here...
+// Maybe it works like this: It acknowledges the 1st interrupt, but never clears it.
+// So subsequent interrupts come into the chip, but they're never serviced but the
+// I2S subsystem keeps going.
+// After some testing on real hardware, it seems that if you enable TIMER0 and EXTERNAL
+// IRQs on J_INT ($F10020), you don't have to run an I2S interrupt on the DSP. Also,
+// It seems that it's only stable for values of SCLK <= 9.
+
+// All of the preceeding is moot now; we run the DSP in the host audio IRQ. This means
+// that we don't actually need this stuff anymore. :-D
+#if 0
+                       if (data & INT_ENA1) // I2S interrupt
+                       {
+                               int freq = GetCalculatedFrequency();
+//This happens too often to be useful...
+//                             WriteLog("DSP: Setting audio freqency to %u Hz...\n", freq);
+                               DACSetNewFrequency(freq);
+                       }
+                       else if (data & INT_ENA2) // TIMER 0 interrupt
+                       {
+                               int freq = JERRYGetPIT1Frequency();
+//This happens too often to be useful...
+//                             WriteLog("DSP: Setting audio freqency to %u Hz...\n", freq);
+                               DACSetNewFrequency(freq);
+                       }
+#endif
+
 /*                     if (IMASKCleared)                                               // If IMASK was cleared,
 #ifdef DSP_DEBUG_IRQ
                        {
@@ -849,7 +858,7 @@ if (who != DSP)
                case 0x14:
                {
 //#ifdef DSP_DEBUG
-WriteLog("Write to DSP CTRL by %s: %08X\n", whoName[who], data);
+WriteLog("Write to DSP CTRL by %s: %08X (DSP PC=$%08X)\n", whoName[who], data, dsp_pc);
 //#endif
                        bool wasRunning = DSP_RUNNING;
 //                     uint32 dsp_was_running = DSP_RUNNING;
@@ -877,7 +886,7 @@ WriteLog("Write to DSP CTRL by %s: %08X\n", whoName[who], data);
                                WriteLog("DSP: CPU -> DSP interrupt\n");
 #endif
                                m68k_end_timeslice();
-                               GPUReleaseTimeslice();
+                               DSPReleaseTimeslice();
                                DSPSetIRQLine(DSPIRQ_CPU, ASSERT_LINE);
                                data &= ~DSPINT0;
                        }
@@ -917,14 +926,14 @@ else
        WriteLog(" --> Stopped by %s! (DSP PC: %08X)", whoName[who], dsp_pc);
 WriteLog("\n");
 #endif // DSP_DEBUG
-//This isn't exactly right either--we don't know if it was the M68K or the GPU writing here...
+//This isn't exactly right either--we don't know if it was the M68K or the DSP writing here...
 // !!! FIX !!! [DONE]
                        if (DSP_RUNNING)
                        {
                                if (who == M68K)
                                        m68k_end_timeslice();
-                               else if (who == GPU)
-                                       GPUReleaseTimeslice();
+                               else if (who == DSP)
+                                       DSPReleaseTimeslice();
 
                                if (!wasRunning)
                                        FlushDSPPipeline();
@@ -933,6 +942,7 @@ WriteLog("\n");
                        break;
                }
                case 0x18:
+WriteLog("DSP: Modulo data %08X written by %s.\n", data, whoName[who]);
                        dsp_modulo = data;
                        break;
                case 0x1C:
@@ -966,6 +976,10 @@ void DSPUpdateRegisterBanks(void)
                dsp_reg = dsp_reg_bank_1, dsp_alternate_reg = dsp_reg_bank_0;
        else
                dsp_reg = dsp_reg_bank_0, dsp_alternate_reg = dsp_reg_bank_1;
+
+#ifdef DSP_DEBUG_IRQ
+       WriteLog("DSP: Register bank #%s active.\n", (bank ? "1" : "0"));
+#endif
 }
 
 //
@@ -987,6 +1001,7 @@ void DSPHandleIRQs(void)
                return;
 
        int which = 0;                                                                  // Determine which interrupt
+
        if (bits & 0x01)
                which = 0;
        if (bits & 0x02)
@@ -1166,18 +1181,24 @@ DSPUpdateRegisterBanks();
        if (bits & 0x20)
                which = 5;
 
-#ifdef DSP_DEBUG_IRQ
-       WriteLog("DSP: Generating interrupt #%i...", which);
-#endif
-
-       dsp_flags |= IMASK;
+       dsp_flags |= IMASK;             // Force Bank #0
 //CC only!
 #ifdef DSP_DEBUG_CC
 ctrl1[4] = dsp_flags;
 #endif
 //!!!!!!!!
+#ifdef DSP_DEBUG_IRQ
+       WriteLog("DSP: Bank 0: R30=%08X, R31=%08X\n", dsp_reg_bank_0[30], dsp_reg_bank_0[31]);
+       WriteLog("DSP: Bank 1: R30=%08X, R31=%08X\n", dsp_reg_bank_1[30], dsp_reg_bank_1[31]);
+#endif
        DSPUpdateRegisterBanks();
 #ifdef DSP_DEBUG_IRQ
+       WriteLog("DSP: Bank 0: R30=%08X, R31=%08X\n", dsp_reg_bank_0[30], dsp_reg_bank_0[31]);
+       WriteLog("DSP: Bank 1: R30=%08X, R31=%08X\n", dsp_reg_bank_1[30], dsp_reg_bank_1[31]);
+#endif
+
+#ifdef DSP_DEBUG_IRQ
+       WriteLog("DSP: Generating interrupt #%i...", which);
        WriteLog(" [PC will return to %08X, R31 = %08X]\n", dsp_pc, dsp_reg[31]);
 #endif
 
@@ -1185,12 +1206,15 @@ ctrl1[4] = dsp_flags;
        // move   pc,r30                ; address of interrupted code
        // store  r30,(r31)     ; store return address
        dsp_reg[31] -= 4;
+       dsp_reg[30] = dsp_pc - 2; // -2 because we've executed the instruction already
+
 //CC only!
 #ifdef DSP_DEBUG_CC
 regs1[31] -= 4;
 #endif
 //!!!!!!!!
-       DSPWriteLong(dsp_reg[31], dsp_pc - 2, DSP);
+//     DSPWriteLong(dsp_reg[31], dsp_pc - 2, DSP);
+       DSPWriteLong(dsp_reg[31], dsp_reg[30], DSP);
 //CC only!
 #ifdef DSP_DEBUG_CC
 SET32(ram1, regs1[31] - 0xF1B000, dsp_pc - 2);
@@ -1225,7 +1249,9 @@ ctrl1[8] = ctrl2[8] = dsp_control;
        if (state)
        {
                dsp_control |= mask;                                            // Set the latch bit
-               DSPHandleIRQs();
+#warning !!! No checking done to see if we're using pipelined DSP or not !!!
+//             DSPHandleIRQs();
+               DSPHandleIRQsNP();
 //CC only!
 #ifdef DSP_DEBUG_CC
 ctrl1[8] = ctrl2[8] = dsp_control;
@@ -1240,6 +1266,11 @@ DSPHandleIRQsNP();
 //     GPUSetIRQLine(GPUIRQ_DSP, ASSERT_LINE);
 }
 
+bool DSPIsRunning(void)
+{
+       return (DSP_RUNNING ? true : false);
+}
+
 void DSPInit(void)
 {
 //     memory_malloc_secure((void **)&dsp_ram_8, 0x2000, "DSP work RAM");
@@ -1248,6 +1279,7 @@ void DSPInit(void)
 
        dsp_build_branch_condition_table();
        DSPReset();
+       srand(time(NULL));                                                      // For randomizing local RAM
 }
 
 void DSPReset(void)
@@ -1274,7 +1306,12 @@ void DSPReset(void)
        IMASKCleared = false;
        FlushDSPPipeline();
        dsp_reset_stats();
-       memset(dsp_ram_8, 0xFF, 0x2000);
+//     memset(dsp_ram_8, 0xFF, 0x2000);
+       // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
+       for(uint32 i=0; i<8192; i+=4)
+       {
+               *((uint32 *)(&dsp_ram_8[i])) = rand();
+       }
 }
 
 void DSPDumpDisassembly(void)
@@ -1283,6 +1320,7 @@ void DSPDumpDisassembly(void)
 
        WriteLog("\n---[DSP code at 00F1B000]---------------------------\n");
        uint32 j = 0xF1B000;
+
        while (j <= 0xF1CFFF)
        {
                uint32 oldj = j;
@@ -1296,29 +1334,32 @@ void DSPDumpRegisters(void)
 //Shoud add modulus, etc to dump here...
        WriteLog("\n---[DSP flags: NCZ %d%d%d, DSP PC: %08X]------------\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_pc);
        WriteLog("\nRegisters bank 0\n");
+
        for(int j=0; j<8; j++)
        {
                WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
-                                                 (j << 2) + 0, dsp_reg_bank_0[(j << 2) + 0],
-                                                 (j << 2) + 1, dsp_reg_bank_0[(j << 2) + 1],
-                                                 (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2],
-                                                 (j << 2) + 3, dsp_reg_bank_0[(j << 2) + 3]);
+                       (j << 2) + 0, dsp_reg_bank_0[(j << 2) + 0],
+                       (j << 2) + 1, dsp_reg_bank_0[(j << 2) + 1],
+                       (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2],
+                       (j << 2) + 3, dsp_reg_bank_0[(j << 2) + 3]);
        }
+
        WriteLog("Registers bank 1\n");
+
        for(int j=0; j<8; j++)
        {
                WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
-                                                 (j << 2) + 0, dsp_reg_bank_1[(j << 2) + 0],
-                                                 (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1],
-                                                 (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2],
-                                                 (j << 2) + 3, dsp_reg_bank_1[(j << 2) + 3]);
+                       (j << 2) + 0, dsp_reg_bank_1[(j << 2) + 0],
+                       (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1],
+                       (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2],
+                       (j << 2) + 3, dsp_reg_bank_1[(j << 2) + 3]);
        }
 }
 
 void DSPDone(void)
 {
        int i, j;
-       WriteLog("DSP: Stopped at PC=%08X dsp_modulo=%08X (dsp %s running)\n", dsp_pc, dsp_modulo, (DSP_RUNNING ? "was" : "wasn't"));
+       WriteLog("DSP: Stopped at PC=%08X dsp_modulo=%08X (dsp was%s running)\n", dsp_pc, dsp_modulo, (DSP_RUNNING ? "" : "n't"));
        WriteLog("DSP: %sin interrupt handler\n", (dsp_flags & IMASK ? "" : "not "));
 
        // get the active interrupt bits
@@ -1326,8 +1367,12 @@ void DSPDone(void)
        // get the interrupt mask
        int mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F);
 
-       WriteLog("DSP: pending=%08X enabled=%08X\n", bits, mask);
+       WriteLog("DSP: pending=$%X enabled=$%X (%s%s%s%s%s%s)\n", bits, mask,
+               (mask & 0x01 ? "CPU " : ""), (mask & 0x02 ? "I2S " : ""),
+               (mask & 0x04 ? "Timer0 " : ""), (mask & 0x08 ? "Timer1 " : ""),
+               (mask & 0x10 ? "Ext0 " : ""), (mask & 0x20 ? "Ext1" : ""));
        WriteLog("\nRegisters bank 0\n");
+
        for(int j=0; j<8; j++)
        {
                WriteLog("\tR%02i=%08X R%02i=%08X R%02i=%08X R%02i=%08X\n",
@@ -1336,7 +1381,9 @@ void DSPDone(void)
                                                  (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2],
                                                  (j << 2) + 3, dsp_reg_bank_0[(j << 2) + 3]);
        }
+
        WriteLog("\nRegisters bank 1\n");
+
        for (j=0; j<8; j++)
        {
                WriteLog("\tR%02i=%08X R%02i=%08X R%02i=%08X R%02i=%08X\n",
@@ -1344,12 +1391,14 @@ void DSPDone(void)
                                                  (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1],
                                                  (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2],
                                                  (j << 2) + 3, dsp_reg_bank_1[(j << 2) + 3]);
-
        }
 
+       WriteLog("\n");
+
        static char buffer[512];
        j = DSP_WORK_RAM_BASE;
-       while (j <= 0xF1BFFF)
+
+       while (j <= 0xF1CFFF)
        {
                uint32 oldj = j;
                j += dasmjag(JAGUAR_DSP, buffer, j);
@@ -1357,20 +1406,12 @@ void DSPDone(void)
        }//*/
 
        WriteLog("DSP opcodes use:\n");
+
        for (i=0;i<64;i++)
        {
                if (dsp_opcode_use[i])
                        WriteLog("\t%s %i\n", dsp_opcode_str[i], dsp_opcode_use[i]);
        }//*/
-
-//     memory_free(dsp_ram_8);
-//     memory_free(dsp_reg_bank_0);
-//     memory_free(dsp_reg_bank_1);
-       if (dsp_branch_condition_table)
-               free(dsp_branch_condition_table);
-
-       if (mirror_table)
-               free(mirror_table);
 }
 
 
@@ -1582,9 +1623,6 @@ for(int k=0; k<2; k++)
 //static uint32 pcQueue[32], ptrPCQ = 0;
 void DSPExec(int32 cycles)
 {
-/*HACKS!!! ->  if (cycles != 1 && jaguar_mainRom_crc32 == 0xba74c3ed)
-               dsp_check_if_i2s_interrupt_needed();*/
-
 #ifdef DSP_SINGLE_STEPPING
        if (dsp_control & 0x18)
        {
@@ -1617,7 +1655,7 @@ if (dsp_pc == 0xF1B092)
                if (IMASKCleared)                                               // If IMASK was cleared,
                {
 #ifdef DSP_DEBUG_IRQ
-                       WriteLog("DSP: Finished interrupt.\n");
+                       WriteLog("DSP: Finished interrupt. PC=$%06X\n", dsp_pc);
 #endif
                        DSPHandleIRQsNP();                                      // See if any other interrupts are pending!
                        IMASKCleared = false;
@@ -1949,7 +1987,11 @@ static void dsp_opcode_store_r14_indexed(void)
        if (doDSPDis)
                WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", dsp_pc-2, IMM_2, dsp_convert_zero[IMM_1] << 2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, dsp_convert_zero[IMM_1] << 2, dsp_reg[14]+(dsp_convert_zero[IMM_1] << 2));
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT_STORE
+       DSPWriteLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
+#else
        DSPWriteLong(dsp_reg[14] + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
+#endif
 }
 
 static void dsp_opcode_store_r15_indexed(void)
@@ -1958,7 +2000,11 @@ static void dsp_opcode_store_r15_indexed(void)
        if (doDSPDis)
                WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", dsp_pc-2, IMM_2, dsp_convert_zero[IMM_1] << 2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, dsp_convert_zero[IMM_1] << 2, dsp_reg[15]+(dsp_convert_zero[IMM_1] << 2));
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT_STORE
+       DSPWriteLong((dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
+#else
        DSPWriteLong(dsp_reg[15] + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
+#endif
 }
 
 static void dsp_opcode_load_r14_ri(void)
@@ -1967,7 +2013,11 @@ static void dsp_opcode_load_r14_ri(void)
        if (doDSPDis)
                WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM+dsp_reg[14], IMM_2, RN);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT
+       RN = DSPReadLong((dsp_reg[14] + RM) & 0xFFFFFFFC, DSP);
+#else
        RN = DSPReadLong(dsp_reg[14] + RM, DSP);
+#endif
 #ifdef DSP_DIS_LOAD14R
        if (doDSPDis)
                WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
@@ -1980,7 +2030,11 @@ static void dsp_opcode_load_r15_ri(void)
        if (doDSPDis)
                WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM+dsp_reg[15], IMM_2, RN);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT
+       RN = DSPReadLong((dsp_reg[15] + RM) & 0xFFFFFFFC, DSP);
+#else
        RN = DSPReadLong(dsp_reg[15] + RM, DSP);
+#endif
 #ifdef DSP_DIS_LOAD15R
        if (doDSPDis)
                WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
@@ -2023,10 +2077,17 @@ static void dsp_opcode_storew(void)
        if (doDSPDis)
                WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_pc-2, IMM_2, IMM_1, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, IMM_1, RM);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT_STORE
+       if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
+               DSPWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, DSP);
+       else
+               JaguarWriteWord(RM & 0xFFFFFFFE, RN, DSP);
+#else
        if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
                DSPWriteLong(RM, RN & 0xFFFF, DSP);
        else
                JaguarWriteWord(RM, RN, DSP);
+#endif
 }
 
 static void dsp_opcode_store(void)
@@ -2035,7 +2096,11 @@ static void dsp_opcode_store(void)
        if (doDSPDis)
                WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_pc-2, IMM_2, IMM_1, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, IMM_1, RM);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT_STORE
+       DSPWriteLong(RM & 0xFFFFFFFC, RN, DSP);
+#else
        DSPWriteLong(RM, RN, DSP);
+#endif
 }
 
 static void dsp_opcode_loadb(void)
@@ -2060,10 +2125,17 @@ static void dsp_opcode_loadw(void)
        if (doDSPDis)
                WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT
+       if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
+               RN = DSPReadLong(RM & 0xFFFFFFFE, DSP) & 0xFFFF;
+       else
+               RN = JaguarReadWord(RM & 0xFFFFFFFE, DSP);
+#else
        if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
                RN = DSPReadLong(RM, DSP) & 0xFFFF;
        else
                RN = JaguarReadWord(RM, DSP);
+#endif
 #ifdef DSP_DIS_LOADW
        if (doDSPDis)
                WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
@@ -2076,7 +2148,11 @@ static void dsp_opcode_load(void)
        if (doDSPDis)
                WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT
+       RN = DSPReadLong(RM & 0xFFFFFFFC, DSP);
+#else
        RN = DSPReadLong(RM, DSP);
+#endif
 #ifdef DSP_DIS_LOAD
        if (doDSPDis)
                WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
@@ -2089,7 +2165,11 @@ static void dsp_opcode_load_r14_indexed(void)
        if (doDSPDis)
                WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1] << 2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_convert_zero[IMM_1] << 2, dsp_reg[14]+(dsp_convert_zero[IMM_1] << 2), IMM_2, RN);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT
+       RN = DSPReadLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), DSP);
+#else
        RN = DSPReadLong(dsp_reg[14] + (dsp_convert_zero[IMM_1] << 2), DSP);
+#endif
 #ifdef DSP_DIS_LOAD14I
        if (doDSPDis)
                WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
@@ -2102,7 +2182,11 @@ static void dsp_opcode_load_r15_indexed(void)
        if (doDSPDis)
                WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1] << 2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_convert_zero[IMM_1] << 2, dsp_reg[15]+(dsp_convert_zero[IMM_1] << 2), IMM_2, RN);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT
+       RN = DSPReadLong((dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), DSP);
+#else
        RN = DSPReadLong(dsp_reg[15] + (dsp_convert_zero[IMM_1] << 2), DSP);
+#endif
 #ifdef DSP_DIS_LOAD15I
        if (doDSPDis)
                WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
@@ -2332,7 +2416,7 @@ static void dsp_opcode_normi(void)
 static void dsp_opcode_mmult(void)
 {
        int count       = dsp_matrix_control&0x0f;
-       uint32 addr = dsp_pointer_to_matrix; // in the gpu ram
+       uint32 addr = dsp_pointer_to_matrix; // in the dsp ram
        int64 accum = 0;
        uint32 res;
 
@@ -2456,6 +2540,7 @@ static void dsp_opcode_shlq(void)
        if (doDSPDis)
                WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, 32 - IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
 #endif
+       // NB: This instruction is the *only* one that does (32 - immediate data).
        int32 r1 = 32 - IMM_1;
        uint32 res = RN << r1;
        SET_ZN(res); dsp_flag_c = (RN >> 31) & 1;
@@ -2645,6 +2730,12 @@ void dsp_opcode_sat16s(void)
        SET_ZN(res);
 }
 
+void dsp_opcode_illegal(void)
+{
+       // Don't know what it does, but it does *something*...
+       WriteLog("%06X: illegal %u, %u [NCZ:%u%u%u]\n", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z);
+}
+
 //
 // New pipelined DSP core
 //
@@ -3093,6 +3184,7 @@ if (dsp_pc == 0xF1B092)
 pcQueue1[pcQPtr1++] = dsp_pc;
 pcQPtr1 &= 0x3FF;
 
+#ifdef DSP_DEBUG_PL2
 if ((dsp_pc < 0xF1B000 || dsp_pc > 0xF1CFFF) && !doDSPDis)
 {
        WriteLog("DSP: PC has stepped out of bounds...\n\nBacktrace:\n\n");
@@ -3107,6 +3199,8 @@ if ((dsp_pc < 0xF1B000 || dsp_pc > 0xF1CFFF) && !doDSPDis)
        }
        WriteLog("\n");
 }//*/
+#endif
+
                if (IMASKCleared)                                               // If IMASK was cleared,
                {
 #ifdef DSP_DEBUG_IRQ
@@ -3210,9 +3304,10 @@ WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", p
                // Stage 2: Execute
                if (pipeline[plPtrExec].opcode != PIPELINE_STALL)
                {
+#ifdef DSP_DEBUG_PL2
 if (doDSPDis)
        WriteLog("\t[inst=%02u][R28=%08X, alt R28=%08X, REGPAGE=%s]\n", pipeline[plPtrExec].opcode, dsp_reg[28], dsp_alternate_reg[28], (dsp_flags & REGPAGE ? "set" : "not set"));
-#ifdef DSP_DEBUG_PL2
+
 if (doDSPDis)
 {
 WriteLog("DSPExecP: About to execute opcode %s...\n", dsp_opcode_str[pipeline[plPtrExec].opcode]);
@@ -3922,7 +4017,11 @@ static void DSP_load(void)
        if (doDSPDis)
                WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT
+       PRES = DSPReadLong(PRM & 0xFFFFFFFC, DSP);
+#else
        PRES = DSPReadLong(PRM, DSP);
+#endif
 #ifdef DSP_DIS_LOAD
        if (doDSPDis)
                WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
@@ -3951,10 +4050,17 @@ static void DSP_loadw(void)
        if (doDSPDis)
                WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT
+       if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
+               PRES = DSPReadLong(PRM & 0xFFFFFFFE, DSP) & 0xFFFF;
+       else
+               PRES = JaguarReadWord(PRM & 0xFFFFFFFE, DSP);
+#else
        if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
                PRES = DSPReadLong(PRM, DSP) & 0xFFFF;
        else
                PRES = JaguarReadWord(PRM, DSP);
+#endif
 #ifdef DSP_DIS_LOADW
        if (doDSPDis)
                WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
@@ -3967,7 +4073,11 @@ static void DSP_load_r14_i(void)
        if (doDSPDis)
                WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1] << 2, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_convert_zero[PIMM1] << 2, dsp_reg[14]+(dsp_convert_zero[PIMM1] << 2), PIMM2, PRN);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT
+       PRES = DSPReadLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2), DSP);
+#else
        PRES = DSPReadLong(dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2), DSP);
+#endif
 #ifdef DSP_DIS_LOAD14I
        if (doDSPDis)
                WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
@@ -3980,7 +4090,11 @@ static void DSP_load_r14_r(void)
        if (doDSPDis)
                WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM+dsp_reg[14], PIMM2, PRES);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT
+       PRES = DSPReadLong((dsp_reg[14] + PRM) & 0xFFFFFFFC, DSP);
+#else
        PRES = DSPReadLong(dsp_reg[14] + PRM, DSP);
+#endif
 #ifdef DSP_DIS_LOAD14R
        if (doDSPDis)
                WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
@@ -3993,7 +4107,11 @@ static void DSP_load_r15_i(void)
        if (doDSPDis)
                WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1] << 2, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_convert_zero[PIMM1] << 2, dsp_reg[15]+(dsp_convert_zero[PIMM1] << 2), PIMM2, PRN);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT
+       PRES = DSPReadLong((dsp_reg[15] &0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2), DSP);
+#else
        PRES = DSPReadLong(dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2), DSP);
+#endif
 #ifdef DSP_DIS_LOAD15I
        if (doDSPDis)
                WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
@@ -4006,7 +4124,11 @@ static void DSP_load_r15_r(void)
        if (doDSPDis)
                WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM+dsp_reg[15], PIMM2, PRN);
 #endif
+#ifdef DSP_CORRECT_ALIGNMENT
+       PRES = DSPReadLong((dsp_reg[15] + PRM) & 0xFFFFFFFC, DSP);
+#else
        PRES = DSPReadLong(dsp_reg[15] + PRM, DSP);
+#endif
 #ifdef DSP_DIS_LOAD15R
        if (doDSPDis)
                WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
@@ -4023,7 +4145,7 @@ static void DSP_mirror(void)
 static void DSP_mmult(void)
 {
        int count       = dsp_matrix_control&0x0f;
-       uint32 addr = dsp_pointer_to_matrix; // in the gpu ram
+       uint32 addr = dsp_pointer_to_matrix; // in the dsp ram
        int64 accum = 0;
        uint32 res;
 
@@ -4444,7 +4566,11 @@ static void DSP_store(void)
 #endif
 //     DSPWriteLong(PRM, PRN, DSP);
 //     NO_WRITEBACK;
+#ifdef DSP_CORRECT_ALIGNMENT_STORE
+       pipeline[plPtrExec].address = PRM & 0xFFFFFFFC;
+#else
        pipeline[plPtrExec].address = PRM;
+#endif
        pipeline[plPtrExec].value = PRN;
        pipeline[plPtrExec].type = TYPE_DWORD;
        WRITEBACK_ADDR;
@@ -4490,7 +4616,11 @@ static void DSP_storew(void)
 //             JaguarWriteWord(PRM, PRN, DSP);
 //
 //     NO_WRITEBACK;
+#ifdef DSP_CORRECT_ALIGNMENT_STORE
+       pipeline[plPtrExec].address = PRM & 0xFFFFFFFE;
+#else
        pipeline[plPtrExec].address = PRM;
+#endif
 
        if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
        {
@@ -4513,7 +4643,11 @@ static void DSP_store_r14_i(void)
 #endif
 //     DSPWriteLong(dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2), PRN, DSP);
 //     NO_WRITEBACK;
+#ifdef DSP_CORRECT_ALIGNMENT_STORE
+       pipeline[plPtrExec].address = (dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2);
+#else
        pipeline[plPtrExec].address = dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2);
+#endif
        pipeline[plPtrExec].value = PRN;
        pipeline[plPtrExec].type = TYPE_DWORD;
        WRITEBACK_ADDR;
@@ -4523,7 +4657,11 @@ static void DSP_store_r14_r(void)
 {
 //     DSPWriteLong(dsp_reg[14] + PRM, PRN, DSP);
 //     NO_WRITEBACK;
+#ifdef DSP_CORRECT_ALIGNMENT_STORE
+       pipeline[plPtrExec].address = (dsp_reg[14] + PRM) & 0xFFFFFFFC;
+#else
        pipeline[plPtrExec].address = dsp_reg[14] + PRM;
+#endif
        pipeline[plPtrExec].value = PRN;
        pipeline[plPtrExec].type = TYPE_DWORD;
        WRITEBACK_ADDR;
@@ -4537,7 +4675,11 @@ static void DSP_store_r15_i(void)
 #endif
 //     DSPWriteLong(dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2), PRN, DSP);
 //     NO_WRITEBACK;
+#ifdef DSP_CORRECT_ALIGNMENT_STORE
+       pipeline[plPtrExec].address = (dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2);
+#else
        pipeline[plPtrExec].address = dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2);
+#endif
        pipeline[plPtrExec].value = PRN;
        pipeline[plPtrExec].type = TYPE_DWORD;
        WRITEBACK_ADDR;
@@ -4547,7 +4689,11 @@ static void DSP_store_r15_r(void)
 {
 //     DSPWriteLong(dsp_reg[15] + PRM, PRN, DSP);
 //     NO_WRITEBACK;
+#ifdef DSP_CORRECT_ALIGNMENT_STORE
+       pipeline[plPtrExec].address = (dsp_reg[15] + PRM) & 0xFFFFFFFC;
+#else
        pipeline[plPtrExec].address = dsp_reg[15] + PRM;
+#endif
        pipeline[plPtrExec].value = PRN;
        pipeline[plPtrExec].type = TYPE_DWORD;
        WRITEBACK_ADDR;