4 // Originally by David Raingeard
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/rewrites by James Hammons
7 // (C) 2010 Underground Software
9 // JLH = James Hammons <jlhamm@acm.org>
12 // --- ---------- -------------------------------------------------------------
13 // JLH 01/16/2010 Created this log ;-)
14 // JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues
19 #include <SDL.h> // Used only for SDL_GetTicks...
27 #include "m68000/m68kinterface.h"
31 // Seems alignment in loads & stores was off...
32 #define DSP_CORRECT_ALIGNMENT
33 //#define DSP_CORRECT_ALIGNMENT_STORE
36 //#define DSP_DEBUG_IRQ
37 //#define DSP_DEBUG_PL2
38 //#define DSP_DEBUG_STALL
39 //#define DSP_DEBUG_CC
40 #define NEW_SCOREBOARD
42 // Disassembly definitions
49 #define DSP_DIS_ADDQMOD
59 #define DSP_DIS_IMULTN
60 #define DSP_DIS_ILLEGAL
64 #define DSP_DIS_LOAD14I
65 #define DSP_DIS_LOAD14R
66 #define DSP_DIS_LOAD15I
67 #define DSP_DIS_LOAD15R
73 #define DSP_DIS_MOVEFA
74 #define DSP_DIS_MOVEPC // Pipeline only!
75 #define DSP_DIS_MOVETA
81 #define DSP_DIS_RESMAC
88 #define DSP_DIS_STORE14I
89 #define DSP_DIS_STORE15I
90 #define DSP_DIS_STOREB
91 #define DSP_DIS_STOREW
98 bool doDSPDis = false;
99 //bool doDSPDis = true;
101 bool doDSPDis = false;
103 //#define DSP_DIS_JUMP
137 + load_r15_indexed 284500
139 + store_r15_indexed 47416
143 + load_r14_ri 1229448
146 // Pipeline structures
148 const bool affectsScoreboard[64] =
150 true, true, true, true,
151 true, true, true, true,
152 true, true, true, true,
153 true, false, true, true,
155 true, true, false, true,
156 false, true, true, true,
157 true, true, true, true,
158 true, true, false, false,
160 true, true, true, true,
161 false, true, true, true,
162 true, true, true, true,
163 true, false, false, false,
165 true, false, false, true,
166 false, false, true, true,
167 true, false, true, true,
168 false, false, false, true
173 uint16_t instruction;
174 uint8_t opcode, operand1, operand2;
175 uint32_t reg1, reg2, areg1, areg2;
177 uint8_t writebackRegister;
178 // General memory store...
187 #define PIPELINE_STALL 64 // Set to # of opcodes + 1
188 #ifndef NEW_SCOREBOARD
191 uint8_t scoreboard[32];
193 uint8_t plPtrFetch, plPtrRead, plPtrExec, plPtrWrite;
194 PipelineStage pipeline[4];
195 bool IMASKCleared = false;
197 // DSP flags (old--have to get rid of this crap)
199 #define CINT0FLAG 0x00200
200 #define CINT1FLAG 0x00400
201 #define CINT2FLAG 0x00800
202 #define CINT3FLAG 0x01000
203 #define CINT4FLAG 0x02000
204 #define CINT04FLAGS (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
205 #define CINT5FLAG 0x20000 /* DSP only */
209 #define ZERO_FLAG 0x00001
210 #define CARRY_FLAG 0x00002
211 #define NEGA_FLAG 0x00004
212 #define IMASK 0x00008
213 #define INT_ENA0 0x00010
214 #define INT_ENA1 0x00020
215 #define INT_ENA2 0x00040
216 #define INT_ENA3 0x00080
217 #define INT_ENA4 0x00100
218 #define INT_CLR0 0x00200
219 #define INT_CLR1 0x00400
220 #define INT_CLR2 0x00800
221 #define INT_CLR3 0x01000
222 #define INT_CLR4 0x02000
223 #define REGPAGE 0x04000
224 #define DMAEN 0x08000
225 #define INT_ENA5 0x10000
226 #define INT_CLR5 0x20000
230 #define DSPGO 0x00001
231 #define CPUINT 0x00002
232 #define DSPINT0 0x00004
233 #define SINGLE_STEP 0x00008
234 #define SINGLE_GO 0x00010
236 #define INT_LAT0 0x00040
237 #define INT_LAT1 0x00080
238 #define INT_LAT2 0x00100
239 #define INT_LAT3 0x00200
240 #define INT_LAT4 0x00400
241 #define BUS_HOG 0x00800
242 #define VERSION 0x0F000
243 #define INT_LAT5 0x10000
245 extern uint32_t jaguar_mainRom_crc32;
247 // Is opcode 62 *really* a NOP? Seems like it...
248 static void dsp_opcode_abs(void);
249 static void dsp_opcode_add(void);
250 static void dsp_opcode_addc(void);
251 static void dsp_opcode_addq(void);
252 static void dsp_opcode_addqmod(void);
253 static void dsp_opcode_addqt(void);
254 static void dsp_opcode_and(void);
255 static void dsp_opcode_bclr(void);
256 static void dsp_opcode_bset(void);
257 static void dsp_opcode_btst(void);
258 static void dsp_opcode_cmp(void);
259 static void dsp_opcode_cmpq(void);
260 static void dsp_opcode_div(void);
261 static void dsp_opcode_imacn(void);
262 static void dsp_opcode_imult(void);
263 static void dsp_opcode_imultn(void);
264 static void dsp_opcode_jr(void);
265 static void dsp_opcode_jump(void);
266 static void dsp_opcode_load(void);
267 static void dsp_opcode_loadb(void);
268 static void dsp_opcode_loadw(void);
269 static void dsp_opcode_load_r14_indexed(void);
270 static void dsp_opcode_load_r14_ri(void);
271 static void dsp_opcode_load_r15_indexed(void);
272 static void dsp_opcode_load_r15_ri(void);
273 static void dsp_opcode_mirror(void);
274 static void dsp_opcode_mmult(void);
275 static void dsp_opcode_move(void);
276 static void dsp_opcode_movei(void);
277 static void dsp_opcode_movefa(void);
278 static void dsp_opcode_move_pc(void);
279 static void dsp_opcode_moveq(void);
280 static void dsp_opcode_moveta(void);
281 static void dsp_opcode_mtoi(void);
282 static void dsp_opcode_mult(void);
283 static void dsp_opcode_neg(void);
284 static void dsp_opcode_nop(void);
285 static void dsp_opcode_normi(void);
286 static void dsp_opcode_not(void);
287 static void dsp_opcode_or(void);
288 static void dsp_opcode_resmac(void);
289 static void dsp_opcode_ror(void);
290 static void dsp_opcode_rorq(void);
291 static void dsp_opcode_xor(void);
292 static void dsp_opcode_sat16s(void);
293 static void dsp_opcode_sat32s(void);
294 static void dsp_opcode_sh(void);
295 static void dsp_opcode_sha(void);
296 static void dsp_opcode_sharq(void);
297 static void dsp_opcode_shlq(void);
298 static void dsp_opcode_shrq(void);
299 static void dsp_opcode_store(void);
300 static void dsp_opcode_storeb(void);
301 static void dsp_opcode_storew(void);
302 static void dsp_opcode_store_r14_indexed(void);
303 static void dsp_opcode_store_r14_ri(void);
304 static void dsp_opcode_store_r15_indexed(void);
305 static void dsp_opcode_store_r15_ri(void);
306 static void dsp_opcode_sub(void);
307 static void dsp_opcode_subc(void);
308 static void dsp_opcode_subq(void);
309 static void dsp_opcode_subqmod(void);
310 static void dsp_opcode_subqt(void);
311 static void dsp_opcode_illegal(void);
313 /*uint8_t dsp_opcode_cycles[64] =
315 3, 3, 3, 3, 3, 3, 3, 3,
316 3, 3, 3, 3, 3, 3, 3, 3,
317 3, 3, 1, 3, 1, 18, 3, 3,
318 3, 3, 3, 3, 3, 3, 3, 3,
319 3, 3, 2, 2, 2, 2, 3, 4,
320 5, 4, 5, 6, 6, 1, 1, 1,
321 1, 2, 2, 2, 1, 1, 9, 3,
322 3, 1, 6, 6, 2, 2, 3, 3
324 //Here's a QnD kludge...
325 //This is wrong, wrong, WRONG, but it seems to work for the time being...
326 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
327 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
328 // Yup, without cheating like this, the sound in things like Rayman, FACTS, &
329 // Tripper Getem get starved for time and sounds like crap. So we have to figure
330 // out how to fix that. :-/
331 uint8_t dsp_opcode_cycles[64] =
333 1, 1, 1, 1, 1, 1, 1, 1,
334 1, 1, 1, 1, 1, 1, 1, 1,
335 1, 1, 1, 1, 1, 9, 1, 1,
336 1, 1, 1, 1, 1, 1, 1, 1,
337 1, 1, 1, 1, 1, 1, 1, 2,
338 2, 2, 2, 3, 3, 1, 1, 1,
339 1, 1, 1, 1, 1, 1, 4, 1,
340 1, 1, 3, 3, 1, 1, 1, 1
343 void (* dsp_opcode[64])() =
345 dsp_opcode_add, dsp_opcode_addc, dsp_opcode_addq, dsp_opcode_addqt,
346 dsp_opcode_sub, dsp_opcode_subc, dsp_opcode_subq, dsp_opcode_subqt,
347 dsp_opcode_neg, dsp_opcode_and, dsp_opcode_or, dsp_opcode_xor,
348 dsp_opcode_not, dsp_opcode_btst, dsp_opcode_bset, dsp_opcode_bclr,
349 dsp_opcode_mult, dsp_opcode_imult, dsp_opcode_imultn, dsp_opcode_resmac,
350 dsp_opcode_imacn, dsp_opcode_div, dsp_opcode_abs, dsp_opcode_sh,
351 dsp_opcode_shlq, dsp_opcode_shrq, dsp_opcode_sha, dsp_opcode_sharq,
352 dsp_opcode_ror, dsp_opcode_rorq, dsp_opcode_cmp, dsp_opcode_cmpq,
353 dsp_opcode_subqmod, dsp_opcode_sat16s, dsp_opcode_move, dsp_opcode_moveq,
354 dsp_opcode_moveta, dsp_opcode_movefa, dsp_opcode_movei, dsp_opcode_loadb,
355 dsp_opcode_loadw, dsp_opcode_load, dsp_opcode_sat32s, dsp_opcode_load_r14_indexed,
356 dsp_opcode_load_r15_indexed, dsp_opcode_storeb, dsp_opcode_storew, dsp_opcode_store,
357 dsp_opcode_mirror, dsp_opcode_store_r14_indexed, dsp_opcode_store_r15_indexed, dsp_opcode_move_pc,
358 dsp_opcode_jump, dsp_opcode_jr, dsp_opcode_mmult, dsp_opcode_mtoi,
359 dsp_opcode_normi, dsp_opcode_nop, dsp_opcode_load_r14_ri, dsp_opcode_load_r15_ri,
360 dsp_opcode_store_r14_ri, dsp_opcode_store_r15_ri, dsp_opcode_illegal, dsp_opcode_addqmod,
363 uint32_t dsp_opcode_use[65];
365 const char * dsp_opcode_str[65]=
367 "add", "addc", "addq", "addqt",
368 "sub", "subc", "subq", "subqt",
369 "neg", "and", "or", "xor",
370 "not", "btst", "bset", "bclr",
371 "mult", "imult", "imultn", "resmac",
372 "imacn", "div", "abs", "sh",
373 "shlq", "shrq", "sha", "sharq",
374 "ror", "rorq", "cmp", "cmpq",
375 "subqmod", "sat16s", "move", "moveq",
376 "moveta", "movefa", "movei", "loadb",
377 "loadw", "load", "sat32s", "load_r14_indexed",
378 "load_r15_indexed", "storeb", "storew", "store",
379 "mirror", "store_r14_indexed","store_r15_indexed","move_pc",
380 "jump", "jr", "mmult", "mtoi",
381 "normi", "nop", "load_r14_ri", "load_r15_ri",
382 "store_r14_ri", "store_r15_ri", "illegal", "addqmod",
387 static uint64_t dsp_acc; // 40 bit register, NOT 32!
388 static uint32_t dsp_remain;
389 static uint32_t dsp_modulo;
390 static uint32_t dsp_flags;
391 static uint32_t dsp_matrix_control;
392 static uint32_t dsp_pointer_to_matrix;
393 static uint32_t dsp_data_organization;
394 uint32_t dsp_control;
395 static uint32_t dsp_div_control;
396 static uint8_t dsp_flag_z, dsp_flag_n, dsp_flag_c;
397 static uint32_t * dsp_reg = NULL, * dsp_alternate_reg = NULL;
398 uint32_t dsp_reg_bank_0[32], dsp_reg_bank_1[32];
400 static uint32_t dsp_opcode_first_parameter;
401 static uint32_t dsp_opcode_second_parameter;
403 #define DSP_RUNNING (dsp_control & 0x01)
405 #define RM dsp_reg[dsp_opcode_first_parameter]
406 #define RN dsp_reg[dsp_opcode_second_parameter]
407 #define ALTERNATE_RM dsp_alternate_reg[dsp_opcode_first_parameter]
408 #define ALTERNATE_RN dsp_alternate_reg[dsp_opcode_second_parameter]
409 #define IMM_1 dsp_opcode_first_parameter
410 #define IMM_2 dsp_opcode_second_parameter
412 #define CLR_Z (dsp_flag_z = 0)
413 #define CLR_ZN (dsp_flag_z = dsp_flag_n = 0)
414 #define CLR_ZNC (dsp_flag_z = dsp_flag_n = dsp_flag_c = 0)
415 #define SET_Z(r) (dsp_flag_z = ((r) == 0))
416 #define SET_N(r) (dsp_flag_n = (((uint32_t)(r) >> 31) & 0x01))
417 #define SET_C_ADD(a,b) (dsp_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
418 #define SET_C_SUB(a,b) (dsp_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
419 #define SET_ZN(r) SET_N(r); SET_Z(r)
420 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
421 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
423 uint32_t dsp_convert_zero[32] = {
424 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
425 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
428 uint8_t dsp_branch_condition_table[32 * 8];
429 static uint16_t mirror_table[65536];
430 static uint8_t dsp_ram_8[0x2000];
432 #define BRANCH_CONDITION(x) dsp_branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
434 static uint32_t dsp_in_exec = 0;
435 static uint32_t dsp_releaseTimeSlice_flag = 0;
440 // Comparison core vars (used only for core comparison! :-)
441 static uint64_t count = 0;
442 static uint8_t ram1[0x2000], ram2[0x2000];
443 static uint32_t regs1[64], regs2[64];
444 static uint32_t ctrl1[14], ctrl2[14];
447 // Private function prototypes
449 void DSPDumpRegisters(void);
450 void DSPDumpDisassembly(void);
451 void FlushDSPPipeline(void);
454 void dsp_reset_stats(void)
456 for(int i=0; i<65; i++)
457 dsp_opcode_use[i] = 0;
461 void DSPReleaseTimeslice(void)
463 //This does absolutely nothing!!! !!! FIX !!!
464 dsp_releaseTimeSlice_flag = 1;
468 void dsp_build_branch_condition_table(void)
470 // Fill in the mirror table
471 for(int i=0; i<65536; i++)
473 mirror_table[i] = ((i >> 15) & 0x0001) | ((i >> 13) & 0x0002)
474 | ((i >> 11) & 0x0004) | ((i >> 9) & 0x0008)
475 | ((i >> 7) & 0x0010) | ((i >> 5) & 0x0020)
476 | ((i >> 3) & 0x0040) | ((i >> 1) & 0x0080)
477 | ((i << 1) & 0x0100) | ((i << 3) & 0x0200)
478 | ((i << 5) & 0x0400) | ((i << 7) & 0x0800)
479 | ((i << 9) & 0x1000) | ((i << 11) & 0x2000)
480 | ((i << 13) & 0x4000) | ((i << 15) & 0x8000);
483 // Fill in the condition table
484 for(int i=0; i<8; i++)
486 for(int j=0; j<32; j++)
490 if ((j & 1) && (i & ZERO_FLAG))
493 if ((j & 2) && (!(i & ZERO_FLAG)))
496 if ((j & 4) && (i & (CARRY_FLAG << (j >> 4))))
499 if ((j & 8) && (!(i & (CARRY_FLAG << (j >> 4)))))
502 dsp_branch_condition_table[i * 32 + j] = result;
508 uint8_t DSPReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
510 if (offset >= 0xF1A000 && offset <= 0xF1A0FF)
511 WriteLog("DSP: ReadByte--Attempt to read from DSP register file by %s!\n", whoName[who]);
513 // if ((offset==0xF1CFE0)||(offset==0xF1CFE2))
516 /* if ((jaguar_mainRom_crc32==0xbfd751a4)||(jaguar_mainRom_crc32==0x053efaf9))
518 if (offset==0xF1CFE0)
521 if (offset >= DSP_WORK_RAM_BASE && offset <= (DSP_WORK_RAM_BASE + 0x1FFF))
522 return dsp_ram_8[offset - DSP_WORK_RAM_BASE];
524 if (offset >= DSP_CONTROL_RAM_BASE && offset <= (DSP_CONTROL_RAM_BASE + 0x1F))
526 uint32_t data = DSPReadLong(offset & 0xFFFFFFFC, who);
528 if ((offset & 0x03) == 0)
530 else if ((offset & 0x03) == 1)
531 return ((data >> 16) & 0xFF);
532 else if ((offset & 0x03) == 2)
533 return ((data >> 8) & 0xFF);
534 else if ((offset & 0x03) == 3)
535 return (data & 0xFF);
538 return JaguarReadByte(offset, who);
542 uint16_t DSPReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
544 if (offset >= 0xF1A000 && offset <= 0xF1A0FF)
545 WriteLog("DSP: ReadWord--Attempt to read from DSP register file by %s!\n", whoName[who]);
547 offset &= 0xFFFFFFFE;
549 if (offset >= DSP_WORK_RAM_BASE && offset <= DSP_WORK_RAM_BASE+0x1FFF)
551 offset -= DSP_WORK_RAM_BASE;
552 /* uint16_t data = (((uint16_t)dsp_ram_8[offset])<<8)|((uint16_t)dsp_ram_8[offset+1]);
554 return GET16(dsp_ram_8, offset);
556 else if ((offset>=DSP_CONTROL_RAM_BASE)&&(offset<DSP_CONTROL_RAM_BASE+0x20))
558 uint32_t data = DSPReadLong(offset & 0xFFFFFFFC, who);
561 return data & 0xFFFF;
566 return JaguarReadWord(offset, who);
570 uint32_t DSPReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
572 if (offset >= 0xF1A000 && offset <= 0xF1A0FF)
573 WriteLog("DSP: ReadLong--Attempt to read from DSP register file by %s!\n", whoName[who]);
576 offset &= 0xFFFFFFFC;
577 /*if (offset == 0xF1BCF4)
579 WriteLog("DSPReadLong: Reading from 0xF1BCF4... -> %08X [%02X %02X %02X %02X][%04X %04X]\n", GET32(dsp_ram_8, 0x0CF4), dsp_ram_8[0x0CF4], dsp_ram_8[0x0CF5], dsp_ram_8[0x0CF6], dsp_ram_8[0x0CF7], JaguarReadWord(0xF1BCF4, DSP), JaguarReadWord(0xF1BCF6, DSP));
580 DSPDumpDisassembly();
582 if (offset >= DSP_WORK_RAM_BASE && offset <= DSP_WORK_RAM_BASE + 0x1FFF)
584 offset -= DSP_WORK_RAM_BASE;
585 return GET32(dsp_ram_8, offset);
587 //NOTE: Didn't return DSP_ACCUM!!!
588 //Mebbe it's not 'spose to! Yes, it is!
589 if (offset >= DSP_CONTROL_RAM_BASE && offset <= DSP_CONTROL_RAM_BASE + 0x23)
595 dsp_flags = (dsp_flags & 0xFFFFFFF8) | (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
596 return dsp_flags & 0xFFFFC1FF;
597 case 0x04: return dsp_matrix_control;
598 case 0x08: return dsp_pointer_to_matrix;
599 case 0x0C: return dsp_data_organization;
600 case 0x10: return dsp_pc;
601 case 0x14: return dsp_control;
602 case 0x18: return dsp_modulo;
603 case 0x1C: return dsp_remain;
605 return (int32_t)((int8_t)(dsp_acc >> 32)); // Top 8 bits of 40-bit accumulator, sign extended
607 // unaligned long read-- !!! FIX !!!
611 return JaguarReadLong(offset, who);
615 void DSPWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
617 if (offset >= 0xF1A000 && offset <= 0xF1A0FF)
618 WriteLog("DSP: WriteByte--Attempt to write to DSP register file by %s!\n", whoName[who]);
620 if ((offset >= DSP_WORK_RAM_BASE) && (offset < DSP_WORK_RAM_BASE + 0x2000))
622 offset -= DSP_WORK_RAM_BASE;
623 dsp_ram_8[offset] = data;
624 //This is rather stupid! !!! FIX !!!
625 /* if (dsp_in_exec == 0)
627 m68k_end_timeslice();
628 dsp_releaseTimeslice();
632 if ((offset >= DSP_CONTROL_RAM_BASE) && (offset < DSP_CONTROL_RAM_BASE + 0x20))
634 uint32_t reg = offset & 0x1C;
635 int bytenum = offset & 0x03;
637 if ((reg >= 0x1C) && (reg <= 0x1F))
638 dsp_div_control = (dsp_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
641 //This looks funky. !!! FIX !!!
642 uint32_t old_data = DSPReadLong(offset&0xFFFFFFC, who);
643 bytenum = 3 - bytenum; // convention motorola !!!
644 old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
645 DSPWriteLong(offset & 0xFFFFFFC, old_data, who);
649 // WriteLog("dsp: writing %.2x at 0x%.8x\n",data,offset);
650 //Should this *ever* happen??? Shouldn't we be saying "unknown" here???
651 // Well, yes, it can. There are 3 MMU users after all: 68K, GPU & DSP...!
652 JaguarWriteByte(offset, data, who);
656 void DSPWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
658 if (offset >= 0xF1A000 && offset <= 0xF1A0FF)
659 WriteLog("DSP: WriteWord--Attempt to write to DSP register file by %s!\n", whoName[who]);
660 offset &= 0xFFFFFFFE;
661 /*if (offset == 0xF1BCF4)
663 WriteLog("DSPWriteWord: Writing to 0xF1BCF4... %04X -> %04X\n", GET16(dsp_ram_8, 0x0CF4), data);
665 // WriteLog("dsp: writing %.4x at 0x%.8x\n",data,offset);
666 if ((offset >= DSP_WORK_RAM_BASE) && (offset < DSP_WORK_RAM_BASE+0x2000))
668 /*if (offset == 0xF1B2F4)
670 WriteLog("DSP: %s is writing %04X at location 0xF1B2F4 (DSP_PC: %08X)...\n", whoName[who], data, dsp_pc);
672 offset -= DSP_WORK_RAM_BASE;
673 dsp_ram_8[offset] = data >> 8;
674 dsp_ram_8[offset+1] = data & 0xFF;
675 //This is rather stupid! !!! FIX !!!
676 /* if (dsp_in_exec == 0)
678 // WriteLog("dsp: writing %.4x at 0x%.8x\n",data,offset+DSP_WORK_RAM_BASE);
679 m68k_end_timeslice();
680 dsp_releaseTimeslice();
684 SET16(ram1, offset, data),
685 SET16(ram2, offset, data);
690 else if ((offset >= DSP_CONTROL_RAM_BASE) && (offset < DSP_CONTROL_RAM_BASE+0x20))
692 if ((offset & 0x1C) == 0x1C)
695 dsp_div_control = (dsp_div_control & 0xFFFF0000) | (data & 0xFFFF);
697 dsp_div_control = (dsp_div_control & 0xFFFF) | ((data & 0xFFFF) << 16);
701 uint32_t old_data = DSPReadLong(offset & 0xFFFFFFC, who);
704 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
706 old_data = (old_data & 0xFFFF) | ((data & 0xFFFF) << 16);
708 DSPWriteLong(offset & 0xFFFFFFC, old_data, who);
714 JaguarWriteWord(offset, data, who);
718 //bool badWrite = false;
719 void DSPWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
721 if (offset >= 0xF1A000 && offset <= 0xF1A0FF)
722 WriteLog("DSP: WriteLong--Attempt to write to DSP register file by %s!\n", whoName[who]);
724 offset &= 0xFFFFFFFC;
725 /*if (offset == 0xF1BCF4)
727 WriteLog("DSPWriteLong: Writing to 0xF1BCF4... %08X -> %08X\n", GET32(dsp_ram_8, 0x0CF4), data);
729 // WriteLog("dsp: writing %.8x at 0x%.8x\n",data,offset);
730 if (offset >= DSP_WORK_RAM_BASE && offset <= DSP_WORK_RAM_BASE + 0x1FFF)
732 /*if (offset == 0xF1BE2C)
734 WriteLog("DSP: %s is writing %08X at location 0xF1BE2C (DSP_PC: %08X)...\n", whoName[who], data, dsp_pc - 2);
736 offset -= DSP_WORK_RAM_BASE;
737 SET32(dsp_ram_8, offset, data);
740 SET32(ram1, offset, data),
741 SET32(ram2, offset, data);
746 else if (offset >= DSP_CONTROL_RAM_BASE && offset <= (DSP_CONTROL_RAM_BASE + 0x1F))
754 WriteLog("DSP: Writing %08X to DSP_FLAGS by %s (REGPAGE is %sset)...\n", data, whoName[who], (dsp_flags & REGPAGE ? "" : "not "));
756 // bool IMASKCleared = (dsp_flags & IMASK) && !(data & IMASK);
757 IMASKCleared = (dsp_flags & IMASK) && !(data & IMASK);
758 // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
759 // IRQ logic can set it. So we mask it out here to prevent problems...
760 dsp_flags = data & (~IMASK);
761 dsp_flag_z = dsp_flags & 0x01;
762 dsp_flag_c = (dsp_flags >> 1) & 0x01;
763 dsp_flag_n = (dsp_flags >> 2) & 0x01;
764 DSPUpdateRegisterBanks();
765 dsp_control &= ~((dsp_flags & CINT04FLAGS) >> 3);
766 dsp_control &= ~((dsp_flags & CINT5FLAG) >> 1);
770 dsp_matrix_control = data;
773 // According to JTRM, only lines 2-11 are addressable, the rest being
774 // hardwired to $F1Bxxx.
775 dsp_pointer_to_matrix = 0xF1B000 | (data & 0x000FFC);
778 dsp_data_organization = data;
783 WriteLog("DSP: Setting DSP PC to %08X by %s%s\n", dsp_pc, whoName[who], (DSP_RUNNING ? " (DSP is RUNNING!)" : ""));//*/
788 ctrl1[0] = ctrl2[0] = data;
795 WriteLog("Write to DSP CTRL by %s: %08X (DSP PC=$%08X)\n", whoName[who], data, dsp_pc);
797 bool wasRunning = DSP_RUNNING;
798 // uint32_t dsp_was_running = DSP_RUNNING;
799 // Check for DSP -> CPU interrupt
803 WriteLog("DSP: DSP -> CPU interrupt\n");
806 #warning "!!! DSP IRQs that go to the 68K have to be routed thru TOM !!! FIX !!!"
807 if (JERRYIRQEnabled(IRQ2_DSP))
809 JERRYSetPendingIRQ(IRQ2_DSP);
810 DSPReleaseTimeslice();
811 m68k_set_irq(2); // Set 68000 IPL 2...
815 // Check for CPU -> DSP interrupt
819 WriteLog("DSP: CPU -> DSP interrupt\n");
821 m68k_end_timeslice();
822 DSPReleaseTimeslice();
823 DSPSetIRQLine(DSPIRQ_CPU, ASSERT_LINE);
827 if (data & SINGLE_STEP)
829 // WriteLog("DSP: Asked to perform a single step (single step is %senabled)\n", (data & 0x8 ? "" : "not "));
832 // Protect writes to VERSION and the interrupt latches...
833 uint32_t mask = VERSION | INT_LAT0 | INT_LAT1 | INT_LAT2 | INT_LAT3 | INT_LAT4 | INT_LAT5;
834 dsp_control = (dsp_control & mask) | (data & ~mask);
838 ctrl1[8] = ctrl2[8] = dsp_control;
842 // if dsp wasn't running but is now running
843 // execute a few cycles
844 //This is just plain wrong, wrong, WRONG!
845 #ifndef DSP_SINGLE_STEPPING
846 /* if (!dsp_was_running && DSP_RUNNING)
851 //This is WRONG! !!! FIX !!!
852 if (dsp_control & 0x18)
857 WriteLog(" --> Starting to run at %08X by %s...", dsp_pc, whoName[who]);
859 WriteLog(" --> Stopped by %s! (DSP PC: %08X)", whoName[who], dsp_pc);
862 //This isn't exactly right either--we don't know if it was the M68K or the DSP writing here...
863 // !!! FIX !!! [DONE]
867 m68k_end_timeslice();
869 DSPReleaseTimeslice();
873 //DSPDumpDisassembly();
878 WriteLog("DSP: Modulo data %08X written by %s.\n", data, whoName[who]);
882 dsp_div_control = data;
884 // default: // unaligned long read
890 //We don't have to break this up like this! We CAN do 32 bit writes!
891 // JaguarWriteWord(offset, (data>>16) & 0xFFFF, DSP);
892 // JaguarWriteWord(offset+2, data & 0xFFFF, DSP);
893 //if (offset > 0xF1FFFF)
895 JaguarWriteLong(offset, data, who);
900 // Update the DSP register file pointers depending on REGPAGE bit
902 void DSPUpdateRegisterBanks(void)
904 int bank = (dsp_flags & REGPAGE);
906 if (dsp_flags & IMASK)
907 bank = 0; // IMASK forces main bank to be bank 0
910 dsp_reg = dsp_reg_bank_1, dsp_alternate_reg = dsp_reg_bank_0;
912 dsp_reg = dsp_reg_bank_0, dsp_alternate_reg = dsp_reg_bank_1;
915 WriteLog("DSP: Register bank #%s active.\n", (bank ? "1" : "0"));
921 // Check for and handle any asserted DSP IRQs
923 void DSPHandleIRQs(void)
925 if (dsp_flags & IMASK) // Bail if we're already inside an interrupt
928 // Get the active interrupt bits (latches) & interrupt mask (enables)
929 uint32_t bits = ((dsp_control >> 10) & 0x20) | ((dsp_control >> 6) & 0x1F),
930 mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F);
932 // WriteLog("dsp: bits=%.2x mask=%.2x\n",bits,mask);
935 if (!bits) // Bail if nothing is enabled
938 int which = 0; // Determine which interrupt
954 WriteLog("DSP: Generating interrupt #%i...", which);
957 //if (which == 0) doDSPDis = true;
959 // NOTE: Since the actual Jaguar hardware injects the code sequence below
960 // directly into the pipeline, it has the side effect of ensuring that the
961 // instruction interrupted also gets to do its writeback. We simulate that
963 /* if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
965 if (pipeline[plPtrWrite].writebackRegister != 0xFF)
966 dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
968 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
969 scoreboard[pipeline[plPtrWrite].operand2] = false;
971 //This should be execute (or should it?--not sure now!)
972 //Actually, the way this is called now, this should be correct (i.e., the plPtrs advance,
973 //and what just executed is now in the Write position...). So why didn't it do the
974 //writeback into register 0?
976 WriteLog("--> Pipeline dump [DSP_PC=%08X]...\n", dsp_pc);
977 WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]);
978 WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]);
979 WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister, dsp_opcode_str[pipeline[plPtrWrite].opcode]);
981 if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
983 if (pipeline[plPtrWrite].writebackRegister != 0xFF)
985 if (pipeline[plPtrWrite].writebackRegister != 0xFE)
986 dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
989 if (pipeline[plPtrWrite].type == TYPE_BYTE)
990 JaguarWriteByte(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value);
991 else if (pipeline[plPtrWrite].type == TYPE_WORD)
992 JaguarWriteWord(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value);
994 JaguarWriteLong(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value);
998 #ifndef NEW_SCOREBOARD
999 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
1000 scoreboard[pipeline[plPtrWrite].operand2] = false;
1002 //Yup, sequential MOVEQ # problem fixing (I hope!)...
1003 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
1004 if (scoreboard[pipeline[plPtrWrite].operand2])
1005 scoreboard[pipeline[plPtrWrite].operand2]--;
1012 ctrl2[4] = dsp_flags;
1015 DSPUpdateRegisterBanks();
1016 #ifdef DSP_DEBUG_IRQ
1017 // WriteLog(" [PC will return to %08X, R31 = %08X]\n", dsp_pc, dsp_reg[31]);
1018 WriteLog(" [PC will return to %08X, R31 = %08X]\n", dsp_pc - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2)), dsp_reg[31]);
1021 // subqt #4,r31 ; pre-decrement stack pointer
1022 // move pc,r30 ; address of interrupted code
1023 // store r30,(r31) ; store return address
1030 //This might not come back to the right place if the instruction was MOVEI #. !!! FIX !!!
1031 //But, then again, JTRM says that it adds two regardless of what the instruction was...
1032 //It missed the place that it was supposed to come back to, so this is WRONG!
1034 // Look at the pipeline when an interrupt occurs (instructions of foo, bar, baz):
1036 // R -> baz (<- PC points here)
1037 // E -> bar (when it should point here!)
1040 // 'Foo' just completed executing as per above. PC is pointing to the instruction 'baz'
1041 // which means (assuming they're all 2 bytes long) that the code below will come back on
1042 // instruction 'baz' instead of 'bar' which is the next instruction to execute in the
1043 // instruction stream...
1045 // DSPWriteLong(dsp_reg[31], dsp_pc - 2, DSP);
1046 DSPWriteLong(dsp_reg[31], dsp_pc - 2 - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2)), DSP);
1049 SET32(ram2, regs2[31] - 0xF1B000, dsp_pc - 2 - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2)));
1053 // movei #service_address,r30 ; pointer to ISR entry
1054 // jump (r30) ; jump to ISR
1056 dsp_pc = dsp_reg[30] = DSP_WORK_RAM_BASE + (which * 0x10);
1059 ctrl2[0] = regs2[30] = dsp_pc;
1067 // Non-pipelined version...
1069 void DSPHandleIRQsNP(void)
1073 memcpy(dsp_ram_8, ram1, 0x2000);
1074 memcpy(dsp_reg_bank_0, regs1, 32 * 4);
1075 memcpy(dsp_reg_bank_1, ®s1[32], 32 * 4);
1078 dsp_remain = ctrl1[2];
1079 dsp_modulo = ctrl1[3];
1080 dsp_flags = ctrl1[4];
1081 dsp_matrix_control = ctrl1[5];
1082 dsp_pointer_to_matrix = ctrl1[6];
1083 dsp_data_organization = ctrl1[7];
1084 dsp_control = ctrl1[8];
1085 dsp_div_control = ctrl1[9];
1086 IMASKCleared = ctrl1[10];
1087 dsp_flag_z = ctrl1[11];
1088 dsp_flag_n = ctrl1[12];
1089 dsp_flag_c = ctrl1[13];
1090 DSPUpdateRegisterBanks();
1093 if (dsp_flags & IMASK) // Bail if we're already inside an interrupt
1096 // Get the active interrupt bits (latches) & interrupt mask (enables)
1097 uint32_t bits = ((dsp_control >> 10) & 0x20) | ((dsp_control >> 6) & 0x1F),
1098 mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F);
1100 // WriteLog("dsp: bits=%.2x mask=%.2x\n",bits,mask);
1103 if (!bits) // Bail if nothing is enabled
1106 int which = 0; // Determine which interrupt
1120 dsp_flags |= IMASK; // Force Bank #0
1123 ctrl1[4] = dsp_flags;
1126 #ifdef DSP_DEBUG_IRQ
1127 WriteLog("DSP: Bank 0: R30=%08X, R31=%08X\n", dsp_reg_bank_0[30], dsp_reg_bank_0[31]);
1128 WriteLog("DSP: Bank 1: R30=%08X, R31=%08X\n", dsp_reg_bank_1[30], dsp_reg_bank_1[31]);
1130 DSPUpdateRegisterBanks();
1131 #ifdef DSP_DEBUG_IRQ
1132 WriteLog("DSP: Bank 0: R30=%08X, R31=%08X\n", dsp_reg_bank_0[30], dsp_reg_bank_0[31]);
1133 WriteLog("DSP: Bank 1: R30=%08X, R31=%08X\n", dsp_reg_bank_1[30], dsp_reg_bank_1[31]);
1136 #ifdef DSP_DEBUG_IRQ
1137 WriteLog("DSP: Generating interrupt #%i...", which);
1138 WriteLog(" [PC will return to %08X, R31 = %08X]\n", dsp_pc, dsp_reg[31]);
1141 // subqt #4,r31 ; pre-decrement stack pointer
1142 // move pc,r30 ; address of interrupted code
1143 // store r30,(r31) ; store return address
1145 dsp_reg[30] = dsp_pc - 2; // -2 because we've executed the instruction already
1152 // DSPWriteLong(dsp_reg[31], dsp_pc - 2, DSP);
1153 DSPWriteLong(dsp_reg[31], dsp_reg[30], DSP);
1156 SET32(ram1, regs1[31] - 0xF1B000, dsp_pc - 2);
1160 // movei #service_address,r30 ; pointer to ISR entry
1161 // jump (r30) ; jump to ISR
1163 dsp_pc = dsp_reg[30] = DSP_WORK_RAM_BASE + (which * 0x10);
1166 ctrl1[0] = regs1[30] = dsp_pc;
1173 // Set the specified DSP IRQ line to a given state
1175 void DSPSetIRQLine(int irqline, int state)
1177 //NOTE: This doesn't take INT_LAT5 into account. !!! FIX !!!
1178 uint32_t mask = INT_LAT0 << irqline;
1179 dsp_control &= ~mask; // Clear the latch bit
1182 ctrl1[8] = ctrl2[8] = dsp_control;
1188 dsp_control |= mask; // Set the latch bit
1189 #warning !!! No checking done to see if we're using pipelined DSP or not !!!
1194 ctrl1[8] = ctrl2[8] = dsp_control;
1200 // Not sure if this is correct behavior, but according to JTRM,
1201 // the IRQ output of JERRY is fed to this IRQ in the GPU...
1202 // Not sure this is right--DSP interrupts seem to be different from the JERRY interrupts!
1203 // GPUSetIRQLine(GPUIRQ_DSP, ASSERT_LINE);
1207 bool DSPIsRunning(void)
1209 return (DSP_RUNNING ? true : false);
1215 // memory_malloc_secure((void **)&dsp_ram_8, 0x2000, "DSP work RAM");
1216 // memory_malloc_secure((void **)&dsp_reg_bank_0, 32 * sizeof(int32_t), "DSP bank 0 regs");
1217 // memory_malloc_secure((void **)&dsp_reg_bank_1, 32 * sizeof(int32_t), "DSP bank 1 regs");
1219 dsp_build_branch_condition_table();
1226 dsp_pc = 0x00F1B000;
1227 dsp_acc = 0x00000000;
1228 dsp_remain = 0x00000000;
1229 dsp_modulo = 0xFFFFFFFF;
1230 dsp_flags = 0x00040000;
1231 dsp_matrix_control = 0x00000000;
1232 dsp_pointer_to_matrix = 0x00000000;
1233 dsp_data_organization = 0xFFFFFFFF;
1234 dsp_control = 0x00002000; // Report DSP version 2
1235 dsp_div_control = 0x00000000;
1238 dsp_reg = dsp_reg_bank_0;
1239 dsp_alternate_reg = dsp_reg_bank_1;
1241 for(int i=0; i<32; i++)
1242 dsp_reg[i] = dsp_alternate_reg[i] = 0x00000000;
1245 IMASKCleared = false;
1249 // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
1250 for(uint32_t i=0; i<8192; i+=4)
1251 *((uint32_t *)(&dsp_ram_8[i])) = rand();
1255 void DSPDumpDisassembly(void)
1259 WriteLog("\n---[DSP code at 00F1B000]---------------------------\n");
1260 uint32_t j = 0xF1B000;
1262 while (j <= 0xF1CFFF)
1265 j += dasmjag(JAGUAR_DSP, buffer, j);
1266 WriteLog("\t%08X: %s\n", oldj, buffer);
1271 void DSPDumpRegisters(void)
1273 //Shoud add modulus, etc to dump here...
1274 WriteLog("\n---[DSP flags: NCZ %d%d%d, DSP PC: %08X]------------\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_pc);
1275 WriteLog("\nRegisters bank 0\n");
1277 for(int j=0; j<8; j++)
1279 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1280 (j << 2) + 0, dsp_reg_bank_0[(j << 2) + 0],
1281 (j << 2) + 1, dsp_reg_bank_0[(j << 2) + 1],
1282 (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2],
1283 (j << 2) + 3, dsp_reg_bank_0[(j << 2) + 3]);
1286 WriteLog("Registers bank 1\n");
1288 for(int j=0; j<8; j++)
1290 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1291 (j << 2) + 0, dsp_reg_bank_1[(j << 2) + 0],
1292 (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1],
1293 (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2],
1294 (j << 2) + 3, dsp_reg_bank_1[(j << 2) + 3]);
1301 WriteLog("\n\n---------------------------------------------------------------------\n");
1302 WriteLog("DSP I/O Registers\n");
1303 WriteLog("---------------------------------------------------------------------\n");
1304 WriteLog("F1%04X (D_FLAGS): $%06X\n", 0xA100, (dsp_flags & 0xFFFFFFF8) | (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z);
1305 WriteLog("F1%04X (D_MTXC): $%04X\n", 0xA104, dsp_matrix_control);
1306 WriteLog("F1%04X (D_MTXA): $%04X\n", 0xA108, dsp_pointer_to_matrix);
1307 WriteLog("F1%04X (D_END): $%02X\n", 0xA10C, dsp_data_organization);
1308 WriteLog("F1%04X (D_PC): $%06X\n", 0xA110, dsp_pc);
1309 WriteLog("F1%04X (D_CTRL): $%06X\n", 0xA114, dsp_control);
1310 WriteLog("F1%04X (D_MOD): $%08X\n", 0xA118, dsp_modulo);
1311 WriteLog("F1%04X (D_REMAIN): $%08X\n", 0xA11C, dsp_remain);
1312 WriteLog("F1%04X (D_DIVCTRL): $%02X\n", 0xA11C, dsp_div_control);
1313 WriteLog("F1%04X (D_MACHI): $%02X\n", 0xA120, (dsp_acc >> 32) & 0xFF);
1314 WriteLog("---------------------------------------------------------------------\n\n\n");
1316 WriteLog("DSP: Stopped at PC=%08X dsp_modulo=%08X (dsp was%s running)\n", dsp_pc, dsp_modulo, (DSP_RUNNING ? "" : "n't"));
1317 WriteLog("DSP: %sin interrupt handler\n", (dsp_flags & IMASK ? "" : "not "));
1319 // Get the active interrupt bits
1320 int bits = ((dsp_control >> 10) & 0x20) | ((dsp_control >> 6) & 0x1F);
1321 // Get the interrupt mask
1322 int mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F);
1324 WriteLog("DSP: pending=$%X enabled=$%X (%s%s%s%s%s%s)\n", bits, mask,
1325 (mask & 0x01 ? "CPU " : ""), (mask & 0x02 ? "I2S " : ""),
1326 (mask & 0x04 ? "Timer0 " : ""), (mask & 0x08 ? "Timer1 " : ""),
1327 (mask & 0x10 ? "Ext0 " : ""), (mask & 0x20 ? "Ext1" : ""));
1331 static char buffer[512];
1332 int j = DSP_WORK_RAM_BASE;
1334 while (j <= 0xF1CFFF)
1337 j += dasmjag(JAGUAR_DSP, buffer, j);
1338 WriteLog("\t%08X: %s\n", oldj, buffer);
1341 WriteLog("DSP opcodes use:\n");
1343 for(int i=0; i<64; i++)
1345 if (dsp_opcode_use[i])
1346 WriteLog("\t%s %i\n", dsp_opcode_str[i], dsp_opcode_use[i]);
1353 // DSP comparison core...
1356 static uint16_t lastExec;
1357 void DSPExecComp(int32_t cycles)
1359 while (cycles > 0 && DSP_RUNNING)
1361 // Load up vars for non-pipelined core
1362 memcpy(dsp_ram_8, ram1, 0x2000);
1363 memcpy(dsp_reg_bank_0, regs1, 32 * 4);
1364 memcpy(dsp_reg_bank_1, ®s1[32], 32 * 4);
1367 dsp_remain = ctrl1[2];
1368 dsp_modulo = ctrl1[3];
1369 dsp_flags = ctrl1[4];
1370 dsp_matrix_control = ctrl1[5];
1371 dsp_pointer_to_matrix = ctrl1[6];
1372 dsp_data_organization = ctrl1[7];
1373 dsp_control = ctrl1[8];
1374 dsp_div_control = ctrl1[9];
1375 IMASKCleared = ctrl1[10];
1376 dsp_flag_z = ctrl1[11];
1377 dsp_flag_n = ctrl1[12];
1378 dsp_flag_c = ctrl1[13];
1379 DSPUpdateRegisterBanks();
1381 // Decrement cycles based on non-pipelined core...
1382 uint16_t instr1 = DSPReadWord(dsp_pc, DSP);
1383 cycles -= dsp_opcode_cycles[instr1 >> 10];
1385 //WriteLog("\tAbout to execute non-pipelined core on tick #%u (DSP_PC=%08X)...\n", (uint32_t)count, dsp_pc);
1386 DSPExec(1); // Do *one* instruction
1389 memcpy(ram1, dsp_ram_8, 0x2000);
1390 memcpy(regs1, dsp_reg_bank_0, 32 * 4);
1391 memcpy(®s1[32], dsp_reg_bank_1, 32 * 4);
1394 ctrl1[2] = dsp_remain;
1395 ctrl1[3] = dsp_modulo;
1396 ctrl1[4] = dsp_flags;
1397 ctrl1[5] = dsp_matrix_control;
1398 ctrl1[6] = dsp_pointer_to_matrix;
1399 ctrl1[7] = dsp_data_organization;
1400 ctrl1[8] = dsp_control;
1401 ctrl1[9] = dsp_div_control;
1402 ctrl1[10] = IMASKCleared;
1403 ctrl1[11] = dsp_flag_z;
1404 ctrl1[12] = dsp_flag_n;
1405 ctrl1[13] = dsp_flag_c;
1407 // Load up vars for pipelined core
1408 memcpy(dsp_ram_8, ram2, 0x2000);
1409 memcpy(dsp_reg_bank_0, regs2, 32 * 4);
1410 memcpy(dsp_reg_bank_1, ®s2[32], 32 * 4);
1413 dsp_remain = ctrl2[2];
1414 dsp_modulo = ctrl2[3];
1415 dsp_flags = ctrl2[4];
1416 dsp_matrix_control = ctrl2[5];
1417 dsp_pointer_to_matrix = ctrl2[6];
1418 dsp_data_organization = ctrl2[7];
1419 dsp_control = ctrl2[8];
1420 dsp_div_control = ctrl2[9];
1421 IMASKCleared = ctrl2[10];
1422 dsp_flag_z = ctrl2[11];
1423 dsp_flag_n = ctrl2[12];
1424 dsp_flag_c = ctrl2[13];
1425 DSPUpdateRegisterBanks();
1427 //WriteLog("\tAbout to execute pipelined core on tick #%u (DSP_PC=%08X)...\n", (uint32_t)count, dsp_pc);
1428 DSPExecP2(1); // Do *one* instruction
1431 memcpy(ram2, dsp_ram_8, 0x2000);
1432 memcpy(regs2, dsp_reg_bank_0, 32 * 4);
1433 memcpy(®s2[32], dsp_reg_bank_1, 32 * 4);
1436 ctrl2[2] = dsp_remain;
1437 ctrl2[3] = dsp_modulo;
1438 ctrl2[4] = dsp_flags;
1439 ctrl2[5] = dsp_matrix_control;
1440 ctrl2[6] = dsp_pointer_to_matrix;
1441 ctrl2[7] = dsp_data_organization;
1442 ctrl2[8] = dsp_control;
1443 ctrl2[9] = dsp_div_control;
1444 ctrl2[10] = IMASKCleared;
1445 ctrl2[11] = dsp_flag_z;
1446 ctrl2[12] = dsp_flag_n;
1447 ctrl2[13] = dsp_flag_c;
1449 if (instr1 != lastExec)
1451 // WriteLog("\nCores diverged at instruction tick #%u!\nAttemping to synchronize...\n\n", count);
1453 // uint32_t ppc = ctrl2[0] - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2)) - (pipeline[plPtrWrite].opcode == 38 ? 6 : (pipeline[plPtrWrite].opcode == PIPELINE_STALL ? 0 : 2));
1454 //WriteLog("[DSP_PC1=%08X, DSP_PC2=%08X]\n", ctrl1[0], ppc);
1455 // if (ctrl1[0] < ppc) // P ran ahead of NP
1456 //How to test this crap???
1459 DSPExecP2(1); // Do one more instruction
1462 memcpy(ram2, dsp_ram_8, 0x2000);
1463 memcpy(regs2, dsp_reg_bank_0, 32 * 4);
1464 memcpy(®s2[32], dsp_reg_bank_1, 32 * 4);
1467 ctrl2[2] = dsp_remain;
1468 ctrl2[3] = dsp_modulo;
1469 ctrl2[4] = dsp_flags;
1470 ctrl2[5] = dsp_matrix_control;
1471 ctrl2[6] = dsp_pointer_to_matrix;
1472 ctrl2[7] = dsp_data_organization;
1473 ctrl2[8] = dsp_control;
1474 ctrl2[9] = dsp_div_control;
1475 ctrl2[10] = IMASKCleared;
1476 ctrl2[11] = dsp_flag_z;
1477 ctrl2[12] = dsp_flag_n;
1478 ctrl2[13] = dsp_flag_c;
1480 // else // NP ran ahead of P
1481 if (instr1 != lastExec) // Must be the other way...
1484 // Load up vars for non-pipelined core
1485 memcpy(dsp_ram_8, ram1, 0x2000);
1486 memcpy(dsp_reg_bank_0, regs1, 32 * 4);
1487 memcpy(dsp_reg_bank_1, ®s1[32], 32 * 4);
1490 dsp_remain = ctrl1[2];
1491 dsp_modulo = ctrl1[3];
1492 dsp_flags = ctrl1[4];
1493 dsp_matrix_control = ctrl1[5];
1494 dsp_pointer_to_matrix = ctrl1[6];
1495 dsp_data_organization = ctrl1[7];
1496 dsp_control = ctrl1[8];
1497 dsp_div_control = ctrl1[9];
1498 IMASKCleared = ctrl1[10];
1499 dsp_flag_z = ctrl1[11];
1500 dsp_flag_n = ctrl1[12];
1501 dsp_flag_c = ctrl1[13];
1502 DSPUpdateRegisterBanks();
1504 for(int k=0; k<2; k++)
1506 // Decrement cycles based on non-pipelined core...
1507 instr1 = DSPReadWord(dsp_pc, DSP);
1508 cycles -= dsp_opcode_cycles[instr1 >> 10];
1510 //WriteLog("\tAbout to execute non-pipelined core on tick #%u (DSP_PC=%08X)...\n", (uint32_t)count, dsp_pc);
1511 DSPExec(1); // Do *one* instruction
1515 memcpy(ram1, dsp_ram_8, 0x2000);
1516 memcpy(regs1, dsp_reg_bank_0, 32 * 4);
1517 memcpy(®s1[32], dsp_reg_bank_1, 32 * 4);
1520 ctrl1[2] = dsp_remain;
1521 ctrl1[3] = dsp_modulo;
1522 ctrl1[4] = dsp_flags;
1523 ctrl1[5] = dsp_matrix_control;
1524 ctrl1[6] = dsp_pointer_to_matrix;
1525 ctrl1[7] = dsp_data_organization;
1526 ctrl1[8] = dsp_control;
1527 ctrl1[9] = dsp_div_control;
1528 ctrl1[10] = IMASKCleared;
1529 ctrl1[11] = dsp_flag_z;
1530 ctrl1[12] = dsp_flag_n;
1531 ctrl1[13] = dsp_flag_c;
1535 if (instr1 != lastExec)
1537 WriteLog("\nCores diverged at instruction tick #%u!\nStopped!\n\n", count);
1539 WriteLog("Instruction for non-pipelined core: %04X\n", instr1);
1540 WriteLog("Instruction for pipelined core: %04X\n", lastExec);
1553 // DSP execution core
1555 //static bool R20Set = false, tripwire = false;
1556 //static uint32_t pcQueue[32], ptrPCQ = 0;
1557 void DSPExec(int32_t cycles)
1559 #ifdef DSP_SINGLE_STEPPING
1560 if (dsp_control & 0x18)
1563 dsp_control &= ~0x10;
1566 //There is *no* good reason to do this here!
1568 dsp_releaseTimeSlice_flag = 0;
1571 while (cycles > 0 && DSP_RUNNING)
1573 /*extern uint32_t totalFrames;
1574 //F1B2F6: LOAD (R14+$04), R24 [NCZ:001, R14+$04=00F20018, R24=FFFFFFFF] -> Jaguar: Unknown word read at 00F20018 by DSP (M68K PC=00E32E)
1575 //-> 43 + 1 + 24 -> $2B + $01 + $18 -> 101011 00001 11000 -> 1010 1100 0011 1000 -> AC38
1576 //C470 -> 1100 0100 0111 0000 -> 110001 00011 10000 -> 49, 3, 16 -> STORE R16, (R14+$0C)
1578 if (totalFrames >= 377 && GET16(dsp_ram_8, 0x0002F6) == 0xAC38 && dsp_pc == 0xF1B140)
1581 WriteLog("Starting disassembly at frame #%u...\n", totalFrames);
1583 if (dsp_pc == 0xF1B092)
1584 doDSPDis = false;//*/
1585 /*if (dsp_pc == 0xF1B140)
1586 doDSPDis = true;//*/
1588 if (IMASKCleared) // If IMASK was cleared,
1590 #ifdef DSP_DEBUG_IRQ
1591 WriteLog("DSP: Finished interrupt. PC=$%06X\n", dsp_pc);
1593 DSPHandleIRQsNP(); // See if any other interrupts are pending!
1594 IMASKCleared = false;
1599 WriteLog("\nDSP: Encountered bad write in Atari Synth module. PC=%08X, R15=%08X\n", dsp_pc, dsp_reg[15]);
1600 for(int i=0; i<80; i+=4)
1601 WriteLog(" %08X: %08X\n", dsp_reg[15]+i, JaguarReadLong(dsp_reg[15]+i));
1604 /*if (dsp_pc == 0xF1B55E)
1606 WriteLog("DSP: At $F1B55E--R15 = %08X at %u ms%s...\n", dsp_reg[15], SDL_GetTicks(), (dsp_flags & IMASK ? " (inside interrupt)" : ""));
1608 /*if (dsp_pc == 0xF1B7D2) // Start here???
1610 pcQueue[ptrPCQ++] = dsp_pc;
1612 uint16_t opcode = DSPReadWord(dsp_pc, DSP);
1613 uint32_t index = opcode >> 10;
1614 dsp_opcode_first_parameter = (opcode >> 5) & 0x1F;
1615 dsp_opcode_second_parameter = opcode & 0x1F;
1617 dsp_opcode[index]();
1618 dsp_opcode_use[index]++;
1619 cycles -= dsp_opcode_cycles[index];
1620 /*if (dsp_reg_bank_0[20] == 0xF1A100 & !R20Set)
1622 WriteLog("DSP: R20 set to $F1A100 at %u ms%s...\n", SDL_GetTicks(), (dsp_flags & IMASK ? " (inside interrupt)" : ""));
1625 if (dsp_reg_bank_0[20] != 0xF1A100 && R20Set)
1627 WriteLog("DSP: R20 corrupted at %u ms from starting%s!\nAborting!\n", SDL_GetTicks(), (dsp_flags & IMASK ? " (inside interrupt)" : ""));
1629 DSPDumpDisassembly();
1632 if ((dsp_pc < 0xF1B000 || dsp_pc > 0xF1CFFE) && !tripwire)
1635 WriteLog("DSP: Jumping outside of DSP RAM at %u ms. Register dump:\n", SDL_GetTicks());
1638 WriteLog("\nBacktrace:\n");
1639 for(int i=0; i<32; i++)
1641 dasmjag(JAGUAR_DSP, buffer, pcQueue[(ptrPCQ + i) % 32]);
1642 WriteLog("\t%08X: %s\n", pcQueue[(ptrPCQ + i) % 32], buffer);
1653 // DSP opcode handlers
1656 // There is a problem here with interrupt handlers the JUMP and JR instructions that
1657 // can cause trouble because an interrupt can occur *before* the instruction following the
1658 // jump can execute... !!! FIX !!!
1659 static void dsp_opcode_jump(void)
1662 const char * condition[32] =
1663 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1664 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1665 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1666 "???", "???", "???", "F" };
1668 WriteLog("%06X: JUMP %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", dsp_pc-2, condition[IMM_2], IMM_1, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM);
1671 /* dsp_flag_c=dsp_flag_c?1:0;
1672 dsp_flag_z=dsp_flag_z?1:0;
1673 dsp_flag_n=dsp_flag_n?1:0;*/
1674 // KLUDGE: Used by BRANCH_CONDITION
1675 uint32_t jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
1677 if (BRANCH_CONDITION(IMM_2))
1681 WriteLog("Branched!\n");
1683 uint32_t delayed_pc = RM;
1685 dsp_pc = delayed_pc;
1690 WriteLog("Branch NOT taken.\n");
1695 static void dsp_opcode_jr(void)
1698 const char * condition[32] =
1699 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1700 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1701 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1702 "???", "???", "???", "F" };
1704 WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", dsp_pc-2, condition[IMM_2], dsp_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), dsp_flag_n, dsp_flag_c, dsp_flag_z);
1707 /* dsp_flag_c=dsp_flag_c?1:0;
1708 dsp_flag_z=dsp_flag_z?1:0;
1709 dsp_flag_n=dsp_flag_n?1:0;*/
1710 // KLUDGE: Used by BRANCH_CONDITION
1711 uint32_t jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
1713 if (BRANCH_CONDITION(IMM_2))
1717 WriteLog("Branched!\n");
1719 int32_t offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
1720 int32_t delayed_pc = dsp_pc + (offset * 2);
1722 dsp_pc = delayed_pc;
1727 WriteLog("Branch NOT taken.\n");
1732 static void dsp_opcode_add(void)
1736 WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1738 uint32_t res = RN + RM;
1739 SET_ZNC_ADD(RN, RM, res);
1743 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1748 static void dsp_opcode_addc(void)
1752 WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1754 uint32_t res = RN + RM + dsp_flag_c;
1755 uint32_t carry = dsp_flag_c;
1756 // SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1757 SET_ZNC_ADD(RN + carry, RM, res);
1758 // SET_ZNC_ADD(RN, RM + carry, res);
1762 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1767 static void dsp_opcode_addq(void)
1771 WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
1773 uint32_t r1 = dsp_convert_zero[IMM_1];
1774 uint32_t res = RN + r1;
1775 CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1779 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
1784 static void dsp_opcode_sub(void)
1788 WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1790 uint32_t res = RN - RM;
1791 SET_ZNC_SUB(RN, RM, res);
1795 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1800 static void dsp_opcode_subc(void)
1804 WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1806 // This is how the DSP ALU does it--Two's complement with inverted carry
1807 uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (dsp_flag_c ^ 1);
1808 // Carry out of the result is inverted too
1809 dsp_flag_c = ((res >> 32) & 0x01) ^ 1;
1810 RN = (res & 0xFFFFFFFF);
1814 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1819 static void dsp_opcode_subq(void)
1823 WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
1825 uint32_t r1 = dsp_convert_zero[IMM_1];
1826 uint32_t res = RN - r1;
1827 SET_ZNC_SUB(RN, r1, res);
1831 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
1836 static void dsp_opcode_cmp(void)
1840 WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1842 uint32_t res = RN - RM;
1843 SET_ZNC_SUB(RN, RM, res);
1846 WriteLog("[NCZ:%u%u%u]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z);
1851 static void dsp_opcode_cmpq(void)
1853 static int32_t sqtable[32] =
1854 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1857 WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, sqtable[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
1859 uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1860 uint32_t res = RN - r1;
1861 SET_ZNC_SUB(RN, r1, res);
1864 WriteLog("[NCZ:%u%u%u]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z);
1869 static void dsp_opcode_and(void)
1873 WriteLog("%06X: AND R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1879 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1884 static void dsp_opcode_or(void)
1888 WriteLog("%06X: OR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1894 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1899 static void dsp_opcode_xor(void)
1903 WriteLog("%06X: XOR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1909 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
1914 static void dsp_opcode_not(void)
1918 WriteLog("%06X: NOT R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
1924 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
1929 static void dsp_opcode_move_pc(void)
1935 static void dsp_opcode_store_r14_indexed(void)
1937 #ifdef DSP_DIS_STORE14I
1939 WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", dsp_pc-2, IMM_2, dsp_convert_zero[IMM_1] << 2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, dsp_convert_zero[IMM_1] << 2, dsp_reg[14]+(dsp_convert_zero[IMM_1] << 2));
1941 #ifdef DSP_CORRECT_ALIGNMENT_STORE
1942 DSPWriteLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
1944 DSPWriteLong(dsp_reg[14] + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
1949 static void dsp_opcode_store_r15_indexed(void)
1951 #ifdef DSP_DIS_STORE15I
1953 WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", dsp_pc-2, IMM_2, dsp_convert_zero[IMM_1] << 2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, dsp_convert_zero[IMM_1] << 2, dsp_reg[15]+(dsp_convert_zero[IMM_1] << 2));
1955 #ifdef DSP_CORRECT_ALIGNMENT_STORE
1956 DSPWriteLong((dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
1958 DSPWriteLong(dsp_reg[15] + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
1963 static void dsp_opcode_load_r14_ri(void)
1965 #ifdef DSP_DIS_LOAD14R
1967 WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM+dsp_reg[14], IMM_2, RN);
1969 #ifdef DSP_CORRECT_ALIGNMENT
1970 RN = DSPReadLong((dsp_reg[14] + RM) & 0xFFFFFFFC, DSP);
1972 RN = DSPReadLong(dsp_reg[14] + RM, DSP);
1974 #ifdef DSP_DIS_LOAD14R
1976 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
1981 static void dsp_opcode_load_r15_ri(void)
1983 #ifdef DSP_DIS_LOAD15R
1985 WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM+dsp_reg[15], IMM_2, RN);
1987 #ifdef DSP_CORRECT_ALIGNMENT
1988 RN = DSPReadLong((dsp_reg[15] + RM) & 0xFFFFFFFC, DSP);
1990 RN = DSPReadLong(dsp_reg[15] + RM, DSP);
1992 #ifdef DSP_DIS_LOAD15R
1994 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
1999 static void dsp_opcode_store_r14_ri(void)
2001 DSPWriteLong(dsp_reg[14] + RM, RN, DSP);
2005 static void dsp_opcode_store_r15_ri(void)
2007 DSPWriteLong(dsp_reg[15] + RM, RN, DSP);
2011 static void dsp_opcode_nop(void)
2015 WriteLog("%06X: NOP [NCZ:%u%u%u]\n", dsp_pc-2, dsp_flag_n, dsp_flag_c, dsp_flag_z);
2020 static void dsp_opcode_storeb(void)
2022 #ifdef DSP_DIS_STOREB
2024 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_pc-2, IMM_2, IMM_1, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, IMM_1, RM);
2026 if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2027 DSPWriteLong(RM, RN & 0xFF, DSP);
2029 JaguarWriteByte(RM, RN, DSP);
2033 static void dsp_opcode_storew(void)
2035 #ifdef DSP_DIS_STOREW
2037 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_pc-2, IMM_2, IMM_1, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, IMM_1, RM);
2039 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2040 if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2041 DSPWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, DSP);
2043 JaguarWriteWord(RM & 0xFFFFFFFE, RN, DSP);
2045 if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2046 DSPWriteLong(RM, RN & 0xFFFF, DSP);
2048 JaguarWriteWord(RM, RN, DSP);
2053 static void dsp_opcode_store(void)
2055 #ifdef DSP_DIS_STORE
2057 WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_pc-2, IMM_2, IMM_1, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, IMM_1, RM);
2059 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2060 DSPWriteLong(RM & 0xFFFFFFFC, RN, DSP);
2062 DSPWriteLong(RM, RN, DSP);
2067 static void dsp_opcode_loadb(void)
2069 #ifdef DSP_DIS_LOADB
2071 WriteLog("%06X: LOADB (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2073 if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2074 RN = DSPReadLong(RM, DSP) & 0xFF;
2076 RN = JaguarReadByte(RM, DSP);
2077 #ifdef DSP_DIS_LOADB
2079 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2084 static void dsp_opcode_loadw(void)
2086 #ifdef DSP_DIS_LOADW
2088 WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2090 #ifdef DSP_CORRECT_ALIGNMENT
2091 if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2092 RN = DSPReadLong(RM & 0xFFFFFFFE, DSP) & 0xFFFF;
2094 RN = JaguarReadWord(RM & 0xFFFFFFFE, DSP);
2096 if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2097 RN = DSPReadLong(RM, DSP) & 0xFFFF;
2099 RN = JaguarReadWord(RM, DSP);
2101 #ifdef DSP_DIS_LOADW
2103 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2108 static void dsp_opcode_load(void)
2112 WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2114 #ifdef DSP_CORRECT_ALIGNMENT
2115 RN = DSPReadLong(RM & 0xFFFFFFFC, DSP);
2117 RN = DSPReadLong(RM, DSP);
2121 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2126 static void dsp_opcode_load_r14_indexed(void)
2128 #ifdef DSP_DIS_LOAD14I
2130 WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1] << 2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_convert_zero[IMM_1] << 2, dsp_reg[14]+(dsp_convert_zero[IMM_1] << 2), IMM_2, RN);
2132 #ifdef DSP_CORRECT_ALIGNMENT
2133 RN = DSPReadLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), DSP);
2135 RN = DSPReadLong(dsp_reg[14] + (dsp_convert_zero[IMM_1] << 2), DSP);
2137 #ifdef DSP_DIS_LOAD14I
2139 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2144 static void dsp_opcode_load_r15_indexed(void)
2146 #ifdef DSP_DIS_LOAD15I
2148 WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1] << 2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_convert_zero[IMM_1] << 2, dsp_reg[15]+(dsp_convert_zero[IMM_1] << 2), IMM_2, RN);
2150 #ifdef DSP_CORRECT_ALIGNMENT
2151 RN = DSPReadLong((dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), DSP);
2153 RN = DSPReadLong(dsp_reg[15] + (dsp_convert_zero[IMM_1] << 2), DSP);
2155 #ifdef DSP_DIS_LOAD15I
2157 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2162 static void dsp_opcode_movei(void)
2164 #ifdef DSP_DIS_MOVEI
2166 WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, (uint32_t)DSPReadWord(dsp_pc) | ((uint32_t)DSPReadWord(dsp_pc + 2) << 16), IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2168 // This instruction is followed by 32-bit value in LSW / MSW format...
2169 RN = (uint32_t)DSPReadWord(dsp_pc, DSP) | ((uint32_t)DSPReadWord(dsp_pc + 2, DSP) << 16);
2171 #ifdef DSP_DIS_MOVEI
2173 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2178 static void dsp_opcode_moveta(void)
2180 #ifdef DSP_DIS_MOVETA
2182 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2185 #ifdef DSP_DIS_MOVETA
2187 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2192 static void dsp_opcode_movefa(void)
2194 #ifdef DSP_DIS_MOVEFA
2196 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2199 #ifdef DSP_DIS_MOVEFA
2201 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2206 static void dsp_opcode_move(void)
2210 WriteLog("%06X: MOVE R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2215 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2220 static void dsp_opcode_moveq(void)
2222 #ifdef DSP_DIS_MOVEQ
2224 WriteLog("%06X: MOVEQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2227 #ifdef DSP_DIS_MOVEQ
2229 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2234 static void dsp_opcode_resmac(void)
2236 #ifdef DSP_DIS_RESMAC
2238 WriteLog("%06X: RESMAC R%02u [NCZ:%u%u%u, R%02u=%08X, DSP_ACC=%02X%08X] -> ", dsp_pc-2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, (uint8_t)(dsp_acc >> 32), (uint32_t)(dsp_acc & 0xFFFFFFFF));
2240 RN = (uint32_t)dsp_acc;
2241 #ifdef DSP_DIS_RESMAC
2243 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2248 static void dsp_opcode_imult(void)
2250 #ifdef DSP_DIS_IMULT
2252 WriteLog("%06X: IMULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2254 RN = (int16_t)RN * (int16_t)RM;
2256 #ifdef DSP_DIS_IMULT
2258 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2263 static void dsp_opcode_mult(void)
2267 WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2269 RN = (uint16_t)RM * (uint16_t)RN;
2273 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2278 static void dsp_opcode_bclr(void)
2282 WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2284 uint32_t res = RN & ~(1 << IMM_1);
2289 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2294 static void dsp_opcode_btst(void)
2298 WriteLog("%06X: BTST #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2300 dsp_flag_z = (~RN >> IMM_1) & 1;
2303 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2308 static void dsp_opcode_bset(void)
2312 WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2314 uint32_t res = RN | (1 << IMM_1);
2319 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2324 static void dsp_opcode_subqt(void)
2326 #ifdef DSP_DIS_SUBQT
2328 WriteLog("%06X: SUBQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2330 RN -= dsp_convert_zero[IMM_1];
2331 #ifdef DSP_DIS_SUBQT
2333 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2338 static void dsp_opcode_addqt(void)
2340 #ifdef DSP_DIS_ADDQT
2342 WriteLog("%06X: ADDQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2344 RN += dsp_convert_zero[IMM_1];
2345 #ifdef DSP_DIS_ADDQT
2347 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2352 static void dsp_opcode_imacn(void)
2354 #ifdef DSP_DIS_IMACN
2356 WriteLog("%06X: IMACN R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2358 int32_t res = (int16_t)RM * (int16_t)RN;
2359 dsp_acc += (int64_t)res;
2360 //Should we AND the result to fit into 40 bits here???
2361 #ifdef DSP_DIS_IMACN
2363 WriteLog("[NCZ:%u%u%u, DSP_ACC=%02X%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, (uint8_t)(dsp_acc >> 32), (uint32_t)(dsp_acc & 0xFFFFFFFF));
2368 static void dsp_opcode_mtoi(void)
2370 RN = (((int32_t)RM >> 8) & 0xFF800000) | (RM & 0x007FFFFF);
2375 static void dsp_opcode_normi(void)
2382 while ((_Rm & 0xffc00000) == 0)
2387 while ((_Rm & 0xff800000) != 0)
2398 static void dsp_opcode_mmult(void)
2400 int count = dsp_matrix_control&0x0f;
2401 uint32_t addr = dsp_pointer_to_matrix; // in the dsp ram
2405 if (!(dsp_matrix_control & 0x10))
2407 for (int i = 0; i < count; i++)
2411 a=(int16_t)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff);
2413 a=(int16_t)(dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]&0xffff);
2414 int16_t b=((int16_t)DSPReadWord(addr + 2, DSP));
2421 for (int i = 0; i < count; i++)
2425 a=(int16_t)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff);
2427 a=(int16_t)(dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]&0xffff);
2428 int16_t b=((int16_t)DSPReadWord(addr + 2, DSP));
2433 RN = res = (int32_t)accum;
2435 //NOTE: The flags are set based upon the last add/multiply done...
2440 static void dsp_opcode_abs(void)
2444 WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2449 if (_Rn == 0x80000000)
2453 dsp_flag_c = ((_Rn & 0x80000000) >> 31);
2454 res = RN = (_Rn & 0x80000000 ? -_Rn : _Rn);
2459 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2464 static void dsp_opcode_div(void)
2469 if (dsp_div_control & 0x01) // 16.16 division
2471 dsp_remain = ((uint64_t)RN << 16) % RM;
2472 RN = ((uint64_t)RN << 16) / RM;
2476 // We calculate the remainder first because we destroy RN after
2477 // this by assigning it to itself.
2478 dsp_remain = RN % RM;
2485 // This is what happens according to SCPCD. NYAN!
2490 // Real algorithm, courtesy of SCPCD: NYAN!
2494 // If 16.16 division, stuff top 16 bits of RN into remainder and put the
2495 // bottom 16 of RN in top 16 of quotient
2496 if (dsp_div_control & 0x01)
2497 q <<= 16, r = RN >> 16;
2499 for(int i=0; i<32; i++)
2501 // uint32_t sign = (r >> 31) & 0x01;
2502 uint32_t sign = r & 0x80000000;
2503 r = (r << 1) | ((q >> 31) & 0x01);
2504 r += (sign ? RM : -RM);
2505 q = (q << 1) | (((~r) >> 31) & 0x01);
2514 static void dsp_opcode_imultn(void)
2516 #ifdef DSP_DIS_IMULTN
2518 WriteLog("%06X: IMULTN R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2520 // This is OK, since this multiply won't overflow 32 bits...
2521 int32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
2522 dsp_acc = (int64_t)res;
2524 #ifdef DSP_DIS_IMULTN
2526 WriteLog("[NCZ:%u%u%u, DSP_ACC=%02X%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, (uint8_t)(dsp_acc >> 32), (uint32_t)(dsp_acc & 0xFFFFFFFF));
2531 static void dsp_opcode_neg(void)
2535 WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2538 SET_ZNC_SUB(0, RN, res);
2542 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2547 static void dsp_opcode_shlq(void)
2551 WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, 32 - IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2553 // NB: This instruction is the *only* one that does (32 - immediate data).
2554 int32_t r1 = 32 - IMM_1;
2555 uint32_t res = RN << r1;
2556 SET_ZN(res); dsp_flag_c = (RN >> 31) & 1;
2560 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2565 static void dsp_opcode_shrq(void)
2569 WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2571 int32_t r1 = dsp_convert_zero[IMM_1];
2572 uint32_t res = RN >> r1;
2573 SET_ZN(res); dsp_flag_c = RN & 1;
2577 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2582 static void dsp_opcode_ror(void)
2586 WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2588 uint32_t r1 = RM & 0x1F;
2589 uint32_t res = (RN >> r1) | (RN << (32 - r1));
2590 SET_ZN(res); dsp_flag_c = (RN >> 31) & 1;
2594 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_1, RM, IMM_2, RN);
2599 static void dsp_opcode_rorq(void)
2603 WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2605 uint32_t r1 = dsp_convert_zero[IMM_1 & 0x1F];
2607 uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
2609 SET_ZN(res); dsp_flag_c = (r2 >> 31) & 0x01;
2612 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2617 static void dsp_opcode_sha(void)
2619 int32_t sRm=(int32_t)RM;
2624 uint32_t shift=-sRm;
2625 if (shift>=32) shift=32;
2626 dsp_flag_c=(_Rn&0x80000000)>>31;
2636 if (shift>=32) shift=32;
2640 _Rn=((int32_t)_Rn)>>1;
2649 static void dsp_opcode_sharq(void)
2651 #ifdef DSP_DIS_SHARQ
2653 WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2655 uint32_t res = (int32_t)RN >> dsp_convert_zero[IMM_1];
2656 SET_ZN(res); dsp_flag_c = RN & 0x01;
2658 #ifdef DSP_DIS_SHARQ
2660 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2665 static void dsp_opcode_sh(void)
2667 int32_t sRm=(int32_t)RM;
2672 uint32_t shift=(-sRm);
2673 if (shift>=32) shift=32;
2674 dsp_flag_c=(_Rn&0x80000000)>>31;
2684 if (shift>=32) shift=32;
2696 void dsp_opcode_addqmod(void)
2698 #ifdef DSP_DIS_ADDQMOD
2700 WriteLog("%06X: ADDQMOD #%u, R%02u [NCZ:%u%u%u, R%02u=%08X, DSP_MOD=%08X] -> ", dsp_pc-2, dsp_convert_zero[IMM_1], IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN, dsp_modulo);
2702 uint32_t r1 = dsp_convert_zero[IMM_1];
2704 uint32_t res = r2 + r1;
2705 res = (res & (~dsp_modulo)) | (r2 & dsp_modulo);
2707 SET_ZNC_ADD(r2, r1, res);
2708 #ifdef DSP_DIS_ADDQMOD
2710 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, IMM_2, RN);
2714 void dsp_opcode_subqmod(void)
2716 uint32_t r1 = dsp_convert_zero[IMM_1];
2718 uint32_t res = r2 - r1;
2719 res = (res & (~dsp_modulo)) | (r2 & dsp_modulo);
2722 SET_ZNC_SUB(r2, r1, res);
2725 void dsp_opcode_mirror(void)
2728 RN = (mirror_table[r1 & 0xFFFF] << 16) | mirror_table[r1 >> 16];
2732 void dsp_opcode_sat32s(void)
2734 int32_t r2 = (uint32_t)RN;
2735 int32_t temp = dsp_acc >> 32;
2736 uint32_t res = (temp < -1) ? (int32_t)0x80000000 : (temp > 0) ? (int32_t)0x7FFFFFFF : r2;
2741 void dsp_opcode_sat16s(void)
2744 uint32_t res = (r2 < -32768) ? -32768 : (r2 > 32767) ? 32767 : r2;
2749 void dsp_opcode_illegal(void)
2751 // Don't know what it does, but it does *something*...
2752 WriteLog("%06X: illegal %u, %u [NCZ:%u%u%u]\n", dsp_pc-2, IMM_1, IMM_2, dsp_flag_n, dsp_flag_c, dsp_flag_z);
2756 // New pipelined DSP core
2759 static void DSP_abs(void);
2760 static void DSP_add(void);
2761 static void DSP_addc(void);
2762 static void DSP_addq(void);
2763 static void DSP_addqmod(void);
2764 static void DSP_addqt(void);
2765 static void DSP_and(void);
2766 static void DSP_bclr(void);
2767 static void DSP_bset(void);
2768 static void DSP_btst(void);
2769 static void DSP_cmp(void);
2770 static void DSP_cmpq(void);
2771 static void DSP_div(void);
2772 static void DSP_imacn(void);
2773 static void DSP_imult(void);
2774 static void DSP_imultn(void);
2775 static void DSP_illegal(void);
2776 static void DSP_jr(void);
2777 static void DSP_jump(void);
2778 static void DSP_load(void);
2779 static void DSP_loadb(void);
2780 static void DSP_loadw(void);
2781 static void DSP_load_r14_i(void);
2782 static void DSP_load_r14_r(void);
2783 static void DSP_load_r15_i(void);
2784 static void DSP_load_r15_r(void);
2785 static void DSP_mirror(void);
2786 static void DSP_mmult(void);
2787 static void DSP_move(void);
2788 static void DSP_movefa(void);
2789 static void DSP_movei(void);
2790 static void DSP_movepc(void);
2791 static void DSP_moveq(void);
2792 static void DSP_moveta(void);
2793 static void DSP_mtoi(void);
2794 static void DSP_mult(void);
2795 static void DSP_neg(void);
2796 static void DSP_nop(void);
2797 static void DSP_normi(void);
2798 static void DSP_not(void);
2799 static void DSP_or(void);
2800 static void DSP_resmac(void);
2801 static void DSP_ror(void);
2802 static void DSP_rorq(void);
2803 static void DSP_sat16s(void);
2804 static void DSP_sat32s(void);
2805 static void DSP_sh(void);
2806 static void DSP_sha(void);
2807 static void DSP_sharq(void);
2808 static void DSP_shlq(void);
2809 static void DSP_shrq(void);
2810 static void DSP_store(void);
2811 static void DSP_storeb(void);
2812 static void DSP_storew(void);
2813 static void DSP_store_r14_i(void);
2814 static void DSP_store_r14_r(void);
2815 static void DSP_store_r15_i(void);
2816 static void DSP_store_r15_r(void);
2817 static void DSP_sub(void);
2818 static void DSP_subc(void);
2819 static void DSP_subq(void);
2820 static void DSP_subqmod(void);
2821 static void DSP_subqt(void);
2822 static void DSP_xor(void);
2824 void (* DSPOpcode[64])() =
2826 DSP_add, DSP_addc, DSP_addq, DSP_addqt,
2827 DSP_sub, DSP_subc, DSP_subq, DSP_subqt,
2828 DSP_neg, DSP_and, DSP_or, DSP_xor,
2829 DSP_not, DSP_btst, DSP_bset, DSP_bclr,
2831 DSP_mult, DSP_imult, DSP_imultn, DSP_resmac,
2832 DSP_imacn, DSP_div, DSP_abs, DSP_sh,
2833 DSP_shlq, DSP_shrq, DSP_sha, DSP_sharq,
2834 DSP_ror, DSP_rorq, DSP_cmp, DSP_cmpq,
2836 DSP_subqmod, DSP_sat16s, DSP_move, DSP_moveq,
2837 DSP_moveta, DSP_movefa, DSP_movei, DSP_loadb,
2838 DSP_loadw, DSP_load, DSP_sat32s, DSP_load_r14_i,
2839 DSP_load_r15_i, DSP_storeb, DSP_storew, DSP_store,
2841 DSP_mirror, DSP_store_r14_i, DSP_store_r15_i, DSP_movepc,
2842 DSP_jump, DSP_jr, DSP_mmult, DSP_mtoi,
2843 DSP_normi, DSP_nop, DSP_load_r14_r, DSP_load_r15_r,
2844 DSP_store_r14_r, DSP_store_r15_r, DSP_illegal, DSP_addqmod
2847 bool readAffected[64][2] =
2849 { true, true}, { true, true}, {false, true}, {false, true},
2850 { true, true}, { true, true}, {false, true}, {false, true},
2851 {false, true}, { true, true}, { true, true}, { true, true},
2852 {false, true}, {false, true}, {false, true}, {false, true},
2854 { true, true}, { true, true}, { true, true}, {false, true},
2855 { true, true}, { true, true}, {false, true}, { true, true},
2856 {false, true}, {false, true}, { true, true}, {false, true},
2857 { true, true}, {false, true}, { true, true}, {false, true},
2859 {false, true}, {false, true}, { true, false}, {false, false},
2860 { true, false}, {false, false}, {false, false}, { true, false},
2861 { true, false}, { true, false}, {false, true}, { true, false},
2862 { true, false}, { true, true}, { true, true}, { true, true},
2864 {false, true}, { true, true}, { true, true}, {false, true},
2865 { true, false}, { true, false}, { true, true}, { true, false},
2866 { true, false}, {false, false}, { true, false}, { true, false},
2867 { true, true}, { true, true}, {false, false}, {false, true}
2870 bool isLoadStore[65] =
2872 false, false, false, false, false, false, false, false,
2873 false, false, false, false, false, false, false, false,
2875 false, false, false, false, false, false, false, false,
2876 false, false, false, false, false, false, false, false,
2878 false, false, false, false, false, false, false, true,
2879 true, true, false, true, true, true, true, true,
2881 false, true, true, false, false, false, false, false,
2882 false, false, true, true, true, true, false, false, false
2885 void FlushDSPPipeline(void)
2887 plPtrFetch = 3, plPtrRead = 2, plPtrExec = 1, plPtrWrite = 0;
2889 for(int i=0; i<4; i++)
2890 pipeline[i].opcode = PIPELINE_STALL;
2892 for(int i=0; i<32; i++)
2897 // New pipelined DSP execution core
2899 /*void DSPExecP(int32_t cycles)
2901 // bool inhibitFetch = false;
2903 dsp_releaseTimeSlice_flag = 0;
2906 while (cycles > 0 && DSP_RUNNING)
2908 WriteLog("DSPExecP: Pipeline status...\n");
2909 WriteLog("\tF -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrFetch].opcode, pipeline[plPtrFetch].operand1, pipeline[plPtrFetch].operand2, pipeline[plPtrFetch].reg1, pipeline[plPtrFetch].reg2, pipeline[plPtrFetch].result, pipeline[plPtrFetch].writebackRegister);
2910 WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister);
2911 WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister);
2912 WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister);
2913 WriteLog(" --> Scoreboard: ");
2914 for(int i=0; i<32; i++)
2915 WriteLog("%s ", scoreboard[i] ? "T" : "F");
2917 // Stage 1: Instruction fetch
2918 // if (!inhibitFetch)
2920 pipeline[plPtrFetch].instruction = DSPReadWord(dsp_pc, DSP);
2921 pipeline[plPtrFetch].opcode = pipeline[plPtrFetch].instruction >> 10;
2922 pipeline[plPtrFetch].operand1 = (pipeline[plPtrFetch].instruction >> 5) & 0x1F;
2923 pipeline[plPtrFetch].operand2 = pipeline[plPtrFetch].instruction & 0x1F;
2924 if (pipeline[plPtrFetch].opcode == 38)
2925 pipeline[plPtrFetch].result = (uint32_t)DSPReadWord(dsp_pc + 2, DSP)
2926 | ((uint32_t)DSPReadWord(dsp_pc + 4, DSP) << 16);
2929 // inhibitFetch = false;
2930 WriteLog("DSPExecP: Fetching instruction (%04X) from DSP_PC = %08X...\n", pipeline[plPtrFetch].instruction, dsp_pc);
2932 WriteLog("DSPExecP: Pipeline status (after stage 1)...\n");
2933 WriteLog("\tF -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrFetch].opcode, pipeline[plPtrFetch].operand1, pipeline[plPtrFetch].operand2, pipeline[plPtrFetch].reg1, pipeline[plPtrFetch].reg2, pipeline[plPtrFetch].result, pipeline[plPtrFetch].writebackRegister);
2934 WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister);
2935 WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister);
2936 WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister);
2937 // Stage 2: Read registers
2938 //Ok, stalls here depend on whether or not the instruction reads two registers or not
2939 //and *which* register (1 or 2) is the one being read... !!! FIX !!!
2940 if (scoreboard[pipeline[plPtrRead].operand2])
2941 && pipeline[plPtrRead].opcode != PIPELINE_STALL)
2942 // We have a hit in the scoreboard, so we have to stall the pipeline...
2944 //This is crappy, crappy CRAPPY! And it doesn't work! !!! FIX !!!
2945 // dsp_pc -= (pipeline[plPtrRead].opcode == 38 ? 6 : 2);
2946 WriteLog(" --> Stalling pipeline: scoreboard = %s\n", scoreboard[pipeline[plPtrRead].operand2] ? "true" : "false");
2947 pipeline[plPtrFetch] = pipeline[plPtrRead];
2948 pipeline[plPtrRead].opcode = PIPELINE_STALL;
2952 pipeline[plPtrRead].reg1 = dsp_reg[pipeline[plPtrRead].operand1];
2953 pipeline[plPtrRead].reg2 = dsp_reg[pipeline[plPtrRead].operand2];
2954 pipeline[plPtrRead].writebackRegister = pipeline[plPtrRead].operand2; // Set it to RN
2956 if (pipeline[plPtrRead].opcode != PIPELINE_STALL)
2957 // Shouldn't we be more selective with the register scoreboarding?
2958 // Yes, we should. !!! FIX !!!
2959 scoreboard[pipeline[plPtrRead].operand2] = true;
2960 //Advance PC here??? Yes.
2961 // dsp_pc += (pipeline[plPtrRead].opcode == 38 ? 6 : 2);
2962 //This is a mangling of the pipeline stages, but what else to do???
2963 dsp_pc += (pipeline[plPtrFetch].opcode == 38 ? 6 : 2);
2966 WriteLog("DSPExecP: Pipeline status (after stage 2)...\n");
2967 WriteLog("\tF -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrFetch].opcode, pipeline[plPtrFetch].operand1, pipeline[plPtrFetch].operand2, pipeline[plPtrFetch].reg1, pipeline[plPtrFetch].reg2, pipeline[plPtrFetch].result, pipeline[plPtrFetch].writebackRegister);
2968 WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister);
2969 WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister);
2970 WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister);
2972 if (pipeline[plPtrExec].opcode != PIPELINE_STALL)
2974 WriteLog("DSPExecP: About to execute opcode %s...\n", dsp_opcode_str[pipeline[plPtrExec].opcode]);
2975 DSPOpcode[pipeline[plPtrExec].opcode]();
2976 dsp_opcode_use[pipeline[plPtrExec].opcode]++;
2977 cycles -= dsp_opcode_cycles[pipeline[plPtrExec].opcode];
2982 WriteLog("DSPExecP: Pipeline status (after stage 3)...\n");
2983 WriteLog("\tF -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrFetch].opcode, pipeline[plPtrFetch].operand1, pipeline[plPtrFetch].operand2, pipeline[plPtrFetch].reg1, pipeline[plPtrFetch].reg2, pipeline[plPtrFetch].result, pipeline[plPtrFetch].writebackRegister);
2984 WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister);
2985 WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister);
2986 WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u \n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister);
2987 // Stage 4: Write back register
2988 if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
2990 if (pipeline[plPtrWrite].writebackRegister != 0xFF)
2991 dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
2993 scoreboard[pipeline[plPtrWrite].operand1]
2994 = scoreboard[pipeline[plPtrWrite].operand2] = false;
2997 // Push instructions through the pipeline...
2998 plPtrFetch = (++plPtrFetch) & 0x03;
2999 plPtrRead = (++plPtrRead) & 0x03;
3000 plPtrExec = (++plPtrExec) & 0x03;
3001 plPtrWrite = (++plPtrWrite) & 0x03;
3008 //Problems: JR and any other instruction that relies on DSP_PC is getting WRONG values!
3010 // Should be fixed now. Another problem is figuring how to do the sequence following
3011 // a branch followed with the JR & JUMP instructions...
3013 // There are two conflicting problems:
3016 F1B236: LOAD (R31), R03 [NCZ:000, R31=00F1CFDC, R03=00F14000] -> [NCZ:000, R03=00F1B084]
3017 F1B238: BCLR #3, R00 [NCZ:000, R00=00004039] -> [NCZ:000, R00=00004031]
3018 F1B23A: ADDQ #2, R03 [NCZ:000, R03=00F1B084] -> [NCZ:000, R03=00F1B086]
3019 F1B23C: SUBQ #1, R17 [NCZ:000, R17=00000040] -> [NCZ:000, R17=0000003F]
3020 F1B23E: MOVEI #$00F1CFE0, R31 [NCZ:000, R31=00F1CFDC] -> [NCZ:000, R31=00F1CFE0]
3021 F1B244: JR z, F1B254 [NCZ:000] Branch NOT taken.
3022 F1B246: BSET #10, R00 [NCZ:000, R00=00004031] -> [NCZ:000, R00=00004431]
3023 F1B248: MOVEI #$00F1A100, R01 [NCZ:000, R01=00F1A148] -> [NCZ:000, R01=00F1A100]
3024 F1B24E: STORE R00, (R01) [NCZ:000, R00=00004431, R01=00F1A100]
3025 DSP: Writing 00004431 to DSP_FLAGS by DSP...
3026 DSP: Finished interrupt.
3027 ; Without pipeline effects, the value in R03 is erroneously read from bank 1 instead of
3028 ; bank 0 (where is was prepared)!
3029 F1B250: JUMP T, (R03) [NCZ:001, R03=00000000] Branched!
3030 F1B252: NOP [NCZ:001]
3033 // The other is when you see this at the end of an IRQ:
3036 JUMP T, (R29) ; R29 = Previous stack + 2
3037 STORE R28, (R30) ; R28 = Modified flags register, R30 = $F1A100
3039 ; Actually, this is OK if we do the atomic JUMP/JR operation correctly:
3040 ; 1) The STORE goes through the pipeline and is executed/written back
3041 ; 2) The pipeline is flushed
3042 ; 3) The DSP_PC is set to the new address
3043 ; 4) Execution resumes
3045 JUMP T, (R25) ; Oops! Because of pipeline effects R25 has the value from
3046 ; bank 0 instead of the current bank 1 and so goes astray!
3049 //One other thing: Since these stages are supposed to happen simulaneously, try executing
3050 //them in reverse order to see if that reduces pipeline stalls from late writebacks...
3054 Small problem here: The return address when INT0 comes up is $F1B088, but when INT1
3055 follows it, the JUMP out of the previous interrupt is bypassed immediately--this is
3056 because the STORE instruction writes back on stage #2 of the pipeline instead of stage #3...
3057 If it were done properly, the STORE write back would occur *after* (well, technically,
3058 during) the execution of the the JUMP that follows it.
3062 F1B08A: JR z, F1B082 [NCZ:001] Branched!
3063 F1B08A: NOP [NCZ:001]
3065 F1B080: MOVEI #$00F1B178, R00 [NCZ:001, R00=00F1B178] -> [NCZ:001, R00=00F1B178]
3068 F1B086: LOAD (R00), R01 [NCZ:001, R00=00F1B178, R01=00000000] -> [NCZ:001, R01=00000000]
3071 F1B088: OR R01, R01 [NCZ:001, R01=00000000, R01=00000000] -> [NCZ:001, R01=00000000, R01=00000000]
3072 F1B08A: JR z, F1B082 [NCZ:001] Branched!
3073 F1B08A: NOP [NCZ:001]
3075 F1B080: MOVEI #$00F1B178, R00 [NCZ:001, R00=00F1B178] -> [NCZ:001, R00=00F1B178]
3078 Write to DSP CTRL: 00002301 --> Starting to run at 00F1B088 by M68K...
3079 DSP: CPU -> DSP interrupt
3080 DSP: Generating interrupt #0... [PC will return to 00F1B088, R31 = 00F1CFE0]
3081 Write to DSP CTRL: 00000001 --> Starting to run at 00F1B000 by M68K...
3083 F1B000: MOVEI #$00F1B0D4, R30 [NCZ:001, R30=00F1B000] -> [NCZ:001, R30=00F1B0D4]
3086 F1B006: JUMP T, (R30) [NCZ:001, R30=00F1B0D4] Branched!
3087 F1B006: NOP [NCZ:001]
3089 F1B0D4: MOVEI #$00F1A100, R01 [NCZ:001, R01=00F1A100] -> [NCZ:001, R01=00F1A100]
3092 F1B0DA: LOAD (R01), R00 [NCZ:001, R01=00F1A100, R00=00004431] -> [NCZ:001, R00=00004039]
3093 F1B0DC: MOVEI #$00F1B0C8, R01 [NCZ:001, R01=00F1A100] -> [NCZ:001, R01=00F1B0C8]
3096 F1B0E2: LOAD (R01), R02 [NCZ:001, R01=00F1B0C8, R02=00000000] -> [NCZ:001, R02=00000001]
3097 F1B0E4: MOVEI #$00F1B0CC, R01 [NCZ:001, R01=00F1B0C8] -> [NCZ:001, R01=00F1B0CC]
3100 F1B0EA: LOAD (R01), R03 [NCZ:001, R01=00F1B0CC, R03=00F1B086] -> [NCZ:001, R03=00000064]
3101 F1B0EC: MOVEI #$00F1B0D0, R01 [NCZ:001, R01=00F1B0CC] -> [NCZ:001, R01=00F1B0D0]
3104 F1B0F2: LOAD (R01), R04 [NCZ:001, R01=00F1B0D0, R04=00000000] -> [NCZ:001, R04=00000008]
3105 F1B0F4: MOVEI #$00F1B0BC, R01 [NCZ:001, R01=00F1B0D0] -> [NCZ:001, R01=00F1B0BC]
3108 F1B0FA: ADD R04, R01 [NCZ:001, R04=00000008, R01=00F1B0BC] -> [NCZ:000, R04=00000008, R01=00F1B0C4]
3111 F1B0FC: LOAD (R01), R01 [NCZ:000, R01=00F1B0C4, R01=00F1B0C4] -> [NCZ:000, R01=00F1B12E]
3114 F1B0FE: JUMP T, (R01) [NCZ:000, R01=00F1B12E] Branched!
3115 F1B0FE: NOP [NCZ:000]
3117 F1B12E: MOVE R02, R08 [NCZ:000, R02=00000001, R08=00000000] -> [NCZ:000, R02=00000001, R08=00000001]
3120 F1B132: MOVEI #$00F1B102, R01 [NCZ:000, R01=00F1B12E] -> [NCZ:000, R01=00F1B102]
3123 F1B138: JUMP T, (R01) [NCZ:000, R01=00F1B102] Branched!
3124 F1B138: NOP [NCZ:000]
3126 F1B102: MOVEI #$00F1B0C8, R01 [NCZ:000, R01=00F1B102] -> [NCZ:000, R01=00F1B0C8]
3129 F1B108: STORE R08, (R01) [NCZ:000, R08=00000000, R01=00F1B0C8]
3130 F1B10A: MOVEI #$00F1B0D0, R01 [NCZ:000, R01=00F1B0C8] -> [NCZ:000, R01=00F1B0D0]
3131 F1B110: MOVEQ #0, R04 [NCZ:000, R04=00000008] -> [NCZ:000, R04=00000000]
3134 F1B112: STORE R04, (R01) [NCZ:000, R04=00000000, R01=00F1B0D0]
3135 F1B114: BCLR #3, R00 [NCZ:000, R00=00004039] -> [NCZ:000, R00=00004031]
3138 F1B116: BSET #9, R00 [NCZ:000, R00=00004031] -> [NCZ:000, R00=00004231]
3139 F1B118: LOAD (R31), R04 [NCZ:000, R31=00F1CFDC, R04=00000000] -> [NCZ:000, R04=00F1B086]
3140 F1B11A: MOVEI #$00F1CFE0, R31 [NCZ:000, R31=00F1CFDC] -> [NCZ:000, R31=00F1CFE0]
3142 F1B120: ADDQ #2, R04 [NCZ:000, R04=00F1B086] -> [NCZ:000, R04=00F1B088]
3143 F1B122: MOVEI #$00F1A100, R01 [NCZ:000, R01=00F1B0D0] -> [NCZ:000, R01=00F1A100]
3146 F1B128: STORE R00, (R01) [NCZ:000, R00=00004231, R01=00F1A100]
3147 DSP: Writing 00004231 to DSP_FLAGS by DSP (REGPAGE is set)...
3148 DSP: Finished interrupt.
3149 DSP: Generating interrupt #1... [PC will return to 00F1B12A, R31 = 00F1CFE0]
3151 F1B010: MOVEI #$00F1B1FC, R30 [NCZ:001, R30=00F1B010] -> [NCZ:001, R30=00F1B1FC]
3154 F1B016: JUMP T, (R30) [NCZ:001, R30=00F1B1FC] Branched!
3155 F1B016: NOP [NCZ:001]
3157 F1B1FC: MOVEI #$00F1A100, R01 [NCZ:001, R01=00F1A100] -> [NCZ:001, R01=00F1A100]
3160 uint32_t pcQueue1[0x400];
3161 uint32_t pcQPtr1 = 0;
3162 static uint32_t prevR1;
3163 //Let's try a 3 stage pipeline....
3164 //Looks like 3 stage is correct, otherwise bad things happen...
3165 void DSPExecP2(int32_t cycles)
3167 dsp_releaseTimeSlice_flag = 0;
3170 while (cycles > 0 && DSP_RUNNING)
3172 /*extern uint32_t totalFrames;
3173 //F1B2F6: LOAD (R14+$04), R24 [NCZ:001, R14+$04=00F20018, R24=FFFFFFFF] -> Jaguar: Unknown word read at 00F20018 by DSP (M68K PC=00E32E)
3174 //-> 43 + 1 + 24 -> $2B + $01 + $18 -> 101011 00001 11000 -> 1010 1100 0011 1000 -> AC38
3175 //C470 -> 1100 0100 0111 0000 -> 110001 00011 10000 -> 49, 3, 16 -> STORE R16, (R14+$0C)
3177 if (totalFrames >= 377 && GET16(dsp_ram_8, 0x0002F6) == 0xAC38 && dsp_pc == 0xF1B140)
3180 WriteLog("Starting disassembly at frame #%u...\n", totalFrames);
3182 if (dsp_pc == 0xF1B092)
3183 doDSPDis = false;//*/
3184 /*if (totalFrames >= 373 && GET16(dsp_ram_8, 0x0002F6) == 0xAC38)
3185 doDSPDis = true;//*/
3186 /*if (totalFrames >= 373 && dsp_pc == 0xF1B0A0)
3187 doDSPDis = true;//*/
3188 /*if (dsp_pc == 0xF1B0A0)
3189 doDSPDis = true;//*/
3190 /*if (dsp_pc == 0xF1B0D2) && dsp_reg[1] == 0x2140C)
3191 doDSPDis = true;//*/
3192 //Two parter... (not sure how to write this)
3193 //if (dsp_pc == 0xF1B0D2)
3194 // prevR1 = dsp_reg[1];
3196 //F1B0D2: ADDQT #8, R01 [NCZ:000, R01=0002140C] -> [NCZ:000, R01=00021414]
3197 //F1B0D2: ADDQT #8, R01 [NCZ:000, R01=0002140C] -> [NCZ:000, R01=00021414]
3200 pcQueue1[pcQPtr1++] = dsp_pc;
3203 #ifdef DSP_DEBUG_PL2
3204 if ((dsp_pc < 0xF1B000 || dsp_pc > 0xF1CFFF) && !doDSPDis)
3206 WriteLog("DSP: PC has stepped out of bounds...\n\nBacktrace:\n\n");
3211 for(int i=0; i<0x400; i++)
3213 dasmjag(JAGUAR_DSP, buffer, pcQueue1[(i + pcQPtr1) & 0x3FF]);
3214 WriteLog("\t%08X: %s\n", pcQueue1[(i + pcQPtr1) & 0x3FF], buffer);
3220 if (IMASKCleared) // If IMASK was cleared,
3222 #ifdef DSP_DEBUG_IRQ
3223 WriteLog("DSP: Finished interrupt.\n");
3225 DSPHandleIRQs(); // See if any other interrupts are pending!
3226 IMASKCleared = false;
3229 //if (dsp_flags & REGPAGE)
3230 // WriteLog(" --> REGPAGE has just been set!\n");
3231 #ifdef DSP_DEBUG_PL2
3234 WriteLog("DSPExecP: Pipeline status [PC=%08X]...\n", dsp_pc);
3235 WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]);
3236 WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]);
3237 WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister, dsp_opcode_str[pipeline[plPtrWrite].opcode]);
3238 WriteLog(" --> Scoreboard: ");
3239 for(int i=0; i<32; i++)
3240 WriteLog("%s ", scoreboard[i] ? "T" : "F");
3244 // Stage 1a: Instruction fetch
3245 pipeline[plPtrRead].instruction = DSPReadWord(dsp_pc, DSP);
3246 pipeline[plPtrRead].opcode = pipeline[plPtrRead].instruction >> 10;
3247 pipeline[plPtrRead].operand1 = (pipeline[plPtrRead].instruction >> 5) & 0x1F;
3248 pipeline[plPtrRead].operand2 = pipeline[plPtrRead].instruction & 0x1F;
3249 if (pipeline[plPtrRead].opcode == 38)
3250 pipeline[plPtrRead].result = (uint32_t)DSPReadWord(dsp_pc + 2, DSP)
3251 | ((uint32_t)DSPReadWord(dsp_pc + 4, DSP) << 16);
3252 #ifdef DSP_DEBUG_PL2
3255 WriteLog("DSPExecP: Fetching instruction (%04X) from DSP_PC = %08X...\n", pipeline[plPtrRead].instruction, dsp_pc);
3256 WriteLog("DSPExecP: Pipeline status (after stage 1a) [PC=%08X]...\n", dsp_pc);
3257 WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]);
3258 WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]);
3259 WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister, dsp_opcode_str[pipeline[plPtrWrite].opcode]);
3262 // Stage 1b: Read registers
3263 //Small problem--when say LOAD or STORE (R14/5+$nn) is executed AFTER an instruction that
3264 //modifies R14/5, we don't check the scoreboard for R14/5 (and we need to!)... !!! FIX !!!
3266 //Another problem: Any sequential combination of LOAD and STORE operations will cause the
3267 //pipeline to stall, and we don't take care of that here. !!! FIX !!!
3268 if ((scoreboard[pipeline[plPtrRead].operand1] && readAffected[pipeline[plPtrRead].opcode][0])
3269 || (scoreboard[pipeline[plPtrRead].operand2] && readAffected[pipeline[plPtrRead].opcode][1])
3270 || ((pipeline[plPtrRead].opcode == 43 || pipeline[plPtrRead].opcode == 58) && scoreboard[14])
3271 || ((pipeline[plPtrRead].opcode == 44 || pipeline[plPtrRead].opcode == 59) && scoreboard[15])
3272 //Not sure that this is the best way to fix the LOAD/STORE problem... But it seems to
3274 || (isLoadStore[pipeline[plPtrRead].opcode] && isLoadStore[pipeline[plPtrExec].opcode]))
3275 // We have a hit in the scoreboard, so we have to stall the pipeline...
3276 #ifdef DSP_DEBUG_PL2
3280 WriteLog(" --> Stalling pipeline: ");
3281 if (readAffected[pipeline[plPtrRead].opcode][0])
3282 WriteLog("scoreboard[%u] = %s (reg 1) ", pipeline[plPtrRead].operand1, scoreboard[pipeline[plPtrRead].operand1] ? "true" : "false");
3283 if (readAffected[pipeline[plPtrRead].opcode][1])
3284 WriteLog("scoreboard[%u] = %s (reg 2)", pipeline[plPtrRead].operand2, scoreboard[pipeline[plPtrRead].operand2] ? "true" : "false");
3288 pipeline[plPtrRead].opcode = PIPELINE_STALL;
3289 #ifdef DSP_DEBUG_PL2
3294 pipeline[plPtrRead].reg1 = dsp_reg[pipeline[plPtrRead].operand1];
3295 pipeline[plPtrRead].reg2 = dsp_reg[pipeline[plPtrRead].operand2];
3296 pipeline[plPtrRead].writebackRegister = pipeline[plPtrRead].operand2; // Set it to RN
3298 // Shouldn't we be more selective with the register scoreboarding?
3299 // Yes, we should. !!! FIX !!! Kinda [DONE]
3300 #ifndef NEW_SCOREBOARD
3301 scoreboard[pipeline[plPtrRead].operand2] = affectsScoreboard[pipeline[plPtrRead].opcode];
3303 //Hopefully this will fix the dual MOVEQ # problem...
3304 scoreboard[pipeline[plPtrRead].operand2] += (affectsScoreboard[pipeline[plPtrRead].opcode] ? 1 : 0);
3307 //Advance PC here??? Yes.
3308 dsp_pc += (pipeline[plPtrRead].opcode == 38 ? 6 : 2);
3311 #ifdef DSP_DEBUG_PL2
3314 WriteLog("DSPExecP: Pipeline status (after stage 1b) [PC=%08X]...\n", dsp_pc);
3315 WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]);
3316 WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]);
3317 WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister, dsp_opcode_str[pipeline[plPtrWrite].opcode]);
3321 if (pipeline[plPtrExec].opcode != PIPELINE_STALL)
3323 #ifdef DSP_DEBUG_PL2
3325 WriteLog("\t[inst=%02u][R28=%08X, alt R28=%08X, REGPAGE=%s]\n", pipeline[plPtrExec].opcode, dsp_reg[28], dsp_alternate_reg[28], (dsp_flags & REGPAGE ? "set" : "not set"));
3329 WriteLog("DSPExecP: About to execute opcode %s...\n", dsp_opcode_str[pipeline[plPtrExec].opcode]);
3334 lastExec = pipeline[plPtrExec].instruction;
3335 //WriteLog("[lastExec = %04X]\n", lastExec);
3337 cycles -= dsp_opcode_cycles[pipeline[plPtrExec].opcode];
3338 dsp_opcode_use[pipeline[plPtrExec].opcode]++;
3339 DSPOpcode[pipeline[plPtrExec].opcode]();
3340 //WriteLog(" --> Returned from execute. DSP_PC: %08X\n", dsp_pc);
3344 //Let's not, until we do the stalling correctly...
3345 //But, we gotta while we're doing the comparison core...!
3346 //Or do we? cycles--;
3347 //Really, the whole thing is wrong. When the pipeline is correctly stuffed, most instructions
3348 //will execute in one clock cycle (others, like DIV, will likely not). So, the challenge is
3349 //to model this clock cycle behavior correctly...
3350 //Also, the pipeline stalls too much--mostly because the transparent writebacks at stage 3
3351 //don't affect the reads at stage 1...
3352 #ifdef DSP_DEBUG_STALL
3354 WriteLog("[STALL... DSP_PC = %08X]\n", dsp_pc);
3358 #ifdef DSP_DEBUG_PL2
3361 WriteLog("DSPExecP: Pipeline status (after stage 2) [PC=%08X]...\n", dsp_pc);
3362 WriteLog("\tR -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]);
3363 WriteLog("\tE -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]);
3364 WriteLog("\tW -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrWrite].opcode, pipeline[plPtrWrite].operand1, pipeline[plPtrWrite].operand2, pipeline[plPtrWrite].reg1, pipeline[plPtrWrite].reg2, pipeline[plPtrWrite].result, pipeline[plPtrWrite].writebackRegister, dsp_opcode_str[pipeline[plPtrWrite].opcode]);
3368 // Stage 3: Write back register/memory address
3369 if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
3371 /*if (pipeline[plPtrWrite].writebackRegister == 3
3372 && (pipeline[plPtrWrite].result < 0xF14000 || pipeline[plPtrWrite].result > 0xF1CFFF)
3375 WriteLog("DSP: Register R03 has stepped out of bounds...\n\n");
3378 if (pipeline[plPtrWrite].writebackRegister != 0xFF)
3380 if (pipeline[plPtrWrite].writebackRegister != 0xFE)
3381 dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
3384 if (pipeline[plPtrWrite].type == TYPE_BYTE)
3385 JaguarWriteByte(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value);
3386 else if (pipeline[plPtrWrite].type == TYPE_WORD)
3387 JaguarWriteWord(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value);
3389 JaguarWriteLong(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value);
3393 #ifndef NEW_SCOREBOARD
3394 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
3395 scoreboard[pipeline[plPtrWrite].operand2] = false;
3397 //Yup, sequential MOVEQ # problem fixing (I hope!)...
3398 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
3399 if (scoreboard[pipeline[plPtrWrite].operand2])
3400 scoreboard[pipeline[plPtrWrite].operand2]--;
3404 // Push instructions through the pipeline...
3405 plPtrRead = (++plPtrRead) & 0x03;
3406 plPtrExec = (++plPtrExec) & 0x03;
3407 plPtrWrite = (++plPtrWrite) & 0x03;
3416 //#define DSP_DEBUG_PL3
3417 //Let's try a 2 stage pipeline....
3418 void DSPExecP3(int32_t cycles)
3420 dsp_releaseTimeSlice_flag = 0;
3423 while (cycles > 0 && DSP_RUNNING)
3425 //if (dsp_pc < 0xF1B000 || dsp_pc > 0xF1CFFF)
3427 #ifdef DSP_DEBUG_PL3
3428 WriteLog("DSPExecP: Pipeline status...\n");
3429 WriteLog("\tF/R -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]);
3430 WriteLog("\tE/W -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]);
3431 WriteLog(" --> Scoreboard: ");
3432 for(int i=0; i<32; i++)
3433 WriteLog("%s ", scoreboard[i] ? "T" : "F");
3436 // Stage 1a: Instruction fetch
3437 pipeline[plPtrRead].instruction = DSPReadWord(dsp_pc, DSP);
3438 pipeline[plPtrRead].opcode = pipeline[plPtrRead].instruction >> 10;
3439 pipeline[plPtrRead].operand1 = (pipeline[plPtrRead].instruction >> 5) & 0x1F;
3440 pipeline[plPtrRead].operand2 = pipeline[plPtrRead].instruction & 0x1F;
3441 if (pipeline[plPtrRead].opcode == 38)
3442 pipeline[plPtrRead].result = (uint32_t)DSPReadWord(dsp_pc + 2, DSP)
3443 | ((uint32_t)DSPReadWord(dsp_pc + 4, DSP) << 16);
3444 #ifdef DSP_DEBUG_PL3
3445 WriteLog("DSPExecP: Fetching instruction (%04X) from DSP_PC = %08X...\n", pipeline[plPtrRead].instruction, dsp_pc);
3446 WriteLog("DSPExecP: Pipeline status (after stage 1a)...\n");
3447 WriteLog("\tF/R -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]);
3448 WriteLog("\tE/W -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]);
3450 // Stage 1b: Read registers
3451 if ((scoreboard[pipeline[plPtrRead].operand1] && readAffected[pipeline[plPtrRead].opcode][0])
3452 || (scoreboard[pipeline[plPtrRead].operand2] && readAffected[pipeline[plPtrRead].opcode][1]))
3453 // We have a hit in the scoreboard, so we have to stall the pipeline...
3454 #ifdef DSP_DEBUG_PL3
3456 WriteLog(" --> Stalling pipeline: ");
3457 if (readAffected[pipeline[plPtrRead].opcode][0])
3458 WriteLog("scoreboard[%u] = %s (reg 1) ", pipeline[plPtrRead].operand1, scoreboard[pipeline[plPtrRead].operand1] ? "true" : "false");
3459 if (readAffected[pipeline[plPtrRead].opcode][1])
3460 WriteLog("scoreboard[%u] = %s (reg 2)", pipeline[plPtrRead].operand2, scoreboard[pipeline[plPtrRead].operand2] ? "true" : "false");
3463 pipeline[plPtrRead].opcode = PIPELINE_STALL;
3464 #ifdef DSP_DEBUG_PL3
3469 pipeline[plPtrRead].reg1 = dsp_reg[pipeline[plPtrRead].operand1];
3470 pipeline[plPtrRead].reg2 = dsp_reg[pipeline[plPtrRead].operand2];
3471 pipeline[plPtrRead].writebackRegister = pipeline[plPtrRead].operand2; // Set it to RN
3473 // Shouldn't we be more selective with the register scoreboarding?
3474 // Yes, we should. !!! FIX !!! [Kinda DONE]
3475 scoreboard[pipeline[plPtrRead].operand2] = affectsScoreboard[pipeline[plPtrRead].opcode];
3477 //Advance PC here??? Yes.
3478 dsp_pc += (pipeline[plPtrRead].opcode == 38 ? 6 : 2);
3481 #ifdef DSP_DEBUG_PL3
3482 WriteLog("DSPExecP: Pipeline status (after stage 1b)...\n");
3483 WriteLog("\tF/R -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]);
3484 WriteLog("\tE/W -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]);
3486 // Stage 2a: Execute
3487 if (pipeline[plPtrExec].opcode != PIPELINE_STALL)
3489 #ifdef DSP_DEBUG_PL3
3490 WriteLog("DSPExecP: About to execute opcode %s...\n", dsp_opcode_str[pipeline[plPtrExec].opcode]);
3492 DSPOpcode[pipeline[plPtrExec].opcode]();
3493 dsp_opcode_use[pipeline[plPtrExec].opcode]++;
3494 cycles -= dsp_opcode_cycles[pipeline[plPtrExec].opcode];
3499 #ifdef DSP_DEBUG_PL3
3500 WriteLog("DSPExecP: Pipeline status (after stage 2a)...\n");
3501 WriteLog("\tF/R -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrRead].opcode, pipeline[plPtrRead].operand1, pipeline[plPtrRead].operand2, pipeline[plPtrRead].reg1, pipeline[plPtrRead].reg2, pipeline[plPtrRead].result, pipeline[plPtrRead].writebackRegister, dsp_opcode_str[pipeline[plPtrRead].opcode]);
3502 WriteLog("\tE/W -> %02u, %02u, %02u; r1=%08X, r2= %08X, res=%08X, wb=%u (%s)\n", pipeline[plPtrExec].opcode, pipeline[plPtrExec].operand1, pipeline[plPtrExec].operand2, pipeline[plPtrExec].reg1, pipeline[plPtrExec].reg2, pipeline[plPtrExec].result, pipeline[plPtrExec].writebackRegister, dsp_opcode_str[pipeline[plPtrExec].opcode]);
3505 // Stage 2b: Write back register
3506 if (pipeline[plPtrExec].opcode != PIPELINE_STALL)
3508 if (pipeline[plPtrExec].writebackRegister != 0xFF)
3509 dsp_reg[pipeline[plPtrExec].writebackRegister] = pipeline[plPtrExec].result;
3511 if (affectsScoreboard[pipeline[plPtrExec].opcode])
3512 scoreboard[pipeline[plPtrExec].operand2] = false;
3515 // Push instructions through the pipeline...
3516 plPtrRead = (++plPtrRead) & 0x03;
3517 plPtrExec = (++plPtrExec) & 0x03;
3524 // DSP pipelined opcode handlers
3527 #define PRM pipeline[plPtrExec].reg1
3528 #define PRN pipeline[plPtrExec].reg2
3529 #define PIMM1 pipeline[plPtrExec].operand1
3530 #define PIMM2 pipeline[plPtrExec].operand2
3531 #define PRES pipeline[plPtrExec].result
3532 #define PWBR pipeline[plPtrExec].writebackRegister
3533 #define NO_WRITEBACK pipeline[plPtrExec].writebackRegister = 0xFF
3534 //#define DSP_PPC dsp_pc - (pipeline[plPtrRead].opcode == 38 ? 6 : 2) - (pipeline[plPtrExec].opcode == 38 ? 6 : 2)
3535 #define DSP_PPC dsp_pc - (pipeline[plPtrRead].opcode == 38 ? 6 : (pipeline[plPtrRead].opcode == PIPELINE_STALL ? 0 : 2)) - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2))
3536 #define WRITEBACK_ADDR pipeline[plPtrExec].writebackRegister = 0xFE
3538 static void DSP_abs(void)
3542 WriteLog("%06X: ABS R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
3546 if (_Rn == 0x80000000)
3550 dsp_flag_c = ((_Rn & 0x80000000) >> 31);
3551 PRES = (_Rn & 0x80000000 ? -_Rn : _Rn);
3552 CLR_ZN; SET_Z(PRES);
3556 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
3560 static void DSP_add(void)
3564 WriteLog("%06X: ADD R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
3566 uint32_t res = PRN + PRM;
3567 SET_ZNC_ADD(PRN, PRM, res);
3571 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES);
3575 static void DSP_addc(void)
3579 WriteLog("%06X: ADDC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
3581 uint32_t res = PRN + PRM + dsp_flag_c;
3582 uint32_t carry = dsp_flag_c;
3583 // SET_ZNC_ADD(PRN, PRM, res); //???BUG??? Yes!
3584 SET_ZNC_ADD(PRN + carry, PRM, res);
3585 // SET_ZNC_ADD(PRN, PRM + carry, res);
3589 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES);
3593 static void DSP_addq(void)
3597 WriteLog("%06X: ADDQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
3599 uint32_t r1 = dsp_convert_zero[PIMM1];
3600 uint32_t res = PRN + r1;
3601 CLR_ZNC; SET_ZNC_ADD(PRN, r1, res);
3605 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
3609 static void DSP_addqmod(void)
3611 #ifdef DSP_DIS_ADDQMOD
3613 WriteLog("%06X: ADDQMOD #%u, R%02u [NCZ:%u%u%u, R%02u=%08X, DSP_MOD=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN, dsp_modulo);
3615 uint32_t r1 = dsp_convert_zero[PIMM1];
3617 uint32_t res = r2 + r1;
3618 res = (res & (~dsp_modulo)) | (r2 & dsp_modulo);
3620 SET_ZNC_ADD(r2, r1, res);
3621 #ifdef DSP_DIS_ADDQMOD
3623 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
3627 static void DSP_addqt(void)
3629 #ifdef DSP_DIS_ADDQT
3631 WriteLog("%06X: ADDQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
3633 PRES = PRN + dsp_convert_zero[PIMM1];
3634 #ifdef DSP_DIS_ADDQT
3636 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
3640 static void DSP_and(void)
3644 WriteLog("%06X: AND R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
3650 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES);
3654 static void DSP_bclr(void)
3658 WriteLog("%06X: BCLR #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
3660 PRES = PRN & ~(1 << PIMM1);
3664 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
3668 static void DSP_bset(void)
3672 WriteLog("%06X: BSET #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
3674 PRES = PRN | (1 << PIMM1);
3678 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
3682 static void DSP_btst(void)
3686 WriteLog("%06X: BTST #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
3688 dsp_flag_z = (~PRN >> PIMM1) & 1;
3692 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
3696 static void DSP_cmp(void)
3700 WriteLog("%06X: CMP R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
3702 uint32_t res = PRN - PRM;
3703 SET_ZNC_SUB(PRN, PRM, res);
3707 WriteLog("[NCZ:%u%u%u]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z);
3711 static void DSP_cmpq(void)
3713 static int32_t sqtable[32] =
3714 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
3717 WriteLog("%06X: CMPQ #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, sqtable[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
3719 uint32_t r1 = sqtable[PIMM1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
3720 uint32_t res = PRN - r1;
3721 SET_ZNC_SUB(PRN, r1, res);
3725 WriteLog("[NCZ:%u%u%u]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z);
3729 static void DSP_div(void)
3731 uint32_t _Rm = PRM, _Rn = PRN;
3735 if (dsp_div_control & 1)
3737 dsp_remain = (((uint64_t)_Rn) << 16) % _Rm;
3738 if (dsp_remain & 0x80000000)
3740 PRES = (((uint64_t)_Rn) << 16) / _Rm;
3744 dsp_remain = _Rn % _Rm;
3745 if (dsp_remain & 0x80000000)
3754 static void DSP_imacn(void)
3756 #ifdef DSP_DIS_IMACN
3758 WriteLog("%06X: IMACN R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
3760 int32_t res = (int16_t)PRM * (int16_t)PRN;
3761 dsp_acc += (int64_t)res;
3762 //Should we AND the result to fit into 40 bits here???
3764 #ifdef DSP_DIS_IMACN
3766 WriteLog("[NCZ:%u%u%u, DSP_ACC=%02X%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, (uint8_t)(dsp_acc >> 32), (uint32_t)(dsp_acc & 0xFFFFFFFF));
3770 static void DSP_imult(void)
3772 #ifdef DSP_DIS_IMULT
3774 WriteLog("%06X: IMULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
3776 PRES = (int16_t)PRN * (int16_t)PRM;
3778 #ifdef DSP_DIS_IMULT
3780 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES);
3784 static void DSP_imultn(void)
3786 #ifdef DSP_DIS_IMULTN
3788 WriteLog("%06X: IMULTN R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
3790 // This is OK, since this multiply won't overflow 32 bits...
3791 int32_t res = (int32_t)((int16_t)PRN * (int16_t)PRM);
3792 dsp_acc = (int64_t)res;
3795 #ifdef DSP_DIS_IMULTN
3797 WriteLog("[NCZ:%u%u%u, DSP_ACC=%02X%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, (uint8_t)(dsp_acc >> 32), (uint32_t)(dsp_acc & 0xFFFFFFFF));
3801 static void DSP_illegal(void)
3803 #ifdef DSP_DIS_ILLEGAL
3805 WriteLog("%06X: ILLEGAL [NCZ:%u%u%u]\n", DSP_PPC, dsp_flag_n, dsp_flag_c, dsp_flag_z);
3810 // There is a problem here with interrupt handlers the JUMP and JR instructions that
3811 // can cause trouble because an interrupt can occur *before* the instruction following the
3812 // jump can execute... !!! FIX !!!
3813 // This can probably be solved by judicious coding in the pipeline execution core...
3814 // And should be fixed now...
3815 static void DSP_jr(void)
3818 const char * condition[32] =
3819 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
3820 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
3821 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
3822 "???", "???", "???", "F" };
3824 //How come this is always off by 2???
3825 WriteLog("%06X: JR %s, %06X [NCZ:%u%u%u] ", DSP_PPC, condition[PIMM2], DSP_PPC+((PIMM1 & 0x10 ? 0xFFFFFFF0 | PIMM1 : PIMM1) * 2)+2, dsp_flag_n, dsp_flag_c, dsp_flag_z);
3827 // KLUDGE: Used by BRANCH_CONDITION macro
3828 uint32_t jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
3830 if (BRANCH_CONDITION(PIMM2))
3834 WriteLog("Branched!\n");
3836 int32_t offset = (PIMM1 & 0x10 ? 0xFFFFFFF0 | PIMM1 : PIMM1); // Sign extend PIMM1
3837 //Account for pipeline effects...
3838 uint32_t newPC = dsp_pc + (offset * 2) - (pipeline[plPtrRead].opcode == 38 ? 6 : (pipeline[plPtrRead].opcode == PIPELINE_STALL ? 0 : 2));
3839 //WriteLog(" --> Old PC: %08X, new PC: %08X\n", dsp_pc, newPC);
3841 // Now that we've branched, we have to make sure that the following instruction
3842 // is executed atomically with this one and then flush the pipeline before setting
3845 // Step 1: Handle writebacks at stage 3 of pipeline
3846 /* if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
3848 if (pipeline[plPtrWrite].writebackRegister != 0xFF)
3849 dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
3851 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
3852 scoreboard[pipeline[plPtrWrite].operand2] = false;
3854 if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
3856 if (pipeline[plPtrWrite].writebackRegister != 0xFF)
3858 if (pipeline[plPtrWrite].writebackRegister != 0xFE)
3859 dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
3862 if (pipeline[plPtrWrite].type == TYPE_BYTE)
3863 JaguarWriteByte(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value);
3864 else if (pipeline[plPtrWrite].type == TYPE_WORD)
3865 JaguarWriteWord(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value);
3867 JaguarWriteLong(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value);
3871 #ifndef NEW_SCOREBOARD
3872 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
3873 scoreboard[pipeline[plPtrWrite].operand2] = false;
3875 //Yup, sequential MOVEQ # problem fixing (I hope!)...
3876 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
3877 if (scoreboard[pipeline[plPtrWrite].operand2])
3878 scoreboard[pipeline[plPtrWrite].operand2]--;
3882 // Step 2: Push instruction through pipeline & execute following instruction
3883 // NOTE: By putting our following instruction at stage 3 of the pipeline,
3884 // we effectively handle the final push of the instruction through the
3885 // pipeline when the new PC takes effect (since when we return, the
3886 // pipeline code will be executing the writeback stage. If we reverse
3887 // the execution order of the pipeline stages, this will no longer be
3889 pipeline[plPtrExec] = pipeline[plPtrRead];
3890 //This is BAD. We need to get that next opcode and execute it!
3891 //NOTE: The problem is here because of a bad stall. Once those are fixed, we can probably
3892 // remove this crap.
3893 if (pipeline[plPtrExec].opcode == PIPELINE_STALL)
3895 uint16_t instruction = DSPReadWord(dsp_pc, DSP);
3896 pipeline[plPtrExec].opcode = instruction >> 10;
3897 pipeline[plPtrExec].operand1 = (instruction >> 5) & 0x1F;
3898 pipeline[plPtrExec].operand2 = instruction & 0x1F;
3899 pipeline[plPtrExec].reg1 = dsp_reg[pipeline[plPtrExec].operand1];
3900 pipeline[plPtrExec].reg2 = dsp_reg[pipeline[plPtrExec].operand2];
3901 pipeline[plPtrExec].writebackRegister = pipeline[plPtrExec].operand2; // Set it to RN
3903 dsp_pc += 2; // For DSP_DIS_* accuracy
3904 DSPOpcode[pipeline[plPtrExec].opcode]();
3905 dsp_opcode_use[pipeline[plPtrExec].opcode]++;
3906 pipeline[plPtrWrite] = pipeline[plPtrExec];
3908 // Step 3: Flush pipeline & set new PC
3909 pipeline[plPtrRead].opcode = pipeline[plPtrExec].opcode = PIPELINE_STALL;
3916 WriteLog("Branch NOT taken.\n");
3922 // WriteLog(" --> DSP_PC: %08X\n", dsp_pc);
3925 static void DSP_jump(void)
3928 const char * condition[32] =
3929 { "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
3930 "c z", "???", "???", "???", "???", "???", "???", "???", "???",
3931 "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
3932 "???", "???", "???", "F" };
3934 WriteLog("%06X: JUMP %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", DSP_PPC, condition[PIMM2], PIMM1, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM);
3936 // KLUDGE: Used by BRANCH_CONDITION macro
3937 uint32_t jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
3939 if (BRANCH_CONDITION(PIMM2))
3943 WriteLog("Branched!\n");
3945 uint32_t PCSave = PRM;
3946 // Now that we've branched, we have to make sure that the following instruction
3947 // is executed atomically with this one and then flush the pipeline before setting
3950 // Step 1: Handle writebacks at stage 3 of pipeline
3951 /* if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
3953 if (pipeline[plPtrWrite].writebackRegister != 0xFF)
3954 dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
3956 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
3957 scoreboard[pipeline[plPtrWrite].operand2] = false;
3959 if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
3961 if (pipeline[plPtrWrite].writebackRegister != 0xFF)
3963 if (pipeline[plPtrWrite].writebackRegister != 0xFE)
3964 dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
3967 if (pipeline[plPtrWrite].type == TYPE_BYTE)
3968 JaguarWriteByte(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value);
3969 else if (pipeline[plPtrWrite].type == TYPE_WORD)
3970 JaguarWriteWord(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value);
3972 JaguarWriteLong(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value);
3976 #ifndef NEW_SCOREBOARD
3977 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
3978 scoreboard[pipeline[plPtrWrite].operand2] = false;
3980 //Yup, sequential MOVEQ # problem fixing (I hope!)...
3981 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
3982 if (scoreboard[pipeline[plPtrWrite].operand2])
3983 scoreboard[pipeline[plPtrWrite].operand2]--;
3987 // Step 2: Push instruction through pipeline & execute following instruction
3988 // NOTE: By putting our following instruction at stage 3 of the pipeline,
3989 // we effectively handle the final push of the instruction through the
3990 // pipeline when the new PC takes effect (since when we return, the
3991 // pipeline code will be executing the writeback stage. If we reverse
3992 // the execution order of the pipeline stages, this will no longer be
3994 pipeline[plPtrExec] = pipeline[plPtrRead];
3995 //This is BAD. We need to get that next opcode and execute it!
3996 //Also, same problem in JR!
3997 //NOTE: The problem is here because of a bad stall. Once those are fixed, we can probably
3998 // remove this crap.
3999 if (pipeline[plPtrExec].opcode == PIPELINE_STALL)
4001 uint16_t instruction = DSPReadWord(dsp_pc, DSP);
4002 pipeline[plPtrExec].opcode = instruction >> 10;
4003 pipeline[plPtrExec].operand1 = (instruction >> 5) & 0x1F;
4004 pipeline[plPtrExec].operand2 = instruction & 0x1F;
4005 pipeline[plPtrExec].reg1 = dsp_reg[pipeline[plPtrExec].operand1];
4006 pipeline[plPtrExec].reg2 = dsp_reg[pipeline[plPtrExec].operand2];
4007 pipeline[plPtrExec].writebackRegister = pipeline[plPtrExec].operand2; // Set it to RN
4009 dsp_pc += 2; // For DSP_DIS_* accuracy
4010 DSPOpcode[pipeline[plPtrExec].opcode]();
4011 dsp_opcode_use[pipeline[plPtrExec].opcode]++;
4012 pipeline[plPtrWrite] = pipeline[plPtrExec];
4014 // Step 3: Flush pipeline & set new PC
4015 pipeline[plPtrRead].opcode = pipeline[plPtrExec].opcode = PIPELINE_STALL;
4022 WriteLog("Branch NOT taken.\n");
4030 static void DSP_load(void)
4034 WriteLog("%06X: LOAD (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
4036 #ifdef DSP_CORRECT_ALIGNMENT
4037 PRES = DSPReadLong(PRM & 0xFFFFFFFC, DSP);
4039 PRES = DSPReadLong(PRM, DSP);
4043 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4047 static void DSP_loadb(void)
4049 #ifdef DSP_DIS_LOADB
4051 WriteLog("%06X: LOADB (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
4053 if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
4054 PRES = DSPReadLong(PRM, DSP) & 0xFF;
4056 PRES = JaguarReadByte(PRM, DSP);
4057 #ifdef DSP_DIS_LOADB
4059 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4063 static void DSP_loadw(void)
4065 #ifdef DSP_DIS_LOADW
4067 WriteLog("%06X: LOADW (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
4069 #ifdef DSP_CORRECT_ALIGNMENT
4070 if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
4071 PRES = DSPReadLong(PRM & 0xFFFFFFFE, DSP) & 0xFFFF;
4073 PRES = JaguarReadWord(PRM & 0xFFFFFFFE, DSP);
4075 if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
4076 PRES = DSPReadLong(PRM, DSP) & 0xFFFF;
4078 PRES = JaguarReadWord(PRM, DSP);
4080 #ifdef DSP_DIS_LOADW
4082 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4086 static void DSP_load_r14_i(void)
4088 #ifdef DSP_DIS_LOAD14I
4090 WriteLog("%06X: LOAD (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1] << 2, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_convert_zero[PIMM1] << 2, dsp_reg[14]+(dsp_convert_zero[PIMM1] << 2), PIMM2, PRN);
4092 #ifdef DSP_CORRECT_ALIGNMENT
4093 PRES = DSPReadLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2), DSP);
4095 PRES = DSPReadLong(dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2), DSP);
4097 #ifdef DSP_DIS_LOAD14I
4099 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4103 static void DSP_load_r14_r(void)
4105 #ifdef DSP_DIS_LOAD14R
4107 WriteLog("%06X: LOAD (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM+dsp_reg[14], PIMM2, PRES);
4109 #ifdef DSP_CORRECT_ALIGNMENT
4110 PRES = DSPReadLong((dsp_reg[14] + PRM) & 0xFFFFFFFC, DSP);
4112 PRES = DSPReadLong(dsp_reg[14] + PRM, DSP);
4114 #ifdef DSP_DIS_LOAD14R
4116 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4120 static void DSP_load_r15_i(void)
4122 #ifdef DSP_DIS_LOAD15I
4124 WriteLog("%06X: LOAD (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1] << 2, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_convert_zero[PIMM1] << 2, dsp_reg[15]+(dsp_convert_zero[PIMM1] << 2), PIMM2, PRN);
4126 #ifdef DSP_CORRECT_ALIGNMENT
4127 PRES = DSPReadLong((dsp_reg[15] &0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2), DSP);
4129 PRES = DSPReadLong(dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2), DSP);
4131 #ifdef DSP_DIS_LOAD15I
4133 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4137 static void DSP_load_r15_r(void)
4139 #ifdef DSP_DIS_LOAD15R
4141 WriteLog("%06X: LOAD (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM+dsp_reg[15], PIMM2, PRN);
4143 #ifdef DSP_CORRECT_ALIGNMENT
4144 PRES = DSPReadLong((dsp_reg[15] + PRM) & 0xFFFFFFFC, DSP);
4146 PRES = DSPReadLong(dsp_reg[15] + PRM, DSP);
4148 #ifdef DSP_DIS_LOAD15R
4150 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4154 static void DSP_mirror(void)
4157 PRES = (mirror_table[r1 & 0xFFFF] << 16) | mirror_table[r1 >> 16];
4161 static void DSP_mmult(void)
4163 int count = dsp_matrix_control&0x0f;
4164 uint32_t addr = dsp_pointer_to_matrix; // in the dsp ram
4168 if (!(dsp_matrix_control & 0x10))
4170 for (int i = 0; i < count; i++)
4174 a=(int16_t)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff);
4176 a=(int16_t)(dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]&0xffff);
4177 int16_t b=((int16_t)DSPReadWord(addr + 2, DSP));
4184 for (int i = 0; i < count; i++)
4188 a=(int16_t)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff);
4190 a=(int16_t)(dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]&0xffff);
4191 int16_t b=((int16_t)DSPReadWord(addr + 2, DSP));
4197 PRES = res = (int32_t)accum;
4199 //NOTE: The flags are set based upon the last add/multiply done...
4203 static void DSP_move(void)
4207 WriteLog("%06X: MOVE R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
4212 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES);
4216 static void DSP_movefa(void)
4218 #ifdef DSP_DIS_MOVEFA
4220 // WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, ALTERNATE_RM, PIMM2, PRN);
4221 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, dsp_alternate_reg[PIMM1], PIMM2, PRN);
4223 // PRES = ALTERNATE_RM;
4224 PRES = dsp_alternate_reg[PIMM1];
4225 #ifdef DSP_DIS_MOVEFA
4227 // WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, ALTERNATE_RM, PIMM2, PRN);
4228 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, dsp_alternate_reg[PIMM1], PIMM2, PRES);
4232 static void DSP_movei(void)
4234 #ifdef DSP_DIS_MOVEI
4236 WriteLog("%06X: MOVEI #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PRES, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
4238 // // This instruction is followed by 32-bit value in LSW / MSW format...
4239 // PRES = (uint32_t)DSPReadWord(dsp_pc, DSP) | ((uint32_t)DSPReadWord(dsp_pc + 2, DSP) << 16);
4241 #ifdef DSP_DIS_MOVEI
4243 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4247 static void DSP_movepc(void)
4249 #ifdef DSP_DIS_MOVEPC
4251 WriteLog("%06X: MOVE PC, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
4253 //Need to fix this to take into account pipelining effects... !!! FIX !!! [DONE]
4254 // PRES = dsp_pc - 2;
4255 //Account for pipeline effects...
4256 PRES = dsp_pc - 2 - (pipeline[plPtrRead].opcode == 38 ? 6 : (pipeline[plPtrRead].opcode == PIPELINE_STALL ? 0 : 2));
4257 #ifdef DSP_DIS_MOVEPC
4259 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4263 static void DSP_moveq(void)
4265 #ifdef DSP_DIS_MOVEQ
4267 WriteLog("%06X: MOVEQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
4270 #ifdef DSP_DIS_MOVEQ
4272 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4276 static void DSP_moveta(void)
4278 #ifdef DSP_DIS_MOVETA
4280 // WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, ALTERNATE_RN);
4281 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, dsp_alternate_reg[PIMM2]);
4283 // ALTERNATE_RN = PRM;
4284 dsp_alternate_reg[PIMM2] = PRM;
4286 #ifdef DSP_DIS_MOVETA
4288 // WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, ALTERNATE_RN);
4289 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, dsp_alternate_reg[PIMM2]);
4293 static void DSP_mtoi(void)
4295 PRES = (((int32_t)PRM >> 8) & 0xFF800000) | (PRM & 0x007FFFFF);
4299 static void DSP_mult(void)
4303 WriteLog("%06X: MULT R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
4305 PRES = (uint16_t)PRM * (uint16_t)PRN;
4309 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES);
4313 static void DSP_neg(void)
4317 WriteLog("%06X: NEG R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
4319 uint32_t res = -PRN;
4320 SET_ZNC_SUB(0, PRN, res);
4324 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4328 static void DSP_nop(void)
4332 WriteLog("%06X: NOP [NCZ:%u%u%u]\n", DSP_PPC, dsp_flag_n, dsp_flag_c, dsp_flag_z);
4337 static void DSP_normi(void)
4344 while ((_Rm & 0xffc00000) == 0)
4349 while ((_Rm & 0xff800000) != 0)
4359 static void DSP_not(void)
4363 WriteLog("%06X: NOT R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
4369 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4373 static void DSP_or(void)
4377 WriteLog("%06X: OR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
4383 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES);
4387 static void DSP_resmac(void)
4389 #ifdef DSP_DIS_RESMAC
4391 WriteLog("%06X: RESMAC R%02u [NCZ:%u%u%u, R%02u=%08X, DSP_ACC=%02X%08X] -> ", DSP_PPC, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN, (uint8_t)(dsp_acc >> 32), (uint32_t)(dsp_acc & 0xFFFFFFFF));
4393 PRES = (uint32_t)dsp_acc;
4394 #ifdef DSP_DIS_RESMAC
4396 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
4400 static void DSP_ror(void)
4404 WriteLog("%06X: ROR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
4406 uint32_t r1 = PRM & 0x1F;
4407 uint32_t res = (PRN >> r1) | (PRN << (32 - r1));
4408 SET_ZN(res); dsp_flag_c = (PRN >> 31) & 1;
4412 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES);
4416 static void DSP_rorq(void)
4420 WriteLog("%06X: RORQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
4422 uint32_t r1 = dsp_convert_zero[PIMM1 & 0x1F];
4424 uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
4426 SET_ZN(res); dsp_flag_c = (r2 >> 31) & 0x01;
4429 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4433 static void DSP_sat16s(void)
4436 uint32_t res = (r2 < -32768) ? -32768 : (r2 > 32767) ? 32767 : r2;
4441 static void DSP_sat32s(void)
4443 int32_t r2 = (uint32_t)PRN;
4444 int32_t temp = dsp_acc >> 32;
4445 uint32_t res = (temp < -1) ? (int32_t)0x80000000 : (temp > 0) ? (int32_t)0x7FFFFFFF : r2;
4450 static void DSP_sh(void)
4452 int32_t sRm = (int32_t)PRM;
4457 uint32_t shift = -sRm;
4462 dsp_flag_c = (_Rn & 0x80000000) >> 31;
4472 uint32_t shift = sRm;
4477 dsp_flag_c = _Rn & 0x1;
4490 static void DSP_sha(void)
4492 int32_t sRm = (int32_t)PRM;
4497 uint32_t shift = -sRm;
4502 dsp_flag_c = (_Rn & 0x80000000) >> 31;
4512 uint32_t shift = sRm;
4517 dsp_flag_c = _Rn & 0x1;
4521 _Rn = ((int32_t)_Rn) >> 1;
4530 static void DSP_sharq(void)
4532 #ifdef DSP_DIS_SHARQ
4534 WriteLog("%06X: SHARQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
4536 uint32_t res = (int32_t)PRN >> dsp_convert_zero[PIMM1];
4537 SET_ZN(res); dsp_flag_c = PRN & 0x01;
4539 #ifdef DSP_DIS_SHARQ
4541 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4545 static void DSP_shlq(void)
4549 WriteLog("%06X: SHLQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, 32 - PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
4551 int32_t r1 = 32 - PIMM1;
4552 uint32_t res = PRN << r1;
4553 SET_ZN(res); dsp_flag_c = (PRN >> 31) & 1;
4557 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4561 static void DSP_shrq(void)
4565 WriteLog("%06X: SHRQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
4567 int32_t r1 = dsp_convert_zero[PIMM1];
4568 uint32_t res = PRN >> r1;
4569 SET_ZN(res); dsp_flag_c = PRN & 1;
4573 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4577 static void DSP_store(void)
4579 #ifdef DSP_DIS_STORE
4581 WriteLog("%06X: STORE R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", DSP_PPC, PIMM2, PIMM1, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN, PIMM1, PRM);
4583 // DSPWriteLong(PRM, PRN, DSP);
4585 #ifdef DSP_CORRECT_ALIGNMENT_STORE
4586 pipeline[plPtrExec].address = PRM & 0xFFFFFFFC;
4588 pipeline[plPtrExec].address = PRM;
4590 pipeline[plPtrExec].value = PRN;
4591 pipeline[plPtrExec].type = TYPE_DWORD;
4595 static void DSP_storeb(void)
4597 #ifdef DSP_DIS_STOREB
4599 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", DSP_PPC, PIMM2, PIMM1, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN, PIMM1, PRM);
4601 // if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
4602 // DSPWriteLong(PRM, PRN & 0xFF, DSP);
4604 // JaguarWriteByte(PRM, PRN, DSP);
4607 pipeline[plPtrExec].address = PRM;
4609 if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
4611 pipeline[plPtrExec].value = PRN & 0xFF;
4612 pipeline[plPtrExec].type = TYPE_DWORD;
4616 pipeline[plPtrExec].value = PRN;
4617 pipeline[plPtrExec].type = TYPE_BYTE;
4623 static void DSP_storew(void)
4625 #ifdef DSP_DIS_STOREW
4627 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", DSP_PPC, PIMM2, PIMM1, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN, PIMM1, PRM);
4629 // if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
4630 // DSPWriteLong(PRM, PRN & 0xFFFF, DSP);
4632 // JaguarWriteWord(PRM, PRN, DSP);
4635 #ifdef DSP_CORRECT_ALIGNMENT_STORE
4636 pipeline[plPtrExec].address = PRM & 0xFFFFFFFE;
4638 pipeline[plPtrExec].address = PRM;
4641 if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
4643 pipeline[plPtrExec].value = PRN & 0xFFFF;
4644 pipeline[plPtrExec].type = TYPE_DWORD;
4648 pipeline[plPtrExec].value = PRN;
4649 pipeline[plPtrExec].type = TYPE_WORD;
4654 static void DSP_store_r14_i(void)
4656 #ifdef DSP_DIS_STORE14I
4658 WriteLog("%06X: STORE R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", DSP_PPC, PIMM2, dsp_convert_zero[PIMM1] << 2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN, dsp_convert_zero[PIMM1] << 2, dsp_reg[14]+(dsp_convert_zero[PIMM1] << 2));
4660 // DSPWriteLong(dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2), PRN, DSP);
4662 #ifdef DSP_CORRECT_ALIGNMENT_STORE
4663 pipeline[plPtrExec].address = (dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2);
4665 pipeline[plPtrExec].address = dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2);
4667 pipeline[plPtrExec].value = PRN;
4668 pipeline[plPtrExec].type = TYPE_DWORD;
4672 static void DSP_store_r14_r(void)
4674 // DSPWriteLong(dsp_reg[14] + PRM, PRN, DSP);
4676 #ifdef DSP_CORRECT_ALIGNMENT_STORE
4677 pipeline[plPtrExec].address = (dsp_reg[14] + PRM) & 0xFFFFFFFC;
4679 pipeline[plPtrExec].address = dsp_reg[14] + PRM;
4681 pipeline[plPtrExec].value = PRN;
4682 pipeline[plPtrExec].type = TYPE_DWORD;
4686 static void DSP_store_r15_i(void)
4688 #ifdef DSP_DIS_STORE15I
4690 WriteLog("%06X: STORE R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", DSP_PPC, PIMM2, dsp_convert_zero[PIMM1] << 2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN, dsp_convert_zero[PIMM1] << 2, dsp_reg[15]+(dsp_convert_zero[PIMM1] << 2));
4692 // DSPWriteLong(dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2), PRN, DSP);
4694 #ifdef DSP_CORRECT_ALIGNMENT_STORE
4695 pipeline[plPtrExec].address = (dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2);
4697 pipeline[plPtrExec].address = dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2);
4699 pipeline[plPtrExec].value = PRN;
4700 pipeline[plPtrExec].type = TYPE_DWORD;
4704 static void DSP_store_r15_r(void)
4706 // DSPWriteLong(dsp_reg[15] + PRM, PRN, DSP);
4708 #ifdef DSP_CORRECT_ALIGNMENT_STORE
4709 pipeline[plPtrExec].address = (dsp_reg[15] + PRM) & 0xFFFFFFFC;
4711 pipeline[plPtrExec].address = dsp_reg[15] + PRM;
4713 pipeline[plPtrExec].value = PRN;
4714 pipeline[plPtrExec].type = TYPE_DWORD;
4718 static void DSP_sub(void)
4722 WriteLog("%06X: SUB R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
4724 uint32_t res = PRN - PRM;
4725 SET_ZNC_SUB(PRN, PRM, res);
4729 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES);
4733 static void DSP_subc(void)
4737 WriteLog("%06X: SUBC R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
4739 uint32_t res = PRN - PRM - dsp_flag_c;
4740 uint32_t borrow = dsp_flag_c;
4741 SET_ZNC_SUB(PRN - borrow, PRM, res);
4745 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES);
4749 static void DSP_subq(void)
4753 WriteLog("%06X: SUBQ #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
4755 uint32_t r1 = dsp_convert_zero[PIMM1];
4756 uint32_t res = PRN - r1;
4757 SET_ZNC_SUB(PRN, r1, res);
4761 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4765 static void DSP_subqmod(void)
4767 uint32_t r1 = dsp_convert_zero[PIMM1];
4769 uint32_t res = r2 - r1;
4770 res = (res & (~dsp_modulo)) | (r2 & dsp_modulo);
4772 SET_ZNC_SUB(r2, r1, res);
4775 static void DSP_subqt(void)
4777 #ifdef DSP_DIS_SUBQT
4779 WriteLog("%06X: SUBQT #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", DSP_PPC, dsp_convert_zero[PIMM1], PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRN);
4781 PRES = PRN - dsp_convert_zero[PIMM1];
4782 #ifdef DSP_DIS_SUBQT
4784 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM2, PRES);
4788 static void DSP_xor(void)
4792 WriteLog("%06X: XOR R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", DSP_PPC, PIMM1, PIMM2, dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRN);
4798 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, PIMM1, PRM, PIMM2, PRES);