]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
Various fixes for GPU/DSP DIV instruction, fixes for joypad handling.
[virtualjaguar] / src / gpu.cpp
1 #if 1
2
3 //
4 // GPU Core
5 //
6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
10 //
11 // JLH = James Hammons <jlhamm@acm.org>
12 //
13 // Who  When        What
14 // ---  ----------  -------------------------------------------------------------
15 // JLH  01/16/2010  Created this log ;-)
16 // JLH  11/26/2011  Added fixes for LOAD/STORE alignment issues
17
18 //
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 //       the braindead way in which MAME handles memory. :-)
21 //
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
25 //
26
27 #include "gpu.h"
28
29 #include <stdlib.h>
30 #include <string.h>                                                             // For memset
31 #include "dsp.h"
32 #include "jagdasm.h"
33 #include "jaguar.h"
34 #include "log.h"
35 #include "m68000/m68kinterface.h"
36 //#include "memory.h"
37 #include "tom.h"
38
39
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
42 //#define GPU_DEBUG
43
44 // For GPU dissasembly...
45
46 #if 0
47 #define GPU_DIS_ABS
48 #define GPU_DIS_ADD
49 #define GPU_DIS_ADDC
50 #define GPU_DIS_ADDQ
51 #define GPU_DIS_ADDQT
52 #define GPU_DIS_AND
53 #define GPU_DIS_BCLR
54 #define GPU_DIS_BSET
55 #define GPU_DIS_BTST
56 #define GPU_DIS_CMP
57 #define GPU_DIS_CMPQ
58 #define GPU_DIS_DIV
59 #define GPU_DIS_IMULT
60 #define GPU_DIS_JUMP
61 #define GPU_DIS_JR
62 #define GPU_DIS_LOAD
63 #define GPU_DIS_LOADB
64 #define GPU_DIS_LOADW
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
69 #define GPU_DIS_MOVE
70 #define GPU_DIS_MOVEFA
71 #define GPU_DIS_MOVEI
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
74 #define GPU_DIS_MOVEQ
75 #define GPU_DIS_MULT
76 #define GPU_DIS_NEG
77 #define GPU_DIS_NOP
78 #define GPU_DIS_NOT
79 #define GPU_DIS_OR
80 #define GPU_DIS_PACK
81 #define GPU_DIS_ROR
82 #define GPU_DIS_RORQ
83 #define GPU_DIS_SAT8
84 #define GPU_DIS_SH
85 #define GPU_DIS_SHA
86 #define GPU_DIS_SHARQ
87 #define GPU_DIS_SHLQ
88 #define GPU_DIS_SHRQ
89 #define GPU_DIS_STORE
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
96 #define GPU_DIS_SUB
97 #define GPU_DIS_SUBC
98 #define GPU_DIS_SUBQ
99 #define GPU_DIS_SUBQT
100 #define GPU_DIS_XOR
101
102 bool doGPUDis = false;
103 //bool doGPUDis = true;
104 #endif
105
106 /*
107 GPU opcodes use (BIOS flying ATARI logo):
108 +                     add 357416
109 +                    addq 538030
110 +                   addqt 6999
111 +                     sub 116663
112 +                    subq 188059
113 +                   subqt 15086
114 +                     neg 36097
115 +                     and 233993
116 +                      or 109332
117 +                     xor 1384
118 +                    btst 111924
119 +                    bset 25029
120 +                    bclr 10551
121 +                    mult 28147
122 +                   imult 69148
123 +                     div 64102
124 +                     abs 159394
125 +                    shlq 194690
126 +                    shrq 292587
127 +                   sharq 192649
128 +                    rorq 58672
129 +                     cmp 244963
130 +                    cmpq 114834
131 +                    move 833472
132 +                   moveq 56427
133 +                  moveta 220814
134 +                  movefa 170678
135 +                   movei 152025
136 +                   loadw 108220
137 +                    load 430936
138 +                  storew 3036
139 +                   store 372490
140 +                 move_pc 2330
141 +                    jump 349134
142 +                      jr 529171
143                     mmult 64904
144 +                     nop 432179
145 */
146
147 // Various bits
148
149 #define CINT0FLAG                       0x0200
150 #define CINT1FLAG                       0x0400
151 #define CINT2FLAG                       0x0800
152 #define CINT3FLAG                       0x1000
153 #define CINT4FLAG                       0x2000
154 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
155
156 // GPU_FLAGS bits
157
158 #define ZERO_FLAG               0x0001
159 #define CARRY_FLAG              0x0002
160 #define NEGA_FLAG               0x0004
161 #define IMASK                   0x0008
162 #define INT_ENA0                0x0010
163 #define INT_ENA1                0x0020
164 #define INT_ENA2                0x0040
165 #define INT_ENA3                0x0080
166 #define INT_ENA4                0x0100
167 #define INT_CLR0                0x0200
168 #define INT_CLR1                0x0400
169 #define INT_CLR2                0x0800
170 #define INT_CLR3                0x1000
171 #define INT_CLR4                0x2000
172 #define REGPAGE                 0x4000
173 #define DMAEN                   0x8000
174
175 // External global variables
176
177 extern int start_logging;
178 extern int gpu_start_log;
179
180 // Private function prototypes
181
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
186
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
251
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8_t gpu_opcode_cycles[64] =
254 {
255         3,  3,  3,  3,  3,  3,  3,  3,
256         3,  3,  3,  3,  3,  3,  3,  3,
257         3,  3,  1,  3,  1, 18,  3,  3,
258         3,  3,  3,  3,  3,  3,  3,  3,
259         3,  3,  2,  2,  2,  2,  3,  4,
260         5,  4,  5,  6,  6,  1,  1,  1,
261         1,  2,  2,  2,  1,  1,  9,  3,
262         3,  1,  6,  6,  2,  2,  3,  3
263 };//*/
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8_t gpu_opcode_cycles[64] =
269 {
270         1,  1,  1,  1,  1,  1,  1,  1,
271         1,  1,  1,  1,  1,  1,  1,  1,
272         1,  1,  1,  1,  1,  9,  1,  1,
273         1,  1,  1,  1,  1,  1,  1,  1,
274         1,  1,  1,  1,  1,  1,  1,  2,
275         2,  2,  2,  3,  3,  1,  1,  1,
276         1,  1,  1,  1,  1,  1,  4,  1,
277         1,  1,  3,  3,  1,  1,  1,  1
278 };//*/
279 uint8_t gpu_opcode_cycles[64] =
280 {
281         1,  1,  1,  1,  1,  1,  1,  1,
282         1,  1,  1,  1,  1,  1,  1,  1,
283         1,  1,  1,  1,  1,  1,  1,  1,
284         1,  1,  1,  1,  1,  1,  1,  1,
285         1,  1,  1,  1,  1,  1,  1,  1,
286         1,  1,  1,  1,  1,  1,  1,  1,
287         1,  1,  1,  1,  1,  1,  1,  1,
288         1,  1,  1,  1,  1,  1,  1,  1
289 };//*/
290
291 void (*gpu_opcode[64])()=
292 {
293         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
294         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
295         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
296         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
297         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
298         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
299         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
300         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
301         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
302         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
303         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
304         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
305         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
306         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
307         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
308         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
309 };
310
311 static uint8_t gpu_ram_8[0x1000];
312 uint32_t gpu_pc;
313 static uint32_t gpu_acc;
314 static uint32_t gpu_remain;
315 static uint32_t gpu_hidata;
316 static uint32_t gpu_flags;
317 static uint32_t gpu_matrix_control;
318 static uint32_t gpu_pointer_to_matrix;
319 static uint32_t gpu_data_organization;
320 static uint32_t gpu_control;
321 static uint32_t gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 uint32_t gpu_reg_bank_0[32];
327 uint32_t gpu_reg_bank_1[32];
328 static uint32_t * gpu_reg;
329 static uint32_t * gpu_alternate_reg;
330
331 static uint32_t gpu_instruction;
332 static uint32_t gpu_opcode_first_parameter;
333 static uint32_t gpu_opcode_second_parameter;
334
335 #define GPU_RUNNING             (gpu_control & 0x01)
336
337 #define RM                              gpu_reg[gpu_opcode_first_parameter]
338 #define RN                              gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM    gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN    gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1                   gpu_opcode_first_parameter
342 #define IMM_2                   gpu_opcode_second_parameter
343
344 #define SET_FLAG_Z(r)   (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r)   (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01));
346
347 #define RESET_FLAG_Z()  gpu_flag_z = 0;
348 #define RESET_FLAG_N()  gpu_flag_n = 0;
349 #define RESET_FLAG_C()  gpu_flag_c = 0;
350
351 #define CLR_Z                           (gpu_flag_z = 0)
352 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
355 #define SET_N(r)                        (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
357 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
358 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
361
362 uint32_t gpu_convert_zero[32] =
363         { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
364
365 uint8_t * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
367
368 uint32_t gpu_opcode_use[64];
369
370 const char * gpu_opcode_str[64]=
371 {
372         "add",                          "addc",                         "addq",                         "addqt",
373         "sub",                          "subc",                         "subq",                         "subqt",
374         "neg",                          "and",                          "or",                           "xor",
375         "not",                          "btst",                         "bset",                         "bclr",
376         "mult",                         "imult",                        "imultn",                       "resmac",
377         "imacn",                        "div",                          "abs",                          "sh",
378         "shlq",                         "shrq",                         "sha",                          "sharq",
379         "ror",                          "rorq",                         "cmp",                          "cmpq",
380         "sat8",                         "sat16",                        "move",                         "moveq",
381         "moveta",                       "movefa",                       "movei",                        "loadb",
382         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
383         "load_r15_indexed",     "storeb",                       "storew",                       "store",
384         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
385         "jump",                         "jr",                           "mmult",                        "mtoi",
386         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
387         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
388 };
389
390 static uint32_t gpu_in_exec = 0;
391 static uint32_t gpu_releaseTimeSlice_flag = 0;
392
393 void GPUReleaseTimeslice(void)
394 {
395         gpu_releaseTimeSlice_flag = 1;
396 }
397
398 uint32_t GPUGetPC(void)
399 {
400         return gpu_pc;
401 }
402
403 void build_branch_condition_table(void)
404 {
405         if (!branch_condition_table)
406         {
407                 branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
408
409                 if (branch_condition_table)
410                 {
411                         for(int i=0; i<8; i++)
412                         {
413                                 for(int j=0; j<32; j++)
414                                 {
415                                         int result = 1;
416                                         if (j & 1)
417                                                 if (i & ZERO_FLAG)
418                                                         result = 0;
419                                         if (j & 2)
420                                                 if (!(i & ZERO_FLAG))
421                                                         result = 0;
422                                         if (j & 4)
423                                                 if (i & (CARRY_FLAG << (j >> 4)))
424                                                         result = 0;
425                                         if (j & 8)
426                                                 if (!(i & (CARRY_FLAG << (j >> 4))))
427                                                         result = 0;
428                                         branch_condition_table[i * 32 + j] = result;
429                                 }
430                         }
431                 }
432         }
433 }
434
435 //
436 // GPU byte access (read)
437 //
438 uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
439 {
440         if (offset >= 0xF02000 && offset <= 0xF020FF)
441                 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
442
443         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444                 return gpu_ram_8[offset & 0xFFF];
445         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
446         {
447                 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
448
449                 if ((offset & 0x03) == 0)
450                         return data >> 24;
451                 else if ((offset & 0x03) == 1)
452                         return (data >> 16) & 0xFF;
453                 else if ((offset & 0x03) == 2)
454                         return (data >> 8) & 0xFF;
455                 else if ((offset & 0x03) == 3)
456                         return data & 0xFF;
457         }
458
459         return JaguarReadByte(offset, who);
460 }
461
462 //
463 // GPU word access (read)
464 //
465 uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
466 {
467         if (offset >= 0xF02000 && offset <= 0xF020FF)
468                 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
469
470         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
471         {
472                 offset &= 0xFFF;
473                 uint16_t data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1];
474                 return data;
475         }
476         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
477         {
478 // This looks and smells wrong...
479 // But it *might* be OK...
480                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
481                         return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
482
483                 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
484
485                 if (offset & 0x02)                      // Cases 0 & 2...
486                         return data & 0xFFFF;
487                 else
488                         return data >> 16;
489         }
490
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
494
495         return JaguarReadWord(offset, who);
496 }
497
498 //
499 // GPU dword access (read)
500 //
501 uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
502 {
503         if (offset >= 0xF02000 && offset <= 0xF020FF)
504         {
505                 WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
506                 uint32_t reg = (offset & 0xFC) >> 2;
507                 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]); 
508         }
509
510 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
511         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
512         {
513                 offset &= 0xFFF;
514                 return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16)
515                         | ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/
516 //              return GET32(gpu_ram_8, offset);
517         }
518 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
519         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
520         {
521                 offset &= 0x1F;
522                 switch (offset)
523                 {
524                 case 0x00:
525                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
526                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
527                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
528
529                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
530
531                         return gpu_flags & 0xFFFFC1FF;
532                 case 0x04:
533                         return gpu_matrix_control;
534                 case 0x08:
535                         return gpu_pointer_to_matrix;
536                 case 0x0C:
537                         return gpu_data_organization;
538                 case 0x10:
539                         return gpu_pc;
540                 case 0x14:
541                         return gpu_control;
542                 case 0x18:
543                         return gpu_hidata;
544                 case 0x1C:
545                         return gpu_remain;
546                 default:                                                                // unaligned long read
547 #ifdef GPU_DEBUG
548                         WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
549 #endif  // GPU_DEBUG
550                         return 0;
551                 }
552         }
553 //TEMP--Mirror of F03000? No. Writes only...
554 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
555 //      WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
556 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
557         WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
558
559         return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
560 }
561
562 //
563 // GPU byte access (write)
564 //
565 void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
566 {
567         if (offset >= 0xF02000 && offset <= 0xF020FF)
568                 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
569
570         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
571         {
572                 gpu_ram_8[offset & 0xFFF] = data;
573
574 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
575 /*              if (!gpu_in_exec)
576                 {
577                         m68k_end_timeslice();
578                         dsp_releaseTimeslice();
579                 }*/
580                 return;
581         }
582         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
583         {
584                 uint32_t reg = offset & 0x1C;
585                 int bytenum = offset & 0x03;
586
587 //This is definitely wrong!
588                 if ((reg >= 0x1C) && (reg <= 0x1F))
589                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
590                 else
591                 {
592                         uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
593                         bytenum = 3 - bytenum; // convention motorola !!!
594                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
595                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
596                 }
597                 return;
598         }
599 //      WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
600         JaguarWriteByte(offset, data, who);
601 }
602
603 //
604 // GPU word access (write)
605 //
606 void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
607 {
608         if (offset >= 0xF02000 && offset <= 0xF020FF)
609                 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
610
611         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
612         {
613                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
614                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
615 /*              offset &= 0xFFF;
616                 SET16(gpu_ram_8, offset, data);//*/
617
618 /*if (offset >= 0xF03214 && offset < 0xF0321F)
619         WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
620
621
622 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
623 /*              if (!gpu_in_exec)
624                 {
625                         m68k_end_timeslice();
626                         dsp_releaseTimeslice();
627                 }*/
628                 return;
629         }
630         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
631         {
632                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
633                 {
634 #ifdef GPU_DEBUG
635                         WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
636                         GPUDumpRegisters();
637 #endif  // GPU_DEBUG
638                         return;
639                 }
640 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
641 //This just literally sucks.
642                 if ((offset & 0x1C) == 0x1C)
643                 {
644 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
645                         if (offset & 0x02)
646                                 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
647                         else
648                                 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
649                 }
650                 else
651                 {
652 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
653                         uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
654
655                         if (offset & 0x02)
656                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
657                         else
658                                 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
659
660                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
661                 }
662
663                 return;
664         }
665         else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
666         {
667 #ifdef GPU_DEBUG
668                         WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
669                         GPUDumpRegisters();
670 #endif  // GPU_DEBUG
671                 return;
672         }
673
674         // Have to be careful here--this can cause an infinite loop!
675         JaguarWriteWord(offset, data, who);
676 }
677
678 //
679 // GPU dword access (write)
680 //
681 void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
682 {
683         if (offset >= 0xF02000 && offset <= 0xF020FF)
684                 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
685
686 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
687         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
688         {
689 #ifdef GPU_DEBUG
690                 if (offset & 0x03)
691                 {
692                         WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
693                         GPUDumpRegisters();
694                 }
695 #endif  // GPU_DEBUG
696
697                 offset &= 0xFFF;
698                 SET32(gpu_ram_8, offset, data);
699                 return;
700         }
701 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
702         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
703         {
704                 offset &= 0x1F;
705                 switch (offset)
706                 {
707                 case 0x00:
708                 {
709                         bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
710                         // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
711                         //       IRQ logic can set it. So we mask it out here to prevent problems...
712                         gpu_flags = data & (~IMASK);
713                         gpu_flag_z = gpu_flags & ZERO_FLAG;
714                         gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
715                         gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
716                         GPUUpdateRegisterBanks();
717                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);       // Interrupt latch clear bits
718 //Writing here is only an interrupt enable--this approach is just plain wrong!
719 //                      GPUHandleIRQs();
720 //This, however, is A-OK! ;-)
721                         if (IMASKCleared)                                               // If IMASK was cleared,
722                                 GPUHandleIRQs();                                        // see if any other interrupts need servicing!
723 #ifdef GPU_DEBUG
724                         if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
725                                 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
726                         WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
727 #endif  // GPU_DEBUG
728                         break;
729                 }
730                 case 0x04:
731                         gpu_matrix_control = data;
732                         break;
733                 case 0x08:
734                         // This can only point to long aligned addresses
735                         gpu_pointer_to_matrix = data & 0xFFFFFFFC;
736                         break;
737                 case 0x0C:
738                         gpu_data_organization = data;
739                         break;
740                 case 0x10:
741                         gpu_pc = data;
742 #ifdef GPU_DEBUG
743 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
744 #endif  // GPU_DEBUG
745                         break;
746                 case 0x14:
747                 {
748 //                      uint32_t gpu_was_running = GPU_RUNNING;
749                         data &= ~0xF7C0;                // Disable writes to INT_LAT0-4 & TOM version number
750
751                         // check for GPU -> CPU interrupt
752                         if (data & 0x02)
753                         {
754 //WriteLog("GPU->CPU interrupt\n");
755                                 if (TOMIRQEnabled(IRQ_GPU))
756                                 {
757 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
758 //                                      if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
759                                         {
760                                                 TOMSetPendingGPUInt();
761                                                 m68k_set_irq(2);                        // Set 68000 IPL 2
762                                                 GPUReleaseTimeslice();
763                                         }
764                                 }
765                                 data &= ~0x02;
766                         }
767
768                         // check for CPU -> GPU interrupt #0
769                         if (data & 0x04)
770                         {
771 //WriteLog("CPU->GPU interrupt\n");
772                                 GPUSetIRQLine(0, ASSERT_LINE);
773                                 m68k_end_timeslice();
774                                 DSPReleaseTimeslice();
775                                 data &= ~0x04;
776                         }
777
778                         // single stepping
779                         if (data & 0x10)
780                         {
781                                 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
782                         }
783
784                         gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
785
786                         // if gpu wasn't running but is now running, execute a few cycles
787 #ifndef GPU_SINGLE_STEPPING
788 /*                      if (!gpu_was_running && GPU_RUNNING)
789 #ifdef GPU_DEBUG
790                         {
791                                 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
792 #endif  // GPU_DEBUG
793                                 GPUExec(200);
794 #ifdef GPU_DEBUG
795                         }
796 #endif  // GPU_DEBUG//*/
797 #else
798                         if (gpu_control & 0x18)
799                                 GPUExec(1);
800 #endif  // #ifndef GPU_SINGLE_STEPPING
801 #ifdef GPU_DEBUG
802 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
803 if (GPU_RUNNING)
804         WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
805 else
806         WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
807 WriteLog("\n");
808 #endif  // GPU_DEBUG
809 //if (GPU_RUNNING)
810 //      GPUDumpDisassembly();
811 /*if (GPU_RUNNING)
812 {
813         if (gpu_pc == 0xF035D8)
814         {
815 //              GPUDumpDisassembly();
816 //              log_done();
817 //              exit(1);
818                 gpu_control &= 0xFFFFFFFE;      // Don't run it and let's see what happens!
819 //Hmm. Seems to lock up when going into the demo...
820 //Try to disable the collision altogether!
821         }
822 }//*/
823 extern int effect_start5;
824 static bool finished = false;
825 //if (GPU_RUNNING && effect_start5 && !finished)
826 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
827 {
828         // Let's do a dump of $6528!
829 /*      uint32_t numItems = JaguarReadWord(0x6BD6);
830         WriteLog("\nDump of $6528: %u items.\n\n", numItems);
831         for(int i=0; i<numItems*3*4; i+=3*4)
832         {
833                 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
834                         JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
835                 uint16_t link = JaguarReadWord(0x6528+i+8+2);
836                 for(int j=0; j<40; j+=4)
837                         WriteLog("%08X ", JaguarReadLong(link + j));
838                 WriteLog("\n");
839         }
840         WriteLog("\n");//*/
841         // Let's try a manual blit here...
842 //This isn't working the way it should! !!! FIX !!!
843 //Err, actually, it is.
844 // NOW, it works right! Problem solved!!! It's a blitter bug!
845 /*      uint32_t src = 0x4D54, dst = 0xF03000, width = 10 * 4;
846         for(int y=0; y<127; y++)
847         {
848                 for(int x=0; x<2; x++)
849                 {
850                         JaguarWriteLong(dst, JaguarReadLong(src));
851
852                         src += 4;
853                         dst += 4;
854                 }
855                 src += width - (2 * 4);
856         }//*/
857 /*      finished = true;
858         doGPUDis = true;
859         WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
860
861 /*      WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
862         int count = 0;
863         for(int i=0x004D54; i<0x004D54+2048; i++)
864         {
865                 WriteLog("%02X ", JaguarReadByte(i));
866                 count++;
867                 if (count == 32)
868                 {
869                         count = 0;
870                         WriteLog("\n");
871                 }
872         }
873         WriteLog("\n\nData @ F03000:\n\n");
874         count = 0;
875         for(int i=0xF03000; i<0xF03200; i++)
876         {
877                 WriteLog("%02X ", JaguarReadByte(i));
878                 count++;
879                 if (count == 32)
880                 {
881                         count = 0;
882                         WriteLog("\n");
883                 }
884         }
885         WriteLog("\n\n");
886         log_done();
887         exit(0);//*/
888 }
889 //if (!GPU_RUNNING)
890 //      doGPUDis = false;
891 /*if (!GPU_RUNNING && finished)
892 {
893         WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
894         GPUDumpRegisters();
895         log_done();
896         exit(0);
897 }//*/
898                         // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
899                         // allow the GPU a chance to run...
900                         // Yes! This partially fixed Trevor McFur...
901                         if (GPU_RUNNING)
902                                 m68k_end_timeslice();
903                         break;
904                 }
905                 case 0x18:
906                         gpu_hidata = data;
907                         break;
908                 case 0x1C:
909                         gpu_div_control = data;
910                         break;
911 //              default:   // unaligned long write
912                         //exit(0);
913                         //__asm int 3
914                 }
915                 return;
916         }
917
918 //      JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
919 //      JaguarWriteWord(offset+2, data & 0xFFFF, who);
920 // We're a 32-bit processor, we can do a long write...!
921         JaguarWriteLong(offset, data, who);
922 }
923
924 //
925 // Change register banks if necessary
926 //
927 void GPUUpdateRegisterBanks(void)
928 {
929         int bank = (gpu_flags & REGPAGE);               // REGPAGE bit
930
931         if (gpu_flags & IMASK)                                  // IMASK bit
932                 bank = 0;                                                       // IMASK forces main bank to be bank 0
933
934         if (bank)
935                 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
936         else
937                 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
938 }
939
940 void GPUHandleIRQs(void)
941 {
942         // Bail out if we're already in an interrupt!
943         if (gpu_flags & IMASK)
944                 return;
945
946         // Get the interrupt latch & enable bits
947         uint32_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
948
949         // Bail out if latched interrupts aren't enabled
950         bits &= mask;
951         if (!bits)
952                 return;
953
954         // Determine which interrupt to service
955         uint32_t which = 0; //Isn't there a #pragma to disable this warning???
956         if (bits & 0x01)
957                 which = 0;
958         if (bits & 0x02)
959                 which = 1;
960         if (bits & 0x04)
961                 which = 2;
962         if (bits & 0x08)
963                 which = 3;
964         if (bits & 0x10)
965                 which = 4;
966
967         if (start_logging)
968                 WriteLog("GPU: Generating IRQ #%i\n", which);
969
970         // set the interrupt flag
971         gpu_flags |= IMASK;
972         GPUUpdateRegisterBanks();
973
974         // subqt  #4,r31                ; pre-decrement stack pointer
975         // move  pc,r30                 ; address of interrupted code
976         // store  r30,(r31)     ; store return address
977         gpu_reg[31] -= 4;
978         GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
979
980         // movei  #service_address,r30  ; pointer to ISR entry
981         // jump  (r30)                                  ; jump to ISR
982         // nop
983         gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
984 }
985
986 void GPUSetIRQLine(int irqline, int state)
987 {
988         if (start_logging)
989                 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
990
991         uint32_t mask = 0x0040 << irqline;
992         gpu_control &= ~mask;                           // Clear the interrupt latch
993
994         if (state)
995         {
996                 gpu_control |= mask;                    // Assert the interrupt latch
997                 GPUHandleIRQs();                                // And handle the interrupt...
998         }
999 }
1000
1001 //TEMPORARY: Testing only!
1002 //#include "gpu2.h"
1003 //#include "gpu3.h"
1004
1005 void GPUInit(void)
1006 {
1007 //      memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1008 //      memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32_t), "GPU bank 0 regs");
1009 //      memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32_t), "GPU bank 1 regs");
1010
1011         build_branch_condition_table();
1012
1013         GPUReset();
1014
1015 //TEMPORARY: Testing only!
1016 //      gpu2_init();
1017 //      gpu3_init();
1018 }
1019
1020 void GPUReset(void)
1021 {
1022         // GPU registers (directly visible)
1023         gpu_flags                         = 0x00000000;
1024         gpu_matrix_control    = 0x00000000;
1025         gpu_pointer_to_matrix = 0x00000000;
1026         gpu_data_organization = 0xFFFFFFFF;
1027         gpu_pc                            = 0x00F03000;
1028         gpu_control                       = 0x00002800;                 // Correctly sets this as TOM Rev. 2
1029         gpu_hidata                        = 0x00000000;
1030         gpu_remain                        = 0x00000000;                 // These two registers are RO/WO
1031         gpu_div_control           = 0x00000000;
1032
1033         // GPU internal register
1034         gpu_acc                           = 0x00000000;
1035
1036         gpu_reg = gpu_reg_bank_0;
1037         gpu_alternate_reg = gpu_reg_bank_1;
1038
1039         for(int i=0; i<32; i++)
1040                 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1041
1042         CLR_ZNC;
1043         memset(gpu_ram_8, 0xFF, 0x1000);
1044         gpu_in_exec = 0;
1045 //not needed    GPUInterruptPending = false;
1046         GPUResetStats();
1047
1048         // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
1049         for(uint32_t i=0; i<4096; i+=4)
1050                 *((uint32_t *)(&gpu_ram_8[i])) = rand();
1051 }
1052
1053 uint32_t GPUReadPC(void)
1054 {
1055         return gpu_pc;
1056 }
1057
1058 void GPUResetStats(void)
1059 {
1060         for(uint32_t i=0; i<64; i++)
1061                 gpu_opcode_use[i] = 0;
1062         WriteLog("--> GPU stats were reset!\n");
1063 }
1064
1065 void GPUDumpDisassembly(void)
1066 {
1067         char buffer[512];
1068
1069         WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1070         uint32_t j = 0xF03000;
1071         while (j <= 0xF03FFF)
1072         {
1073                 uint32_t oldj = j;
1074                 j += dasmjag(JAGUAR_GPU, buffer, j);
1075                 WriteLog("\t%08X: %s\n", oldj, buffer);
1076         }
1077 }
1078
1079 void GPUDumpRegisters(void)
1080 {
1081         WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1082         WriteLog("\nRegisters bank 0\n");
1083         for(int j=0; j<8; j++)
1084         {
1085                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1086                                                   (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1087                                                   (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1088                                                   (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1089                                                   (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1090         }
1091         WriteLog("Registers bank 1\n");
1092         for(int j=0; j<8; j++)
1093         {
1094                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1095                                                   (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1096                                                   (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1097                                                   (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1098                                                   (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1099         }
1100 }
1101
1102 void GPUDumpMemory(void)
1103 {
1104         WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1105         for(int i=0; i<0xFFF; i+=4)
1106                 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1107                         gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1108 }
1109
1110 void GPUDone(void)
1111 {
1112         WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1113
1114         // Get the interrupt latch & enable bits
1115         uint8_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1116         WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1117
1118         GPUDumpRegisters();
1119         GPUDumpDisassembly();
1120
1121         WriteLog("\nGPU opcodes use:\n");
1122         for(int i=0; i<64; i++)
1123         {
1124                 if (gpu_opcode_use[i])
1125                         WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1126         }
1127         WriteLog("\n");
1128
1129 //      memory_free(gpu_ram_8);
1130 //      memory_free(gpu_reg_bank_0);
1131 //      memory_free(gpu_reg_bank_1);
1132 }
1133
1134 //
1135 // Main GPU execution core
1136 //
1137 static int testCount = 1;
1138 static int len = 0;
1139 static bool tripwire = false;
1140 void GPUExec(int32_t cycles)
1141 {
1142         if (!GPU_RUNNING)
1143                 return;
1144
1145 #ifdef GPU_SINGLE_STEPPING
1146         if (gpu_control & 0x18)
1147         {
1148                 cycles = 1;
1149                 gpu_control &= ~0x10;
1150         }
1151 #endif
1152         GPUHandleIRQs();
1153         gpu_releaseTimeSlice_flag = 0;
1154         gpu_in_exec++;
1155
1156         while (cycles > 0 && GPU_RUNNING)
1157         {
1158 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1159         && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1160 {
1161         if (gpu_pc == 0xF03000)
1162         {
1163                 extern uint32_t starCount;
1164                 starCount = 0;
1165 /*              WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1166                 uint32_t base = gpu_reg_bank_0[3];
1167                 for(uint32_t i=0; i<0x100; i+=16)
1168                 {
1169                         WriteLog("%02X: ", i);
1170                         for(uint32_t j=0; j<16; j++)
1171                         {
1172                                 WriteLog("%02X ", JaguarReadByte(base + i + j));
1173                         }
1174                         WriteLog("\n");
1175                 }*/
1176         }
1177 //      if (gpu_pc == 0xF03)
1178         {
1179         }
1180 }//*/
1181 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1182 {
1183         GPUDumpRegisters();
1184         WriteLog("GPU: Starting disassembly log...\n");
1185         doGPUDis = true;
1186 }//*/
1187 /*if (gpu_pc == 0xF0359A)
1188 {
1189         doGPUDis = true;
1190         GPUDumpRegisters();
1191 }*/
1192 /*              gpu_flag_c = (gpu_flag_c ? 1 : 0);
1193                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1194                 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1195 #if 0
1196 if (gpu_pc == 0xF03200)
1197         doGPUDis = true;
1198 #endif
1199
1200                 uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1201                 uint32_t index = opcode >> 10;
1202                 gpu_instruction = opcode;                               // Added for GPU #3...
1203                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1204                 gpu_opcode_second_parameter = opcode & 0x1F;
1205 /*if (gpu_pc == 0xF03BE8)
1206 WriteLog("Start of OP frame write...\n");
1207 if (gpu_pc == 0xF03EEE)
1208 WriteLog("--> Writing BRANCH object ---\n");
1209 if (gpu_pc == 0xF03F62)
1210 WriteLog("--> Writing BITMAP object ***\n");//*/
1211 /*if (gpu_pc == 0xF03546)
1212 {
1213         WriteLog("\n--> GPU PC: F03546\n");
1214         GPUDumpRegisters();
1215         GPUDumpDisassembly();
1216 }//*/
1217 /*if (gpu_pc == 0xF033F6)
1218 {
1219         WriteLog("\n--> GPU PC: F033F6\n");
1220         GPUDumpRegisters();
1221         GPUDumpDisassembly();
1222 }//*/
1223 /*if (gpu_pc == 0xF033CC)
1224 {
1225         WriteLog("\n--> GPU PC: F033CC\n");
1226         GPUDumpRegisters();
1227         GPUDumpDisassembly();
1228 }//*/
1229 /*if (gpu_pc == 0xF033D6)
1230 {
1231         WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1232         GPUDumpRegisters();
1233         GPUDumpMemory();
1234 }//*/
1235 /*if (gpu_pc == 0xF033D8)
1236 {
1237         WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1238         GPUDumpRegisters();
1239         GPUDumpMemory();
1240 }//*/
1241 /*if (gpu_pc == 0xF0358E)
1242 {
1243         WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1244         GPUDumpRegisters();
1245         GPUDumpMemory();
1246 }//*/
1247 /*if (gpu_pc == 0xF034CA)
1248 {
1249         WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1250         GPUDumpRegisters();
1251 }//*/
1252 /*if (gpu_pc == 0xF034CA)
1253 {
1254         len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1255         WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n   ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1256         for(int i=0; i<len; i+=4)
1257                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1258         WriteLog("\n   ");
1259         for(int i=0; i<len; i+=4)
1260                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1261         WriteLog("\n\n");
1262 }
1263 if (gpu_pc == 0xF034DE)
1264 {
1265         WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n   ", gpu_reg[14], gpu_reg[15]);
1266         for(int i=0; i<len; i+=4)
1267                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1268         WriteLog("\n   ");
1269         for(int i=0; i<len; i+=4)
1270                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1271         WriteLog("\n   ");
1272         for(int i=0; i<len; i+=4)
1273                 WriteLog(" --------");
1274         WriteLog("\n   ");
1275         for(int i=0; i<len; i+=4)
1276                 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1277         WriteLog("\n\n");
1278 }//*/
1279 /*if (gpu_pc == 0xF035C8)
1280 {
1281         WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1282         GPUDumpRegisters();
1283         GPUDumpDisassembly();
1284 }//*/
1285
1286 if (gpu_start_log)
1287 {
1288 //      gpu_reset_stats();
1289 static char buffer[512];
1290 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1291 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1292 }//*/
1293 //$E400 -> 1110 01 -> $39 -> 57
1294 //GPU #1
1295                 gpu_pc += 2;
1296                 gpu_opcode[index]();
1297 //GPU #2
1298 //              gpu2_opcode[index]();
1299 //              gpu_pc += 2;
1300 //GPU #3                                (Doesn't show ATARI logo! #1 & #2 do...)
1301 //              gpu_pc += 2;
1302 //              gpu3_opcode[index]();
1303
1304 // BIOS hacking
1305 //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken.
1306 /*static bool firstTime = true;
1307 if (gpu_pc == 0xF03548 && firstTime)
1308 {
1309         gpu_flag_z = 1;
1310 //      firstTime = false;
1311
1312 //static char buffer[512];
1313 //int k=0xF03548;
1314 //while (k<0xF0356C)
1315 //{
1316 //int oldk = k;
1317 //k += dasmjag(JAGUAR_GPU, buffer, k);
1318 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1319 //}
1320 //      gpu_start_log = 1;
1321 }//*/
1322 //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1323 /*if (gpu_pc == 0xF0354C)
1324         gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1325
1326                 cycles -= gpu_opcode_cycles[index];
1327                 gpu_opcode_use[index]++;
1328 if (gpu_start_log)
1329         WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1330 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1331 {
1332         WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1333         tripwire = true;
1334 }
1335         }
1336
1337         gpu_in_exec--;
1338 }
1339
1340 //
1341 // GPU opcodes
1342 //
1343
1344 /*
1345 GPU opcodes use (offset punch--vertically below bad guy):
1346                       add 18686
1347                      addq 32621
1348                       sub 7483
1349                      subq 10252
1350                       and 21229
1351                        or 15003
1352                      btst 1822
1353                      bset 2072
1354                      mult 141
1355                       div 2392
1356                      shlq 13449
1357                      shrq 10297
1358                     sharq 11104
1359                       cmp 6775
1360                      cmpq 5944
1361                      move 31259
1362                     moveq 4473
1363                     movei 23277
1364                     loadb 46
1365                     loadw 4201
1366                      load 28580
1367          load_r14_indexed 1183
1368          load_r15_indexed 1125
1369                    storew 178
1370                     store 10144
1371         store_r14_indexed 320
1372         store_r15_indexed 1
1373                   move_pc 1742
1374                      jump 24467
1375                        jr 18090
1376                       nop 41362
1377 */
1378
1379 static void gpu_opcode_jump(void)
1380 {
1381 #ifdef GPU_DIS_JUMP
1382 const char * condition[32] =
1383 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1384         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1385         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1386         "???", "???", "???", "F" };
1387         if (doGPUDis)
1388                 WriteLog("%06X: JUMP   %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1389 #endif
1390         // normalize flags
1391 /*      gpu_flag_c = (gpu_flag_c ? 1 : 0);
1392         gpu_flag_z = (gpu_flag_z ? 1 : 0);
1393         gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1394         // KLUDGE: Used by BRANCH_CONDITION
1395         uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1396
1397         if (BRANCH_CONDITION(IMM_2))
1398         {
1399 #ifdef GPU_DIS_JUMP
1400         if (doGPUDis)
1401                 WriteLog("Branched!\n");
1402 #endif
1403 if (gpu_start_log)
1404         WriteLog("    --> JUMP: Branch taken.\n");
1405                 uint32_t delayed_pc = RM;
1406                 GPUExec(1);
1407                 gpu_pc = delayed_pc;
1408 /*              uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1409                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1410                 gpu_opcode_second_parameter = opcode & 0x1F;
1411
1412                 gpu_pc = delayed_pc;
1413                 gpu_opcode[opcode>>10]();//*/
1414         }
1415 #ifdef GPU_DIS_JUMP
1416         else
1417                 if (doGPUDis)
1418                         WriteLog("Branch NOT taken.\n");
1419 #endif
1420 }
1421
1422 static void gpu_opcode_jr(void)
1423 {
1424 #ifdef GPU_DIS_JR
1425 const char * condition[32] =
1426 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1427         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1428         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1429         "???", "???", "???", "F" };
1430         if (doGPUDis)
1431                 WriteLog("%06X: JR     %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1432 #endif
1433 /*      if (CONDITION(jaguar.op & 31))
1434         {
1435                 int32_t r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1436                 uint32_t newpc = jaguar.PC + r1;
1437                 CALL_MAME_DEBUG;
1438                 jaguar.op = ROPCODE(jaguar.PC);
1439                 jaguar.PC = newpc;
1440                 (*jaguar.table[jaguar.op >> 10])();
1441
1442                 jaguar_icount -= 3;     // 3 wait states guaranteed
1443         }*/
1444         // normalize flags
1445 /*      gpu_flag_n = (gpu_flag_n ? 1 : 0);
1446         gpu_flag_c = (gpu_flag_c ? 1 : 0);
1447         gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1448         // KLUDGE: Used by BRANCH_CONDITION
1449         uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1450
1451         if (BRANCH_CONDITION(IMM_2))
1452         {
1453 #ifdef GPU_DIS_JR
1454         if (doGPUDis)
1455                 WriteLog("Branched!\n");
1456 #endif
1457 if (gpu_start_log)
1458         WriteLog("    --> JR: Branch taken.\n");
1459                 int32_t offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1);           // Sign extend IMM_1
1460                 int32_t delayed_pc = gpu_pc + (offset * 2);
1461                 GPUExec(1);
1462                 gpu_pc = delayed_pc;
1463 /*              uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1464                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1465                 gpu_opcode_second_parameter = opcode & 0x1F;
1466
1467                 gpu_pc = delayed_pc;
1468                 gpu_opcode[opcode>>10]();//*/
1469         }
1470 #ifdef GPU_DIS_JR
1471         else
1472                 if (doGPUDis)
1473                         WriteLog("Branch NOT taken.\n");
1474 #endif
1475 }
1476
1477 static void gpu_opcode_add(void)
1478 {
1479 #ifdef GPU_DIS_ADD
1480         if (doGPUDis)
1481                 WriteLog("%06X: ADD    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1482 #endif
1483         uint32_t res = RN + RM;
1484         CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1485         RN = res;
1486 #ifdef GPU_DIS_ADD
1487         if (doGPUDis)
1488                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1489 #endif
1490 }
1491
1492 static void gpu_opcode_addc(void)
1493 {
1494 #ifdef GPU_DIS_ADDC
1495         if (doGPUDis)
1496                 WriteLog("%06X: ADDC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1497 #endif
1498 /*      int dreg = jaguar.op & 31;
1499         uint32_t r1 = jaguar.r[(jaguar.op >> 5) & 31];
1500         uint32_t r2 = jaguar.r[dreg];
1501         uint32_t res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1502         jaguar.r[dreg] = res;
1503         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1504
1505         uint32_t res = RN + RM + gpu_flag_c;
1506         uint32_t carry = gpu_flag_c;
1507 //      SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1508         SET_ZNC_ADD(RN + carry, RM, res);
1509 //      SET_ZNC_ADD(RN, RM + carry, res);
1510         RN = res;
1511 #ifdef GPU_DIS_ADDC
1512         if (doGPUDis)
1513                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1514 #endif
1515 }
1516
1517 static void gpu_opcode_addq(void)
1518 {
1519 #ifdef GPU_DIS_ADDQ
1520         if (doGPUDis)
1521                 WriteLog("%06X: ADDQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1522 #endif
1523         uint32_t r1 = gpu_convert_zero[IMM_1];
1524         uint32_t res = RN + r1;
1525         CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1526         RN = res;
1527 #ifdef GPU_DIS_ADDQ
1528         if (doGPUDis)
1529                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1530 #endif
1531 }
1532
1533 static void gpu_opcode_addqt(void)
1534 {
1535 #ifdef GPU_DIS_ADDQT
1536         if (doGPUDis)
1537                 WriteLog("%06X: ADDQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1538 #endif
1539         RN += gpu_convert_zero[IMM_1];
1540 #ifdef GPU_DIS_ADDQT
1541         if (doGPUDis)
1542                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1543 #endif
1544 }
1545
1546 static void gpu_opcode_sub(void)
1547 {
1548 #ifdef GPU_DIS_SUB
1549         if (doGPUDis)
1550                 WriteLog("%06X: SUB    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1551 #endif
1552         uint32_t res = RN - RM;
1553         SET_ZNC_SUB(RN, RM, res);
1554         RN = res;
1555 #ifdef GPU_DIS_SUB
1556         if (doGPUDis)
1557                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1558 #endif
1559 }
1560
1561 static void gpu_opcode_subc(void)
1562 {
1563 #ifdef GPU_DIS_SUBC
1564         if (doGPUDis)
1565                 WriteLog("%06X: SUBC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1566 #endif
1567         uint32_t res = RN - RM - gpu_flag_c;
1568         uint32_t borrow = gpu_flag_c;
1569 //      SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1570 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1571 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1572 //      SET_ZNC_SUB(RN - borrow, RM, res);
1573         SET_ZNC_SUB(RN, RM + borrow, res);
1574         RN = res;
1575 #ifdef GPU_DIS_SUBC
1576         if (doGPUDis)
1577                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1578 #endif
1579 }
1580 /*
1581 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1582 N = 0, M = 1, 0 - 1 = -1, C = 0!
1583
1584 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
1585 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
1586 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1587 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1588 */
1589 static void gpu_opcode_subq(void)
1590 {
1591 #ifdef GPU_DIS_SUBQ
1592         if (doGPUDis)
1593                 WriteLog("%06X: SUBQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1594 #endif
1595         uint32_t r1 = gpu_convert_zero[IMM_1];
1596         uint32_t res = RN - r1;
1597         SET_ZNC_SUB(RN, r1, res);
1598         RN = res;
1599 #ifdef GPU_DIS_SUBQ
1600         if (doGPUDis)
1601                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1602 #endif
1603 }
1604
1605 static void gpu_opcode_subqt(void)
1606 {
1607 #ifdef GPU_DIS_SUBQT
1608         if (doGPUDis)
1609                 WriteLog("%06X: SUBQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1610 #endif
1611         RN -= gpu_convert_zero[IMM_1];
1612 #ifdef GPU_DIS_SUBQT
1613         if (doGPUDis)
1614                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1615 #endif
1616 }
1617
1618 static void gpu_opcode_cmp(void)
1619 {
1620 #ifdef GPU_DIS_CMP
1621         if (doGPUDis)
1622                 WriteLog("%06X: CMP    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1623 #endif
1624         uint32_t res = RN - RM;
1625         SET_ZNC_SUB(RN, RM, res);
1626 #ifdef GPU_DIS_CMP
1627         if (doGPUDis)
1628                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1629 #endif
1630 }
1631
1632 static void gpu_opcode_cmpq(void)
1633 {
1634         static int32_t sqtable[32] =
1635                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1636 #ifdef GPU_DIS_CMPQ
1637         if (doGPUDis)
1638                 WriteLog("%06X: CMPQ   #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1639 #endif
1640         uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1641         uint32_t res = RN - r1;
1642         SET_ZNC_SUB(RN, r1, res);
1643 #ifdef GPU_DIS_CMPQ
1644         if (doGPUDis)
1645                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1646 #endif
1647 }
1648
1649 static void gpu_opcode_and(void)
1650 {
1651 #ifdef GPU_DIS_AND
1652         if (doGPUDis)
1653                 WriteLog("%06X: AND    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1654 #endif
1655         RN = RN & RM;
1656         SET_ZN(RN);
1657 #ifdef GPU_DIS_AND
1658         if (doGPUDis)
1659                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1660 #endif
1661 }
1662
1663 static void gpu_opcode_or(void)
1664 {
1665 #ifdef GPU_DIS_OR
1666         if (doGPUDis)
1667                 WriteLog("%06X: OR     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1668 #endif
1669         RN = RN | RM;
1670         SET_ZN(RN);
1671 #ifdef GPU_DIS_OR
1672         if (doGPUDis)
1673                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1674 #endif
1675 }
1676
1677 static void gpu_opcode_xor(void)
1678 {
1679 #ifdef GPU_DIS_XOR
1680         if (doGPUDis)
1681                 WriteLog("%06X: XOR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1682 #endif
1683         RN = RN ^ RM;
1684         SET_ZN(RN);
1685 #ifdef GPU_DIS_XOR
1686         if (doGPUDis)
1687                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1688 #endif
1689 }
1690
1691 static void gpu_opcode_not(void)
1692 {
1693 #ifdef GPU_DIS_NOT
1694         if (doGPUDis)
1695                 WriteLog("%06X: NOT    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1696 #endif
1697         RN = ~RN;
1698         SET_ZN(RN);
1699 #ifdef GPU_DIS_NOT
1700         if (doGPUDis)
1701                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1702 #endif
1703 }
1704
1705 static void gpu_opcode_move_pc(void)
1706 {
1707 #ifdef GPU_DIS_MOVEPC
1708         if (doGPUDis)
1709                 WriteLog("%06X: MOVE   PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1710 #endif
1711         // Should be previous PC--this might not always be previous instruction!
1712         // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1713         RN = gpu_pc - 2;
1714 #ifdef GPU_DIS_MOVEPC
1715         if (doGPUDis)
1716                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1717 #endif
1718 }
1719
1720 static void gpu_opcode_sat8(void)
1721 {
1722 #ifdef GPU_DIS_SAT8
1723         if (doGPUDis)
1724                 WriteLog("%06X: SAT8   R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1725 #endif
1726         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1727         SET_ZN(RN);
1728 #ifdef GPU_DIS_SAT8
1729         if (doGPUDis)
1730                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1731 #endif
1732 }
1733
1734 static void gpu_opcode_sat16(void)
1735 {
1736         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1737         SET_ZN(RN);
1738 }
1739
1740 static void gpu_opcode_sat24(void)
1741 {
1742         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1743         SET_ZN(RN);
1744 }
1745
1746 static void gpu_opcode_store_r14_indexed(void)
1747 {
1748 #ifdef GPU_DIS_STORE14I
1749         if (doGPUDis)
1750                 WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1751 #endif
1752 #ifdef GPU_CORRECT_ALIGNMENT
1753         uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1754         
1755         if (address >= 0xF03000 && address <= 0xF03FFF)
1756                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1757         else
1758                 GPUWriteLong(address, RN, GPU);
1759 #else
1760         GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1761 #endif
1762 }
1763
1764 static void gpu_opcode_store_r15_indexed(void)
1765 {
1766 #ifdef GPU_DIS_STORE15I
1767         if (doGPUDis)
1768                 WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1769 #endif
1770 #ifdef GPU_CORRECT_ALIGNMENT
1771         uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1772
1773         if (address >= 0xF03000 && address <= 0xF03FFF)
1774                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1775         else
1776                 GPUWriteLong(address, RN, GPU);
1777 #else
1778         GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1779 #endif
1780 }
1781
1782 static void gpu_opcode_load_r14_ri(void)
1783 {
1784 #ifdef GPU_DIS_LOAD14R
1785         if (doGPUDis)
1786                 WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1787 #endif
1788 #ifdef GPU_CORRECT_ALIGNMENT
1789         uint32_t address = gpu_reg[14] + RM;
1790
1791         if (address >= 0xF03000 && address <= 0xF03FFF)
1792                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1793         else
1794                 RN = GPUReadLong(address, GPU);
1795 #else
1796         RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1797 #endif
1798 #ifdef GPU_DIS_LOAD14R
1799         if (doGPUDis)
1800                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1801 #endif
1802 }
1803
1804 static void gpu_opcode_load_r15_ri(void)
1805 {
1806 #ifdef GPU_DIS_LOAD15R
1807         if (doGPUDis)
1808                 WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1809 #endif
1810 #ifdef GPU_CORRECT_ALIGNMENT
1811         uint32_t address = gpu_reg[15] + RM;
1812
1813         if (address >= 0xF03000 && address <= 0xF03FFF)
1814                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1815         else
1816                 RN = GPUReadLong(address, GPU);
1817 #else
1818         RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1819 #endif
1820 #ifdef GPU_DIS_LOAD15R
1821         if (doGPUDis)
1822                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1823 #endif
1824 }
1825
1826 static void gpu_opcode_store_r14_ri(void)
1827 {
1828 #ifdef GPU_DIS_STORE14R
1829         if (doGPUDis)
1830                 WriteLog("%06X: STORE  R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1831 #endif
1832 #ifdef GPU_CORRECT_ALIGNMENT
1833         uint32_t address = gpu_reg[14] + RM;
1834
1835         if (address >= 0xF03000 && address <= 0xF03FFF)
1836                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1837         else
1838                 GPUWriteLong(address, RN, GPU);
1839 #else
1840         GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1841 #endif
1842 }
1843
1844 static void gpu_opcode_store_r15_ri(void)
1845 {
1846 #ifdef GPU_DIS_STORE15R
1847         if (doGPUDis)
1848                 WriteLog("%06X: STORE  R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1849 #endif
1850 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1851         uint32_t address = gpu_reg[15] + RM;
1852
1853         if (address >= 0xF03000 && address <= 0xF03FFF)
1854                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1855         else
1856                 GPUWriteLong(address, RN, GPU);
1857 #else
1858         GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1859 #endif
1860 }
1861
1862 static void gpu_opcode_nop(void)
1863 {
1864 #ifdef GPU_DIS_NOP
1865         if (doGPUDis)
1866                 WriteLog("%06X: NOP    [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1867 #endif
1868 }
1869
1870 static void gpu_opcode_pack(void)
1871 {
1872 #ifdef GPU_DIS_PACK
1873         if (doGPUDis)
1874                 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK  " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1875 #endif
1876         uint32_t val = RN;
1877
1878 //BUG!  if (RM == 0)                            // Pack
1879         if (IMM_1 == 0)                         // Pack
1880                 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1881         else                                            // Unpack
1882                 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1883 #ifdef GPU_DIS_PACK
1884         if (doGPUDis)
1885                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1886 #endif
1887 }
1888
1889 static void gpu_opcode_storeb(void)
1890 {
1891 #ifdef GPU_DIS_STOREB
1892         if (doGPUDis)
1893                 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1894 #endif
1895 //Is this right???
1896 // Would appear to be so...!
1897         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1898                 GPUWriteLong(RM, RN & 0xFF, GPU);
1899         else
1900                 JaguarWriteByte(RM, RN, GPU);
1901 }
1902
1903 static void gpu_opcode_storew(void)
1904 {
1905 #ifdef GPU_DIS_STOREW
1906         if (doGPUDis)
1907                 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1908 #endif
1909 #ifdef GPU_CORRECT_ALIGNMENT
1910         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1911                 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1912         else
1913                 JaguarWriteWord(RM, RN, GPU);
1914 #else
1915         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1916                 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1917         else
1918                 JaguarWriteWord(RM, RN, GPU);
1919 #endif
1920 }
1921
1922 static void gpu_opcode_store(void)
1923 {
1924 #ifdef GPU_DIS_STORE
1925         if (doGPUDis)
1926                 WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1927 #endif
1928 #ifdef GPU_CORRECT_ALIGNMENT
1929         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1930                 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1931         else
1932                 GPUWriteLong(RM, RN, GPU);
1933 #else
1934         GPUWriteLong(RM, RN, GPU);
1935 #endif
1936 }
1937
1938 static void gpu_opcode_storep(void)
1939 {
1940 #ifdef GPU_CORRECT_ALIGNMENT
1941         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1942         {
1943                 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1944                 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1945         }
1946         else
1947         {
1948                 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1949                 GPUWriteLong(RM + 4, RN, GPU);
1950         }
1951 #else
1952         GPUWriteLong(RM + 0, gpu_hidata, GPU);
1953         GPUWriteLong(RM + 4, RN, GPU);
1954 #endif
1955 }
1956
1957 static void gpu_opcode_loadb(void)
1958 {
1959 #ifdef GPU_DIS_LOADB
1960         if (doGPUDis)
1961                 WriteLog("%06X: LOADB  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1962 #endif
1963         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1964                 RN = GPUReadLong(RM, GPU) & 0xFF;
1965         else
1966                 RN = JaguarReadByte(RM, GPU);
1967 #ifdef GPU_DIS_LOADB
1968         if (doGPUDis)
1969                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1970 #endif
1971 }
1972
1973 static void gpu_opcode_loadw(void)
1974 {
1975 #ifdef GPU_DIS_LOADW
1976         if (doGPUDis)
1977                 WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1978 #endif
1979 #ifdef GPU_CORRECT_ALIGNMENT
1980         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1981                 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1982         else
1983                 RN = JaguarReadWord(RM, GPU);
1984 #else
1985         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1986                 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1987         else
1988                 RN = JaguarReadWord(RM, GPU);
1989 #endif
1990 #ifdef GPU_DIS_LOADW
1991         if (doGPUDis)
1992                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1993 #endif
1994 }
1995
1996 // According to the docs, & "Do The Same", this address is long aligned...
1997 // So let's try it:
1998 // And it works!!! Need to fix all instances...
1999 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
2000 // the $F03000-$F03FFF range are aligned...
2001 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
2002 /*
2003 Preliminary testing on real hardware seems to confirm that something strange goes on
2004 with unaligned reads in main memory. When the address is off by 1, the result is the
2005 same as the long address with the top byte replaced by something. So if the read is
2006 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
2007 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
2008 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
2009 It may be that the "unknown" values come from the prefetch queue, but not sure how
2010 to test that. They seem to be stable, though, which would indicate such a mechanism.
2011 Sometimes, however, the off by 2 case returns $12345678!
2012 */
2013 static void gpu_opcode_load(void)
2014 {
2015 #ifdef GPU_DIS_LOAD
2016         if (doGPUDis)
2017                 WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2018 #endif
2019 #ifdef GPU_CORRECT_ALIGNMENT
2020         uint32_t mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2021 //      if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2022                 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2023 //              RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2024 //      else
2025 //              RN = GPUReadLong(RM, GPU);
2026         // Simulate garbage in unaligned reads...
2027 //seems that this behavior is different in GPU mem vs. main mem...
2028 //      if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2029 //              RN |= mask[RM & 0x03];
2030 #else
2031         RN = GPUReadLong(RM, GPU);
2032 #endif
2033 #ifdef GPU_DIS_LOAD
2034         if (doGPUDis)
2035                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2036 #endif
2037 }
2038
2039 static void gpu_opcode_loadp(void)
2040 {
2041 #ifdef GPU_CORRECT_ALIGNMENT
2042         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2043         {
2044                 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2045                 RN                 = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2046         }
2047         else
2048         {
2049                 gpu_hidata = GPUReadLong(RM + 0, GPU);
2050                 RN                 = GPUReadLong(RM + 4, GPU);
2051         }
2052 #else
2053         gpu_hidata = GPUReadLong(RM + 0, GPU);
2054         RN                 = GPUReadLong(RM + 4, GPU);
2055 #endif
2056 }
2057
2058 static void gpu_opcode_load_r14_indexed(void)
2059 {
2060 #ifdef GPU_DIS_LOAD14I
2061         if (doGPUDis)
2062                 WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2063 #endif
2064 #ifdef GPU_CORRECT_ALIGNMENT
2065         uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2066
2067         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2068                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2069         else
2070                 RN = GPUReadLong(address, GPU);
2071 #else
2072         RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2073 #endif
2074 #ifdef GPU_DIS_LOAD14I
2075         if (doGPUDis)
2076                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2077 #endif
2078 }
2079
2080 static void gpu_opcode_load_r15_indexed(void)
2081 {
2082 #ifdef GPU_DIS_LOAD15I
2083         if (doGPUDis)
2084                 WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2085 #endif
2086 #ifdef GPU_CORRECT_ALIGNMENT
2087         uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2088
2089         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2090                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2091         else
2092                 RN = GPUReadLong(address, GPU);
2093 #else
2094         RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2095 #endif
2096 #ifdef GPU_DIS_LOAD15I
2097         if (doGPUDis)
2098                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2099 #endif
2100 }
2101
2102 static void gpu_opcode_movei(void)
2103 {
2104 #ifdef GPU_DIS_MOVEI
2105         if (doGPUDis)
2106                 WriteLog("%06X: MOVEI  #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32_t)GPUReadWord(gpu_pc) | ((uint32_t)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2107 #endif
2108         // This instruction is followed by 32-bit value in LSW / MSW format...
2109         RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16);
2110         gpu_pc += 4;
2111 #ifdef GPU_DIS_MOVEI
2112         if (doGPUDis)
2113                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2114 #endif
2115 }
2116
2117 static void gpu_opcode_moveta(void)
2118 {
2119 #ifdef GPU_DIS_MOVETA
2120         if (doGPUDis)
2121                 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2122 #endif
2123         ALTERNATE_RN = RM;
2124 #ifdef GPU_DIS_MOVETA
2125         if (doGPUDis)
2126                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2127 #endif
2128 }
2129
2130 static void gpu_opcode_movefa(void)
2131 {
2132 #ifdef GPU_DIS_MOVEFA
2133         if (doGPUDis)
2134                 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2135 #endif
2136         RN = ALTERNATE_RM;
2137 #ifdef GPU_DIS_MOVEFA
2138         if (doGPUDis)
2139                 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2140 #endif
2141 }
2142
2143 static void gpu_opcode_move(void)
2144 {
2145 #ifdef GPU_DIS_MOVE
2146         if (doGPUDis)
2147                 WriteLog("%06X: MOVE   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2148 #endif
2149         RN = RM;
2150 #ifdef GPU_DIS_MOVE
2151         if (doGPUDis)
2152                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2153 #endif
2154 }
2155
2156 static void gpu_opcode_moveq(void)
2157 {
2158 #ifdef GPU_DIS_MOVEQ
2159         if (doGPUDis)
2160                 WriteLog("%06X: MOVEQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2161 #endif
2162         RN = IMM_1;
2163 #ifdef GPU_DIS_MOVEQ
2164         if (doGPUDis)
2165                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2166 #endif
2167 }
2168
2169 static void gpu_opcode_resmac(void)
2170 {
2171         RN = gpu_acc;
2172 }
2173
2174 static void gpu_opcode_imult(void)
2175 {
2176 #ifdef GPU_DIS_IMULT
2177         if (doGPUDis)
2178                 WriteLog("%06X: IMULT  R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2179 #endif
2180         RN = (int16_t)RN * (int16_t)RM;
2181         SET_ZN(RN);
2182 #ifdef GPU_DIS_IMULT
2183         if (doGPUDis)
2184                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2185 #endif
2186 }
2187
2188 static void gpu_opcode_mult(void)
2189 {
2190 #ifdef GPU_DIS_MULT
2191         if (doGPUDis)
2192                 WriteLog("%06X: MULT   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2193 #endif
2194         RN = (uint16_t)RM * (uint16_t)RN;
2195 //      RN = (RM & 0xFFFF) * (RN & 0xFFFF);
2196         SET_ZN(RN);
2197 #ifdef GPU_DIS_MULT
2198         if (doGPUDis)
2199                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2200 #endif
2201 }
2202
2203 static void gpu_opcode_bclr(void)
2204 {
2205 #ifdef GPU_DIS_BCLR
2206         if (doGPUDis)
2207                 WriteLog("%06X: BCLR   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2208 #endif
2209         uint32_t res = RN & ~(1 << IMM_1);
2210         RN = res;
2211         SET_ZN(res);
2212 #ifdef GPU_DIS_BCLR
2213         if (doGPUDis)
2214                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2215 #endif
2216 }
2217
2218 static void gpu_opcode_btst(void)
2219 {
2220 #ifdef GPU_DIS_BTST
2221         if (doGPUDis)
2222                 WriteLog("%06X: BTST   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2223 #endif
2224         gpu_flag_z = (~RN >> IMM_1) & 1;
2225 #ifdef GPU_DIS_BTST
2226         if (doGPUDis)
2227                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2228 #endif
2229 }
2230
2231 static void gpu_opcode_bset(void)
2232 {
2233 #ifdef GPU_DIS_BSET
2234         if (doGPUDis)
2235                 WriteLog("%06X: BSET   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2236 #endif
2237         uint32_t res = RN | (1 << IMM_1);
2238         RN = res;
2239         SET_ZN(res);
2240 #ifdef GPU_DIS_BSET
2241         if (doGPUDis)
2242                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2243 #endif
2244 }
2245
2246 static void gpu_opcode_imacn(void)
2247 {
2248         uint32_t res = (int16_t)RM * (int16_t)(RN);
2249         gpu_acc += res;
2250 }
2251
2252 static void gpu_opcode_mtoi(void)
2253 {
2254         uint32_t _RM = RM;
2255         uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2256         SET_ZN(res);
2257 }
2258
2259 static void gpu_opcode_normi(void)
2260 {
2261         uint32_t _RM = RM;
2262         uint32_t res = 0;
2263
2264         if (_RM)
2265         {
2266                 while ((_RM & 0xFFC00000) == 0)
2267                 {
2268                         _RM <<= 1;
2269                         res--;
2270                 }
2271                 while ((_RM & 0xFF800000) != 0)
2272                 {
2273                         _RM >>= 1;
2274                         res++;
2275                 }
2276         }
2277         RN = res;
2278         SET_ZN(res);
2279 }
2280
2281 static void gpu_opcode_mmult(void)
2282 {
2283         int count       = gpu_matrix_control & 0x0F;    // Matrix width
2284         uint32_t addr = gpu_pointer_to_matrix;          // In the GPU's RAM
2285         int64_t accum = 0;
2286         uint32_t res;
2287
2288         if (gpu_matrix_control & 0x10)                          // Column stepping
2289         {
2290                 for(int i=0; i<count; i++)
2291                 {
2292                         int16_t a;
2293                         if (i & 0x01)
2294                                 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2295                         else
2296                                 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2297
2298                         int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2299                         accum += a * b;
2300                         addr += 4 * count;
2301                 }
2302         }
2303         else                                                                            // Row stepping
2304         {
2305                 for(int i=0; i<count; i++)
2306                 {
2307                         int16_t a;
2308                         if (i & 0x01)
2309                                 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2310                         else
2311                                 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2312
2313                         int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2314                         accum += a * b;
2315                         addr += 4;
2316                 }
2317         }
2318         RN = res = (int32_t)accum;
2319         // carry flag to do (out of the last add)
2320         SET_ZN(res);
2321 }
2322
2323 static void gpu_opcode_abs(void)
2324 {
2325 #ifdef GPU_DIS_ABS
2326         if (doGPUDis)
2327                 WriteLog("%06X: ABS    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2328 #endif
2329         gpu_flag_c = RN >> 31;
2330         if (RN == 0x80000000)
2331         //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2332                 gpu_flag_n = 1, gpu_flag_z = 0;
2333         else
2334         {
2335                 if (gpu_flag_c)
2336                         RN = -RN;
2337                 gpu_flag_n = 0; SET_FLAG_Z(RN);
2338         }
2339 #ifdef GPU_DIS_ABS
2340         if (doGPUDis)
2341                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2342 #endif
2343 }
2344
2345 static void gpu_opcode_div(void)        // RN / RM
2346 {
2347 #ifdef GPU_DIS_DIV
2348         if (doGPUDis)
2349                 WriteLog("%06X: DIV    R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2350 #endif
2351 // NOTE: remainder is NOT calculated correctly here!
2352 //       The original tried to get it right by checking to see if the
2353 //       remainder was negative, but that's too late...
2354 // The code there should do it now, but I'm not 100% sure...
2355 // [Now it should be correct, but not displaying correct behavior of the actual
2356 //  hardware. A step in the right direction.]
2357
2358         if (RM)
2359         {
2360                 if (gpu_div_control & 0x01)             // 16.16 division
2361                 {
2362                         gpu_remain = ((uint64_t)RN << 16) % RM;
2363                         RN = ((uint64_t)RN << 16) / RM;
2364                 }
2365                 else
2366                 {
2367                         // We calculate the remainder first because we destroy RN after
2368                         // this by assigning it to itself.
2369                         gpu_remain = RN % RM;
2370                         RN = RN / RM;
2371                 }
2372
2373 // What we really should do here is figure out why this condition
2374 // happens in the real divide unit and emulate *that* behavior.
2375 #if 0
2376                 if ((gpu_remain - RM) & 0x80000000)     // If the result would have been negative...
2377                         gpu_remain -= RM;                       // Then make it negative!
2378 #endif
2379         }
2380         else
2381                 RN = 0xFFFFFFFF;
2382
2383 #ifdef GPU_DIS_DIV
2384         if (doGPUDis)
2385                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2386 #endif
2387 }
2388
2389 static void gpu_opcode_imultn(void)
2390 {
2391         uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
2392         gpu_acc = (int32_t)res;
2393         SET_FLAG_Z(res);
2394         SET_FLAG_N(res);
2395 }
2396
2397 static void gpu_opcode_neg(void)
2398 {
2399 #ifdef GPU_DIS_NEG
2400         if (doGPUDis)
2401                 WriteLog("%06X: NEG    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2402 #endif
2403         uint32_t res = -RN;
2404         SET_ZNC_SUB(0, RN, res);
2405         RN = res;
2406 #ifdef GPU_DIS_NEG
2407         if (doGPUDis)
2408                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2409 #endif
2410 }
2411
2412 static void gpu_opcode_shlq(void)
2413 {
2414 #ifdef GPU_DIS_SHLQ
2415         if (doGPUDis)
2416                 WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2417 #endif
2418 // Was a bug here...
2419 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2420         int32_t r1 = 32 - IMM_1;
2421         uint32_t res = RN << r1;
2422         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2423         RN = res;
2424 #ifdef GPU_DIS_SHLQ
2425         if (doGPUDis)
2426                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2427 #endif
2428 }
2429
2430 static void gpu_opcode_shrq(void)
2431 {
2432 #ifdef GPU_DIS_SHRQ
2433         if (doGPUDis)
2434                 WriteLog("%06X: SHRQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2435 #endif
2436         int32_t r1 = gpu_convert_zero[IMM_1];
2437         uint32_t res = RN >> r1;
2438         SET_ZN(res); gpu_flag_c = RN & 1;
2439         RN = res;
2440 #ifdef GPU_DIS_SHRQ
2441         if (doGPUDis)
2442                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2443 #endif
2444 }
2445
2446 static void gpu_opcode_ror(void)
2447 {
2448 #ifdef GPU_DIS_ROR
2449         if (doGPUDis)
2450                 WriteLog("%06X: ROR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2451 #endif
2452         uint32_t r1 = RM & 0x1F;
2453         uint32_t res = (RN >> r1) | (RN << (32 - r1));
2454         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2455         RN = res;
2456 #ifdef GPU_DIS_ROR
2457         if (doGPUDis)
2458                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2459 #endif
2460 }
2461
2462 static void gpu_opcode_rorq(void)
2463 {
2464 #ifdef GPU_DIS_RORQ
2465         if (doGPUDis)
2466                 WriteLog("%06X: RORQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2467 #endif
2468         uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F];
2469         uint32_t r2 = RN;
2470         uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
2471         RN = res;
2472         SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2473 #ifdef GPU_DIS_RORQ
2474         if (doGPUDis)
2475                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2476 #endif
2477 }
2478
2479 static void gpu_opcode_sha(void)
2480 {
2481 /*      int dreg = jaguar.op & 31;
2482         int32_t r1 = (int32_t)jaguar.r[(jaguar.op >> 5) & 31];
2483         uint32_t r2 = jaguar.r[dreg];
2484         uint32_t res;
2485
2486         CLR_ZNC;
2487         if (r1 < 0)
2488         {
2489                 res = (r1 <= -32) ? 0 : (r2 << -r1);
2490                 jaguar.FLAGS |= (r2 >> 30) & 2;
2491         }
2492         else
2493         {
2494                 res = (r1 >= 32) ? ((int32_t)r2 >> 31) : ((int32_t)r2 >> r1);
2495                 jaguar.FLAGS |= (r2 << 1) & 2;
2496         }
2497         jaguar.r[dreg] = res;
2498         SET_ZN(res);*/
2499
2500 #ifdef GPU_DIS_SHA
2501         if (doGPUDis)
2502                 WriteLog("%06X: SHA    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2503 #endif
2504         uint32_t res;
2505
2506         if ((int32_t)RM < 0)
2507         {
2508                 res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM);
2509                 gpu_flag_c = RN >> 31;
2510         }
2511         else
2512         {
2513                 res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM);
2514                 gpu_flag_c = RN & 0x01;
2515         }
2516         RN = res;
2517         SET_ZN(res);
2518 #ifdef GPU_DIS_SHA
2519         if (doGPUDis)
2520                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2521 #endif
2522
2523 /*      int32_t sRM=(int32_t)RM;
2524         uint32_t _RN=RN;
2525
2526         if (sRM<0)
2527         {
2528                 uint32_t shift=-sRM;
2529                 if (shift>=32) shift=32;
2530                 gpu_flag_c=(_RN&0x80000000)>>31;
2531                 while (shift)
2532                 {
2533                         _RN<<=1;
2534                         shift--;
2535                 }
2536         }
2537         else
2538         {
2539                 uint32_t shift=sRM;
2540                 if (shift>=32) shift=32;
2541                 gpu_flag_c=_RN&0x1;
2542                 while (shift)
2543                 {
2544                         _RN=((int32_t)_RN)>>1;
2545                         shift--;
2546                 }
2547         }
2548         RN=_RN;
2549         SET_FLAG_Z(_RN);
2550         SET_FLAG_N(_RN);*/
2551 }
2552
2553 static void gpu_opcode_sharq(void)
2554 {
2555 #ifdef GPU_DIS_SHARQ
2556         if (doGPUDis)
2557                 WriteLog("%06X: SHARQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2558 #endif
2559         uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1];
2560         SET_ZN(res); gpu_flag_c = RN & 0x01;
2561         RN = res;
2562 #ifdef GPU_DIS_SHARQ
2563         if (doGPUDis)
2564                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2565 #endif
2566 }
2567
2568 static void gpu_opcode_sh(void)
2569 {
2570 #ifdef GPU_DIS_SH
2571         if (doGPUDis)
2572                 WriteLog("%06X: SH     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2573 #endif
2574         if (RM & 0x80000000)            // Shift left
2575         {
2576                 gpu_flag_c = RN >> 31;
2577                 RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM);
2578         }
2579         else                                            // Shift right
2580         {
2581                 gpu_flag_c = RN & 0x01;
2582                 RN = (RM >= 32 ? 0 : RN >> RM);
2583         }
2584         SET_ZN(RN);
2585 #ifdef GPU_DIS_SH
2586         if (doGPUDis)
2587                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2588 #endif
2589 }
2590
2591 //Temporary: Testing only!
2592 //#include "gpu2.cpp"
2593 //#include "gpu3.cpp"
2594
2595 #else
2596
2597 // New thread-safe GPU core
2598
2599 int GPUCore(void * data)
2600 {
2601 }
2602
2603 #endif