]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
UI cleanups, added RAM randomization for main RAM & GPU local RAM.
[virtualjaguar] / src / gpu.cpp
1 #if 1
2
3 //
4 // GPU Core
5 //
6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
10 //
11 // JLH = James Hammons <jlhamm@acm.org>
12 //
13 // Who  When        What
14 // ---  ----------  -------------------------------------------------------------
15 // JLH  01/16/2010  Created this log ;-)
16 // JLH  11/26/2011  Added fixes for LOAD/STORE alignment issues
17
18 //
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 //       the braindead way in which MAME handles memory. :-)
21 //
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
25 //
26
27 #include "gpu.h"
28
29 #include <stdlib.h>
30 #include <string.h>                                                             // For memset
31 #include "dsp.h"
32 #include "jagdasm.h"
33 #include "jaguar.h"
34 #include "log.h"
35 #include "m68000/m68kinterface.h"
36 //#include "memory.h"
37 #include "tom.h"
38
39
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
42 //#define GPU_DEBUG
43
44 // For GPU dissasembly...
45
46 #if 0
47 #define GPU_DIS_ABS
48 #define GPU_DIS_ADD
49 #define GPU_DIS_ADDC
50 #define GPU_DIS_ADDQ
51 #define GPU_DIS_ADDQT
52 #define GPU_DIS_AND
53 #define GPU_DIS_BCLR
54 #define GPU_DIS_BSET
55 #define GPU_DIS_BTST
56 #define GPU_DIS_CMP
57 #define GPU_DIS_CMPQ
58 #define GPU_DIS_DIV
59 #define GPU_DIS_IMULT
60 #define GPU_DIS_JUMP
61 #define GPU_DIS_JR
62 #define GPU_DIS_LOAD
63 #define GPU_DIS_LOADB
64 #define GPU_DIS_LOADW
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
69 #define GPU_DIS_MOVE
70 #define GPU_DIS_MOVEFA
71 #define GPU_DIS_MOVEI
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
74 #define GPU_DIS_MOVEQ
75 #define GPU_DIS_MULT
76 #define GPU_DIS_NEG
77 #define GPU_DIS_NOP
78 #define GPU_DIS_NOT
79 #define GPU_DIS_OR
80 #define GPU_DIS_PACK
81 #define GPU_DIS_ROR
82 #define GPU_DIS_RORQ
83 #define GPU_DIS_SAT8
84 #define GPU_DIS_SH
85 #define GPU_DIS_SHA
86 #define GPU_DIS_SHARQ
87 #define GPU_DIS_SHLQ
88 #define GPU_DIS_SHRQ
89 #define GPU_DIS_STORE
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
96 #define GPU_DIS_SUB
97 #define GPU_DIS_SUBC
98 #define GPU_DIS_SUBQ
99 #define GPU_DIS_SUBQT
100 #define GPU_DIS_XOR
101
102 //bool doGPUDis = false;
103 bool doGPUDis = true;
104 #endif
105
106 /*
107 GPU opcodes use (BIOS flying ATARI logo):
108 +                     add 357416
109 +                    addq 538030
110 +                   addqt 6999
111 +                     sub 116663
112 +                    subq 188059
113 +                   subqt 15086
114 +                     neg 36097
115 +                     and 233993
116 +                      or 109332
117 +                     xor 1384
118 +                    btst 111924
119 +                    bset 25029
120 +                    bclr 10551
121 +                    mult 28147
122 +                   imult 69148
123 +                     div 64102
124 +                     abs 159394
125 +                    shlq 194690
126 +                    shrq 292587
127 +                   sharq 192649
128 +                    rorq 58672
129 +                     cmp 244963
130 +                    cmpq 114834
131 +                    move 833472
132 +                   moveq 56427
133 +                  moveta 220814
134 +                  movefa 170678
135 +                   movei 152025
136 +                   loadw 108220
137 +                    load 430936
138 +                  storew 3036
139 +                   store 372490
140 +                 move_pc 2330
141 +                    jump 349134
142 +                      jr 529171
143                     mmult 64904
144 +                     nop 432179
145 */
146
147 // Various bits
148
149 #define CINT0FLAG                       0x0200
150 #define CINT1FLAG                       0x0400
151 #define CINT2FLAG                       0x0800
152 #define CINT3FLAG                       0x1000
153 #define CINT4FLAG                       0x2000
154 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
155
156 // GPU_FLAGS bits
157
158 #define ZERO_FLAG               0x0001
159 #define CARRY_FLAG              0x0002
160 #define NEGA_FLAG               0x0004
161 #define IMASK                   0x0008
162 #define INT_ENA0                0x0010
163 #define INT_ENA1                0x0020
164 #define INT_ENA2                0x0040
165 #define INT_ENA3                0x0080
166 #define INT_ENA4                0x0100
167 #define INT_CLR0                0x0200
168 #define INT_CLR1                0x0400
169 #define INT_CLR2                0x0800
170 #define INT_CLR3                0x1000
171 #define INT_CLR4                0x2000
172 #define REGPAGE                 0x4000
173 #define DMAEN                   0x8000
174
175 // External global variables
176
177 extern int start_logging;
178 extern int gpu_start_log;
179
180 // Private function prototypes
181
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
186
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
251
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8_t gpu_opcode_cycles[64] =
254 {
255         3,  3,  3,  3,  3,  3,  3,  3,
256         3,  3,  3,  3,  3,  3,  3,  3,
257         3,  3,  1,  3,  1, 18,  3,  3,
258         3,  3,  3,  3,  3,  3,  3,  3,
259         3,  3,  2,  2,  2,  2,  3,  4,
260         5,  4,  5,  6,  6,  1,  1,  1,
261         1,  2,  2,  2,  1,  1,  9,  3,
262         3,  1,  6,  6,  2,  2,  3,  3
263 };//*/
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8_t gpu_opcode_cycles[64] =
269 {
270         1,  1,  1,  1,  1,  1,  1,  1,
271         1,  1,  1,  1,  1,  1,  1,  1,
272         1,  1,  1,  1,  1,  9,  1,  1,
273         1,  1,  1,  1,  1,  1,  1,  1,
274         1,  1,  1,  1,  1,  1,  1,  2,
275         2,  2,  2,  3,  3,  1,  1,  1,
276         1,  1,  1,  1,  1,  1,  4,  1,
277         1,  1,  3,  3,  1,  1,  1,  1
278 };//*/
279 uint8_t gpu_opcode_cycles[64] =
280 {
281         1,  1,  1,  1,  1,  1,  1,  1,
282         1,  1,  1,  1,  1,  1,  1,  1,
283         1,  1,  1,  1,  1,  1,  1,  1,
284         1,  1,  1,  1,  1,  1,  1,  1,
285         1,  1,  1,  1,  1,  1,  1,  1,
286         1,  1,  1,  1,  1,  1,  1,  1,
287         1,  1,  1,  1,  1,  1,  1,  1,
288         1,  1,  1,  1,  1,  1,  1,  1
289 };//*/
290
291 void (*gpu_opcode[64])()=
292 {
293         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
294         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
295         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
296         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
297         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
298         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
299         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
300         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
301         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
302         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
303         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
304         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
305         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
306         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
307         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
308         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
309 };
310
311 static uint8_t gpu_ram_8[0x1000];
312 uint32_t gpu_pc;
313 static uint32_t gpu_acc;
314 static uint32_t gpu_remain;
315 static uint32_t gpu_hidata;
316 static uint32_t gpu_flags;
317 static uint32_t gpu_matrix_control;
318 static uint32_t gpu_pointer_to_matrix;
319 static uint32_t gpu_data_organization;
320 static uint32_t gpu_control;
321 static uint32_t gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 uint32_t gpu_reg_bank_0[32];
327 uint32_t gpu_reg_bank_1[32];
328 static uint32_t * gpu_reg;
329 static uint32_t * gpu_alternate_reg;
330
331 static uint32_t gpu_instruction;
332 static uint32_t gpu_opcode_first_parameter;
333 static uint32_t gpu_opcode_second_parameter;
334
335 #define GPU_RUNNING             (gpu_control & 0x01)
336
337 #define RM                              gpu_reg[gpu_opcode_first_parameter]
338 #define RN                              gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM    gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN    gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1                   gpu_opcode_first_parameter
342 #define IMM_2                   gpu_opcode_second_parameter
343
344 #define SET_FLAG_Z(r)   (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r)   (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01));
346
347 #define RESET_FLAG_Z()  gpu_flag_z = 0;
348 #define RESET_FLAG_N()  gpu_flag_n = 0;
349 #define RESET_FLAG_C()  gpu_flag_c = 0;
350
351 #define CLR_Z                           (gpu_flag_z = 0)
352 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
355 #define SET_N(r)                        (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
357 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
358 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
361
362 uint32_t gpu_convert_zero[32] =
363         { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
364
365 uint8_t * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
367
368 uint32_t gpu_opcode_use[64];
369
370 const char * gpu_opcode_str[64]=
371 {
372         "add",                          "addc",                         "addq",                         "addqt",
373         "sub",                          "subc",                         "subq",                         "subqt",
374         "neg",                          "and",                          "or",                           "xor",
375         "not",                          "btst",                         "bset",                         "bclr",
376         "mult",                         "imult",                        "imultn",                       "resmac",
377         "imacn",                        "div",                          "abs",                          "sh",
378         "shlq",                         "shrq",                         "sha",                          "sharq",
379         "ror",                          "rorq",                         "cmp",                          "cmpq",
380         "sat8",                         "sat16",                        "move",                         "moveq",
381         "moveta",                       "movefa",                       "movei",                        "loadb",
382         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
383         "load_r15_indexed",     "storeb",                       "storew",                       "store",
384         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
385         "jump",                         "jr",                           "mmult",                        "mtoi",
386         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
387         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
388 };
389
390 static uint32_t gpu_in_exec = 0;
391 static uint32_t gpu_releaseTimeSlice_flag = 0;
392
393 void GPUReleaseTimeslice(void)
394 {
395         gpu_releaseTimeSlice_flag = 1;
396 }
397
398 uint32_t GPUGetPC(void)
399 {
400         return gpu_pc;
401 }
402
403 void build_branch_condition_table(void)
404 {
405         if (!branch_condition_table)
406         {
407                 branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
408
409                 if (branch_condition_table)
410                 {
411                         for(int i=0; i<8; i++)
412                         {
413                                 for(int j=0; j<32; j++)
414                                 {
415                                         int result = 1;
416                                         if (j & 1)
417                                                 if (i & ZERO_FLAG)
418                                                         result = 0;
419                                         if (j & 2)
420                                                 if (!(i & ZERO_FLAG))
421                                                         result = 0;
422                                         if (j & 4)
423                                                 if (i & (CARRY_FLAG << (j >> 4)))
424                                                         result = 0;
425                                         if (j & 8)
426                                                 if (!(i & (CARRY_FLAG << (j >> 4))))
427                                                         result = 0;
428                                         branch_condition_table[i * 32 + j] = result;
429                                 }
430                         }
431                 }
432         }
433 }
434
435 //
436 // GPU byte access (read)
437 //
438 uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
439 {
440         if (offset >= 0xF02000 && offset <= 0xF020FF)
441                 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
442
443         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444                 return gpu_ram_8[offset & 0xFFF];
445         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
446         {
447                 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
448
449                 if ((offset & 0x03) == 0)
450                         return data >> 24;
451                 else if ((offset & 0x03) == 1)
452                         return (data >> 16) & 0xFF;
453                 else if ((offset & 0x03) == 2)
454                         return (data >> 8) & 0xFF;
455                 else if ((offset & 0x03) == 3)
456                         return data & 0xFF;
457         }
458
459         return JaguarReadByte(offset, who);
460 }
461
462 //
463 // GPU word access (read)
464 //
465 uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
466 {
467         if (offset >= 0xF02000 && offset <= 0xF020FF)
468                 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
469
470         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
471         {
472                 offset &= 0xFFF;
473                 uint16_t data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1];
474                 return data;
475         }
476         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
477         {
478 // This looks and smells wrong...
479 // But it *might* be OK...
480                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
481                         return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
482
483                 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
484
485                 if (offset & 0x02)                      // Cases 0 & 2...
486                         return data & 0xFFFF;
487                 else
488                         return data >> 16;
489         }
490
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
494
495         return JaguarReadWord(offset, who);
496 }
497
498 //
499 // GPU dword access (read)
500 //
501 uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
502 {
503         if (offset >= 0xF02000 && offset <= 0xF020FF)
504         {
505                 WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
506                 uint32_t reg = (offset & 0xFC) >> 2;
507                 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]); 
508         }
509
510 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
511         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
512         {
513                 offset &= 0xFFF;
514                 return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16)
515                         | ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/
516 //              return GET32(gpu_ram_8, offset);
517         }
518 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
519         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
520         {
521                 offset &= 0x1F;
522                 switch (offset)
523                 {
524                 case 0x00:
525                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
526                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
527                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
528
529                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
530
531                         return gpu_flags & 0xFFFFC1FF;
532                 case 0x04:
533                         return gpu_matrix_control;
534                 case 0x08:
535                         return gpu_pointer_to_matrix;
536                 case 0x0C:
537                         return gpu_data_organization;
538                 case 0x10:
539                         return gpu_pc;
540                 case 0x14:
541                         return gpu_control;
542                 case 0x18:
543                         return gpu_hidata;
544                 case 0x1C:
545                         return gpu_remain;
546                 default:                                                                // unaligned long read
547 #ifdef GPU_DEBUG
548                         WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
549 #endif  // GPU_DEBUG
550                         return 0;
551                 }
552         }
553 //TEMP--Mirror of F03000? No. Writes only...
554 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
555 //      WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
556 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
557         WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
558
559         return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
560 }
561
562 //
563 // GPU byte access (write)
564 //
565 void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
566 {
567         if (offset >= 0xF02000 && offset <= 0xF020FF)
568                 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
569
570         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
571         {
572                 gpu_ram_8[offset & 0xFFF] = data;
573
574 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
575 /*              if (!gpu_in_exec)
576                 {
577                         m68k_end_timeslice();
578                         dsp_releaseTimeslice();
579                 }*/
580                 return;
581         }
582         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
583         {
584                 uint32_t reg = offset & 0x1C;
585                 int bytenum = offset & 0x03;
586
587 //This is definitely wrong!
588                 if ((reg >= 0x1C) && (reg <= 0x1F))
589                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
590                 else
591                 {
592                         uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
593                         bytenum = 3 - bytenum; // convention motorola !!!
594                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
595                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
596                 }
597                 return;
598         }
599 //      WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
600         JaguarWriteByte(offset, data, who);
601 }
602
603 //
604 // GPU word access (write)
605 //
606 void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
607 {
608         if (offset >= 0xF02000 && offset <= 0xF020FF)
609                 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
610
611         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
612         {
613                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
614                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
615 /*              offset &= 0xFFF;
616                 SET16(gpu_ram_8, offset, data);//*/
617
618 /*if (offset >= 0xF03214 && offset < 0xF0321F)
619         WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
620
621
622 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
623 /*              if (!gpu_in_exec)
624                 {
625                         m68k_end_timeslice();
626                         dsp_releaseTimeslice();
627                 }*/
628                 return;
629         }
630         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
631         {
632                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
633                 {
634 #ifdef GPU_DEBUG
635                         WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
636                         GPUDumpRegisters();
637 #endif  // GPU_DEBUG
638                         return;
639                 }
640 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
641 //This just literally sucks.
642                 if ((offset & 0x1C) == 0x1C)
643                 {
644 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
645                         if (offset & 0x02)
646                                 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
647                         else
648                                 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
649                 }
650                 else
651                 {
652 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
653                         uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
654
655                         if (offset & 0x02)
656                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
657                         else
658                                 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
659
660                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
661                 }
662
663                 return;
664         }
665         else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
666         {
667 #ifdef GPU_DEBUG
668                         WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
669                         GPUDumpRegisters();
670 #endif  // GPU_DEBUG
671                 return;
672         }
673
674         // Have to be careful here--this can cause an infinite loop!
675         JaguarWriteWord(offset, data, who);
676 }
677
678 //
679 // GPU dword access (write)
680 //
681 void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
682 {
683         if (offset >= 0xF02000 && offset <= 0xF020FF)
684                 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
685
686 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
687         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
688         {
689 #ifdef GPU_DEBUG
690                 if (offset & 0x03)
691                 {
692                         WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
693                         GPUDumpRegisters();
694                 }
695 #endif  // GPU_DEBUG
696
697                 offset &= 0xFFF;
698                 SET32(gpu_ram_8, offset, data);
699                 return;
700         }
701 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
702         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
703         {
704                 offset &= 0x1F;
705                 switch (offset)
706                 {
707                 case 0x00:
708                 {
709                         bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
710                         // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
711                         //       IRQ logic can set it. So we mask it out here to prevent problems...
712                         gpu_flags = data & (~IMASK);
713                         gpu_flag_z = gpu_flags & ZERO_FLAG;
714                         gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
715                         gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
716                         GPUUpdateRegisterBanks();
717                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);       // Interrupt latch clear bits
718 //Writing here is only an interrupt enable--this approach is just plain wrong!
719 //                      GPUHandleIRQs();
720 //This, however, is A-OK! ;-)
721                         if (IMASKCleared)                                               // If IMASK was cleared,
722                                 GPUHandleIRQs();                                        // see if any other interrupts need servicing!
723 #ifdef GPU_DEBUG
724                         if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
725                                 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
726                         WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
727 #endif  // GPU_DEBUG
728                         break;
729                 }
730                 case 0x04:
731                         gpu_matrix_control = data;
732                         break;
733                 case 0x08:
734                         // This can only point to long aligned addresses
735                         gpu_pointer_to_matrix = data & 0xFFFFFFFC;
736                         break;
737                 case 0x0C:
738                         gpu_data_organization = data;
739                         break;
740                 case 0x10:
741                         gpu_pc = data;
742 #ifdef GPU_DEBUG
743 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
744 #endif  // GPU_DEBUG
745                         break;
746                 case 0x14:
747                 {
748 //                      uint32_t gpu_was_running = GPU_RUNNING;
749                         data &= ~0xF7C0;                // Disable writes to INT_LAT0-4 & TOM version number
750
751                         // check for GPU -> CPU interrupt
752                         if (data & 0x02)
753                         {
754 //WriteLog("GPU->CPU interrupt\n");
755                                 if (TOMIRQEnabled(IRQ_GPU))
756                                 {
757 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
758 //                                      if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
759                                         {
760                                                 TOMSetPendingGPUInt();
761                                                 m68k_set_irq(2);                        // Set 68000 IPL 2
762                                                 GPUReleaseTimeslice();
763                                         }
764                                 }
765                                 data &= ~0x02;
766                         }
767
768                         // check for CPU -> GPU interrupt #0
769                         if (data & 0x04)
770                         {
771 //WriteLog("CPU->GPU interrupt\n");
772                                 GPUSetIRQLine(0, ASSERT_LINE);
773                                 m68k_end_timeslice();
774                                 DSPReleaseTimeslice();
775                                 data &= ~0x04;
776                         }
777
778                         // single stepping
779                         if (data & 0x10)
780                         {
781                                 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
782                         }
783
784                         gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
785
786                         // if gpu wasn't running but is now running, execute a few cycles
787 #ifndef GPU_SINGLE_STEPPING
788 /*                      if (!gpu_was_running && GPU_RUNNING)
789 #ifdef GPU_DEBUG
790                         {
791                                 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
792 #endif  // GPU_DEBUG
793                                 GPUExec(200);
794 #ifdef GPU_DEBUG
795                         }
796 #endif  // GPU_DEBUG//*/
797 #else
798                         if (gpu_control & 0x18)
799                                 GPUExec(1);
800 #endif  // #ifndef GPU_SINGLE_STEPPING
801 #ifdef GPU_DEBUG
802 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
803 if (GPU_RUNNING)
804         WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
805 else
806         WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
807 WriteLog("\n");
808 #endif  // GPU_DEBUG
809 //if (GPU_RUNNING)
810 //      GPUDumpDisassembly();
811 /*if (GPU_RUNNING)
812 {
813         if (gpu_pc == 0xF035D8)
814         {
815 //              GPUDumpDisassembly();
816 //              log_done();
817 //              exit(1);
818                 gpu_control &= 0xFFFFFFFE;      // Don't run it and let's see what happens!
819 //Hmm. Seems to lock up when going into the demo...
820 //Try to disable the collision altogether!
821         }
822 }//*/
823 extern int effect_start5;
824 static bool finished = false;
825 //if (GPU_RUNNING && effect_start5 && !finished)
826 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
827 {
828         // Let's do a dump of $6528!
829 /*      uint32_t numItems = JaguarReadWord(0x6BD6);
830         WriteLog("\nDump of $6528: %u items.\n\n", numItems);
831         for(int i=0; i<numItems*3*4; i+=3*4)
832         {
833                 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
834                         JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
835                 uint16_t link = JaguarReadWord(0x6528+i+8+2);
836                 for(int j=0; j<40; j+=4)
837                         WriteLog("%08X ", JaguarReadLong(link + j));
838                 WriteLog("\n");
839         }
840         WriteLog("\n");//*/
841         // Let's try a manual blit here...
842 //This isn't working the way it should! !!! FIX !!!
843 //Err, actually, it is.
844 // NOW, it works right! Problem solved!!! It's a blitter bug!
845 /*      uint32_t src = 0x4D54, dst = 0xF03000, width = 10 * 4;
846         for(int y=0; y<127; y++)
847         {
848                 for(int x=0; x<2; x++)
849                 {
850                         JaguarWriteLong(dst, JaguarReadLong(src));
851
852                         src += 4;
853                         dst += 4;
854                 }
855                 src += width - (2 * 4);
856         }//*/
857 /*      finished = true;
858         doGPUDis = true;
859         WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
860
861 /*      WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
862         int count = 0;
863         for(int i=0x004D54; i<0x004D54+2048; i++)
864         {
865                 WriteLog("%02X ", JaguarReadByte(i));
866                 count++;
867                 if (count == 32)
868                 {
869                         count = 0;
870                         WriteLog("\n");
871                 }
872         }
873         WriteLog("\n\nData @ F03000:\n\n");
874         count = 0;
875         for(int i=0xF03000; i<0xF03200; i++)
876         {
877                 WriteLog("%02X ", JaguarReadByte(i));
878                 count++;
879                 if (count == 32)
880                 {
881                         count = 0;
882                         WriteLog("\n");
883                 }
884         }
885         WriteLog("\n\n");
886         log_done();
887         exit(0);//*/
888 }
889 //if (!GPU_RUNNING)
890 //      doGPUDis = false;
891 /*if (!GPU_RUNNING && finished)
892 {
893         WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
894         GPUDumpRegisters();
895         log_done();
896         exit(0);
897 }//*/
898                         // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
899                         // allow the GPU a chance to run...
900                         // Yes! This partially fixed Trevor McFur...
901                         if (GPU_RUNNING)
902                                 m68k_end_timeslice();
903                         break;
904                 }
905                 case 0x18:
906                         gpu_hidata = data;
907                         break;
908                 case 0x1C:
909                         gpu_div_control = data;
910                         break;
911 //              default:   // unaligned long write
912                         //exit(0);
913                         //__asm int 3
914                 }
915                 return;
916         }
917
918 //      JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
919 //      JaguarWriteWord(offset+2, data & 0xFFFF, who);
920 // We're a 32-bit processor, we can do a long write...!
921         JaguarWriteLong(offset, data, who);
922 }
923
924 //
925 // Change register banks if necessary
926 //
927 void GPUUpdateRegisterBanks(void)
928 {
929         int bank = (gpu_flags & REGPAGE);               // REGPAGE bit
930
931         if (gpu_flags & IMASK)                                  // IMASK bit
932                 bank = 0;                                                       // IMASK forces main bank to be bank 0
933
934         if (bank)
935                 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
936         else
937                 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
938 }
939
940 void GPUHandleIRQs(void)
941 {
942         // Bail out if we're already in an interrupt!
943         if (gpu_flags & IMASK)
944                 return;
945
946         // Get the interrupt latch & enable bits
947         uint32_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
948
949         // Bail out if latched interrupts aren't enabled
950         bits &= mask;
951         if (!bits)
952                 return;
953
954         // Determine which interrupt to service
955         uint32_t which = 0; //Isn't there a #pragma to disable this warning???
956         if (bits & 0x01)
957                 which = 0;
958         if (bits & 0x02)
959                 which = 1;
960         if (bits & 0x04)
961                 which = 2;
962         if (bits & 0x08)
963                 which = 3;
964         if (bits & 0x10)
965                 which = 4;
966
967         if (start_logging)
968                 WriteLog("GPU: Generating IRQ #%i\n", which);
969
970         // set the interrupt flag
971         gpu_flags |= IMASK;
972         GPUUpdateRegisterBanks();
973
974         // subqt  #4,r31                ; pre-decrement stack pointer
975         // move  pc,r30                 ; address of interrupted code
976         // store  r30,(r31)     ; store return address
977         gpu_reg[31] -= 4;
978         GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
979
980         // movei  #service_address,r30  ; pointer to ISR entry
981         // jump  (r30)                                  ; jump to ISR
982         // nop
983         gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
984 }
985
986 void GPUSetIRQLine(int irqline, int state)
987 {
988         if (start_logging)
989                 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
990
991         uint32_t mask = 0x0040 << irqline;
992         gpu_control &= ~mask;                           // Clear the interrupt latch
993
994         if (state)
995         {
996                 gpu_control |= mask;                    // Assert the interrupt latch
997                 GPUHandleIRQs();                                // And handle the interrupt...
998         }
999 }
1000
1001 //TEMPORARY: Testing only!
1002 //#include "gpu2.h"
1003 //#include "gpu3.h"
1004
1005 void GPUInit(void)
1006 {
1007 //      memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1008 //      memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32_t), "GPU bank 0 regs");
1009 //      memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32_t), "GPU bank 1 regs");
1010
1011         build_branch_condition_table();
1012
1013         GPUReset();
1014
1015 //TEMPORARY: Testing only!
1016 //      gpu2_init();
1017 //      gpu3_init();
1018 }
1019
1020 void GPUReset(void)
1021 {
1022         // GPU registers (directly visible)
1023         gpu_flags                         = 0x00000000;
1024         gpu_matrix_control    = 0x00000000;
1025         gpu_pointer_to_matrix = 0x00000000;
1026         gpu_data_organization = 0xFFFFFFFF;
1027         gpu_pc                            = 0x00F03000;
1028         gpu_control                       = 0x00002800;                 // Correctly sets this as TOM Rev. 2
1029         gpu_hidata                        = 0x00000000;
1030         gpu_remain                        = 0x00000000;                 // These two registers are RO/WO
1031         gpu_div_control           = 0x00000000;
1032
1033         // GPU internal register
1034         gpu_acc                           = 0x00000000;
1035
1036         gpu_reg = gpu_reg_bank_0;
1037         gpu_alternate_reg = gpu_reg_bank_1;
1038
1039         for(int i=0; i<32; i++)
1040                 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1041
1042         CLR_ZNC;
1043         memset(gpu_ram_8, 0xFF, 0x1000);
1044         gpu_in_exec = 0;
1045 //not needed    GPUInterruptPending = false;
1046         GPUResetStats();
1047
1048         // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
1049         for(uint32_t i=0; i<4096; i+=4)
1050                 *((uint32_t *)(&gpu_ram_8[i])) = rand();
1051 }
1052
1053 uint32_t GPUReadPC(void)
1054 {
1055         return gpu_pc;
1056 }
1057
1058 void GPUResetStats(void)
1059 {
1060         for(uint32_t i=0; i<64; i++)
1061                 gpu_opcode_use[i] = 0;
1062         WriteLog("--> GPU stats were reset!\n");
1063 }
1064
1065 void GPUDumpDisassembly(void)
1066 {
1067         char buffer[512];
1068
1069         WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1070         uint32_t j = 0xF03000;
1071         while (j <= 0xF03FFF)
1072         {
1073                 uint32_t oldj = j;
1074                 j += dasmjag(JAGUAR_GPU, buffer, j);
1075                 WriteLog("\t%08X: %s\n", oldj, buffer);
1076         }
1077 }
1078
1079 void GPUDumpRegisters(void)
1080 {
1081         WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1082         WriteLog("\nRegisters bank 0\n");
1083         for(int j=0; j<8; j++)
1084         {
1085                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1086                                                   (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1087                                                   (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1088                                                   (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1089                                                   (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1090         }
1091         WriteLog("Registers bank 1\n");
1092         for(int j=0; j<8; j++)
1093         {
1094                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1095                                                   (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1096                                                   (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1097                                                   (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1098                                                   (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1099         }
1100 }
1101
1102 void GPUDumpMemory(void)
1103 {
1104         WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1105         for(int i=0; i<0xFFF; i+=4)
1106                 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1107                         gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1108 }
1109
1110 void GPUDone(void)
1111 {
1112         WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1113
1114         // Get the interrupt latch & enable bits
1115         uint8_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1116         WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1117
1118         GPUDumpRegisters();
1119         GPUDumpDisassembly();
1120
1121         WriteLog("\nGPU opcodes use:\n");
1122         for(int i=0; i<64; i++)
1123         {
1124                 if (gpu_opcode_use[i])
1125                         WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1126         }
1127         WriteLog("\n");
1128
1129 //      memory_free(gpu_ram_8);
1130 //      memory_free(gpu_reg_bank_0);
1131 //      memory_free(gpu_reg_bank_1);
1132 }
1133
1134 //
1135 // Main GPU execution core
1136 //
1137 static int testCount = 1;
1138 static int len = 0;
1139 static bool tripwire = false;
1140 void GPUExec(int32_t cycles)
1141 {
1142         if (!GPU_RUNNING)
1143                 return;
1144
1145 #ifdef GPU_SINGLE_STEPPING
1146         if (gpu_control & 0x18)
1147         {
1148                 cycles = 1;
1149                 gpu_control &= ~0x10;
1150         }
1151 #endif
1152         GPUHandleIRQs();
1153         gpu_releaseTimeSlice_flag = 0;
1154         gpu_in_exec++;
1155
1156         while (cycles > 0 && GPU_RUNNING)
1157         {
1158 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1159         && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1160 {
1161         if (gpu_pc == 0xF03000)
1162         {
1163                 extern uint32_t starCount;
1164                 starCount = 0;
1165 /*              WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1166                 uint32_t base = gpu_reg_bank_0[3];
1167                 for(uint32_t i=0; i<0x100; i+=16)
1168                 {
1169                         WriteLog("%02X: ", i);
1170                         for(uint32_t j=0; j<16; j++)
1171                         {
1172                                 WriteLog("%02X ", JaguarReadByte(base + i + j));
1173                         }
1174                         WriteLog("\n");
1175                 }*/
1176         }
1177 //      if (gpu_pc == 0xF03)
1178         {
1179         }
1180 }//*/
1181 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1182 {
1183         GPUDumpRegisters();
1184         WriteLog("GPU: Starting disassembly log...\n");
1185         doGPUDis = true;
1186 }//*/
1187 /*if (gpu_pc == 0xF0359A)
1188 {
1189         doGPUDis = true;
1190         GPUDumpRegisters();
1191 }*/
1192 /*              gpu_flag_c = (gpu_flag_c ? 1 : 0);
1193                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1194                 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1195
1196                 uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1197                 uint32_t index = opcode >> 10;
1198                 gpu_instruction = opcode;                               // Added for GPU #3...
1199                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1200                 gpu_opcode_second_parameter = opcode & 0x1F;
1201 /*if (gpu_pc == 0xF03BE8)
1202 WriteLog("Start of OP frame write...\n");
1203 if (gpu_pc == 0xF03EEE)
1204 WriteLog("--> Writing BRANCH object ---\n");
1205 if (gpu_pc == 0xF03F62)
1206 WriteLog("--> Writing BITMAP object ***\n");//*/
1207 /*if (gpu_pc == 0xF03546)
1208 {
1209         WriteLog("\n--> GPU PC: F03546\n");
1210         GPUDumpRegisters();
1211         GPUDumpDisassembly();
1212 }//*/
1213 /*if (gpu_pc == 0xF033F6)
1214 {
1215         WriteLog("\n--> GPU PC: F033F6\n");
1216         GPUDumpRegisters();
1217         GPUDumpDisassembly();
1218 }//*/
1219 /*if (gpu_pc == 0xF033CC)
1220 {
1221         WriteLog("\n--> GPU PC: F033CC\n");
1222         GPUDumpRegisters();
1223         GPUDumpDisassembly();
1224 }//*/
1225 /*if (gpu_pc == 0xF033D6)
1226 {
1227         WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1228         GPUDumpRegisters();
1229         GPUDumpMemory();
1230 }//*/
1231 /*if (gpu_pc == 0xF033D8)
1232 {
1233         WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1234         GPUDumpRegisters();
1235         GPUDumpMemory();
1236 }//*/
1237 /*if (gpu_pc == 0xF0358E)
1238 {
1239         WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1240         GPUDumpRegisters();
1241         GPUDumpMemory();
1242 }//*/
1243 /*if (gpu_pc == 0xF034CA)
1244 {
1245         WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1246         GPUDumpRegisters();
1247 }//*/
1248 /*if (gpu_pc == 0xF034CA)
1249 {
1250         len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1251         WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n   ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1252         for(int i=0; i<len; i+=4)
1253                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1254         WriteLog("\n   ");
1255         for(int i=0; i<len; i+=4)
1256                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1257         WriteLog("\n\n");
1258 }
1259 if (gpu_pc == 0xF034DE)
1260 {
1261         WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n   ", gpu_reg[14], gpu_reg[15]);
1262         for(int i=0; i<len; i+=4)
1263                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1264         WriteLog("\n   ");
1265         for(int i=0; i<len; i+=4)
1266                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1267         WriteLog("\n   ");
1268         for(int i=0; i<len; i+=4)
1269                 WriteLog(" --------");
1270         WriteLog("\n   ");
1271         for(int i=0; i<len; i+=4)
1272                 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1273         WriteLog("\n\n");
1274 }//*/
1275 /*if (gpu_pc == 0xF035C8)
1276 {
1277         WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1278         GPUDumpRegisters();
1279         GPUDumpDisassembly();
1280 }//*/
1281
1282 if (gpu_start_log)
1283 {
1284 //      gpu_reset_stats();
1285 static char buffer[512];
1286 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1287 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1288 }//*/
1289 //$E400 -> 1110 01 -> $39 -> 57
1290 //GPU #1
1291                 gpu_pc += 2;
1292                 gpu_opcode[index]();
1293 //GPU #2
1294 //              gpu2_opcode[index]();
1295 //              gpu_pc += 2;
1296 //GPU #3                                (Doesn't show ATARI logo! #1 & #2 do...)
1297 //              gpu_pc += 2;
1298 //              gpu3_opcode[index]();
1299
1300 // BIOS hacking
1301 //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken.
1302 /*static bool firstTime = true;
1303 if (gpu_pc == 0xF03548 && firstTime)
1304 {
1305         gpu_flag_z = 1;
1306 //      firstTime = false;
1307
1308 //static char buffer[512];
1309 //int k=0xF03548;
1310 //while (k<0xF0356C)
1311 //{
1312 //int oldk = k;
1313 //k += dasmjag(JAGUAR_GPU, buffer, k);
1314 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1315 //}
1316 //      gpu_start_log = 1;
1317 }//*/
1318 //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1319 /*if (gpu_pc == 0xF0354C)
1320         gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1321
1322                 cycles -= gpu_opcode_cycles[index];
1323                 gpu_opcode_use[index]++;
1324 if (gpu_start_log)
1325         WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1326 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1327 {
1328         WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1329         tripwire = true;
1330 }
1331         }
1332
1333         gpu_in_exec--;
1334 }
1335
1336 //
1337 // GPU opcodes
1338 //
1339
1340 /*
1341 GPU opcodes use (offset punch--vertically below bad guy):
1342                       add 18686
1343                      addq 32621
1344                       sub 7483
1345                      subq 10252
1346                       and 21229
1347                        or 15003
1348                      btst 1822
1349                      bset 2072
1350                      mult 141
1351                       div 2392
1352                      shlq 13449
1353                      shrq 10297
1354                     sharq 11104
1355                       cmp 6775
1356                      cmpq 5944
1357                      move 31259
1358                     moveq 4473
1359                     movei 23277
1360                     loadb 46
1361                     loadw 4201
1362                      load 28580
1363          load_r14_indexed 1183
1364          load_r15_indexed 1125
1365                    storew 178
1366                     store 10144
1367         store_r14_indexed 320
1368         store_r15_indexed 1
1369                   move_pc 1742
1370                      jump 24467
1371                        jr 18090
1372                       nop 41362
1373 */
1374
1375 static void gpu_opcode_jump(void)
1376 {
1377 #ifdef GPU_DIS_JUMP
1378 const char * condition[32] =
1379 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1380         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1381         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1382         "???", "???", "???", "F" };
1383         if (doGPUDis)
1384                 WriteLog("%06X: JUMP   %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1385 #endif
1386         // normalize flags
1387 /*      gpu_flag_c = (gpu_flag_c ? 1 : 0);
1388         gpu_flag_z = (gpu_flag_z ? 1 : 0);
1389         gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1390         // KLUDGE: Used by BRANCH_CONDITION
1391         uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1392
1393         if (BRANCH_CONDITION(IMM_2))
1394         {
1395 #ifdef GPU_DIS_JUMP
1396         if (doGPUDis)
1397                 WriteLog("Branched!\n");
1398 #endif
1399 if (gpu_start_log)
1400         WriteLog("    --> JUMP: Branch taken.\n");
1401                 uint32_t delayed_pc = RM;
1402                 GPUExec(1);
1403                 gpu_pc = delayed_pc;
1404 /*              uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1405                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1406                 gpu_opcode_second_parameter = opcode & 0x1F;
1407
1408                 gpu_pc = delayed_pc;
1409                 gpu_opcode[opcode>>10]();//*/
1410         }
1411 #ifdef GPU_DIS_JUMP
1412         else
1413                 if (doGPUDis)
1414                         WriteLog("Branch NOT taken.\n");
1415 #endif
1416 }
1417
1418 static void gpu_opcode_jr(void)
1419 {
1420 #ifdef GPU_DIS_JR
1421 const char * condition[32] =
1422 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1423         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1424         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1425         "???", "???", "???", "F" };
1426         if (doGPUDis)
1427                 WriteLog("%06X: JR     %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1428 #endif
1429 /*      if (CONDITION(jaguar.op & 31))
1430         {
1431                 int32_t r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1432                 uint32_t newpc = jaguar.PC + r1;
1433                 CALL_MAME_DEBUG;
1434                 jaguar.op = ROPCODE(jaguar.PC);
1435                 jaguar.PC = newpc;
1436                 (*jaguar.table[jaguar.op >> 10])();
1437
1438                 jaguar_icount -= 3;     // 3 wait states guaranteed
1439         }*/
1440         // normalize flags
1441 /*      gpu_flag_n = (gpu_flag_n ? 1 : 0);
1442         gpu_flag_c = (gpu_flag_c ? 1 : 0);
1443         gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1444         // KLUDGE: Used by BRANCH_CONDITION
1445         uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1446
1447         if (BRANCH_CONDITION(IMM_2))
1448         {
1449 #ifdef GPU_DIS_JR
1450         if (doGPUDis)
1451                 WriteLog("Branched!\n");
1452 #endif
1453 if (gpu_start_log)
1454         WriteLog("    --> JR: Branch taken.\n");
1455                 int32_t offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1);           // Sign extend IMM_1
1456                 int32_t delayed_pc = gpu_pc + (offset * 2);
1457                 GPUExec(1);
1458                 gpu_pc = delayed_pc;
1459 /*              uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1460                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1461                 gpu_opcode_second_parameter = opcode & 0x1F;
1462
1463                 gpu_pc = delayed_pc;
1464                 gpu_opcode[opcode>>10]();//*/
1465         }
1466 #ifdef GPU_DIS_JR
1467         else
1468                 if (doGPUDis)
1469                         WriteLog("Branch NOT taken.\n");
1470 #endif
1471 }
1472
1473 static void gpu_opcode_add(void)
1474 {
1475 #ifdef GPU_DIS_ADD
1476         if (doGPUDis)
1477                 WriteLog("%06X: ADD    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1478 #endif
1479         uint32_t res = RN + RM;
1480         CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1481         RN = res;
1482 #ifdef GPU_DIS_ADD
1483         if (doGPUDis)
1484                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1485 #endif
1486 }
1487
1488 static void gpu_opcode_addc(void)
1489 {
1490 #ifdef GPU_DIS_ADDC
1491         if (doGPUDis)
1492                 WriteLog("%06X: ADDC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1493 #endif
1494 /*      int dreg = jaguar.op & 31;
1495         uint32_t r1 = jaguar.r[(jaguar.op >> 5) & 31];
1496         uint32_t r2 = jaguar.r[dreg];
1497         uint32_t res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1498         jaguar.r[dreg] = res;
1499         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1500
1501         uint32_t res = RN + RM + gpu_flag_c;
1502         uint32_t carry = gpu_flag_c;
1503 //      SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1504         SET_ZNC_ADD(RN + carry, RM, res);
1505 //      SET_ZNC_ADD(RN, RM + carry, res);
1506         RN = res;
1507 #ifdef GPU_DIS_ADDC
1508         if (doGPUDis)
1509                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1510 #endif
1511 }
1512
1513 static void gpu_opcode_addq(void)
1514 {
1515 #ifdef GPU_DIS_ADDQ
1516         if (doGPUDis)
1517                 WriteLog("%06X: ADDQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1518 #endif
1519         uint32_t r1 = gpu_convert_zero[IMM_1];
1520         uint32_t res = RN + r1;
1521         CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1522         RN = res;
1523 #ifdef GPU_DIS_ADDQ
1524         if (doGPUDis)
1525                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1526 #endif
1527 }
1528
1529 static void gpu_opcode_addqt(void)
1530 {
1531 #ifdef GPU_DIS_ADDQT
1532         if (doGPUDis)
1533                 WriteLog("%06X: ADDQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1534 #endif
1535         RN += gpu_convert_zero[IMM_1];
1536 #ifdef GPU_DIS_ADDQT
1537         if (doGPUDis)
1538                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1539 #endif
1540 }
1541
1542 static void gpu_opcode_sub(void)
1543 {
1544 #ifdef GPU_DIS_SUB
1545         if (doGPUDis)
1546                 WriteLog("%06X: SUB    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1547 #endif
1548         uint32_t res = RN - RM;
1549         SET_ZNC_SUB(RN, RM, res);
1550         RN = res;
1551 #ifdef GPU_DIS_SUB
1552         if (doGPUDis)
1553                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1554 #endif
1555 }
1556
1557 static void gpu_opcode_subc(void)
1558 {
1559 #ifdef GPU_DIS_SUBC
1560         if (doGPUDis)
1561                 WriteLog("%06X: SUBC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1562 #endif
1563         uint32_t res = RN - RM - gpu_flag_c;
1564         uint32_t borrow = gpu_flag_c;
1565 //      SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1566 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1567 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1568 //      SET_ZNC_SUB(RN - borrow, RM, res);
1569         SET_ZNC_SUB(RN, RM + borrow, res);
1570         RN = res;
1571 #ifdef GPU_DIS_SUBC
1572         if (doGPUDis)
1573                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1574 #endif
1575 }
1576 /*
1577 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1578 N = 0, M = 1, 0 - 1 = -1, C = 0!
1579
1580 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
1581 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
1582 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1583 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1584 */
1585 static void gpu_opcode_subq(void)
1586 {
1587 #ifdef GPU_DIS_SUBQ
1588         if (doGPUDis)
1589                 WriteLog("%06X: SUBQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1590 #endif
1591         uint32_t r1 = gpu_convert_zero[IMM_1];
1592         uint32_t res = RN - r1;
1593         SET_ZNC_SUB(RN, r1, res);
1594         RN = res;
1595 #ifdef GPU_DIS_SUBQ
1596         if (doGPUDis)
1597                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1598 #endif
1599 }
1600
1601 static void gpu_opcode_subqt(void)
1602 {
1603 #ifdef GPU_DIS_SUBQT
1604         if (doGPUDis)
1605                 WriteLog("%06X: SUBQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1606 #endif
1607         RN -= gpu_convert_zero[IMM_1];
1608 #ifdef GPU_DIS_SUBQT
1609         if (doGPUDis)
1610                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1611 #endif
1612 }
1613
1614 static void gpu_opcode_cmp(void)
1615 {
1616 #ifdef GPU_DIS_CMP
1617         if (doGPUDis)
1618                 WriteLog("%06X: CMP    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1619 #endif
1620         uint32_t res = RN - RM;
1621         SET_ZNC_SUB(RN, RM, res);
1622 #ifdef GPU_DIS_CMP
1623         if (doGPUDis)
1624                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1625 #endif
1626 }
1627
1628 static void gpu_opcode_cmpq(void)
1629 {
1630         static int32_t sqtable[32] =
1631                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1632 #ifdef GPU_DIS_CMPQ
1633         if (doGPUDis)
1634                 WriteLog("%06X: CMPQ   #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1635 #endif
1636         uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1637         uint32_t res = RN - r1;
1638         SET_ZNC_SUB(RN, r1, res);
1639 #ifdef GPU_DIS_CMPQ
1640         if (doGPUDis)
1641                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1642 #endif
1643 }
1644
1645 static void gpu_opcode_and(void)
1646 {
1647 #ifdef GPU_DIS_AND
1648         if (doGPUDis)
1649                 WriteLog("%06X: AND    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1650 #endif
1651         RN = RN & RM;
1652         SET_ZN(RN);
1653 #ifdef GPU_DIS_AND
1654         if (doGPUDis)
1655                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1656 #endif
1657 }
1658
1659 static void gpu_opcode_or(void)
1660 {
1661 #ifdef GPU_DIS_OR
1662         if (doGPUDis)
1663                 WriteLog("%06X: OR     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1664 #endif
1665         RN = RN | RM;
1666         SET_ZN(RN);
1667 #ifdef GPU_DIS_OR
1668         if (doGPUDis)
1669                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1670 #endif
1671 }
1672
1673 static void gpu_opcode_xor(void)
1674 {
1675 #ifdef GPU_DIS_XOR
1676         if (doGPUDis)
1677                 WriteLog("%06X: XOR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1678 #endif
1679         RN = RN ^ RM;
1680         SET_ZN(RN);
1681 #ifdef GPU_DIS_XOR
1682         if (doGPUDis)
1683                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1684 #endif
1685 }
1686
1687 static void gpu_opcode_not(void)
1688 {
1689 #ifdef GPU_DIS_NOT
1690         if (doGPUDis)
1691                 WriteLog("%06X: NOT    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1692 #endif
1693         RN = ~RN;
1694         SET_ZN(RN);
1695 #ifdef GPU_DIS_NOT
1696         if (doGPUDis)
1697                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1698 #endif
1699 }
1700
1701 static void gpu_opcode_move_pc(void)
1702 {
1703 #ifdef GPU_DIS_MOVEPC
1704         if (doGPUDis)
1705                 WriteLog("%06X: MOVE   PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1706 #endif
1707         // Should be previous PC--this might not always be previous instruction!
1708         // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1709         RN = gpu_pc - 2;
1710 #ifdef GPU_DIS_MOVEPC
1711         if (doGPUDis)
1712                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1713 #endif
1714 }
1715
1716 static void gpu_opcode_sat8(void)
1717 {
1718 #ifdef GPU_DIS_SAT8
1719         if (doGPUDis)
1720                 WriteLog("%06X: SAT8   R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1721 #endif
1722         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1723         SET_ZN(RN);
1724 #ifdef GPU_DIS_SAT8
1725         if (doGPUDis)
1726                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1727 #endif
1728 }
1729
1730 static void gpu_opcode_sat16(void)
1731 {
1732         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1733         SET_ZN(RN);
1734 }
1735
1736 static void gpu_opcode_sat24(void)
1737 {
1738         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1739         SET_ZN(RN);
1740 }
1741
1742 static void gpu_opcode_store_r14_indexed(void)
1743 {
1744 #ifdef GPU_DIS_STORE14I
1745         if (doGPUDis)
1746                 WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1747 #endif
1748 #ifdef GPU_CORRECT_ALIGNMENT
1749         uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1750         
1751         if (address >= 0xF03000 && address <= 0xF03FFF)
1752                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1753         else
1754                 GPUWriteLong(address, RN, GPU);
1755 #else
1756         GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1757 #endif
1758 }
1759
1760 static void gpu_opcode_store_r15_indexed(void)
1761 {
1762 #ifdef GPU_DIS_STORE15I
1763         if (doGPUDis)
1764                 WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1765 #endif
1766 #ifdef GPU_CORRECT_ALIGNMENT
1767         uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1768
1769         if (address >= 0xF03000 && address <= 0xF03FFF)
1770                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1771         else
1772                 GPUWriteLong(address, RN, GPU);
1773 #else
1774         GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1775 #endif
1776 }
1777
1778 static void gpu_opcode_load_r14_ri(void)
1779 {
1780 #ifdef GPU_DIS_LOAD14R
1781         if (doGPUDis)
1782                 WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1783 #endif
1784 #ifdef GPU_CORRECT_ALIGNMENT
1785         uint32_t address = gpu_reg[14] + RM;
1786
1787         if (address >= 0xF03000 && address <= 0xF03FFF)
1788                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1789         else
1790                 RN = GPUReadLong(address, GPU);
1791 #else
1792         RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1793 #endif
1794 #ifdef GPU_DIS_LOAD14R
1795         if (doGPUDis)
1796                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1797 #endif
1798 }
1799
1800 static void gpu_opcode_load_r15_ri(void)
1801 {
1802 #ifdef GPU_DIS_LOAD15R
1803         if (doGPUDis)
1804                 WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1805 #endif
1806 #ifdef GPU_CORRECT_ALIGNMENT
1807         uint32_t address = gpu_reg[15] + RM;
1808
1809         if (address >= 0xF03000 && address <= 0xF03FFF)
1810                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1811         else
1812                 RN = GPUReadLong(address, GPU);
1813 #else
1814         RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1815 #endif
1816 #ifdef GPU_DIS_LOAD15R
1817         if (doGPUDis)
1818                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1819 #endif
1820 }
1821
1822 static void gpu_opcode_store_r14_ri(void)
1823 {
1824 #ifdef GPU_DIS_STORE14R
1825         if (doGPUDis)
1826                 WriteLog("%06X: STORE  R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1827 #endif
1828 #ifdef GPU_CORRECT_ALIGNMENT
1829         uint32_t address = gpu_reg[14] + RM;
1830
1831         if (address >= 0xF03000 && address <= 0xF03FFF)
1832                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1833         else
1834                 GPUWriteLong(address, RN, GPU);
1835 #else
1836         GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1837 #endif
1838 }
1839
1840 static void gpu_opcode_store_r15_ri(void)
1841 {
1842 #ifdef GPU_DIS_STORE15R
1843         if (doGPUDis)
1844                 WriteLog("%06X: STORE  R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1845 #endif
1846 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1847         uint32_t address = gpu_reg[15] + RM;
1848
1849         if (address >= 0xF03000 && address <= 0xF03FFF)
1850                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1851         else
1852                 GPUWriteLong(address, RN, GPU);
1853 #else
1854         GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1855 #endif
1856 }
1857
1858 static void gpu_opcode_nop(void)
1859 {
1860 #ifdef GPU_DIS_NOP
1861         if (doGPUDis)
1862                 WriteLog("%06X: NOP    [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1863 #endif
1864 }
1865
1866 static void gpu_opcode_pack(void)
1867 {
1868 #ifdef GPU_DIS_PACK
1869         if (doGPUDis)
1870                 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK  " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1871 #endif
1872         uint32_t val = RN;
1873
1874 //BUG!  if (RM == 0)                            // Pack
1875         if (IMM_1 == 0)                         // Pack
1876                 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1877         else                                            // Unpack
1878                 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1879 #ifdef GPU_DIS_PACK
1880         if (doGPUDis)
1881                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1882 #endif
1883 }
1884
1885 static void gpu_opcode_storeb(void)
1886 {
1887 #ifdef GPU_DIS_STOREB
1888         if (doGPUDis)
1889                 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1890 #endif
1891 //Is this right???
1892 // Would appear to be so...!
1893         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1894                 GPUWriteLong(RM, RN & 0xFF, GPU);
1895         else
1896                 JaguarWriteByte(RM, RN, GPU);
1897 }
1898
1899 static void gpu_opcode_storew(void)
1900 {
1901 #ifdef GPU_DIS_STOREW
1902         if (doGPUDis)
1903                 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1904 #endif
1905 #ifdef GPU_CORRECT_ALIGNMENT
1906         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1907                 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1908         else
1909                 JaguarWriteWord(RM, RN, GPU);
1910 #else
1911         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1912                 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1913         else
1914                 JaguarWriteWord(RM, RN, GPU);
1915 #endif
1916 }
1917
1918 static void gpu_opcode_store(void)
1919 {
1920 #ifdef GPU_DIS_STORE
1921         if (doGPUDis)
1922                 WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1923 #endif
1924 #ifdef GPU_CORRECT_ALIGNMENT
1925         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1926                 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1927         else
1928                 GPUWriteLong(RM, RN, GPU);
1929 #else
1930         GPUWriteLong(RM, RN, GPU);
1931 #endif
1932 }
1933
1934 static void gpu_opcode_storep(void)
1935 {
1936 #ifdef GPU_CORRECT_ALIGNMENT
1937         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1938         {
1939                 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1940                 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1941         }
1942         else
1943         {
1944                 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1945                 GPUWriteLong(RM + 4, RN, GPU);
1946         }
1947 #else
1948         GPUWriteLong(RM + 0, gpu_hidata, GPU);
1949         GPUWriteLong(RM + 4, RN, GPU);
1950 #endif
1951 }
1952
1953 static void gpu_opcode_loadb(void)
1954 {
1955 #ifdef GPU_DIS_LOADB
1956         if (doGPUDis)
1957                 WriteLog("%06X: LOADB  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1958 #endif
1959         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1960                 RN = GPUReadLong(RM, GPU) & 0xFF;
1961         else
1962                 RN = JaguarReadByte(RM, GPU);
1963 #ifdef GPU_DIS_LOADB
1964         if (doGPUDis)
1965                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1966 #endif
1967 }
1968
1969 static void gpu_opcode_loadw(void)
1970 {
1971 #ifdef GPU_DIS_LOADW
1972         if (doGPUDis)
1973                 WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1974 #endif
1975 #ifdef GPU_CORRECT_ALIGNMENT
1976         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1977                 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1978         else
1979                 RN = JaguarReadWord(RM, GPU);
1980 #else
1981         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1982                 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1983         else
1984                 RN = JaguarReadWord(RM, GPU);
1985 #endif
1986 #ifdef GPU_DIS_LOADW
1987         if (doGPUDis)
1988                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1989 #endif
1990 }
1991
1992 // According to the docs, & "Do The Same", this address is long aligned...
1993 // So let's try it:
1994 // And it works!!! Need to fix all instances...
1995 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
1996 // the $F03000-$F03FFF range are aligned...
1997 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
1998 /*
1999 Preliminary testing on real hardware seems to confirm that something strange goes on
2000 with unaligned reads in main memory. When the address is off by 1, the result is the
2001 same as the long address with the top byte replaced by something. So if the read is
2002 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
2003 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
2004 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
2005 It may be that the "unknown" values come from the prefetch queue, but not sure how
2006 to test that. They seem to be stable, though, which would indicate such a mechanism.
2007 Sometimes, however, the off by 2 case returns $12345678!
2008 */
2009 static void gpu_opcode_load(void)
2010 {
2011 #ifdef GPU_DIS_LOAD
2012         if (doGPUDis)
2013                 WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2014 #endif
2015 #ifdef GPU_CORRECT_ALIGNMENT
2016         uint32_t mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2017 //      if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2018                 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2019 //              RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2020 //      else
2021 //              RN = GPUReadLong(RM, GPU);
2022         // Simulate garbage in unaligned reads...
2023 //seems that this behavior is different in GPU mem vs. main mem...
2024 //      if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2025 //              RN |= mask[RM & 0x03];
2026 #else
2027         RN = GPUReadLong(RM, GPU);
2028 #endif
2029 #ifdef GPU_DIS_LOAD
2030         if (doGPUDis)
2031                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2032 #endif
2033 }
2034
2035 static void gpu_opcode_loadp(void)
2036 {
2037 #ifdef GPU_CORRECT_ALIGNMENT
2038         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2039         {
2040                 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2041                 RN                 = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2042         }
2043         else
2044         {
2045                 gpu_hidata = GPUReadLong(RM + 0, GPU);
2046                 RN                 = GPUReadLong(RM + 4, GPU);
2047         }
2048 #else
2049         gpu_hidata = GPUReadLong(RM + 0, GPU);
2050         RN                 = GPUReadLong(RM + 4, GPU);
2051 #endif
2052 }
2053
2054 static void gpu_opcode_load_r14_indexed(void)
2055 {
2056 #ifdef GPU_DIS_LOAD14I
2057         if (doGPUDis)
2058                 WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2059 #endif
2060 #ifdef GPU_CORRECT_ALIGNMENT
2061         uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2062
2063         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2064                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2065         else
2066                 RN = GPUReadLong(address, GPU);
2067 #else
2068         RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2069 #endif
2070 #ifdef GPU_DIS_LOAD14I
2071         if (doGPUDis)
2072                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2073 #endif
2074 }
2075
2076 static void gpu_opcode_load_r15_indexed(void)
2077 {
2078 #ifdef GPU_DIS_LOAD15I
2079         if (doGPUDis)
2080                 WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2081 #endif
2082 #ifdef GPU_CORRECT_ALIGNMENT
2083         uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2084
2085         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2086                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2087         else
2088                 RN = GPUReadLong(address, GPU);
2089 #else
2090         RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2091 #endif
2092 #ifdef GPU_DIS_LOAD15I
2093         if (doGPUDis)
2094                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2095 #endif
2096 }
2097
2098 static void gpu_opcode_movei(void)
2099 {
2100 #ifdef GPU_DIS_MOVEI
2101         if (doGPUDis)
2102                 WriteLog("%06X: MOVEI  #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32_t)GPUReadWord(gpu_pc) | ((uint32_t)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2103 #endif
2104         // This instruction is followed by 32-bit value in LSW / MSW format...
2105         RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16);
2106         gpu_pc += 4;
2107 #ifdef GPU_DIS_MOVEI
2108         if (doGPUDis)
2109                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2110 #endif
2111 }
2112
2113 static void gpu_opcode_moveta(void)
2114 {
2115 #ifdef GPU_DIS_MOVETA
2116         if (doGPUDis)
2117                 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2118 #endif
2119         ALTERNATE_RN = RM;
2120 #ifdef GPU_DIS_MOVETA
2121         if (doGPUDis)
2122                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2123 #endif
2124 }
2125
2126 static void gpu_opcode_movefa(void)
2127 {
2128 #ifdef GPU_DIS_MOVEFA
2129         if (doGPUDis)
2130                 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2131 #endif
2132         RN = ALTERNATE_RM;
2133 #ifdef GPU_DIS_MOVEFA
2134         if (doGPUDis)
2135                 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2136 #endif
2137 }
2138
2139 static void gpu_opcode_move(void)
2140 {
2141 #ifdef GPU_DIS_MOVE
2142         if (doGPUDis)
2143                 WriteLog("%06X: MOVE   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2144 #endif
2145         RN = RM;
2146 #ifdef GPU_DIS_MOVE
2147         if (doGPUDis)
2148                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2149 #endif
2150 }
2151
2152 static void gpu_opcode_moveq(void)
2153 {
2154 #ifdef GPU_DIS_MOVEQ
2155         if (doGPUDis)
2156                 WriteLog("%06X: MOVEQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2157 #endif
2158         RN = IMM_1;
2159 #ifdef GPU_DIS_MOVEQ
2160         if (doGPUDis)
2161                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2162 #endif
2163 }
2164
2165 static void gpu_opcode_resmac(void)
2166 {
2167         RN = gpu_acc;
2168 }
2169
2170 static void gpu_opcode_imult(void)
2171 {
2172 #ifdef GPU_DIS_IMULT
2173         if (doGPUDis)
2174                 WriteLog("%06X: IMULT  R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2175 #endif
2176         RN = (int16_t)RN * (int16_t)RM;
2177         SET_ZN(RN);
2178 #ifdef GPU_DIS_IMULT
2179         if (doGPUDis)
2180                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2181 #endif
2182 }
2183
2184 static void gpu_opcode_mult(void)
2185 {
2186 #ifdef GPU_DIS_MULT
2187         if (doGPUDis)
2188                 WriteLog("%06X: MULT   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2189 #endif
2190         RN = (uint16_t)RM * (uint16_t)RN;
2191         SET_ZN(RN);
2192 #ifdef GPU_DIS_MULT
2193         if (doGPUDis)
2194                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2195 #endif
2196 }
2197
2198 static void gpu_opcode_bclr(void)
2199 {
2200 #ifdef GPU_DIS_BCLR
2201         if (doGPUDis)
2202                 WriteLog("%06X: BCLR   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2203 #endif
2204         uint32_t res = RN & ~(1 << IMM_1);
2205         RN = res;
2206         SET_ZN(res);
2207 #ifdef GPU_DIS_BCLR
2208         if (doGPUDis)
2209                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2210 #endif
2211 }
2212
2213 static void gpu_opcode_btst(void)
2214 {
2215 #ifdef GPU_DIS_BTST
2216         if (doGPUDis)
2217                 WriteLog("%06X: BTST   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2218 #endif
2219         gpu_flag_z = (~RN >> IMM_1) & 1;
2220 #ifdef GPU_DIS_BTST
2221         if (doGPUDis)
2222                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2223 #endif
2224 }
2225
2226 static void gpu_opcode_bset(void)
2227 {
2228 #ifdef GPU_DIS_BSET
2229         if (doGPUDis)
2230                 WriteLog("%06X: BSET   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2231 #endif
2232         uint32_t res = RN | (1 << IMM_1);
2233         RN = res;
2234         SET_ZN(res);
2235 #ifdef GPU_DIS_BSET
2236         if (doGPUDis)
2237                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2238 #endif
2239 }
2240
2241 static void gpu_opcode_imacn(void)
2242 {
2243         uint32_t res = (int16_t)RM * (int16_t)(RN);
2244         gpu_acc += res;
2245 }
2246
2247 static void gpu_opcode_mtoi(void)
2248 {
2249         uint32_t _RM = RM;
2250         uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2251         SET_ZN(res);
2252 }
2253
2254 static void gpu_opcode_normi(void)
2255 {
2256         uint32_t _RM = RM;
2257         uint32_t res = 0;
2258
2259         if (_RM)
2260         {
2261                 while ((_RM & 0xFFC00000) == 0)
2262                 {
2263                         _RM <<= 1;
2264                         res--;
2265                 }
2266                 while ((_RM & 0xFF800000) != 0)
2267                 {
2268                         _RM >>= 1;
2269                         res++;
2270                 }
2271         }
2272         RN = res;
2273         SET_ZN(res);
2274 }
2275
2276 static void gpu_opcode_mmult(void)
2277 {
2278         int count       = gpu_matrix_control & 0x0F;    // Matrix width
2279         uint32_t addr = gpu_pointer_to_matrix;          // In the GPU's RAM
2280         int64_t accum = 0;
2281         uint32_t res;
2282
2283         if (gpu_matrix_control & 0x10)                          // Column stepping
2284         {
2285                 for(int i=0; i<count; i++)
2286                 {
2287                         int16_t a;
2288                         if (i & 0x01)
2289                                 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2290                         else
2291                                 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2292
2293                         int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2294                         accum += a * b;
2295                         addr += 4 * count;
2296                 }
2297         }
2298         else                                                                            // Row stepping
2299         {
2300                 for(int i=0; i<count; i++)
2301                 {
2302                         int16_t a;
2303                         if (i & 0x01)
2304                                 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2305                         else
2306                                 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2307
2308                         int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2309                         accum += a * b;
2310                         addr += 4;
2311                 }
2312         }
2313         RN = res = (int32_t)accum;
2314         // carry flag to do (out of the last add)
2315         SET_ZN(res);
2316 }
2317
2318 static void gpu_opcode_abs(void)
2319 {
2320 #ifdef GPU_DIS_ABS
2321         if (doGPUDis)
2322                 WriteLog("%06X: ABS    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2323 #endif
2324         gpu_flag_c = RN >> 31;
2325         if (RN == 0x80000000)
2326         //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2327                 gpu_flag_n = 1, gpu_flag_z = 0;
2328         else
2329         {
2330                 if (gpu_flag_c)
2331                         RN = -RN;
2332                 gpu_flag_n = 0; SET_FLAG_Z(RN);
2333         }
2334 #ifdef GPU_DIS_ABS
2335         if (doGPUDis)
2336                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2337 #endif
2338 }
2339
2340 static void gpu_opcode_div(void)        // RN / RM
2341 {
2342 #ifdef GPU_DIS_DIV
2343         if (doGPUDis)
2344                 WriteLog("%06X: DIV    R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2345 #endif
2346 // NOTE: remainder is NOT calculated correctly here!
2347 //       The original tried to get it right by checking to see if the
2348 //       remainder was negative, but that's too late...
2349 // The code there should do it now, but I'm not 100% sure...
2350
2351         if (RM)
2352         {
2353                 if (gpu_div_control & 0x01)             // 16.16 division
2354                 {
2355                         RN = ((uint64_t)RN << 16) / RM;
2356                         gpu_remain = ((uint64_t)RN << 16) % RM;
2357                 }
2358                 else
2359                 {
2360                         RN = RN / RM;
2361                         gpu_remain = RN % RM;
2362                 }
2363
2364                 if ((gpu_remain - RM) & 0x80000000)     // If the result would have been negative...
2365                         gpu_remain -= RM;                       // Then make it negative!
2366         }
2367         else
2368                 RN = 0xFFFFFFFF;
2369
2370 /*      uint32_t _RM=RM;
2371         uint32_t _RN=RN;
2372
2373         if (_RM)
2374         {
2375                 if (gpu_div_control & 1)
2376                 {
2377                         gpu_remain = (((uint64_t)_RN) << 16) % _RM;
2378                         if (gpu_remain&0x80000000)
2379                                 gpu_remain-=_RM;
2380                         RN = (((uint64_t)_RN) << 16) / _RM;
2381                 }
2382                 else
2383                 {
2384                         gpu_remain = _RN % _RM;
2385                         if (gpu_remain&0x80000000)
2386                                 gpu_remain-=_RM;
2387                         RN/=_RM;
2388                 }
2389         }
2390         else
2391                 RN=0xffffffff;*/
2392 #ifdef GPU_DIS_DIV
2393         if (doGPUDis)
2394                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2395 #endif
2396 }
2397
2398 static void gpu_opcode_imultn(void)
2399 {
2400         uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
2401         gpu_acc = (int32_t)res;
2402         SET_FLAG_Z(res);
2403         SET_FLAG_N(res);
2404 }
2405
2406 static void gpu_opcode_neg(void)
2407 {
2408 #ifdef GPU_DIS_NEG
2409         if (doGPUDis)
2410                 WriteLog("%06X: NEG    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2411 #endif
2412         uint32_t res = -RN;
2413         SET_ZNC_SUB(0, RN, res);
2414         RN = res;
2415 #ifdef GPU_DIS_NEG
2416         if (doGPUDis)
2417                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2418 #endif
2419 }
2420
2421 static void gpu_opcode_shlq(void)
2422 {
2423 #ifdef GPU_DIS_SHLQ
2424         if (doGPUDis)
2425                 WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2426 #endif
2427 // Was a bug here...
2428 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2429         int32_t r1 = 32 - IMM_1;
2430         uint32_t res = RN << r1;
2431         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2432         RN = res;
2433 #ifdef GPU_DIS_SHLQ
2434         if (doGPUDis)
2435                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2436 #endif
2437 }
2438
2439 static void gpu_opcode_shrq(void)
2440 {
2441 #ifdef GPU_DIS_SHRQ
2442         if (doGPUDis)
2443                 WriteLog("%06X: SHRQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2444 #endif
2445         int32_t r1 = gpu_convert_zero[IMM_1];
2446         uint32_t res = RN >> r1;
2447         SET_ZN(res); gpu_flag_c = RN & 1;
2448         RN = res;
2449 #ifdef GPU_DIS_SHRQ
2450         if (doGPUDis)
2451                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2452 #endif
2453 }
2454
2455 static void gpu_opcode_ror(void)
2456 {
2457 #ifdef GPU_DIS_ROR
2458         if (doGPUDis)
2459                 WriteLog("%06X: ROR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2460 #endif
2461         uint32_t r1 = RM & 0x1F;
2462         uint32_t res = (RN >> r1) | (RN << (32 - r1));
2463         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2464         RN = res;
2465 #ifdef GPU_DIS_ROR
2466         if (doGPUDis)
2467                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2468 #endif
2469 }
2470
2471 static void gpu_opcode_rorq(void)
2472 {
2473 #ifdef GPU_DIS_RORQ
2474         if (doGPUDis)
2475                 WriteLog("%06X: RORQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2476 #endif
2477         uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F];
2478         uint32_t r2 = RN;
2479         uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
2480         RN = res;
2481         SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2482 #ifdef GPU_DIS_RORQ
2483         if (doGPUDis)
2484                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2485 #endif
2486 }
2487
2488 static void gpu_opcode_sha(void)
2489 {
2490 /*      int dreg = jaguar.op & 31;
2491         int32_t r1 = (int32_t)jaguar.r[(jaguar.op >> 5) & 31];
2492         uint32_t r2 = jaguar.r[dreg];
2493         uint32_t res;
2494
2495         CLR_ZNC;
2496         if (r1 < 0)
2497         {
2498                 res = (r1 <= -32) ? 0 : (r2 << -r1);
2499                 jaguar.FLAGS |= (r2 >> 30) & 2;
2500         }
2501         else
2502         {
2503                 res = (r1 >= 32) ? ((int32_t)r2 >> 31) : ((int32_t)r2 >> r1);
2504                 jaguar.FLAGS |= (r2 << 1) & 2;
2505         }
2506         jaguar.r[dreg] = res;
2507         SET_ZN(res);*/
2508
2509 #ifdef GPU_DIS_SHA
2510         if (doGPUDis)
2511                 WriteLog("%06X: SHA    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2512 #endif
2513         uint32_t res;
2514
2515         if ((int32_t)RM < 0)
2516         {
2517                 res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM);
2518                 gpu_flag_c = RN >> 31;
2519         }
2520         else
2521         {
2522                 res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM);
2523                 gpu_flag_c = RN & 0x01;
2524         }
2525         RN = res;
2526         SET_ZN(res);
2527 #ifdef GPU_DIS_SHA
2528         if (doGPUDis)
2529                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2530 #endif
2531
2532 /*      int32_t sRM=(int32_t)RM;
2533         uint32_t _RN=RN;
2534
2535         if (sRM<0)
2536         {
2537                 uint32_t shift=-sRM;
2538                 if (shift>=32) shift=32;
2539                 gpu_flag_c=(_RN&0x80000000)>>31;
2540                 while (shift)
2541                 {
2542                         _RN<<=1;
2543                         shift--;
2544                 }
2545         }
2546         else
2547         {
2548                 uint32_t shift=sRM;
2549                 if (shift>=32) shift=32;
2550                 gpu_flag_c=_RN&0x1;
2551                 while (shift)
2552                 {
2553                         _RN=((int32_t)_RN)>>1;
2554                         shift--;
2555                 }
2556         }
2557         RN=_RN;
2558         SET_FLAG_Z(_RN);
2559         SET_FLAG_N(_RN);*/
2560 }
2561
2562 static void gpu_opcode_sharq(void)
2563 {
2564 #ifdef GPU_DIS_SHARQ
2565         if (doGPUDis)
2566                 WriteLog("%06X: SHARQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2567 #endif
2568         uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1];
2569         SET_ZN(res); gpu_flag_c = RN & 0x01;
2570         RN = res;
2571 #ifdef GPU_DIS_SHARQ
2572         if (doGPUDis)
2573                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2574 #endif
2575 }
2576
2577 static void gpu_opcode_sh(void)
2578 {
2579 #ifdef GPU_DIS_SH
2580         if (doGPUDis)
2581                 WriteLog("%06X: SH     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2582 #endif
2583         if (RM & 0x80000000)            // Shift left
2584         {
2585                 gpu_flag_c = RN >> 31;
2586                 RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM);
2587         }
2588         else                                            // Shift right
2589         {
2590                 gpu_flag_c = RN & 0x01;
2591                 RN = (RM >= 32 ? 0 : RN >> RM);
2592         }
2593         SET_ZN(RN);
2594 #ifdef GPU_DIS_SH
2595         if (doGPUDis)
2596                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2597 #endif
2598 }
2599
2600 //Temporary: Testing only!
2601 //#include "gpu2.cpp"
2602 //#include "gpu3.cpp"
2603
2604 #else
2605
2606 // New thread-safe GPU core
2607
2608 int GPUCore(void * data)
2609 {
2610 }
2611
2612 #endif