]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
Removed some cruft and nonstandard int/uint types, added M series BIOS.
[virtualjaguar] / src / gpu.cpp
1 #if 1
2
3 //
4 // GPU Core
5 //
6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
10 //
11 // JLH = James Hammons <jlhamm@acm.org>
12 //
13 // Who  When        What
14 // ---  ----------  -------------------------------------------------------------
15 // JLH  01/16/2010  Created this log ;-)
16 // JLH  11/26/2011  Added fixes for LOAD/STORE alignment issues
17
18 //
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 //       the braindead way in which MAME handles memory. :-)
21 //
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
25 //
26
27 #include "gpu.h"
28
29 #include <stdlib.h>
30 #include <string.h>                                                             // For memset
31 #include "dsp.h"
32 #include "jagdasm.h"
33 #include "jaguar.h"
34 #include "log.h"
35 #include "m68000/m68kinterface.h"
36 //#include "memory.h"
37 #include "tom.h"
38
39
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
42 //#define GPU_DEBUG
43
44 // For GPU dissasembly...
45
46 #if 0
47 #define GPU_DIS_ABS
48 #define GPU_DIS_ADD
49 #define GPU_DIS_ADDC
50 #define GPU_DIS_ADDQ
51 #define GPU_DIS_ADDQT
52 #define GPU_DIS_AND
53 #define GPU_DIS_BCLR
54 #define GPU_DIS_BSET
55 #define GPU_DIS_BTST
56 #define GPU_DIS_CMP
57 #define GPU_DIS_CMPQ
58 #define GPU_DIS_DIV
59 #define GPU_DIS_IMULT
60 #define GPU_DIS_JUMP
61 #define GPU_DIS_JR
62 #define GPU_DIS_LOAD
63 #define GPU_DIS_LOADB
64 #define GPU_DIS_LOADW
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
69 #define GPU_DIS_MOVE
70 #define GPU_DIS_MOVEFA
71 #define GPU_DIS_MOVEI
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
74 #define GPU_DIS_MOVEQ
75 #define GPU_DIS_MULT
76 #define GPU_DIS_NEG
77 #define GPU_DIS_NOP
78 #define GPU_DIS_NOT
79 #define GPU_DIS_OR
80 #define GPU_DIS_PACK
81 #define GPU_DIS_ROR
82 #define GPU_DIS_RORQ
83 #define GPU_DIS_SAT8
84 #define GPU_DIS_SH
85 #define GPU_DIS_SHA
86 #define GPU_DIS_SHARQ
87 #define GPU_DIS_SHLQ
88 #define GPU_DIS_SHRQ
89 #define GPU_DIS_STORE
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
96 #define GPU_DIS_SUB
97 #define GPU_DIS_SUBC
98 #define GPU_DIS_SUBQ
99 #define GPU_DIS_SUBQT
100 #define GPU_DIS_XOR
101
102 //bool doGPUDis = false;
103 bool doGPUDis = true;
104 #endif
105
106 /*
107 GPU opcodes use (BIOS flying ATARI logo):
108 +                     add 357416
109 +                    addq 538030
110 +                   addqt 6999
111 +                     sub 116663
112 +                    subq 188059
113 +                   subqt 15086
114 +                     neg 36097
115 +                     and 233993
116 +                      or 109332
117 +                     xor 1384
118 +                    btst 111924
119 +                    bset 25029
120 +                    bclr 10551
121 +                    mult 28147
122 +                   imult 69148
123 +                     div 64102
124 +                     abs 159394
125 +                    shlq 194690
126 +                    shrq 292587
127 +                   sharq 192649
128 +                    rorq 58672
129 +                     cmp 244963
130 +                    cmpq 114834
131 +                    move 833472
132 +                   moveq 56427
133 +                  moveta 220814
134 +                  movefa 170678
135 +                   movei 152025
136 +                   loadw 108220
137 +                    load 430936
138 +                  storew 3036
139 +                   store 372490
140 +                 move_pc 2330
141 +                    jump 349134
142 +                      jr 529171
143                     mmult 64904
144 +                     nop 432179
145 */
146
147 // Various bits
148
149 #define CINT0FLAG                       0x0200
150 #define CINT1FLAG                       0x0400
151 #define CINT2FLAG                       0x0800
152 #define CINT3FLAG                       0x1000
153 #define CINT4FLAG                       0x2000
154 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
155
156 // GPU_FLAGS bits
157
158 #define ZERO_FLAG               0x0001
159 #define CARRY_FLAG              0x0002
160 #define NEGA_FLAG               0x0004
161 #define IMASK                   0x0008
162 #define INT_ENA0                0x0010
163 #define INT_ENA1                0x0020
164 #define INT_ENA2                0x0040
165 #define INT_ENA3                0x0080
166 #define INT_ENA4                0x0100
167 #define INT_CLR0                0x0200
168 #define INT_CLR1                0x0400
169 #define INT_CLR2                0x0800
170 #define INT_CLR3                0x1000
171 #define INT_CLR4                0x2000
172 #define REGPAGE                 0x4000
173 #define DMAEN                   0x8000
174
175 // External global variables
176
177 extern int start_logging;
178 extern int gpu_start_log;
179
180 // Private function prototypes
181
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
186
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
251
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8_t gpu_opcode_cycles[64] =
254 {
255         3,  3,  3,  3,  3,  3,  3,  3,
256         3,  3,  3,  3,  3,  3,  3,  3,
257         3,  3,  1,  3,  1, 18,  3,  3,
258         3,  3,  3,  3,  3,  3,  3,  3,
259         3,  3,  2,  2,  2,  2,  3,  4,
260         5,  4,  5,  6,  6,  1,  1,  1,
261         1,  2,  2,  2,  1,  1,  9,  3,
262         3,  1,  6,  6,  2,  2,  3,  3
263 };//*/
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8_t gpu_opcode_cycles[64] =
269 {
270         1,  1,  1,  1,  1,  1,  1,  1,
271         1,  1,  1,  1,  1,  1,  1,  1,
272         1,  1,  1,  1,  1,  9,  1,  1,
273         1,  1,  1,  1,  1,  1,  1,  1,
274         1,  1,  1,  1,  1,  1,  1,  2,
275         2,  2,  2,  3,  3,  1,  1,  1,
276         1,  1,  1,  1,  1,  1,  4,  1,
277         1,  1,  3,  3,  1,  1,  1,  1
278 };//*/
279 uint8_t gpu_opcode_cycles[64] =
280 {
281         1,  1,  1,  1,  1,  1,  1,  1,
282         1,  1,  1,  1,  1,  1,  1,  1,
283         1,  1,  1,  1,  1,  1,  1,  1,
284         1,  1,  1,  1,  1,  1,  1,  1,
285         1,  1,  1,  1,  1,  1,  1,  1,
286         1,  1,  1,  1,  1,  1,  1,  1,
287         1,  1,  1,  1,  1,  1,  1,  1,
288         1,  1,  1,  1,  1,  1,  1,  1
289 };//*/
290
291 void (*gpu_opcode[64])()=
292 {
293         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
294         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
295         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
296         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
297         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
298         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
299         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
300         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
301         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
302         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
303         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
304         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
305         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
306         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
307         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
308         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
309 };
310
311 static uint8_t gpu_ram_8[0x1000];
312 uint32_t gpu_pc;
313 static uint32_t gpu_acc;
314 static uint32_t gpu_remain;
315 static uint32_t gpu_hidata;
316 static uint32_t gpu_flags;
317 static uint32_t gpu_matrix_control;
318 static uint32_t gpu_pointer_to_matrix;
319 static uint32_t gpu_data_organization;
320 static uint32_t gpu_control;
321 static uint32_t gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 uint32_t gpu_reg_bank_0[32];
327 uint32_t gpu_reg_bank_1[32];
328 static uint32_t * gpu_reg;
329 static uint32_t * gpu_alternate_reg;
330
331 static uint32_t gpu_instruction;
332 static uint32_t gpu_opcode_first_parameter;
333 static uint32_t gpu_opcode_second_parameter;
334
335 #define GPU_RUNNING             (gpu_control & 0x01)
336
337 #define RM                              gpu_reg[gpu_opcode_first_parameter]
338 #define RN                              gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM    gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN    gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1                   gpu_opcode_first_parameter
342 #define IMM_2                   gpu_opcode_second_parameter
343
344 #define SET_FLAG_Z(r)   (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r)   (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01));
346
347 #define RESET_FLAG_Z()  gpu_flag_z = 0;
348 #define RESET_FLAG_N()  gpu_flag_n = 0;
349 #define RESET_FLAG_C()  gpu_flag_c = 0;
350
351 #define CLR_Z                           (gpu_flag_z = 0)
352 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
355 #define SET_N(r)                        (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
357 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
358 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
361
362 uint32_t gpu_convert_zero[32] =
363         { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
364
365 uint8_t * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
367
368 uint32_t gpu_opcode_use[64];
369
370 const char * gpu_opcode_str[64]=
371 {
372         "add",                          "addc",                         "addq",                         "addqt",
373         "sub",                          "subc",                         "subq",                         "subqt",
374         "neg",                          "and",                          "or",                           "xor",
375         "not",                          "btst",                         "bset",                         "bclr",
376         "mult",                         "imult",                        "imultn",                       "resmac",
377         "imacn",                        "div",                          "abs",                          "sh",
378         "shlq",                         "shrq",                         "sha",                          "sharq",
379         "ror",                          "rorq",                         "cmp",                          "cmpq",
380         "sat8",                         "sat16",                        "move",                         "moveq",
381         "moveta",                       "movefa",                       "movei",                        "loadb",
382         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
383         "load_r15_indexed",     "storeb",                       "storew",                       "store",
384         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
385         "jump",                         "jr",                           "mmult",                        "mtoi",
386         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
387         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
388 };
389
390 static uint32_t gpu_in_exec = 0;
391 static uint32_t gpu_releaseTimeSlice_flag = 0;
392
393 void GPUReleaseTimeslice(void)
394 {
395         gpu_releaseTimeSlice_flag = 1;
396 }
397
398 uint32_t GPUGetPC(void)
399 {
400         return gpu_pc;
401 }
402
403 void build_branch_condition_table(void)
404 {
405         if (!branch_condition_table)
406         {
407                 branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
408
409                 if (branch_condition_table)
410                 {
411                         for(int i=0; i<8; i++)
412                         {
413                                 for(int j=0; j<32; j++)
414                                 {
415                                         int result = 1;
416                                         if (j & 1)
417                                                 if (i & ZERO_FLAG)
418                                                         result = 0;
419                                         if (j & 2)
420                                                 if (!(i & ZERO_FLAG))
421                                                         result = 0;
422                                         if (j & 4)
423                                                 if (i & (CARRY_FLAG << (j >> 4)))
424                                                         result = 0;
425                                         if (j & 8)
426                                                 if (!(i & (CARRY_FLAG << (j >> 4))))
427                                                         result = 0;
428                                         branch_condition_table[i * 32 + j] = result;
429                                 }
430                         }
431                 }
432         }
433 }
434
435 //
436 // GPU byte access (read)
437 //
438 uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
439 {
440         if (offset >= 0xF02000 && offset <= 0xF020FF)
441                 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
442
443         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444                 return gpu_ram_8[offset & 0xFFF];
445         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
446         {
447                 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
448
449                 if ((offset & 0x03) == 0)
450                         return data >> 24;
451                 else if ((offset & 0x03) == 1)
452                         return (data >> 16) & 0xFF;
453                 else if ((offset & 0x03) == 2)
454                         return (data >> 8) & 0xFF;
455                 else if ((offset & 0x03) == 3)
456                         return data & 0xFF;
457         }
458
459         return JaguarReadByte(offset, who);
460 }
461
462 //
463 // GPU word access (read)
464 //
465 uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
466 {
467         if (offset >= 0xF02000 && offset <= 0xF020FF)
468                 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
469
470         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
471         {
472                 offset &= 0xFFF;
473                 uint16_t data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1];
474                 return data;
475         }
476         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
477         {
478 // This looks and smells wrong...
479 // But it *might* be OK...
480                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
481                         return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
482
483                 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
484
485                 if (offset & 0x02)                      // Cases 0 & 2...
486                         return data & 0xFFFF;
487                 else
488                         return data >> 16;
489         }
490
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
494
495         return JaguarReadWord(offset, who);
496 }
497
498 //
499 // GPU dword access (read)
500 //
501 uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
502 {
503         if (offset >= 0xF02000 && offset <= 0xF020FF)
504         {
505                 WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
506                 uint32_t reg = (offset & 0xFC) >> 2;
507                 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]); 
508         }
509
510 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
511         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
512         {
513                 offset &= 0xFFF;
514                 return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16)
515                         | ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/
516 //              return GET32(gpu_ram_8, offset);
517         }
518 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
519         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
520         {
521                 offset &= 0x1F;
522                 switch (offset)
523                 {
524                 case 0x00:
525                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
526                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
527                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
528
529                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
530
531                         return gpu_flags & 0xFFFFC1FF;
532                 case 0x04:
533                         return gpu_matrix_control;
534                 case 0x08:
535                         return gpu_pointer_to_matrix;
536                 case 0x0C:
537                         return gpu_data_organization;
538                 case 0x10:
539                         return gpu_pc;
540                 case 0x14:
541                         return gpu_control;
542                 case 0x18:
543                         return gpu_hidata;
544                 case 0x1C:
545                         return gpu_remain;
546                 default:                                                                // unaligned long read
547 #ifdef GPU_DEBUG
548                         WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
549 #endif  // GPU_DEBUG
550                         return 0;
551                 }
552         }
553 //TEMP--Mirror of F03000? No. Writes only...
554 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
555 //      WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
556 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
557         WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
558
559         return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
560 }
561
562 //
563 // GPU byte access (write)
564 //
565 void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
566 {
567         if (offset >= 0xF02000 && offset <= 0xF020FF)
568                 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
569
570         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
571         {
572                 gpu_ram_8[offset & 0xFFF] = data;
573
574 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
575 /*              if (!gpu_in_exec)
576                 {
577                         m68k_end_timeslice();
578                         dsp_releaseTimeslice();
579                 }*/
580                 return;
581         }
582         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
583         {
584                 uint32_t reg = offset & 0x1C;
585                 int bytenum = offset & 0x03;
586
587 //This is definitely wrong!
588                 if ((reg >= 0x1C) && (reg <= 0x1F))
589                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
590                 else
591                 {
592                         uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
593                         bytenum = 3 - bytenum; // convention motorola !!!
594                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
595                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
596                 }
597                 return;
598         }
599 //      WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
600         JaguarWriteByte(offset, data, who);
601 }
602
603 //
604 // GPU word access (write)
605 //
606 void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
607 {
608         if (offset >= 0xF02000 && offset <= 0xF020FF)
609                 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
610
611         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
612         {
613                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
614                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
615 /*              offset &= 0xFFF;
616                 SET16(gpu_ram_8, offset, data);//*/
617
618 /*if (offset >= 0xF03214 && offset < 0xF0321F)
619         WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
620
621
622 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
623 /*              if (!gpu_in_exec)
624                 {
625                         m68k_end_timeslice();
626                         dsp_releaseTimeslice();
627                 }*/
628                 return;
629         }
630         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
631         {
632                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
633                 {
634 #ifdef GPU_DEBUG
635                         WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
636                         GPUDumpRegisters();
637 #endif  // GPU_DEBUG
638                         return;
639                 }
640 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
641 //This just literally sucks.
642                 if ((offset & 0x1C) == 0x1C)
643                 {
644 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
645                         if (offset & 0x02)
646                                 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
647                         else
648                                 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
649                 }
650                 else
651                 {
652 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
653                         uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
654
655                         if (offset & 0x02)
656                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
657                         else
658                                 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
659
660                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
661                 }
662
663                 return;
664         }
665         else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
666         {
667 #ifdef GPU_DEBUG
668                         WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
669                         GPUDumpRegisters();
670 #endif  // GPU_DEBUG
671                 return;
672         }
673
674         // Have to be careful here--this can cause an infinite loop!
675         JaguarWriteWord(offset, data, who);
676 }
677
678 //
679 // GPU dword access (write)
680 //
681 void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
682 {
683         if (offset >= 0xF02000 && offset <= 0xF020FF)
684                 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
685
686 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
687         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
688         {
689 #ifdef GPU_DEBUG
690                 if (offset & 0x03)
691                 {
692                         WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
693                         GPUDumpRegisters();
694                 }
695 #endif  // GPU_DEBUG
696
697                 offset &= 0xFFF;
698                 SET32(gpu_ram_8, offset, data);
699                 return;
700         }
701 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
702         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
703         {
704                 offset &= 0x1F;
705                 switch (offset)
706                 {
707                 case 0x00:
708                 {
709                         bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
710                         // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
711                         //       IRQ logic can set it. So we mask it out here to prevent problems...
712                         gpu_flags = data & (~IMASK);
713                         gpu_flag_z = gpu_flags & ZERO_FLAG;
714                         gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
715                         gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
716                         GPUUpdateRegisterBanks();
717                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);       // Interrupt latch clear bits
718 //Writing here is only an interrupt enable--this approach is just plain wrong!
719 //                      GPUHandleIRQs();
720 //This, however, is A-OK! ;-)
721                         if (IMASKCleared)                                               // If IMASK was cleared,
722                                 GPUHandleIRQs();                                        // see if any other interrupts need servicing!
723 #ifdef GPU_DEBUG
724                         if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
725                                 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
726                         WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
727 #endif  // GPU_DEBUG
728                         break;
729                 }
730                 case 0x04:
731                         gpu_matrix_control = data;
732                         break;
733                 case 0x08:
734                         // This can only point to long aligned addresses
735                         gpu_pointer_to_matrix = data & 0xFFFFFFFC;
736                         break;
737                 case 0x0C:
738                         gpu_data_organization = data;
739                         break;
740                 case 0x10:
741                         gpu_pc = data;
742 #ifdef GPU_DEBUG
743 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
744 #endif  // GPU_DEBUG
745                         break;
746                 case 0x14:
747                 {
748 //                      uint32_t gpu_was_running = GPU_RUNNING;
749                         data &= ~0xF7C0;                // Disable writes to INT_LAT0-4 & TOM version number
750
751                         // check for GPU -> CPU interrupt
752                         if (data & 0x02)
753                         {
754 //WriteLog("GPU->CPU interrupt\n");
755                                 if (TOMIRQEnabled(IRQ_GPU))
756                                 {
757 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
758 //                                      if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
759                                         {
760                                                 TOMSetPendingGPUInt();
761                                                 m68k_set_irq(2);                        // Set 68000 IPL 2
762                                                 GPUReleaseTimeslice();
763                                         }
764                                 }
765                                 data &= ~0x02;
766                         }
767
768                         // check for CPU -> GPU interrupt #0
769                         if (data & 0x04)
770                         {
771 //WriteLog("CPU->GPU interrupt\n");
772                                 GPUSetIRQLine(0, ASSERT_LINE);
773                                 m68k_end_timeslice();
774                                 DSPReleaseTimeslice();
775                                 data &= ~0x04;
776                         }
777
778                         // single stepping
779                         if (data & 0x10)
780                         {
781                                 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
782                         }
783
784                         gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
785
786                         // if gpu wasn't running but is now running, execute a few cycles
787 #ifndef GPU_SINGLE_STEPPING
788 /*                      if (!gpu_was_running && GPU_RUNNING)
789 #ifdef GPU_DEBUG
790                         {
791                                 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
792 #endif  // GPU_DEBUG
793                                 GPUExec(200);
794 #ifdef GPU_DEBUG
795                         }
796 #endif  // GPU_DEBUG//*/
797 #else
798                         if (gpu_control & 0x18)
799                                 GPUExec(1);
800 #endif  // #ifndef GPU_SINGLE_STEPPING
801 #ifdef GPU_DEBUG
802 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
803 if (GPU_RUNNING)
804         WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
805 else
806         WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
807 WriteLog("\n");
808 #endif  // GPU_DEBUG
809 //if (GPU_RUNNING)
810 //      GPUDumpDisassembly();
811 /*if (GPU_RUNNING)
812 {
813         if (gpu_pc == 0xF035D8)
814         {
815 //              GPUDumpDisassembly();
816 //              log_done();
817 //              exit(1);
818                 gpu_control &= 0xFFFFFFFE;      // Don't run it and let's see what happens!
819 //Hmm. Seems to lock up when going into the demo...
820 //Try to disable the collision altogether!
821         }
822 }//*/
823 extern int effect_start5;
824 static bool finished = false;
825 //if (GPU_RUNNING && effect_start5 && !finished)
826 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
827 {
828         // Let's do a dump of $6528!
829 /*      uint32_t numItems = JaguarReadWord(0x6BD6);
830         WriteLog("\nDump of $6528: %u items.\n\n", numItems);
831         for(int i=0; i<numItems*3*4; i+=3*4)
832         {
833                 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
834                         JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
835                 uint16_t link = JaguarReadWord(0x6528+i+8+2);
836                 for(int j=0; j<40; j+=4)
837                         WriteLog("%08X ", JaguarReadLong(link + j));
838                 WriteLog("\n");
839         }
840         WriteLog("\n");//*/
841         // Let's try a manual blit here...
842 //This isn't working the way it should! !!! FIX !!!
843 //Err, actually, it is.
844 // NOW, it works right! Problem solved!!! It's a blitter bug!
845 /*      uint32_t src = 0x4D54, dst = 0xF03000, width = 10 * 4;
846         for(int y=0; y<127; y++)
847         {
848                 for(int x=0; x<2; x++)
849                 {
850                         JaguarWriteLong(dst, JaguarReadLong(src));
851
852                         src += 4;
853                         dst += 4;
854                 }
855                 src += width - (2 * 4);
856         }//*/
857 /*      finished = true;
858         doGPUDis = true;
859         WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
860
861 /*      WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
862         int count = 0;
863         for(int i=0x004D54; i<0x004D54+2048; i++)
864         {
865                 WriteLog("%02X ", JaguarReadByte(i));
866                 count++;
867                 if (count == 32)
868                 {
869                         count = 0;
870                         WriteLog("\n");
871                 }
872         }
873         WriteLog("\n\nData @ F03000:\n\n");
874         count = 0;
875         for(int i=0xF03000; i<0xF03200; i++)
876         {
877                 WriteLog("%02X ", JaguarReadByte(i));
878                 count++;
879                 if (count == 32)
880                 {
881                         count = 0;
882                         WriteLog("\n");
883                 }
884         }
885         WriteLog("\n\n");
886         log_done();
887         exit(0);//*/
888 }
889 //if (!GPU_RUNNING)
890 //      doGPUDis = false;
891 /*if (!GPU_RUNNING && finished)
892 {
893         WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
894         GPUDumpRegisters();
895         log_done();
896         exit(0);
897 }//*/
898                         // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
899                         // allow the GPU a chance to run...
900                         // Yes! This partially fixed Trevor McFur...
901                         if (GPU_RUNNING)
902                                 m68k_end_timeslice();
903                         break;
904                 }
905                 case 0x18:
906                         gpu_hidata = data;
907                         break;
908                 case 0x1C:
909                         gpu_div_control = data;
910                         break;
911 //              default:   // unaligned long write
912                         //exit(0);
913                         //__asm int 3
914                 }
915                 return;
916         }
917
918 //      JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
919 //      JaguarWriteWord(offset+2, data & 0xFFFF, who);
920 // We're a 32-bit processor, we can do a long write...!
921         JaguarWriteLong(offset, data, who);
922 }
923
924 //
925 // Change register banks if necessary
926 //
927 void GPUUpdateRegisterBanks(void)
928 {
929         int bank = (gpu_flags & REGPAGE);               // REGPAGE bit
930
931         if (gpu_flags & IMASK)                                  // IMASK bit
932                 bank = 0;                                                       // IMASK forces main bank to be bank 0
933
934         if (bank)
935                 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
936         else
937                 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
938 }
939
940 void GPUHandleIRQs(void)
941 {
942         // Bail out if we're already in an interrupt!
943         if (gpu_flags & IMASK)
944                 return;
945
946         // Get the interrupt latch & enable bits
947         uint32_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
948
949         // Bail out if latched interrupts aren't enabled
950         bits &= mask;
951         if (!bits)
952                 return;
953
954         // Determine which interrupt to service
955         uint32_t which = 0; //Isn't there a #pragma to disable this warning???
956         if (bits & 0x01)
957                 which = 0;
958         if (bits & 0x02)
959                 which = 1;
960         if (bits & 0x04)
961                 which = 2;
962         if (bits & 0x08)
963                 which = 3;
964         if (bits & 0x10)
965                 which = 4;
966
967         if (start_logging)
968                 WriteLog("GPU: Generating IRQ #%i\n", which);
969
970         // set the interrupt flag
971         gpu_flags |= IMASK;
972         GPUUpdateRegisterBanks();
973
974         // subqt  #4,r31                ; pre-decrement stack pointer
975         // move  pc,r30                 ; address of interrupted code
976         // store  r30,(r31)     ; store return address
977         gpu_reg[31] -= 4;
978         GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
979
980         // movei  #service_address,r30  ; pointer to ISR entry
981         // jump  (r30)                                  ; jump to ISR
982         // nop
983         gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
984 }
985
986 void GPUSetIRQLine(int irqline, int state)
987 {
988         if (start_logging)
989                 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
990
991         uint32_t mask = 0x0040 << irqline;
992         gpu_control &= ~mask;                           // Clear the interrupt latch
993
994         if (state)
995         {
996                 gpu_control |= mask;                    // Assert the interrupt latch
997                 GPUHandleIRQs();                                // And handle the interrupt...
998         }
999 }
1000
1001 //TEMPORARY: Testing only!
1002 //#include "gpu2.h"
1003 //#include "gpu3.h"
1004
1005 void GPUInit(void)
1006 {
1007 //      memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1008 //      memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32_t), "GPU bank 0 regs");
1009 //      memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32_t), "GPU bank 1 regs");
1010
1011         build_branch_condition_table();
1012
1013         GPUReset();
1014
1015 //TEMPORARY: Testing only!
1016 //      gpu2_init();
1017 //      gpu3_init();
1018 }
1019
1020 void GPUReset(void)
1021 {
1022         // GPU registers (directly visible)
1023         gpu_flags                         = 0x00000000;
1024         gpu_matrix_control    = 0x00000000;
1025         gpu_pointer_to_matrix = 0x00000000;
1026         gpu_data_organization = 0xFFFFFFFF;
1027         gpu_pc                            = 0x00F03000;
1028         gpu_control                       = 0x00002800;                 // Correctly sets this as TOM Rev. 2
1029         gpu_hidata                        = 0x00000000;
1030         gpu_remain                        = 0x00000000;                 // These two registers are RO/WO
1031         gpu_div_control           = 0x00000000;
1032
1033         // GPU internal register
1034         gpu_acc                           = 0x00000000;
1035
1036         gpu_reg = gpu_reg_bank_0;
1037         gpu_alternate_reg = gpu_reg_bank_1;
1038
1039         for(int i=0; i<32; i++)
1040                 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1041
1042         CLR_ZNC;
1043         memset(gpu_ram_8, 0xFF, 0x1000);
1044         gpu_in_exec = 0;
1045 //not needed    GPUInterruptPending = false;
1046         GPUResetStats();
1047 }
1048
1049 uint32_t GPUReadPC(void)
1050 {
1051         return gpu_pc;
1052 }
1053
1054 void GPUResetStats(void)
1055 {
1056         for(uint32_t i=0; i<64; i++)
1057                 gpu_opcode_use[i] = 0;
1058         WriteLog("--> GPU stats were reset!\n");
1059 }
1060
1061 void GPUDumpDisassembly(void)
1062 {
1063         char buffer[512];
1064
1065         WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1066         uint32_t j = 0xF03000;
1067         while (j <= 0xF03FFF)
1068         {
1069                 uint32_t oldj = j;
1070                 j += dasmjag(JAGUAR_GPU, buffer, j);
1071                 WriteLog("\t%08X: %s\n", oldj, buffer);
1072         }
1073 }
1074
1075 void GPUDumpRegisters(void)
1076 {
1077         WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1078         WriteLog("\nRegisters bank 0\n");
1079         for(int j=0; j<8; j++)
1080         {
1081                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1082                                                   (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1083                                                   (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1084                                                   (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1085                                                   (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1086         }
1087         WriteLog("Registers bank 1\n");
1088         for(int j=0; j<8; j++)
1089         {
1090                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1091                                                   (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1092                                                   (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1093                                                   (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1094                                                   (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1095         }
1096 }
1097
1098 void GPUDumpMemory(void)
1099 {
1100         WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1101         for(int i=0; i<0xFFF; i+=4)
1102                 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1103                         gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1104 }
1105
1106 void GPUDone(void)
1107 {
1108         WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1109
1110         // Get the interrupt latch & enable bits
1111         uint8_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1112         WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1113
1114         GPUDumpRegisters();
1115         GPUDumpDisassembly();
1116
1117         WriteLog("\nGPU opcodes use:\n");
1118         for(int i=0; i<64; i++)
1119         {
1120                 if (gpu_opcode_use[i])
1121                         WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1122         }
1123         WriteLog("\n");
1124
1125 //      memory_free(gpu_ram_8);
1126 //      memory_free(gpu_reg_bank_0);
1127 //      memory_free(gpu_reg_bank_1);
1128 }
1129
1130 //
1131 // Main GPU execution core
1132 //
1133 static int testCount = 1;
1134 static int len = 0;
1135 static bool tripwire = false;
1136 void GPUExec(int32_t cycles)
1137 {
1138         if (!GPU_RUNNING)
1139                 return;
1140
1141 #ifdef GPU_SINGLE_STEPPING
1142         if (gpu_control & 0x18)
1143         {
1144                 cycles = 1;
1145                 gpu_control &= ~0x10;
1146         }
1147 #endif
1148         GPUHandleIRQs();
1149         gpu_releaseTimeSlice_flag = 0;
1150         gpu_in_exec++;
1151
1152         while (cycles > 0 && GPU_RUNNING)
1153         {
1154 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1155         && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1156 {
1157         if (gpu_pc == 0xF03000)
1158         {
1159                 extern uint32_t starCount;
1160                 starCount = 0;
1161 /*              WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1162                 uint32_t base = gpu_reg_bank_0[3];
1163                 for(uint32_t i=0; i<0x100; i+=16)
1164                 {
1165                         WriteLog("%02X: ", i);
1166                         for(uint32_t j=0; j<16; j++)
1167                         {
1168                                 WriteLog("%02X ", JaguarReadByte(base + i + j));
1169                         }
1170                         WriteLog("\n");
1171                 }*/
1172         }
1173 //      if (gpu_pc == 0xF03)
1174         {
1175         }
1176 }//*/
1177 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1178 {
1179         GPUDumpRegisters();
1180         WriteLog("GPU: Starting disassembly log...\n");
1181         doGPUDis = true;
1182 }//*/
1183 /*if (gpu_pc == 0xF0359A)
1184 {
1185         doGPUDis = true;
1186         GPUDumpRegisters();
1187 }*/
1188 /*              gpu_flag_c = (gpu_flag_c ? 1 : 0);
1189                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1190                 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1191
1192                 uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1193                 uint32_t index = opcode >> 10;
1194                 gpu_instruction = opcode;                               // Added for GPU #3...
1195                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1196                 gpu_opcode_second_parameter = opcode & 0x1F;
1197 /*if (gpu_pc == 0xF03BE8)
1198 WriteLog("Start of OP frame write...\n");
1199 if (gpu_pc == 0xF03EEE)
1200 WriteLog("--> Writing BRANCH object ---\n");
1201 if (gpu_pc == 0xF03F62)
1202 WriteLog("--> Writing BITMAP object ***\n");//*/
1203 /*if (gpu_pc == 0xF03546)
1204 {
1205         WriteLog("\n--> GPU PC: F03546\n");
1206         GPUDumpRegisters();
1207         GPUDumpDisassembly();
1208 }//*/
1209 /*if (gpu_pc == 0xF033F6)
1210 {
1211         WriteLog("\n--> GPU PC: F033F6\n");
1212         GPUDumpRegisters();
1213         GPUDumpDisassembly();
1214 }//*/
1215 /*if (gpu_pc == 0xF033CC)
1216 {
1217         WriteLog("\n--> GPU PC: F033CC\n");
1218         GPUDumpRegisters();
1219         GPUDumpDisassembly();
1220 }//*/
1221 /*if (gpu_pc == 0xF033D6)
1222 {
1223         WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1224         GPUDumpRegisters();
1225         GPUDumpMemory();
1226 }//*/
1227 /*if (gpu_pc == 0xF033D8)
1228 {
1229         WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1230         GPUDumpRegisters();
1231         GPUDumpMemory();
1232 }//*/
1233 /*if (gpu_pc == 0xF0358E)
1234 {
1235         WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1236         GPUDumpRegisters();
1237         GPUDumpMemory();
1238 }//*/
1239 /*if (gpu_pc == 0xF034CA)
1240 {
1241         WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1242         GPUDumpRegisters();
1243 }//*/
1244 /*if (gpu_pc == 0xF034CA)
1245 {
1246         len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1247         WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n   ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1248         for(int i=0; i<len; i+=4)
1249                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1250         WriteLog("\n   ");
1251         for(int i=0; i<len; i+=4)
1252                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1253         WriteLog("\n\n");
1254 }
1255 if (gpu_pc == 0xF034DE)
1256 {
1257         WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n   ", gpu_reg[14], gpu_reg[15]);
1258         for(int i=0; i<len; i+=4)
1259                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1260         WriteLog("\n   ");
1261         for(int i=0; i<len; i+=4)
1262                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1263         WriteLog("\n   ");
1264         for(int i=0; i<len; i+=4)
1265                 WriteLog(" --------");
1266         WriteLog("\n   ");
1267         for(int i=0; i<len; i+=4)
1268                 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1269         WriteLog("\n\n");
1270 }//*/
1271 /*if (gpu_pc == 0xF035C8)
1272 {
1273         WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1274         GPUDumpRegisters();
1275         GPUDumpDisassembly();
1276 }//*/
1277
1278 if (gpu_start_log)
1279 {
1280 //      gpu_reset_stats();
1281 static char buffer[512];
1282 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1283 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1284 }//*/
1285 //$E400 -> 1110 01 -> $39 -> 57
1286 //GPU #1
1287                 gpu_pc += 2;
1288                 gpu_opcode[index]();
1289 //GPU #2
1290 //              gpu2_opcode[index]();
1291 //              gpu_pc += 2;
1292 //GPU #3                                (Doesn't show ATARI logo! #1 & #2 do...)
1293 //              gpu_pc += 2;
1294 //              gpu3_opcode[index]();
1295
1296 // BIOS hacking
1297 //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken.
1298 /*static bool firstTime = true;
1299 if (gpu_pc == 0xF03548 && firstTime)
1300 {
1301         gpu_flag_z = 1;
1302 //      firstTime = false;
1303
1304 //static char buffer[512];
1305 //int k=0xF03548;
1306 //while (k<0xF0356C)
1307 //{
1308 //int oldk = k;
1309 //k += dasmjag(JAGUAR_GPU, buffer, k);
1310 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1311 //}
1312 //      gpu_start_log = 1;
1313 }//*/
1314 //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1315 /*if (gpu_pc == 0xF0354C)
1316         gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1317
1318                 cycles -= gpu_opcode_cycles[index];
1319                 gpu_opcode_use[index]++;
1320 if (gpu_start_log)
1321         WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1322 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1323 {
1324         WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1325         tripwire = true;
1326 }
1327         }
1328
1329         gpu_in_exec--;
1330 }
1331
1332 //
1333 // GPU opcodes
1334 //
1335
1336 /*
1337 GPU opcodes use (offset punch--vertically below bad guy):
1338                       add 18686
1339                      addq 32621
1340                       sub 7483
1341                      subq 10252
1342                       and 21229
1343                        or 15003
1344                      btst 1822
1345                      bset 2072
1346                      mult 141
1347                       div 2392
1348                      shlq 13449
1349                      shrq 10297
1350                     sharq 11104
1351                       cmp 6775
1352                      cmpq 5944
1353                      move 31259
1354                     moveq 4473
1355                     movei 23277
1356                     loadb 46
1357                     loadw 4201
1358                      load 28580
1359          load_r14_indexed 1183
1360          load_r15_indexed 1125
1361                    storew 178
1362                     store 10144
1363         store_r14_indexed 320
1364         store_r15_indexed 1
1365                   move_pc 1742
1366                      jump 24467
1367                        jr 18090
1368                       nop 41362
1369 */
1370
1371 static void gpu_opcode_jump(void)
1372 {
1373 #ifdef GPU_DIS_JUMP
1374 const char * condition[32] =
1375 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1376         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1377         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1378         "???", "???", "???", "F" };
1379         if (doGPUDis)
1380                 WriteLog("%06X: JUMP   %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1381 #endif
1382         // normalize flags
1383 /*      gpu_flag_c = (gpu_flag_c ? 1 : 0);
1384         gpu_flag_z = (gpu_flag_z ? 1 : 0);
1385         gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1386         // KLUDGE: Used by BRANCH_CONDITION
1387         uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1388
1389         if (BRANCH_CONDITION(IMM_2))
1390         {
1391 #ifdef GPU_DIS_JUMP
1392         if (doGPUDis)
1393                 WriteLog("Branched!\n");
1394 #endif
1395 if (gpu_start_log)
1396         WriteLog("    --> JUMP: Branch taken.\n");
1397                 uint32_t delayed_pc = RM;
1398                 GPUExec(1);
1399                 gpu_pc = delayed_pc;
1400 /*              uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1401                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1402                 gpu_opcode_second_parameter = opcode & 0x1F;
1403
1404                 gpu_pc = delayed_pc;
1405                 gpu_opcode[opcode>>10]();//*/
1406         }
1407 #ifdef GPU_DIS_JUMP
1408         else
1409                 if (doGPUDis)
1410                         WriteLog("Branch NOT taken.\n");
1411 #endif
1412 }
1413
1414 static void gpu_opcode_jr(void)
1415 {
1416 #ifdef GPU_DIS_JR
1417 const char * condition[32] =
1418 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1419         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1420         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1421         "???", "???", "???", "F" };
1422         if (doGPUDis)
1423                 WriteLog("%06X: JR     %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1424 #endif
1425 /*      if (CONDITION(jaguar.op & 31))
1426         {
1427                 int32_t r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1428                 uint32_t newpc = jaguar.PC + r1;
1429                 CALL_MAME_DEBUG;
1430                 jaguar.op = ROPCODE(jaguar.PC);
1431                 jaguar.PC = newpc;
1432                 (*jaguar.table[jaguar.op >> 10])();
1433
1434                 jaguar_icount -= 3;     // 3 wait states guaranteed
1435         }*/
1436         // normalize flags
1437 /*      gpu_flag_n = (gpu_flag_n ? 1 : 0);
1438         gpu_flag_c = (gpu_flag_c ? 1 : 0);
1439         gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1440         // KLUDGE: Used by BRANCH_CONDITION
1441         uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1442
1443         if (BRANCH_CONDITION(IMM_2))
1444         {
1445 #ifdef GPU_DIS_JR
1446         if (doGPUDis)
1447                 WriteLog("Branched!\n");
1448 #endif
1449 if (gpu_start_log)
1450         WriteLog("    --> JR: Branch taken.\n");
1451                 int32_t offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1);           // Sign extend IMM_1
1452                 int32_t delayed_pc = gpu_pc + (offset * 2);
1453                 GPUExec(1);
1454                 gpu_pc = delayed_pc;
1455 /*              uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1456                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1457                 gpu_opcode_second_parameter = opcode & 0x1F;
1458
1459                 gpu_pc = delayed_pc;
1460                 gpu_opcode[opcode>>10]();//*/
1461         }
1462 #ifdef GPU_DIS_JR
1463         else
1464                 if (doGPUDis)
1465                         WriteLog("Branch NOT taken.\n");
1466 #endif
1467 }
1468
1469 static void gpu_opcode_add(void)
1470 {
1471 #ifdef GPU_DIS_ADD
1472         if (doGPUDis)
1473                 WriteLog("%06X: ADD    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1474 #endif
1475         uint32_t res = RN + RM;
1476         CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1477         RN = res;
1478 #ifdef GPU_DIS_ADD
1479         if (doGPUDis)
1480                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1481 #endif
1482 }
1483
1484 static void gpu_opcode_addc(void)
1485 {
1486 #ifdef GPU_DIS_ADDC
1487         if (doGPUDis)
1488                 WriteLog("%06X: ADDC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1489 #endif
1490 /*      int dreg = jaguar.op & 31;
1491         uint32_t r1 = jaguar.r[(jaguar.op >> 5) & 31];
1492         uint32_t r2 = jaguar.r[dreg];
1493         uint32_t res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1494         jaguar.r[dreg] = res;
1495         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1496
1497         uint32_t res = RN + RM + gpu_flag_c;
1498         uint32_t carry = gpu_flag_c;
1499 //      SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1500         SET_ZNC_ADD(RN + carry, RM, res);
1501 //      SET_ZNC_ADD(RN, RM + carry, res);
1502         RN = res;
1503 #ifdef GPU_DIS_ADDC
1504         if (doGPUDis)
1505                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1506 #endif
1507 }
1508
1509 static void gpu_opcode_addq(void)
1510 {
1511 #ifdef GPU_DIS_ADDQ
1512         if (doGPUDis)
1513                 WriteLog("%06X: ADDQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1514 #endif
1515         uint32_t r1 = gpu_convert_zero[IMM_1];
1516         uint32_t res = RN + r1;
1517         CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1518         RN = res;
1519 #ifdef GPU_DIS_ADDQ
1520         if (doGPUDis)
1521                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1522 #endif
1523 }
1524
1525 static void gpu_opcode_addqt(void)
1526 {
1527 #ifdef GPU_DIS_ADDQT
1528         if (doGPUDis)
1529                 WriteLog("%06X: ADDQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1530 #endif
1531         RN += gpu_convert_zero[IMM_1];
1532 #ifdef GPU_DIS_ADDQT
1533         if (doGPUDis)
1534                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1535 #endif
1536 }
1537
1538 static void gpu_opcode_sub(void)
1539 {
1540 #ifdef GPU_DIS_SUB
1541         if (doGPUDis)
1542                 WriteLog("%06X: SUB    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1543 #endif
1544         uint32_t res = RN - RM;
1545         SET_ZNC_SUB(RN, RM, res);
1546         RN = res;
1547 #ifdef GPU_DIS_SUB
1548         if (doGPUDis)
1549                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1550 #endif
1551 }
1552
1553 static void gpu_opcode_subc(void)
1554 {
1555 #ifdef GPU_DIS_SUBC
1556         if (doGPUDis)
1557                 WriteLog("%06X: SUBC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1558 #endif
1559         uint32_t res = RN - RM - gpu_flag_c;
1560         uint32_t borrow = gpu_flag_c;
1561 //      SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1562 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1563 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1564 //      SET_ZNC_SUB(RN - borrow, RM, res);
1565         SET_ZNC_SUB(RN, RM + borrow, res);
1566         RN = res;
1567 #ifdef GPU_DIS_SUBC
1568         if (doGPUDis)
1569                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1570 #endif
1571 }
1572 /*
1573 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1574 N = 0, M = 1, 0 - 1 = -1, C = 0!
1575
1576 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
1577 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
1578 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1579 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1580 */
1581 static void gpu_opcode_subq(void)
1582 {
1583 #ifdef GPU_DIS_SUBQ
1584         if (doGPUDis)
1585                 WriteLog("%06X: SUBQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1586 #endif
1587         uint32_t r1 = gpu_convert_zero[IMM_1];
1588         uint32_t res = RN - r1;
1589         SET_ZNC_SUB(RN, r1, res);
1590         RN = res;
1591 #ifdef GPU_DIS_SUBQ
1592         if (doGPUDis)
1593                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1594 #endif
1595 }
1596
1597 static void gpu_opcode_subqt(void)
1598 {
1599 #ifdef GPU_DIS_SUBQT
1600         if (doGPUDis)
1601                 WriteLog("%06X: SUBQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1602 #endif
1603         RN -= gpu_convert_zero[IMM_1];
1604 #ifdef GPU_DIS_SUBQT
1605         if (doGPUDis)
1606                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1607 #endif
1608 }
1609
1610 static void gpu_opcode_cmp(void)
1611 {
1612 #ifdef GPU_DIS_CMP
1613         if (doGPUDis)
1614                 WriteLog("%06X: CMP    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1615 #endif
1616         uint32_t res = RN - RM;
1617         SET_ZNC_SUB(RN, RM, res);
1618 #ifdef GPU_DIS_CMP
1619         if (doGPUDis)
1620                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1621 #endif
1622 }
1623
1624 static void gpu_opcode_cmpq(void)
1625 {
1626         static int32_t sqtable[32] =
1627                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1628 #ifdef GPU_DIS_CMPQ
1629         if (doGPUDis)
1630                 WriteLog("%06X: CMPQ   #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1631 #endif
1632         uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1633         uint32_t res = RN - r1;
1634         SET_ZNC_SUB(RN, r1, res);
1635 #ifdef GPU_DIS_CMPQ
1636         if (doGPUDis)
1637                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1638 #endif
1639 }
1640
1641 static void gpu_opcode_and(void)
1642 {
1643 #ifdef GPU_DIS_AND
1644         if (doGPUDis)
1645                 WriteLog("%06X: AND    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1646 #endif
1647         RN = RN & RM;
1648         SET_ZN(RN);
1649 #ifdef GPU_DIS_AND
1650         if (doGPUDis)
1651                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1652 #endif
1653 }
1654
1655 static void gpu_opcode_or(void)
1656 {
1657 #ifdef GPU_DIS_OR
1658         if (doGPUDis)
1659                 WriteLog("%06X: OR     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1660 #endif
1661         RN = RN | RM;
1662         SET_ZN(RN);
1663 #ifdef GPU_DIS_OR
1664         if (doGPUDis)
1665                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1666 #endif
1667 }
1668
1669 static void gpu_opcode_xor(void)
1670 {
1671 #ifdef GPU_DIS_XOR
1672         if (doGPUDis)
1673                 WriteLog("%06X: XOR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1674 #endif
1675         RN = RN ^ RM;
1676         SET_ZN(RN);
1677 #ifdef GPU_DIS_XOR
1678         if (doGPUDis)
1679                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1680 #endif
1681 }
1682
1683 static void gpu_opcode_not(void)
1684 {
1685 #ifdef GPU_DIS_NOT
1686         if (doGPUDis)
1687                 WriteLog("%06X: NOT    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1688 #endif
1689         RN = ~RN;
1690         SET_ZN(RN);
1691 #ifdef GPU_DIS_NOT
1692         if (doGPUDis)
1693                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1694 #endif
1695 }
1696
1697 static void gpu_opcode_move_pc(void)
1698 {
1699 #ifdef GPU_DIS_MOVEPC
1700         if (doGPUDis)
1701                 WriteLog("%06X: MOVE   PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1702 #endif
1703         // Should be previous PC--this might not always be previous instruction!
1704         // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1705         RN = gpu_pc - 2;
1706 #ifdef GPU_DIS_MOVEPC
1707         if (doGPUDis)
1708                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1709 #endif
1710 }
1711
1712 static void gpu_opcode_sat8(void)
1713 {
1714 #ifdef GPU_DIS_SAT8
1715         if (doGPUDis)
1716                 WriteLog("%06X: SAT8   R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1717 #endif
1718         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1719         SET_ZN(RN);
1720 #ifdef GPU_DIS_SAT8
1721         if (doGPUDis)
1722                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1723 #endif
1724 }
1725
1726 static void gpu_opcode_sat16(void)
1727 {
1728         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1729         SET_ZN(RN);
1730 }
1731
1732 static void gpu_opcode_sat24(void)
1733 {
1734         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1735         SET_ZN(RN);
1736 }
1737
1738 static void gpu_opcode_store_r14_indexed(void)
1739 {
1740 #ifdef GPU_DIS_STORE14I
1741         if (doGPUDis)
1742                 WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1743 #endif
1744 #ifdef GPU_CORRECT_ALIGNMENT
1745         uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1746         
1747         if (address >= 0xF03000 && address <= 0xF03FFF)
1748                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1749         else
1750                 GPUWriteLong(address, RN, GPU);
1751 #else
1752         GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1753 #endif
1754 }
1755
1756 static void gpu_opcode_store_r15_indexed(void)
1757 {
1758 #ifdef GPU_DIS_STORE15I
1759         if (doGPUDis)
1760                 WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1761 #endif
1762 #ifdef GPU_CORRECT_ALIGNMENT
1763         uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1764
1765         if (address >= 0xF03000 && address <= 0xF03FFF)
1766                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1767         else
1768                 GPUWriteLong(address, RN, GPU);
1769 #else
1770         GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1771 #endif
1772 }
1773
1774 static void gpu_opcode_load_r14_ri(void)
1775 {
1776 #ifdef GPU_DIS_LOAD14R
1777         if (doGPUDis)
1778                 WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1779 #endif
1780 #ifdef GPU_CORRECT_ALIGNMENT
1781         uint32_t address = gpu_reg[14] + RM;
1782
1783         if (address >= 0xF03000 && address <= 0xF03FFF)
1784                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1785         else
1786                 RN = GPUReadLong(address, GPU);
1787 #else
1788         RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1789 #endif
1790 #ifdef GPU_DIS_LOAD14R
1791         if (doGPUDis)
1792                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1793 #endif
1794 }
1795
1796 static void gpu_opcode_load_r15_ri(void)
1797 {
1798 #ifdef GPU_DIS_LOAD15R
1799         if (doGPUDis)
1800                 WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1801 #endif
1802 #ifdef GPU_CORRECT_ALIGNMENT
1803         uint32_t address = gpu_reg[15] + RM;
1804
1805         if (address >= 0xF03000 && address <= 0xF03FFF)
1806                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1807         else
1808                 RN = GPUReadLong(address, GPU);
1809 #else
1810         RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1811 #endif
1812 #ifdef GPU_DIS_LOAD15R
1813         if (doGPUDis)
1814                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1815 #endif
1816 }
1817
1818 static void gpu_opcode_store_r14_ri(void)
1819 {
1820 #ifdef GPU_DIS_STORE14R
1821         if (doGPUDis)
1822                 WriteLog("%06X: STORE  R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1823 #endif
1824 #ifdef GPU_CORRECT_ALIGNMENT
1825         uint32_t address = gpu_reg[14] + RM;
1826
1827         if (address >= 0xF03000 && address <= 0xF03FFF)
1828                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1829         else
1830                 GPUWriteLong(address, RN, GPU);
1831 #else
1832         GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1833 #endif
1834 }
1835
1836 static void gpu_opcode_store_r15_ri(void)
1837 {
1838 #ifdef GPU_DIS_STORE15R
1839         if (doGPUDis)
1840                 WriteLog("%06X: STORE  R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1841 #endif
1842 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1843         uint32_t address = gpu_reg[15] + RM;
1844
1845         if (address >= 0xF03000 && address <= 0xF03FFF)
1846                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1847         else
1848                 GPUWriteLong(address, RN, GPU);
1849 #else
1850         GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1851 #endif
1852 }
1853
1854 static void gpu_opcode_nop(void)
1855 {
1856 #ifdef GPU_DIS_NOP
1857         if (doGPUDis)
1858                 WriteLog("%06X: NOP    [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1859 #endif
1860 }
1861
1862 static void gpu_opcode_pack(void)
1863 {
1864 #ifdef GPU_DIS_PACK
1865         if (doGPUDis)
1866                 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK  " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1867 #endif
1868         uint32_t val = RN;
1869
1870 //BUG!  if (RM == 0)                            // Pack
1871         if (IMM_1 == 0)                         // Pack
1872                 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1873         else                                            // Unpack
1874                 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1875 #ifdef GPU_DIS_PACK
1876         if (doGPUDis)
1877                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1878 #endif
1879 }
1880
1881 static void gpu_opcode_storeb(void)
1882 {
1883 #ifdef GPU_DIS_STOREB
1884         if (doGPUDis)
1885                 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1886 #endif
1887 //Is this right???
1888 // Would appear to be so...!
1889         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1890                 GPUWriteLong(RM, RN & 0xFF, GPU);
1891         else
1892                 JaguarWriteByte(RM, RN, GPU);
1893 }
1894
1895 static void gpu_opcode_storew(void)
1896 {
1897 #ifdef GPU_DIS_STOREW
1898         if (doGPUDis)
1899                 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1900 #endif
1901 #ifdef GPU_CORRECT_ALIGNMENT
1902         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1903                 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1904         else
1905                 JaguarWriteWord(RM, RN, GPU);
1906 #else
1907         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1908                 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1909         else
1910                 JaguarWriteWord(RM, RN, GPU);
1911 #endif
1912 }
1913
1914 static void gpu_opcode_store(void)
1915 {
1916 #ifdef GPU_DIS_STORE
1917         if (doGPUDis)
1918                 WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1919 #endif
1920 #ifdef GPU_CORRECT_ALIGNMENT
1921         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1922                 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1923         else
1924                 GPUWriteLong(RM, RN, GPU);
1925 #else
1926         GPUWriteLong(RM, RN, GPU);
1927 #endif
1928 }
1929
1930 static void gpu_opcode_storep(void)
1931 {
1932 #ifdef GPU_CORRECT_ALIGNMENT
1933         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1934         {
1935                 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1936                 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1937         }
1938         else
1939         {
1940                 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1941                 GPUWriteLong(RM + 4, RN, GPU);
1942         }
1943 #else
1944         GPUWriteLong(RM + 0, gpu_hidata, GPU);
1945         GPUWriteLong(RM + 4, RN, GPU);
1946 #endif
1947 }
1948
1949 static void gpu_opcode_loadb(void)
1950 {
1951 #ifdef GPU_DIS_LOADB
1952         if (doGPUDis)
1953                 WriteLog("%06X: LOADB  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1954 #endif
1955         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1956                 RN = GPUReadLong(RM, GPU) & 0xFF;
1957         else
1958                 RN = JaguarReadByte(RM, GPU);
1959 #ifdef GPU_DIS_LOADB
1960         if (doGPUDis)
1961                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1962 #endif
1963 }
1964
1965 static void gpu_opcode_loadw(void)
1966 {
1967 #ifdef GPU_DIS_LOADW
1968         if (doGPUDis)
1969                 WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1970 #endif
1971 #ifdef GPU_CORRECT_ALIGNMENT
1972         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1973                 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1974         else
1975                 RN = JaguarReadWord(RM, GPU);
1976 #else
1977         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1978                 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1979         else
1980                 RN = JaguarReadWord(RM, GPU);
1981 #endif
1982 #ifdef GPU_DIS_LOADW
1983         if (doGPUDis)
1984                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1985 #endif
1986 }
1987
1988 // According to the docs, & "Do The Same", this address is long aligned...
1989 // So let's try it:
1990 // And it works!!! Need to fix all instances...
1991 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
1992 // the $F03000-$F03FFF range are aligned...
1993 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
1994 /*
1995 Preliminary testing on real hardware seems to confirm that something strange goes on
1996 with unaligned reads in main memory. When the address is off by 1, the result is the
1997 same as the long address with the top byte replaced by something. So if the read is
1998 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
1999 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
2000 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
2001 It may be that the "unknown" values come from the prefetch queue, but not sure how
2002 to test that. They seem to be stable, though, which would indicate such a mechanism.
2003 Sometimes, however, the off by 2 case returns $12345678!
2004 */
2005 static void gpu_opcode_load(void)
2006 {
2007 #ifdef GPU_DIS_LOAD
2008         if (doGPUDis)
2009                 WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2010 #endif
2011 #ifdef GPU_CORRECT_ALIGNMENT
2012         uint32_t mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2013 //      if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2014                 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2015 //              RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2016 //      else
2017 //              RN = GPUReadLong(RM, GPU);
2018         // Simulate garbage in unaligned reads...
2019 //seems that this behavior is different in GPU mem vs. main mem...
2020 //      if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2021 //              RN |= mask[RM & 0x03];
2022 #else
2023         RN = GPUReadLong(RM, GPU);
2024 #endif
2025 #ifdef GPU_DIS_LOAD
2026         if (doGPUDis)
2027                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2028 #endif
2029 }
2030
2031 static void gpu_opcode_loadp(void)
2032 {
2033 #ifdef GPU_CORRECT_ALIGNMENT
2034         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2035         {
2036                 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2037                 RN                 = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2038         }
2039         else
2040         {
2041                 gpu_hidata = GPUReadLong(RM + 0, GPU);
2042                 RN                 = GPUReadLong(RM + 4, GPU);
2043         }
2044 #else
2045         gpu_hidata = GPUReadLong(RM + 0, GPU);
2046         RN                 = GPUReadLong(RM + 4, GPU);
2047 #endif
2048 }
2049
2050 static void gpu_opcode_load_r14_indexed(void)
2051 {
2052 #ifdef GPU_DIS_LOAD14I
2053         if (doGPUDis)
2054                 WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2055 #endif
2056 #ifdef GPU_CORRECT_ALIGNMENT
2057         uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2058
2059         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2060                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2061         else
2062                 RN = GPUReadLong(address, GPU);
2063 #else
2064         RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2065 #endif
2066 #ifdef GPU_DIS_LOAD14I
2067         if (doGPUDis)
2068                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2069 #endif
2070 }
2071
2072 static void gpu_opcode_load_r15_indexed(void)
2073 {
2074 #ifdef GPU_DIS_LOAD15I
2075         if (doGPUDis)
2076                 WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2077 #endif
2078 #ifdef GPU_CORRECT_ALIGNMENT
2079         uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2080
2081         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2082                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2083         else
2084                 RN = GPUReadLong(address, GPU);
2085 #else
2086         RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2087 #endif
2088 #ifdef GPU_DIS_LOAD15I
2089         if (doGPUDis)
2090                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2091 #endif
2092 }
2093
2094 static void gpu_opcode_movei(void)
2095 {
2096 #ifdef GPU_DIS_MOVEI
2097         if (doGPUDis)
2098                 WriteLog("%06X: MOVEI  #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32_t)GPUReadWord(gpu_pc) | ((uint32_t)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2099 #endif
2100         // This instruction is followed by 32-bit value in LSW / MSW format...
2101         RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16);
2102         gpu_pc += 4;
2103 #ifdef GPU_DIS_MOVEI
2104         if (doGPUDis)
2105                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2106 #endif
2107 }
2108
2109 static void gpu_opcode_moveta(void)
2110 {
2111 #ifdef GPU_DIS_MOVETA
2112         if (doGPUDis)
2113                 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2114 #endif
2115         ALTERNATE_RN = RM;
2116 #ifdef GPU_DIS_MOVETA
2117         if (doGPUDis)
2118                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2119 #endif
2120 }
2121
2122 static void gpu_opcode_movefa(void)
2123 {
2124 #ifdef GPU_DIS_MOVEFA
2125         if (doGPUDis)
2126                 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2127 #endif
2128         RN = ALTERNATE_RM;
2129 #ifdef GPU_DIS_MOVEFA
2130         if (doGPUDis)
2131                 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2132 #endif
2133 }
2134
2135 static void gpu_opcode_move(void)
2136 {
2137 #ifdef GPU_DIS_MOVE
2138         if (doGPUDis)
2139                 WriteLog("%06X: MOVE   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2140 #endif
2141         RN = RM;
2142 #ifdef GPU_DIS_MOVE
2143         if (doGPUDis)
2144                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2145 #endif
2146 }
2147
2148 static void gpu_opcode_moveq(void)
2149 {
2150 #ifdef GPU_DIS_MOVEQ
2151         if (doGPUDis)
2152                 WriteLog("%06X: MOVEQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2153 #endif
2154         RN = IMM_1;
2155 #ifdef GPU_DIS_MOVEQ
2156         if (doGPUDis)
2157                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2158 #endif
2159 }
2160
2161 static void gpu_opcode_resmac(void)
2162 {
2163         RN = gpu_acc;
2164 }
2165
2166 static void gpu_opcode_imult(void)
2167 {
2168 #ifdef GPU_DIS_IMULT
2169         if (doGPUDis)
2170                 WriteLog("%06X: IMULT  R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2171 #endif
2172         RN = (int16_t)RN * (int16_t)RM;
2173         SET_ZN(RN);
2174 #ifdef GPU_DIS_IMULT
2175         if (doGPUDis)
2176                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2177 #endif
2178 }
2179
2180 static void gpu_opcode_mult(void)
2181 {
2182 #ifdef GPU_DIS_MULT
2183         if (doGPUDis)
2184                 WriteLog("%06X: MULT   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2185 #endif
2186         RN = (uint16_t)RM * (uint16_t)RN;
2187         SET_ZN(RN);
2188 #ifdef GPU_DIS_MULT
2189         if (doGPUDis)
2190                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2191 #endif
2192 }
2193
2194 static void gpu_opcode_bclr(void)
2195 {
2196 #ifdef GPU_DIS_BCLR
2197         if (doGPUDis)
2198                 WriteLog("%06X: BCLR   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2199 #endif
2200         uint32_t res = RN & ~(1 << IMM_1);
2201         RN = res;
2202         SET_ZN(res);
2203 #ifdef GPU_DIS_BCLR
2204         if (doGPUDis)
2205                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2206 #endif
2207 }
2208
2209 static void gpu_opcode_btst(void)
2210 {
2211 #ifdef GPU_DIS_BTST
2212         if (doGPUDis)
2213                 WriteLog("%06X: BTST   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2214 #endif
2215         gpu_flag_z = (~RN >> IMM_1) & 1;
2216 #ifdef GPU_DIS_BTST
2217         if (doGPUDis)
2218                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2219 #endif
2220 }
2221
2222 static void gpu_opcode_bset(void)
2223 {
2224 #ifdef GPU_DIS_BSET
2225         if (doGPUDis)
2226                 WriteLog("%06X: BSET   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2227 #endif
2228         uint32_t res = RN | (1 << IMM_1);
2229         RN = res;
2230         SET_ZN(res);
2231 #ifdef GPU_DIS_BSET
2232         if (doGPUDis)
2233                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2234 #endif
2235 }
2236
2237 static void gpu_opcode_imacn(void)
2238 {
2239         uint32_t res = (int16_t)RM * (int16_t)(RN);
2240         gpu_acc += res;
2241 }
2242
2243 static void gpu_opcode_mtoi(void)
2244 {
2245         uint32_t _RM = RM;
2246         uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2247         SET_ZN(res);
2248 }
2249
2250 static void gpu_opcode_normi(void)
2251 {
2252         uint32_t _RM = RM;
2253         uint32_t res = 0;
2254
2255         if (_RM)
2256         {
2257                 while ((_RM & 0xFFC00000) == 0)
2258                 {
2259                         _RM <<= 1;
2260                         res--;
2261                 }
2262                 while ((_RM & 0xFF800000) != 0)
2263                 {
2264                         _RM >>= 1;
2265                         res++;
2266                 }
2267         }
2268         RN = res;
2269         SET_ZN(res);
2270 }
2271
2272 static void gpu_opcode_mmult(void)
2273 {
2274         int count       = gpu_matrix_control & 0x0F;    // Matrix width
2275         uint32_t addr = gpu_pointer_to_matrix;          // In the GPU's RAM
2276         int64_t accum = 0;
2277         uint32_t res;
2278
2279         if (gpu_matrix_control & 0x10)                          // Column stepping
2280         {
2281                 for(int i=0; i<count; i++)
2282                 {
2283                         int16_t a;
2284                         if (i & 0x01)
2285                                 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2286                         else
2287                                 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2288
2289                         int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2290                         accum += a * b;
2291                         addr += 4 * count;
2292                 }
2293         }
2294         else                                                                            // Row stepping
2295         {
2296                 for(int i=0; i<count; i++)
2297                 {
2298                         int16_t a;
2299                         if (i & 0x01)
2300                                 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2301                         else
2302                                 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2303
2304                         int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2305                         accum += a * b;
2306                         addr += 4;
2307                 }
2308         }
2309         RN = res = (int32_t)accum;
2310         // carry flag to do (out of the last add)
2311         SET_ZN(res);
2312 }
2313
2314 static void gpu_opcode_abs(void)
2315 {
2316 #ifdef GPU_DIS_ABS
2317         if (doGPUDis)
2318                 WriteLog("%06X: ABS    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2319 #endif
2320         gpu_flag_c = RN >> 31;
2321         if (RN == 0x80000000)
2322         //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2323                 gpu_flag_n = 1, gpu_flag_z = 0;
2324         else
2325         {
2326                 if (gpu_flag_c)
2327                         RN = -RN;
2328                 gpu_flag_n = 0; SET_FLAG_Z(RN);
2329         }
2330 #ifdef GPU_DIS_ABS
2331         if (doGPUDis)
2332                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2333 #endif
2334 }
2335
2336 static void gpu_opcode_div(void)        // RN / RM
2337 {
2338 #ifdef GPU_DIS_DIV
2339         if (doGPUDis)
2340                 WriteLog("%06X: DIV    R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2341 #endif
2342 // NOTE: remainder is NOT calculated correctly here!
2343 //       The original tried to get it right by checking to see if the
2344 //       remainder was negative, but that's too late...
2345 // The code there should do it now, but I'm not 100% sure...
2346
2347         if (RM)
2348         {
2349                 if (gpu_div_control & 0x01)             // 16.16 division
2350                 {
2351                         RN = ((uint64_t)RN << 16) / RM;
2352                         gpu_remain = ((uint64_t)RN << 16) % RM;
2353                 }
2354                 else
2355                 {
2356                         RN = RN / RM;
2357                         gpu_remain = RN % RM;
2358                 }
2359
2360                 if ((gpu_remain - RM) & 0x80000000)     // If the result would have been negative...
2361                         gpu_remain -= RM;                       // Then make it negative!
2362         }
2363         else
2364                 RN = 0xFFFFFFFF;
2365
2366 /*      uint32_t _RM=RM;
2367         uint32_t _RN=RN;
2368
2369         if (_RM)
2370         {
2371                 if (gpu_div_control & 1)
2372                 {
2373                         gpu_remain = (((uint64_t)_RN) << 16) % _RM;
2374                         if (gpu_remain&0x80000000)
2375                                 gpu_remain-=_RM;
2376                         RN = (((uint64_t)_RN) << 16) / _RM;
2377                 }
2378                 else
2379                 {
2380                         gpu_remain = _RN % _RM;
2381                         if (gpu_remain&0x80000000)
2382                                 gpu_remain-=_RM;
2383                         RN/=_RM;
2384                 }
2385         }
2386         else
2387                 RN=0xffffffff;*/
2388 #ifdef GPU_DIS_DIV
2389         if (doGPUDis)
2390                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2391 #endif
2392 }
2393
2394 static void gpu_opcode_imultn(void)
2395 {
2396         uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
2397         gpu_acc = (int32_t)res;
2398         SET_FLAG_Z(res);
2399         SET_FLAG_N(res);
2400 }
2401
2402 static void gpu_opcode_neg(void)
2403 {
2404 #ifdef GPU_DIS_NEG
2405         if (doGPUDis)
2406                 WriteLog("%06X: NEG    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2407 #endif
2408         uint32_t res = -RN;
2409         SET_ZNC_SUB(0, RN, res);
2410         RN = res;
2411 #ifdef GPU_DIS_NEG
2412         if (doGPUDis)
2413                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2414 #endif
2415 }
2416
2417 static void gpu_opcode_shlq(void)
2418 {
2419 #ifdef GPU_DIS_SHLQ
2420         if (doGPUDis)
2421                 WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2422 #endif
2423 // Was a bug here...
2424 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2425         int32_t r1 = 32 - IMM_1;
2426         uint32_t res = RN << r1;
2427         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2428         RN = res;
2429 #ifdef GPU_DIS_SHLQ
2430         if (doGPUDis)
2431                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2432 #endif
2433 }
2434
2435 static void gpu_opcode_shrq(void)
2436 {
2437 #ifdef GPU_DIS_SHRQ
2438         if (doGPUDis)
2439                 WriteLog("%06X: SHRQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2440 #endif
2441         int32_t r1 = gpu_convert_zero[IMM_1];
2442         uint32_t res = RN >> r1;
2443         SET_ZN(res); gpu_flag_c = RN & 1;
2444         RN = res;
2445 #ifdef GPU_DIS_SHRQ
2446         if (doGPUDis)
2447                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2448 #endif
2449 }
2450
2451 static void gpu_opcode_ror(void)
2452 {
2453 #ifdef GPU_DIS_ROR
2454         if (doGPUDis)
2455                 WriteLog("%06X: ROR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2456 #endif
2457         uint32_t r1 = RM & 0x1F;
2458         uint32_t res = (RN >> r1) | (RN << (32 - r1));
2459         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2460         RN = res;
2461 #ifdef GPU_DIS_ROR
2462         if (doGPUDis)
2463                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2464 #endif
2465 }
2466
2467 static void gpu_opcode_rorq(void)
2468 {
2469 #ifdef GPU_DIS_RORQ
2470         if (doGPUDis)
2471                 WriteLog("%06X: RORQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2472 #endif
2473         uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F];
2474         uint32_t r2 = RN;
2475         uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
2476         RN = res;
2477         SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2478 #ifdef GPU_DIS_RORQ
2479         if (doGPUDis)
2480                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2481 #endif
2482 }
2483
2484 static void gpu_opcode_sha(void)
2485 {
2486 /*      int dreg = jaguar.op & 31;
2487         int32_t r1 = (int32_t)jaguar.r[(jaguar.op >> 5) & 31];
2488         uint32_t r2 = jaguar.r[dreg];
2489         uint32_t res;
2490
2491         CLR_ZNC;
2492         if (r1 < 0)
2493         {
2494                 res = (r1 <= -32) ? 0 : (r2 << -r1);
2495                 jaguar.FLAGS |= (r2 >> 30) & 2;
2496         }
2497         else
2498         {
2499                 res = (r1 >= 32) ? ((int32_t)r2 >> 31) : ((int32_t)r2 >> r1);
2500                 jaguar.FLAGS |= (r2 << 1) & 2;
2501         }
2502         jaguar.r[dreg] = res;
2503         SET_ZN(res);*/
2504
2505 #ifdef GPU_DIS_SHA
2506         if (doGPUDis)
2507                 WriteLog("%06X: SHA    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2508 #endif
2509         uint32_t res;
2510
2511         if ((int32_t)RM < 0)
2512         {
2513                 res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM);
2514                 gpu_flag_c = RN >> 31;
2515         }
2516         else
2517         {
2518                 res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM);
2519                 gpu_flag_c = RN & 0x01;
2520         }
2521         RN = res;
2522         SET_ZN(res);
2523 #ifdef GPU_DIS_SHA
2524         if (doGPUDis)
2525                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2526 #endif
2527
2528 /*      int32_t sRM=(int32_t)RM;
2529         uint32_t _RN=RN;
2530
2531         if (sRM<0)
2532         {
2533                 uint32_t shift=-sRM;
2534                 if (shift>=32) shift=32;
2535                 gpu_flag_c=(_RN&0x80000000)>>31;
2536                 while (shift)
2537                 {
2538                         _RN<<=1;
2539                         shift--;
2540                 }
2541         }
2542         else
2543         {
2544                 uint32_t shift=sRM;
2545                 if (shift>=32) shift=32;
2546                 gpu_flag_c=_RN&0x1;
2547                 while (shift)
2548                 {
2549                         _RN=((int32_t)_RN)>>1;
2550                         shift--;
2551                 }
2552         }
2553         RN=_RN;
2554         SET_FLAG_Z(_RN);
2555         SET_FLAG_N(_RN);*/
2556 }
2557
2558 static void gpu_opcode_sharq(void)
2559 {
2560 #ifdef GPU_DIS_SHARQ
2561         if (doGPUDis)
2562                 WriteLog("%06X: SHARQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2563 #endif
2564         uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1];
2565         SET_ZN(res); gpu_flag_c = RN & 0x01;
2566         RN = res;
2567 #ifdef GPU_DIS_SHARQ
2568         if (doGPUDis)
2569                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2570 #endif
2571 }
2572
2573 static void gpu_opcode_sh(void)
2574 {
2575 #ifdef GPU_DIS_SH
2576         if (doGPUDis)
2577                 WriteLog("%06X: SH     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2578 #endif
2579         if (RM & 0x80000000)            // Shift left
2580         {
2581                 gpu_flag_c = RN >> 31;
2582                 RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM);
2583         }
2584         else                                            // Shift right
2585         {
2586                 gpu_flag_c = RN & 0x01;
2587                 RN = (RM >= 32 ? 0 : RN >> RM);
2588         }
2589         SET_ZN(RN);
2590 #ifdef GPU_DIS_SH
2591         if (doGPUDis)
2592                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2593 #endif
2594 }
2595
2596 //Temporary: Testing only!
2597 //#include "gpu2.cpp"
2598 //#include "gpu3.cpp"
2599
2600 #else
2601
2602 // New thread-safe GPU core
2603
2604 int GPUCore(void * data)
2605 {
2606 }
2607
2608 #endif