]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
c57b948d5088751dd169e4d20f0d5e89a7a11a3c
[virtualjaguar] / src / gpu.cpp
1 #if 1
2
3 //
4 // GPU Core
5 //
6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
10 //
11 // JLH = James Hammons <jlhamm@acm.org>
12 //
13 // Who  When        What
14 // ---  ----------  -------------------------------------------------------------
15 // JLH  01/16/2010  Created this log ;-)
16 // JLH  11/26/2011  Added fixes for LOAD/STORE alignment issues
17
18 //
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 //       the braindead way in which MAME handles memory. :-)
21 //
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
25 //
26
27 #include "gpu.h"
28
29 #include <stdlib.h>
30 #include <string.h>                                                             // For memset
31 #include "dsp.h"
32 #include "jagdasm.h"
33 #include "jaguar.h"
34 #include "log.h"
35 #include "m68000/m68kinterface.h"
36 //#include "memory.h"
37 #include "tom.h"
38
39
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
42 //#define GPU_DEBUG
43
44 // For GPU dissasembly...
45
46 #if 0
47 #define GPU_DIS_ABS
48 #define GPU_DIS_ADD
49 #define GPU_DIS_ADDC
50 #define GPU_DIS_ADDQ
51 #define GPU_DIS_ADDQT
52 #define GPU_DIS_AND
53 #define GPU_DIS_BCLR
54 #define GPU_DIS_BSET
55 #define GPU_DIS_BTST
56 #define GPU_DIS_CMP
57 #define GPU_DIS_CMPQ
58 #define GPU_DIS_DIV
59 #define GPU_DIS_IMULT
60 #define GPU_DIS_JUMP
61 #define GPU_DIS_JR
62 #define GPU_DIS_LOAD
63 #define GPU_DIS_LOADB
64 #define GPU_DIS_LOADW
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
69 #define GPU_DIS_MOVE
70 #define GPU_DIS_MOVEFA
71 #define GPU_DIS_MOVEI
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
74 #define GPU_DIS_MOVEQ
75 #define GPU_DIS_MULT
76 #define GPU_DIS_NEG
77 #define GPU_DIS_NOP
78 #define GPU_DIS_NOT
79 #define GPU_DIS_OR
80 #define GPU_DIS_PACK
81 #define GPU_DIS_ROR
82 #define GPU_DIS_RORQ
83 #define GPU_DIS_SAT8
84 #define GPU_DIS_SH
85 #define GPU_DIS_SHA
86 #define GPU_DIS_SHARQ
87 #define GPU_DIS_SHLQ
88 #define GPU_DIS_SHRQ
89 #define GPU_DIS_STORE
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
96 #define GPU_DIS_SUB
97 #define GPU_DIS_SUBC
98 #define GPU_DIS_SUBQ
99 #define GPU_DIS_SUBQT
100 #define GPU_DIS_XOR
101
102 //bool doGPUDis = false;
103 bool doGPUDis = true;
104 #endif
105
106 /*
107 GPU opcodes use (BIOS flying ATARI logo):
108 +                     add 357416
109 +                    addq 538030
110 +                   addqt 6999
111 +                     sub 116663
112 +                    subq 188059
113 +                   subqt 15086
114 +                     neg 36097
115 +                     and 233993
116 +                      or 109332
117 +                     xor 1384
118 +                    btst 111924
119 +                    bset 25029
120 +                    bclr 10551
121 +                    mult 28147
122 +                   imult 69148
123 +                     div 64102
124 +                     abs 159394
125 +                    shlq 194690
126 +                    shrq 292587
127 +                   sharq 192649
128 +                    rorq 58672
129 +                     cmp 244963
130 +                    cmpq 114834
131 +                    move 833472
132 +                   moveq 56427
133 +                  moveta 220814
134 +                  movefa 170678
135 +                   movei 152025
136 +                   loadw 108220
137 +                    load 430936
138 +                  storew 3036
139 +                   store 372490
140 +                 move_pc 2330
141 +                    jump 349134
142 +                      jr 529171
143                     mmult 64904
144 +                     nop 432179
145 */
146
147 // Various bits
148
149 #define CINT0FLAG                       0x0200
150 #define CINT1FLAG                       0x0400
151 #define CINT2FLAG                       0x0800
152 #define CINT3FLAG                       0x1000
153 #define CINT4FLAG                       0x2000
154 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
155
156 // GPU_FLAGS bits
157
158 #define ZERO_FLAG               0x0001
159 #define CARRY_FLAG              0x0002
160 #define NEGA_FLAG               0x0004
161 #define IMASK                   0x0008
162 #define INT_ENA0                0x0010
163 #define INT_ENA1                0x0020
164 #define INT_ENA2                0x0040
165 #define INT_ENA3                0x0080
166 #define INT_ENA4                0x0100
167 #define INT_CLR0                0x0200
168 #define INT_CLR1                0x0400
169 #define INT_CLR2                0x0800
170 #define INT_CLR3                0x1000
171 #define INT_CLR4                0x2000
172 #define REGPAGE                 0x4000
173 #define DMAEN                   0x8000
174
175 // External global variables
176
177 extern int start_logging;
178 extern int gpu_start_log;
179
180 // Private function prototypes
181
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
186
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
251
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8 gpu_opcode_cycles[64] =
254 {
255         3,  3,  3,  3,  3,  3,  3,  3,
256         3,  3,  3,  3,  3,  3,  3,  3,
257         3,  3,  1,  3,  1, 18,  3,  3,
258         3,  3,  3,  3,  3,  3,  3,  3,
259         3,  3,  2,  2,  2,  2,  3,  4,
260         5,  4,  5,  6,  6,  1,  1,  1,
261         1,  2,  2,  2,  1,  1,  9,  3,
262         3,  1,  6,  6,  2,  2,  3,  3
263 };//*/
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8 gpu_opcode_cycles[64] =
269 {
270         1,  1,  1,  1,  1,  1,  1,  1,
271         1,  1,  1,  1,  1,  1,  1,  1,
272         1,  1,  1,  1,  1,  9,  1,  1,
273         1,  1,  1,  1,  1,  1,  1,  1,
274         1,  1,  1,  1,  1,  1,  1,  2,
275         2,  2,  2,  3,  3,  1,  1,  1,
276         1,  1,  1,  1,  1,  1,  4,  1,
277         1,  1,  3,  3,  1,  1,  1,  1
278 };//*/
279 uint8 gpu_opcode_cycles[64] =
280 {
281         1,  1,  1,  1,  1,  1,  1,  1,
282         1,  1,  1,  1,  1,  1,  1,  1,
283         1,  1,  1,  1,  1,  1,  1,  1,
284         1,  1,  1,  1,  1,  1,  1,  1,
285         1,  1,  1,  1,  1,  1,  1,  1,
286         1,  1,  1,  1,  1,  1,  1,  1,
287         1,  1,  1,  1,  1,  1,  1,  1,
288         1,  1,  1,  1,  1,  1,  1,  1
289 };//*/
290
291 void (*gpu_opcode[64])()=
292 {
293         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
294         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
295         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
296         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
297         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
298         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
299         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
300         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
301         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
302         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
303         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
304         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
305         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
306         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
307         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
308         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
309 };
310
311 static uint8 gpu_ram_8[0x1000];
312 uint32 gpu_pc;
313 static uint32 gpu_acc;
314 static uint32 gpu_remain;
315 static uint32 gpu_hidata;
316 static uint32 gpu_flags;
317 static uint32 gpu_matrix_control;
318 static uint32 gpu_pointer_to_matrix;
319 static uint32 gpu_data_organization;
320 static uint32 gpu_control;
321 static uint32 gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 static uint32 gpu_reg_bank_0[32];
327 static uint32 gpu_reg_bank_1[32];
328 static uint32 * gpu_reg;
329 static uint32 * gpu_alternate_reg;
330
331 static uint32 gpu_instruction;
332 static uint32 gpu_opcode_first_parameter;
333 static uint32 gpu_opcode_second_parameter;
334
335 #define GPU_RUNNING             (gpu_control & 0x01)
336
337 #define RM                              gpu_reg[gpu_opcode_first_parameter]
338 #define RN                              gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM    gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN    gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1                   gpu_opcode_first_parameter
342 #define IMM_2                   gpu_opcode_second_parameter
343
344 #define SET_FLAG_Z(r)   (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r)   (gpu_flag_n = (((uint32)(r) >> 31) & 0x01));
346
347 #define RESET_FLAG_Z()  gpu_flag_z = 0;
348 #define RESET_FLAG_N()  gpu_flag_n = 0;
349 #define RESET_FLAG_C()  gpu_flag_c = 0;
350
351 #define CLR_Z                           (gpu_flag_z = 0)
352 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
355 #define SET_N(r)                        (gpu_flag_n = (((uint32)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(~(a))))
357 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
358 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
361
362 uint32 gpu_convert_zero[32] =
363         { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
364
365 uint8 * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
367
368 uint32 gpu_opcode_use[64];
369
370 const char * gpu_opcode_str[64]=
371 {
372         "add",                          "addc",                         "addq",                         "addqt",
373         "sub",                          "subc",                         "subq",                         "subqt",
374         "neg",                          "and",                          "or",                           "xor",
375         "not",                          "btst",                         "bset",                         "bclr",
376         "mult",                         "imult",                        "imultn",                       "resmac",
377         "imacn",                        "div",                          "abs",                          "sh",
378         "shlq",                         "shrq",                         "sha",                          "sharq",
379         "ror",                          "rorq",                         "cmp",                          "cmpq",
380         "sat8",                         "sat16",                        "move",                         "moveq",
381         "moveta",                       "movefa",                       "movei",                        "loadb",
382         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
383         "load_r15_indexed",     "storeb",                       "storew",                       "store",
384         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
385         "jump",                         "jr",                           "mmult",                        "mtoi",
386         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
387         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
388 };
389
390 static uint32 gpu_in_exec = 0;
391 static uint32 gpu_releaseTimeSlice_flag = 0;
392
393 void GPUReleaseTimeslice(void)
394 {
395         gpu_releaseTimeSlice_flag = 1;
396 }
397
398 uint32 GPUGetPC(void)
399 {
400         return gpu_pc;
401 }
402
403 void build_branch_condition_table(void)
404 {
405         if (!branch_condition_table)
406         {
407                 branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
408
409                 if (branch_condition_table)
410                 {
411                         for(int i=0; i<8; i++)
412                         {
413                                 for(int j=0; j<32; j++)
414                                 {
415                                         int result = 1;
416                                         if (j & 1)
417                                                 if (i & ZERO_FLAG)
418                                                         result = 0;
419                                         if (j & 2)
420                                                 if (!(i & ZERO_FLAG))
421                                                         result = 0;
422                                         if (j & 4)
423                                                 if (i & (CARRY_FLAG << (j >> 4)))
424                                                         result = 0;
425                                         if (j & 8)
426                                                 if (!(i & (CARRY_FLAG << (j >> 4))))
427                                                         result = 0;
428                                         branch_condition_table[i * 32 + j] = result;
429                                 }
430                         }
431                 }
432         }
433 }
434
435 //
436 // GPU byte access (read)
437 //
438 uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
439 {
440         if (offset >= 0xF02000 && offset <= 0xF020FF)
441                 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
442
443         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444                 return gpu_ram_8[offset & 0xFFF];
445         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
446         {
447                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
448
449                 if ((offset & 0x03) == 0)
450                         return data >> 24;
451                 else if ((offset & 0x03) == 1)
452                         return (data >> 16) & 0xFF;
453                 else if ((offset & 0x03) == 2)
454                         return (data >> 8) & 0xFF;
455                 else if ((offset & 0x03) == 3)
456                         return data & 0xFF;
457         }
458
459         return JaguarReadByte(offset, who);
460 }
461
462 //
463 // GPU word access (read)
464 //
465 uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
466 {
467         if (offset >= 0xF02000 && offset <= 0xF020FF)
468                 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
469
470         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
471         {
472                 offset &= 0xFFF;
473                 uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
474                 return data;
475         }
476         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
477         {
478 // This looks and smells wrong...
479 // But it *might* be OK...
480                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
481                         return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
482
483                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
484
485                 if (offset & 0x02)                      // Cases 0 & 2...
486                         return data & 0xFFFF;
487                 else
488                         return data >> 16;
489         }
490
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
494
495         return JaguarReadWord(offset, who);
496 }
497
498 //
499 // GPU dword access (read)
500 //
501 uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
502 {
503         if (offset >= 0xF02000 && offset <= 0xF020FF)
504                 WriteLog("GPU: ReadLong--Attempt to read from GPU register file by %s!\n", whoName[who]);
505
506 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
507         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
508         {
509                 offset &= 0xFFF;
510                 return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
511                         | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/
512 //              return GET32(gpu_ram_8, offset);
513         }
514 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
515         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
516         {
517                 offset &= 0x1F;
518                 switch (offset)
519                 {
520                 case 0x00:
521                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
522                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
523                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
524
525                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
526
527                         return gpu_flags & 0xFFFFC1FF;
528                 case 0x04:
529                         return gpu_matrix_control;
530                 case 0x08:
531                         return gpu_pointer_to_matrix;
532                 case 0x0C:
533                         return gpu_data_organization;
534                 case 0x10:
535                         return gpu_pc;
536                 case 0x14:
537                         return gpu_control;
538                 case 0x18:
539                         return gpu_hidata;
540                 case 0x1C:
541                         return gpu_remain;
542                 default:                                                                // unaligned long read
543 #ifdef GPU_DEBUG
544                         WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
545 #endif  // GPU_DEBUG
546                         return 0;
547                 }
548         }
549 //TEMP--Mirror of F03000? No. Writes only...
550 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
551 //      WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
552 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
553         WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
554
555         return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
556 }
557
558 //
559 // GPU byte access (write)
560 //
561 void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
562 {
563         if (offset >= 0xF02000 && offset <= 0xF020FF)
564                 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
565
566         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
567         {
568                 gpu_ram_8[offset & 0xFFF] = data;
569
570 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
571 /*              if (!gpu_in_exec)
572                 {
573                         m68k_end_timeslice();
574                         dsp_releaseTimeslice();
575                 }*/
576                 return;
577         }
578         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
579         {
580                 uint32 reg = offset & 0x1C;
581                 int bytenum = offset & 0x03;
582
583 //This is definitely wrong!
584                 if ((reg >= 0x1C) && (reg <= 0x1F))
585                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
586                 else
587                 {
588                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
589                         bytenum = 3 - bytenum; // convention motorola !!!
590                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
591                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
592                 }
593                 return;
594         }
595 //      WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
596         JaguarWriteByte(offset, data, who);
597 }
598
599 //
600 // GPU word access (write)
601 //
602 void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
603 {
604         if (offset >= 0xF02000 && offset <= 0xF020FF)
605                 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
606
607         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
608         {
609                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
610                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
611 /*              offset &= 0xFFF;
612                 SET16(gpu_ram_8, offset, data);//*/
613
614 /*if (offset >= 0xF03214 && offset < 0xF0321F)
615         WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
616
617
618 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
619 /*              if (!gpu_in_exec)
620                 {
621                         m68k_end_timeslice();
622                         dsp_releaseTimeslice();
623                 }*/
624                 return;
625         }
626         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
627         {
628                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
629                 {
630 #ifdef GPU_DEBUG
631                         WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
632                         GPUDumpRegisters();
633 #endif  // GPU_DEBUG
634                         return;
635                 }
636 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
637 //This just literally sucks.
638                 if ((offset & 0x1C) == 0x1C)
639                 {
640 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
641                         if (offset & 0x02)
642                                 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
643                         else
644                                 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
645                 }
646                 else
647                 {
648 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
649                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
650
651                         if (offset & 0x02)
652                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
653                         else
654                                 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
655
656                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
657                 }
658
659                 return;
660         }
661         else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
662         {
663 #ifdef GPU_DEBUG
664                         WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
665                         GPUDumpRegisters();
666 #endif  // GPU_DEBUG
667                 return;
668         }
669
670         // Have to be careful here--this can cause an infinite loop!
671         JaguarWriteWord(offset, data, who);
672 }
673
674 //
675 // GPU dword access (write)
676 //
677 void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
678 {
679         if (offset >= 0xF02000 && offset <= 0xF020FF)
680                 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
681
682 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
683         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
684         {
685 #ifdef GPU_DEBUG
686                 if (offset & 0x03)
687                 {
688                         WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
689                         GPUDumpRegisters();
690                 }
691 #endif  // GPU_DEBUG
692
693                 offset &= 0xFFF;
694                 SET32(gpu_ram_8, offset, data);
695                 return;
696         }
697 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
698         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
699         {
700                 offset &= 0x1F;
701                 switch (offset)
702                 {
703                 case 0x00:
704                 {
705                         bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
706                         // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
707                         //       IRQ logic can set it. So we mask it out here to prevent problems...
708                         gpu_flags = data & (~IMASK);
709                         gpu_flag_z = gpu_flags & ZERO_FLAG;
710                         gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
711                         gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
712                         GPUUpdateRegisterBanks();
713                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);       // Interrupt latch clear bits
714 //Writing here is only an interrupt enable--this approach is just plain wrong!
715 //                      GPUHandleIRQs();
716 //This, however, is A-OK! ;-)
717                         if (IMASKCleared)                                               // If IMASK was cleared,
718                                 GPUHandleIRQs();                                        // see if any other interrupts need servicing!
719 #ifdef GPU_DEBUG
720                         if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
721                                 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
722                         WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
723 #endif  // GPU_DEBUG
724                         break;
725                 }
726                 case 0x04:
727                         gpu_matrix_control = data;
728                         break;
729                 case 0x08:
730                         // This can only point to long aligned addresses
731                         gpu_pointer_to_matrix = data & 0xFFFFFFFC;
732                         break;
733                 case 0x0C:
734                         gpu_data_organization = data;
735                         break;
736                 case 0x10:
737                         gpu_pc = data;
738 #ifdef GPU_DEBUG
739 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
740 #endif  // GPU_DEBUG
741                         break;
742                 case 0x14:
743                 {
744 //                      uint32 gpu_was_running = GPU_RUNNING;
745                         data &= ~0xF7C0;                // Disable writes to INT_LAT0-4 & TOM version number
746
747                         // check for GPU -> CPU interrupt
748                         if (data & 0x02)
749                         {
750 //WriteLog("GPU->CPU interrupt\n");
751                                 if (TOMIRQEnabled(IRQ_GPU))
752                                 {
753 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
754 //                                      if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
755                                         {
756                                                 TOMSetPendingGPUInt();
757                                                 m68k_set_irq(2);                        // Set 68000 IPL 2
758                                                 GPUReleaseTimeslice();
759                                         }
760                                 }
761                                 data &= ~0x02;
762                         }
763
764                         // check for CPU -> GPU interrupt #0
765                         if (data & 0x04)
766                         {
767 //WriteLog("CPU->GPU interrupt\n");
768                                 GPUSetIRQLine(0, ASSERT_LINE);
769                                 m68k_end_timeslice();
770                                 DSPReleaseTimeslice();
771                                 data &= ~0x04;
772                         }
773
774                         // single stepping
775                         if (data & 0x10)
776                         {
777                                 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
778                         }
779                         gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
780
781                         // if gpu wasn't running but is now running, execute a few cycles
782 #ifndef GPU_SINGLE_STEPPING
783 /*                      if (!gpu_was_running && GPU_RUNNING)
784 #ifdef GPU_DEBUG
785                         {
786                                 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
787 #endif  // GPU_DEBUG
788                                 GPUExec(200);
789 #ifdef GPU_DEBUG
790                         }
791 #endif  // GPU_DEBUG//*/
792 #else
793                         if (gpu_control & 0x18)
794                                 GPUExec(1);
795 #endif  // #ifndef GPU_SINGLE_STEPPING
796 #ifdef GPU_DEBUG
797 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
798 if (GPU_RUNNING)
799         WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
800 else
801         WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
802 WriteLog("\n");
803 #endif  // GPU_DEBUG
804 //if (GPU_RUNNING)
805 //      GPUDumpDisassembly();
806 /*if (GPU_RUNNING)
807 {
808         if (gpu_pc == 0xF035D8)
809         {
810 //              GPUDumpDisassembly();
811 //              log_done();
812 //              exit(1);
813                 gpu_control &= 0xFFFFFFFE;      // Don't run it and let's see what happens!
814 //Hmm. Seems to lock up when going into the demo...
815 //Try to disable the collision altogether!
816         }
817 }//*/
818 extern int effect_start5;
819 static bool finished = false;
820 //if (GPU_RUNNING && effect_start5 && !finished)
821 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
822 {
823         // Let's do a dump of $6528!
824 /*      uint32 numItems = JaguarReadWord(0x6BD6);
825         WriteLog("\nDump of $6528: %u items.\n\n", numItems);
826         for(int i=0; i<numItems*3*4; i+=3*4)
827         {
828                 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
829                         JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
830                 uint16 link = JaguarReadWord(0x6528+i+8+2);
831                 for(int j=0; j<40; j+=4)
832                         WriteLog("%08X ", JaguarReadLong(link + j));
833                 WriteLog("\n");
834         }
835         WriteLog("\n");//*/
836         // Let's try a manual blit here...
837 //This isn't working the way it should! !!! FIX !!!
838 //Err, actually, it is.
839 // NOW, it works right! Problem solved!!! It's a blitter bug!
840 /*      uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4;
841         for(int y=0; y<127; y++)
842         {
843                 for(int x=0; x<2; x++)
844                 {
845                         JaguarWriteLong(dst, JaguarReadLong(src));
846
847                         src += 4;
848                         dst += 4;
849                 }
850                 src += width - (2 * 4);
851         }//*/
852 /*      finished = true;
853         doGPUDis = true;
854         WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
855
856 /*      WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
857         int count = 0;
858         for(int i=0x004D54; i<0x004D54+2048; i++)
859         {
860                 WriteLog("%02X ", JaguarReadByte(i));
861                 count++;
862                 if (count == 32)
863                 {
864                         count = 0;
865                         WriteLog("\n");
866                 }
867         }
868         WriteLog("\n\nData @ F03000:\n\n");
869         count = 0;
870         for(int i=0xF03000; i<0xF03200; i++)
871         {
872                 WriteLog("%02X ", JaguarReadByte(i));
873                 count++;
874                 if (count == 32)
875                 {
876                         count = 0;
877                         WriteLog("\n");
878                 }
879         }
880         WriteLog("\n\n");
881         log_done();
882         exit(0);//*/
883 }
884 //if (!GPU_RUNNING)
885 //      doGPUDis = false;
886 /*if (!GPU_RUNNING && finished)
887 {
888         WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
889         GPUDumpRegisters();
890         log_done();
891         exit(0);
892 }//*/
893                         // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
894                         // allow the GPU a chance to run...
895                         // Yes! This partially fixed Trevor McFur...
896                         if (GPU_RUNNING)
897                                 m68k_end_timeslice();
898                         break;
899                 }
900                 case 0x18:
901                         gpu_hidata = data;
902                         break;
903                 case 0x1C:
904                         gpu_div_control = data;
905                         break;
906 //              default:   // unaligned long write
907                         //exit(0);
908                         //__asm int 3
909                 }
910                 return;
911         }
912
913 //      JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
914 //      JaguarWriteWord(offset+2, data & 0xFFFF, who);
915 // We're a 32-bit processor, we can do a long write...!
916         JaguarWriteLong(offset, data, who);
917 }
918
919 //
920 // Change register banks if necessary
921 //
922 void GPUUpdateRegisterBanks(void)
923 {
924         int bank = (gpu_flags & REGPAGE);               // REGPAGE bit
925
926         if (gpu_flags & IMASK)                                  // IMASK bit
927                 bank = 0;                                                       // IMASK forces main bank to be bank 0
928
929         if (bank)
930                 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
931         else
932                 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
933 }
934
935 void GPUHandleIRQs(void)
936 {
937         // Bail out if we're already in an interrupt!
938         if (gpu_flags & IMASK)
939                 return;
940
941         // Get the interrupt latch & enable bits
942         uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
943
944         // Bail out if latched interrupts aren't enabled
945         bits &= mask;
946         if (!bits)
947                 return;
948
949         // Determine which interrupt to service
950         uint32 which = 0; //Isn't there a #pragma to disable this warning???
951         if (bits & 0x01)
952                 which = 0;
953         if (bits & 0x02)
954                 which = 1;
955         if (bits & 0x04)
956                 which = 2;
957         if (bits & 0x08)
958                 which = 3;
959         if (bits & 0x10)
960                 which = 4;
961
962         if (start_logging)
963                 WriteLog("GPU: Generating IRQ #%i\n", which);
964
965         // set the interrupt flag
966         gpu_flags |= IMASK;
967         GPUUpdateRegisterBanks();
968
969         // subqt  #4,r31                ; pre-decrement stack pointer
970         // move  pc,r30                 ; address of interrupted code
971         // store  r30,(r31)     ; store return address
972         gpu_reg[31] -= 4;
973         GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
974
975         // movei  #service_address,r30  ; pointer to ISR entry
976         // jump  (r30)                                  ; jump to ISR
977         // nop
978         gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
979 }
980
981 void GPUSetIRQLine(int irqline, int state)
982 {
983         if (start_logging)
984                 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
985
986         uint32 mask = 0x0040 << irqline;
987         gpu_control &= ~mask;                           // Clear the interrupt latch
988
989         if (state)
990         {
991                 gpu_control |= mask;                    // Assert the interrupt latch
992                 GPUHandleIRQs();                                // And handle the interrupt...
993         }
994 }
995
996 //TEMPORARY: Testing only!
997 //#include "gpu2.h"
998 //#include "gpu3.h"
999
1000 void GPUInit(void)
1001 {
1002 //      memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1003 //      memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs");
1004 //      memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs");
1005
1006         build_branch_condition_table();
1007
1008         GPUReset();
1009
1010 //TEMPORARY: Testing only!
1011 //      gpu2_init();
1012 //      gpu3_init();
1013 }
1014
1015 void GPUReset(void)
1016 {
1017         // GPU registers (directly visible)
1018         gpu_flags                         = 0x00000000;
1019         gpu_matrix_control    = 0x00000000;
1020         gpu_pointer_to_matrix = 0x00000000;
1021         gpu_data_organization = 0xFFFFFFFF;
1022         gpu_pc                            = 0x00F03000;
1023         gpu_control                       = 0x00002800;                 // Correctly sets this as TOM Rev. 2
1024         gpu_hidata                        = 0x00000000;
1025         gpu_remain                        = 0x00000000;                 // These two registers are RO/WO
1026         gpu_div_control           = 0x00000000;
1027
1028         // GPU internal register
1029         gpu_acc                           = 0x00000000;
1030
1031         gpu_reg = gpu_reg_bank_0;
1032         gpu_alternate_reg = gpu_reg_bank_1;
1033
1034         for(int i=0; i<32; i++)
1035                 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1036
1037         CLR_ZNC;
1038         memset(gpu_ram_8, 0xFF, 0x1000);
1039         gpu_in_exec = 0;
1040 //not needed    GPUInterruptPending = false;
1041         GPUResetStats();
1042 }
1043
1044 uint32 GPUReadPC(void)
1045 {
1046         return gpu_pc;
1047 }
1048
1049 void GPUResetStats(void)
1050 {
1051         for(uint32 i=0; i<64; i++)
1052                 gpu_opcode_use[i] = 0;
1053         WriteLog("--> GPU stats were reset!\n");
1054 }
1055
1056 void GPUDumpDisassembly(void)
1057 {
1058         char buffer[512];
1059
1060         WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1061         uint32 j = 0xF03000;
1062         while (j <= 0xF03FFF)
1063         {
1064                 uint32 oldj = j;
1065                 j += dasmjag(JAGUAR_GPU, buffer, j);
1066                 WriteLog("\t%08X: %s\n", oldj, buffer);
1067         }
1068 }
1069
1070 void GPUDumpRegisters(void)
1071 {
1072         WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1073         WriteLog("\nRegisters bank 0\n");
1074         for(int j=0; j<8; j++)
1075         {
1076                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1077                                                   (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1078                                                   (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1079                                                   (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1080                                                   (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1081         }
1082         WriteLog("Registers bank 1\n");
1083         for(int j=0; j<8; j++)
1084         {
1085                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1086                                                   (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1087                                                   (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1088                                                   (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1089                                                   (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1090         }
1091 }
1092
1093 void GPUDumpMemory(void)
1094 {
1095         WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1096         for(int i=0; i<0xFFF; i+=4)
1097                 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1098                         gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1099 }
1100
1101 void GPUDone(void)
1102 {
1103         WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1104
1105         // Get the interrupt latch & enable bits
1106         uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1107         WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1108
1109         GPUDumpRegisters();
1110         GPUDumpDisassembly();
1111
1112         WriteLog("\nGPU opcodes use:\n");
1113         for(int i=0; i<64; i++)
1114         {
1115                 if (gpu_opcode_use[i])
1116                         WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1117         }
1118         WriteLog("\n");
1119
1120 //      memory_free(gpu_ram_8);
1121 //      memory_free(gpu_reg_bank_0);
1122 //      memory_free(gpu_reg_bank_1);
1123 }
1124
1125 //
1126 // Main GPU execution core
1127 //
1128 static int testCount = 1;
1129 static int len = 0;
1130 static bool tripwire = false;
1131 void GPUExec(int32 cycles)
1132 {
1133         if (!GPU_RUNNING)
1134                 return;
1135
1136 #ifdef GPU_SINGLE_STEPPING
1137         if (gpu_control & 0x18)
1138         {
1139                 cycles = 1;
1140                 gpu_control &= ~0x10;
1141         }
1142 #endif
1143         GPUHandleIRQs();
1144         gpu_releaseTimeSlice_flag = 0;
1145         gpu_in_exec++;
1146
1147         while (cycles > 0 && GPU_RUNNING)
1148         {
1149 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1150         && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1151 {
1152         if (gpu_pc == 0xF03000)
1153         {
1154                 extern uint32 starCount;
1155                 starCount = 0;
1156 /*              WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1157                 uint32 base = gpu_reg_bank_0[3];
1158                 for(uint32 i=0; i<0x100; i+=16)
1159                 {
1160                         WriteLog("%02X: ", i);
1161                         for(uint32 j=0; j<16; j++)
1162                         {
1163                                 WriteLog("%02X ", JaguarReadByte(base + i + j));
1164                         }
1165                         WriteLog("\n");
1166                 }*/
1167         }
1168 //      if (gpu_pc == 0xF03)
1169         {
1170         }
1171 }//*/
1172 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1173 {
1174         GPUDumpRegisters();
1175         WriteLog("GPU: Starting disassembly log...\n");
1176         doGPUDis = true;
1177 }//*/
1178 /*if (gpu_pc == 0xF0359A)
1179 {
1180         doGPUDis = true;
1181         GPUDumpRegisters();
1182 }*/
1183 /*              gpu_flag_c = (gpu_flag_c ? 1 : 0);
1184                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1185                 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1186
1187                 uint16 opcode = GPUReadWord(gpu_pc, GPU);
1188                 uint32 index = opcode >> 10;
1189                 gpu_instruction = opcode;                               // Added for GPU #3...
1190                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1191                 gpu_opcode_second_parameter = opcode & 0x1F;
1192 /*if (gpu_pc == 0xF03BE8)
1193 WriteLog("Start of OP frame write...\n");
1194 if (gpu_pc == 0xF03EEE)
1195 WriteLog("--> Writing BRANCH object ---\n");
1196 if (gpu_pc == 0xF03F62)
1197 WriteLog("--> Writing BITMAP object ***\n");//*/
1198 /*if (gpu_pc == 0xF03546)
1199 {
1200         WriteLog("\n--> GPU PC: F03546\n");
1201         GPUDumpRegisters();
1202         GPUDumpDisassembly();
1203 }//*/
1204 /*if (gpu_pc == 0xF033F6)
1205 {
1206         WriteLog("\n--> GPU PC: F033F6\n");
1207         GPUDumpRegisters();
1208         GPUDumpDisassembly();
1209 }//*/
1210 /*if (gpu_pc == 0xF033CC)
1211 {
1212         WriteLog("\n--> GPU PC: F033CC\n");
1213         GPUDumpRegisters();
1214         GPUDumpDisassembly();
1215 }//*/
1216 /*if (gpu_pc == 0xF033D6)
1217 {
1218         WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1219         GPUDumpRegisters();
1220         GPUDumpMemory();
1221 }//*/
1222 /*if (gpu_pc == 0xF033D8)
1223 {
1224         WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1225         GPUDumpRegisters();
1226         GPUDumpMemory();
1227 }//*/
1228 /*if (gpu_pc == 0xF0358E)
1229 {
1230         WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1231         GPUDumpRegisters();
1232         GPUDumpMemory();
1233 }//*/
1234 /*if (gpu_pc == 0xF034CA)
1235 {
1236         WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1237         GPUDumpRegisters();
1238 }//*/
1239 /*if (gpu_pc == 0xF034CA)
1240 {
1241         len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1242         WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n   ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1243         for(int i=0; i<len; i+=4)
1244                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1245         WriteLog("\n   ");
1246         for(int i=0; i<len; i+=4)
1247                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1248         WriteLog("\n\n");
1249 }
1250 if (gpu_pc == 0xF034DE)
1251 {
1252         WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n   ", gpu_reg[14], gpu_reg[15]);
1253         for(int i=0; i<len; i+=4)
1254                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1255         WriteLog("\n   ");
1256         for(int i=0; i<len; i+=4)
1257                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1258         WriteLog("\n   ");
1259         for(int i=0; i<len; i+=4)
1260                 WriteLog(" --------");
1261         WriteLog("\n   ");
1262         for(int i=0; i<len; i+=4)
1263                 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1264         WriteLog("\n\n");
1265 }//*/
1266 /*if (gpu_pc == 0xF035C8)
1267 {
1268         WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1269         GPUDumpRegisters();
1270         GPUDumpDisassembly();
1271 }//*/
1272
1273 if (gpu_start_log)
1274 {
1275 //      gpu_reset_stats();
1276 static char buffer[512];
1277 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1278 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1279 }//*/
1280 //$E400 -> 1110 01 -> $39 -> 57
1281 //GPU #1
1282                 gpu_pc += 2;
1283                 gpu_opcode[index]();
1284 //GPU #2
1285 //              gpu2_opcode[index]();
1286 //              gpu_pc += 2;
1287 //GPU #3                                (Doesn't show ATARI logo! #1 & #2 do...)
1288 //              gpu_pc += 2;
1289 //              gpu3_opcode[index]();
1290
1291 // BIOS hacking
1292 //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken.
1293 /*static bool firstTime = true;
1294 if (gpu_pc == 0xF03548 && firstTime)
1295 {
1296         gpu_flag_z = 1;
1297 //      firstTime = false;
1298
1299 //static char buffer[512];
1300 //int k=0xF03548;
1301 //while (k<0xF0356C)
1302 //{
1303 //int oldk = k;
1304 //k += dasmjag(JAGUAR_GPU, buffer, k);
1305 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1306 //}
1307 //      gpu_start_log = 1;
1308 }//*/
1309 //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1310 /*if (gpu_pc == 0xF0354C)
1311         gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1312
1313                 cycles -= gpu_opcode_cycles[index];
1314                 gpu_opcode_use[index]++;
1315 if (gpu_start_log)
1316         WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1317 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1318 {
1319         WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1320         tripwire = true;
1321 }
1322         }
1323
1324         gpu_in_exec--;
1325 }
1326
1327 //
1328 // GPU opcodes
1329 //
1330
1331 /*
1332 GPU opcodes use (offset punch--vertically below bad guy):
1333                       add 18686
1334                      addq 32621
1335                       sub 7483
1336                      subq 10252
1337                       and 21229
1338                        or 15003
1339                      btst 1822
1340                      bset 2072
1341                      mult 141
1342                       div 2392
1343                      shlq 13449
1344                      shrq 10297
1345                     sharq 11104
1346                       cmp 6775
1347                      cmpq 5944
1348                      move 31259
1349                     moveq 4473
1350                     movei 23277
1351                     loadb 46
1352                     loadw 4201
1353                      load 28580
1354          load_r14_indexed 1183
1355          load_r15_indexed 1125
1356                    storew 178
1357                     store 10144
1358         store_r14_indexed 320
1359         store_r15_indexed 1
1360                   move_pc 1742
1361                      jump 24467
1362                        jr 18090
1363                       nop 41362
1364 */
1365
1366 static void gpu_opcode_jump(void)
1367 {
1368 #ifdef GPU_DIS_JUMP
1369 const char * condition[32] =
1370 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1371         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1372         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1373         "???", "???", "???", "F" };
1374         if (doGPUDis)
1375                 WriteLog("%06X: JUMP   %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1376 #endif
1377         // normalize flags
1378 /*      gpu_flag_c = (gpu_flag_c ? 1 : 0);
1379         gpu_flag_z = (gpu_flag_z ? 1 : 0);
1380         gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1381         // KLUDGE: Used by BRANCH_CONDITION
1382         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1383
1384         if (BRANCH_CONDITION(IMM_2))
1385         {
1386 #ifdef GPU_DIS_JUMP
1387         if (doGPUDis)
1388                 WriteLog("Branched!\n");
1389 #endif
1390 if (gpu_start_log)
1391         WriteLog("    --> JUMP: Branch taken.\n");
1392                 uint32 delayed_pc = RM;
1393                 GPUExec(1);
1394                 gpu_pc = delayed_pc;
1395 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1396                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1397                 gpu_opcode_second_parameter = opcode & 0x1F;
1398
1399                 gpu_pc = delayed_pc;
1400                 gpu_opcode[opcode>>10]();//*/
1401         }
1402 #ifdef GPU_DIS_JUMP
1403         else
1404                 if (doGPUDis)
1405                         WriteLog("Branch NOT taken.\n");
1406 #endif
1407 }
1408
1409 static void gpu_opcode_jr(void)
1410 {
1411 #ifdef GPU_DIS_JR
1412 const char * condition[32] =
1413 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1414         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1415         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1416         "???", "???", "???", "F" };
1417         if (doGPUDis)
1418                 WriteLog("%06X: JR     %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1419 #endif
1420 /*      if (CONDITION(jaguar.op & 31))
1421         {
1422                 int32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1423                 uint32 newpc = jaguar.PC + r1;
1424                 CALL_MAME_DEBUG;
1425                 jaguar.op = ROPCODE(jaguar.PC);
1426                 jaguar.PC = newpc;
1427                 (*jaguar.table[jaguar.op >> 10])();
1428
1429                 jaguar_icount -= 3;     // 3 wait states guaranteed
1430         }*/
1431         // normalize flags
1432 /*      gpu_flag_n = (gpu_flag_n ? 1 : 0);
1433         gpu_flag_c = (gpu_flag_c ? 1 : 0);
1434         gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1435         // KLUDGE: Used by BRANCH_CONDITION
1436         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1437
1438         if (BRANCH_CONDITION(IMM_2))
1439         {
1440 #ifdef GPU_DIS_JR
1441         if (doGPUDis)
1442                 WriteLog("Branched!\n");
1443 #endif
1444 if (gpu_start_log)
1445         WriteLog("    --> JR: Branch taken.\n");
1446                 int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1);             // Sign extend IMM_1
1447                 int32 delayed_pc = gpu_pc + (offset * 2);
1448                 GPUExec(1);
1449                 gpu_pc = delayed_pc;
1450 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1451                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1452                 gpu_opcode_second_parameter = opcode & 0x1F;
1453
1454                 gpu_pc = delayed_pc;
1455                 gpu_opcode[opcode>>10]();//*/
1456         }
1457 #ifdef GPU_DIS_JR
1458         else
1459                 if (doGPUDis)
1460                         WriteLog("Branch NOT taken.\n");
1461 #endif
1462 }
1463
1464 static void gpu_opcode_add(void)
1465 {
1466 #ifdef GPU_DIS_ADD
1467         if (doGPUDis)
1468                 WriteLog("%06X: ADD    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1469 #endif
1470         uint32 res = RN + RM;
1471         CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1472         RN = res;
1473 #ifdef GPU_DIS_ADD
1474         if (doGPUDis)
1475                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1476 #endif
1477 }
1478
1479 static void gpu_opcode_addc(void)
1480 {
1481 #ifdef GPU_DIS_ADDC
1482         if (doGPUDis)
1483                 WriteLog("%06X: ADDC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1484 #endif
1485 /*      int dreg = jaguar.op & 31;
1486         uint32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1487         uint32 r2 = jaguar.r[dreg];
1488         uint32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1489         jaguar.r[dreg] = res;
1490         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1491
1492         uint32 res = RN + RM + gpu_flag_c;
1493         uint32 carry = gpu_flag_c;
1494 //      SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1495         SET_ZNC_ADD(RN + carry, RM, res);
1496 //      SET_ZNC_ADD(RN, RM + carry, res);
1497         RN = res;
1498 #ifdef GPU_DIS_ADDC
1499         if (doGPUDis)
1500                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1501 #endif
1502 }
1503
1504 static void gpu_opcode_addq(void)
1505 {
1506 #ifdef GPU_DIS_ADDQ
1507         if (doGPUDis)
1508                 WriteLog("%06X: ADDQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1509 #endif
1510         uint32 r1 = gpu_convert_zero[IMM_1];
1511         uint32 res = RN + r1;
1512         CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1513         RN = res;
1514 #ifdef GPU_DIS_ADDQ
1515         if (doGPUDis)
1516                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1517 #endif
1518 }
1519
1520 static void gpu_opcode_addqt(void)
1521 {
1522 #ifdef GPU_DIS_ADDQT
1523         if (doGPUDis)
1524                 WriteLog("%06X: ADDQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1525 #endif
1526         RN += gpu_convert_zero[IMM_1];
1527 #ifdef GPU_DIS_ADDQT
1528         if (doGPUDis)
1529                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1530 #endif
1531 }
1532
1533 static void gpu_opcode_sub(void)
1534 {
1535 #ifdef GPU_DIS_SUB
1536         if (doGPUDis)
1537                 WriteLog("%06X: SUB    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1538 #endif
1539         uint32 res = RN - RM;
1540         SET_ZNC_SUB(RN, RM, res);
1541         RN = res;
1542 #ifdef GPU_DIS_SUB
1543         if (doGPUDis)
1544                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1545 #endif
1546 }
1547
1548 static void gpu_opcode_subc(void)
1549 {
1550 #ifdef GPU_DIS_SUBC
1551         if (doGPUDis)
1552                 WriteLog("%06X: SUBC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1553 #endif
1554         uint32 res = RN - RM - gpu_flag_c;
1555         uint32 borrow = gpu_flag_c;
1556 //      SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1557 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1558 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1559 //      SET_ZNC_SUB(RN - borrow, RM, res);
1560         SET_ZNC_SUB(RN, RM + borrow, res);
1561         RN = res;
1562 #ifdef GPU_DIS_SUBC
1563         if (doGPUDis)
1564                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1565 #endif
1566 }
1567 /*
1568 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1569 N = 0, M = 1, 0 - 1 = -1, C = 0!
1570
1571 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
1572 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
1573 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1574 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1575 */
1576 static void gpu_opcode_subq(void)
1577 {
1578 #ifdef GPU_DIS_SUBQ
1579         if (doGPUDis)
1580                 WriteLog("%06X: SUBQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1581 #endif
1582         uint32 r1 = gpu_convert_zero[IMM_1];
1583         uint32 res = RN - r1;
1584         SET_ZNC_SUB(RN, r1, res);
1585         RN = res;
1586 #ifdef GPU_DIS_SUBQ
1587         if (doGPUDis)
1588                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1589 #endif
1590 }
1591
1592 static void gpu_opcode_subqt(void)
1593 {
1594 #ifdef GPU_DIS_SUBQT
1595         if (doGPUDis)
1596                 WriteLog("%06X: SUBQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1597 #endif
1598         RN -= gpu_convert_zero[IMM_1];
1599 #ifdef GPU_DIS_SUBQT
1600         if (doGPUDis)
1601                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1602 #endif
1603 }
1604
1605 static void gpu_opcode_cmp(void)
1606 {
1607 #ifdef GPU_DIS_CMP
1608         if (doGPUDis)
1609                 WriteLog("%06X: CMP    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1610 #endif
1611         uint32 res = RN - RM;
1612         SET_ZNC_SUB(RN, RM, res);
1613 #ifdef GPU_DIS_CMP
1614         if (doGPUDis)
1615                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1616 #endif
1617 }
1618
1619 static void gpu_opcode_cmpq(void)
1620 {
1621         static int32 sqtable[32] =
1622                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1623 #ifdef GPU_DIS_CMPQ
1624         if (doGPUDis)
1625                 WriteLog("%06X: CMPQ   #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1626 #endif
1627         uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1628         uint32 res = RN - r1;
1629         SET_ZNC_SUB(RN, r1, res);
1630 #ifdef GPU_DIS_CMPQ
1631         if (doGPUDis)
1632                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1633 #endif
1634 }
1635
1636 static void gpu_opcode_and(void)
1637 {
1638 #ifdef GPU_DIS_AND
1639         if (doGPUDis)
1640                 WriteLog("%06X: AND    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1641 #endif
1642         RN = RN & RM;
1643         SET_ZN(RN);
1644 #ifdef GPU_DIS_AND
1645         if (doGPUDis)
1646                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1647 #endif
1648 }
1649
1650 static void gpu_opcode_or(void)
1651 {
1652 #ifdef GPU_DIS_OR
1653         if (doGPUDis)
1654                 WriteLog("%06X: OR     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1655 #endif
1656         RN = RN | RM;
1657         SET_ZN(RN);
1658 #ifdef GPU_DIS_OR
1659         if (doGPUDis)
1660                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1661 #endif
1662 }
1663
1664 static void gpu_opcode_xor(void)
1665 {
1666 #ifdef GPU_DIS_XOR
1667         if (doGPUDis)
1668                 WriteLog("%06X: XOR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1669 #endif
1670         RN = RN ^ RM;
1671         SET_ZN(RN);
1672 #ifdef GPU_DIS_XOR
1673         if (doGPUDis)
1674                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1675 #endif
1676 }
1677
1678 static void gpu_opcode_not(void)
1679 {
1680 #ifdef GPU_DIS_NOT
1681         if (doGPUDis)
1682                 WriteLog("%06X: NOT    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1683 #endif
1684         RN = ~RN;
1685         SET_ZN(RN);
1686 #ifdef GPU_DIS_NOT
1687         if (doGPUDis)
1688                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1689 #endif
1690 }
1691
1692 static void gpu_opcode_move_pc(void)
1693 {
1694 #ifdef GPU_DIS_MOVEPC
1695         if (doGPUDis)
1696                 WriteLog("%06X: MOVE   PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1697 #endif
1698         // Should be previous PC--this might not always be previous instruction!
1699         // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1700         RN = gpu_pc - 2;
1701 #ifdef GPU_DIS_MOVEPC
1702         if (doGPUDis)
1703                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1704 #endif
1705 }
1706
1707 static void gpu_opcode_sat8(void)
1708 {
1709 #ifdef GPU_DIS_SAT8
1710         if (doGPUDis)
1711                 WriteLog("%06X: SAT8   R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1712 #endif
1713         RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1714         SET_ZN(RN);
1715 #ifdef GPU_DIS_SAT8
1716         if (doGPUDis)
1717                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1718 #endif
1719 }
1720
1721 static void gpu_opcode_sat16(void)
1722 {
1723         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1724         SET_ZN(RN);
1725 }
1726
1727 static void gpu_opcode_sat24(void)
1728 {
1729         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1730         SET_ZN(RN);
1731 }
1732
1733 static void gpu_opcode_store_r14_indexed(void)
1734 {
1735 #ifdef GPU_DIS_STORE14I
1736         if (doGPUDis)
1737                 WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1738 #endif
1739 #ifdef GPU_CORRECT_ALIGNMENT
1740         uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1741         
1742         if (address >= 0xF03000 && address <= 0xF03FFF)
1743                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1744         else
1745                 GPUWriteLong(address, RN, GPU);
1746 #else
1747         GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1748 #endif
1749 }
1750
1751 static void gpu_opcode_store_r15_indexed(void)
1752 {
1753 #ifdef GPU_DIS_STORE15I
1754         if (doGPUDis)
1755                 WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1756 #endif
1757 #ifdef GPU_CORRECT_ALIGNMENT
1758         uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1759
1760         if (address >= 0xF03000 && address <= 0xF03FFF)
1761                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1762         else
1763                 GPUWriteLong(address, RN, GPU);
1764 #else
1765         GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1766 #endif
1767 }
1768
1769 static void gpu_opcode_load_r14_ri(void)
1770 {
1771 #ifdef GPU_DIS_LOAD14R
1772         if (doGPUDis)
1773                 WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1774 #endif
1775 #ifdef GPU_CORRECT_ALIGNMENT
1776         uint32 address = gpu_reg[14] + RM;
1777
1778         if (address >= 0xF03000 && address <= 0xF03FFF)
1779                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1780         else
1781                 RN = GPUReadLong(address, GPU);
1782 #else
1783         RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1784 #endif
1785 #ifdef GPU_DIS_LOAD14R
1786         if (doGPUDis)
1787                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1788 #endif
1789 }
1790
1791 static void gpu_opcode_load_r15_ri(void)
1792 {
1793 #ifdef GPU_DIS_LOAD15R
1794         if (doGPUDis)
1795                 WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1796 #endif
1797 #ifdef GPU_CORRECT_ALIGNMENT
1798         uint32 address = gpu_reg[15] + RM;
1799
1800         if (address >= 0xF03000 && address <= 0xF03FFF)
1801                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1802         else
1803                 RN = GPUReadLong(address, GPU);
1804 #else
1805         RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1806 #endif
1807 #ifdef GPU_DIS_LOAD15R
1808         if (doGPUDis)
1809                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1810 #endif
1811 }
1812
1813 static void gpu_opcode_store_r14_ri(void)
1814 {
1815 #ifdef GPU_DIS_STORE14R
1816         if (doGPUDis)
1817                 WriteLog("%06X: STORE  R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1818 #endif
1819 #ifdef GPU_CORRECT_ALIGNMENT
1820         uint32 address = gpu_reg[14] + RM;
1821
1822         if (address >= 0xF03000 && address <= 0xF03FFF)
1823                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1824         else
1825                 GPUWriteLong(address, RN, GPU);
1826 #else
1827         GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1828 #endif
1829 }
1830
1831 static void gpu_opcode_store_r15_ri(void)
1832 {
1833 #ifdef GPU_DIS_STORE15R
1834         if (doGPUDis)
1835                 WriteLog("%06X: STORE  R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1836 #endif
1837 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1838         uint32 address = gpu_reg[15] + RM;
1839
1840         if (address >= 0xF03000 && address <= 0xF03FFF)
1841                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1842         else
1843                 GPUWriteLong(address, RN, GPU);
1844 #else
1845         GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1846 #endif
1847 }
1848
1849 static void gpu_opcode_nop(void)
1850 {
1851 #ifdef GPU_DIS_NOP
1852         if (doGPUDis)
1853                 WriteLog("%06X: NOP    [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1854 #endif
1855 }
1856
1857 static void gpu_opcode_pack(void)
1858 {
1859 #ifdef GPU_DIS_PACK
1860         if (doGPUDis)
1861                 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK  " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1862 #endif
1863         uint32 val = RN;
1864
1865 //BUG!  if (RM == 0)                            // Pack
1866         if (IMM_1 == 0)                         // Pack
1867                 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1868         else                                            // Unpack
1869                 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1870 #ifdef GPU_DIS_PACK
1871         if (doGPUDis)
1872                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1873 #endif
1874 }
1875
1876 static void gpu_opcode_storeb(void)
1877 {
1878 #ifdef GPU_DIS_STOREB
1879         if (doGPUDis)
1880                 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1881 #endif
1882 //Is this right???
1883 // Would appear to be so...!
1884         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1885                 GPUWriteLong(RM, RN & 0xFF, GPU);
1886         else
1887                 JaguarWriteByte(RM, RN, GPU);
1888 }
1889
1890 static void gpu_opcode_storew(void)
1891 {
1892 #ifdef GPU_DIS_STOREW
1893         if (doGPUDis)
1894                 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1895 #endif
1896 #ifdef GPU_CORRECT_ALIGNMENT
1897         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1898                 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1899         else
1900                 JaguarWriteWord(RM, RN, GPU);
1901 #else
1902         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1903                 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1904         else
1905                 JaguarWriteWord(RM, RN, GPU);
1906 #endif
1907 }
1908
1909 static void gpu_opcode_store(void)
1910 {
1911 #ifdef GPU_DIS_STORE
1912         if (doGPUDis)
1913                 WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1914 #endif
1915 #ifdef GPU_CORRECT_ALIGNMENT
1916         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1917                 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1918         else
1919                 GPUWriteLong(RM, RN, GPU);
1920 #else
1921         GPUWriteLong(RM, RN, GPU);
1922 #endif
1923 }
1924
1925 static void gpu_opcode_storep(void)
1926 {
1927 #ifdef GPU_CORRECT_ALIGNMENT
1928         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1929         {
1930                 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1931                 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1932         }
1933         else
1934         {
1935                 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1936                 GPUWriteLong(RM + 4, RN, GPU);
1937         }
1938 #else
1939         GPUWriteLong(RM + 0, gpu_hidata, GPU);
1940         GPUWriteLong(RM + 4, RN, GPU);
1941 #endif
1942 }
1943
1944 static void gpu_opcode_loadb(void)
1945 {
1946 #ifdef GPU_DIS_LOADB
1947         if (doGPUDis)
1948                 WriteLog("%06X: LOADB  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1949 #endif
1950         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1951                 RN = GPUReadLong(RM, GPU) & 0xFF;
1952         else
1953                 RN = JaguarReadByte(RM, GPU);
1954 #ifdef GPU_DIS_LOADB
1955         if (doGPUDis)
1956                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1957 #endif
1958 }
1959
1960 static void gpu_opcode_loadw(void)
1961 {
1962 #ifdef GPU_DIS_LOADW
1963         if (doGPUDis)
1964                 WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1965 #endif
1966 #ifdef GPU_CORRECT_ALIGNMENT
1967         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1968                 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1969         else
1970                 RN = JaguarReadWord(RM, GPU);
1971 #else
1972         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1973                 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1974         else
1975                 RN = JaguarReadWord(RM, GPU);
1976 #endif
1977 #ifdef GPU_DIS_LOADW
1978         if (doGPUDis)
1979                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1980 #endif
1981 }
1982
1983 // According to the docs, & "Do The Same", this address is long aligned...
1984 // So let's try it:
1985 // And it works!!! Need to fix all instances...
1986 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
1987 // the $F03000-$F03FFF range are aligned...
1988 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
1989 /*
1990 Preliminary testing on real hardware seems to confirm that something strange goes on
1991 with unaligned reads in main memory. When the address is off by 1, the result is the
1992 same as the long address with the top byte replaced by something. So if the read is
1993 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
1994 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
1995 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
1996 It may be that the "unknown" values come from the prefetch queue, but not sure how
1997 to test that. They seem to be stable, though, which would indicate such a mechanism.
1998 Sometimes, however, the off by 2 case returns $12345678!
1999 */
2000 static void gpu_opcode_load(void)
2001 {
2002 #ifdef GPU_DIS_LOAD
2003         if (doGPUDis)
2004                 WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2005 #endif
2006 #ifdef GPU_CORRECT_ALIGNMENT
2007         uint32 mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2008 //      if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2009                 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2010 //              RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2011 //      else
2012 //              RN = GPUReadLong(RM, GPU);
2013         // Simulate garbage in unaligned reads...
2014 //seems that this behavior is different in GPU mem vs. main mem...
2015 //      if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2016 //              RN |= mask[RM & 0x03];
2017 #else
2018         RN = GPUReadLong(RM, GPU);
2019 #endif
2020 #ifdef GPU_DIS_LOAD
2021         if (doGPUDis)
2022                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2023 #endif
2024 }
2025
2026 static void gpu_opcode_loadp(void)
2027 {
2028 #ifdef GPU_CORRECT_ALIGNMENT
2029         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2030         {
2031                 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2032                 RN                 = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2033         }
2034         else
2035         {
2036                 gpu_hidata = GPUReadLong(RM + 0, GPU);
2037                 RN                 = GPUReadLong(RM + 4, GPU);
2038         }
2039 #else
2040         gpu_hidata = GPUReadLong(RM + 0, GPU);
2041         RN                 = GPUReadLong(RM + 4, GPU);
2042 #endif
2043 }
2044
2045 static void gpu_opcode_load_r14_indexed(void)
2046 {
2047 #ifdef GPU_DIS_LOAD14I
2048         if (doGPUDis)
2049                 WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2050 #endif
2051 #ifdef GPU_CORRECT_ALIGNMENT
2052         uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2053
2054         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2055                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2056         else
2057                 RN = GPUReadLong(address, GPU);
2058 #else
2059         RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2060 #endif
2061 #ifdef GPU_DIS_LOAD14I
2062         if (doGPUDis)
2063                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2064 #endif
2065 }
2066
2067 static void gpu_opcode_load_r15_indexed(void)
2068 {
2069 #ifdef GPU_DIS_LOAD15I
2070         if (doGPUDis)
2071                 WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2072 #endif
2073 #ifdef GPU_CORRECT_ALIGNMENT
2074         uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2075
2076         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2077                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2078         else
2079                 RN = GPUReadLong(address, GPU);
2080 #else
2081         RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2082 #endif
2083 #ifdef GPU_DIS_LOAD15I
2084         if (doGPUDis)
2085                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2086 #endif
2087 }
2088
2089 static void gpu_opcode_movei(void)
2090 {
2091 #ifdef GPU_DIS_MOVEI
2092         if (doGPUDis)
2093                 WriteLog("%06X: MOVEI  #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2094 #endif
2095         // This instruction is followed by 32-bit value in LSW / MSW format...
2096         RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16);
2097         gpu_pc += 4;
2098 #ifdef GPU_DIS_MOVEI
2099         if (doGPUDis)
2100                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2101 #endif
2102 }
2103
2104 static void gpu_opcode_moveta(void)
2105 {
2106 #ifdef GPU_DIS_MOVETA
2107         if (doGPUDis)
2108                 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2109 #endif
2110         ALTERNATE_RN = RM;
2111 #ifdef GPU_DIS_MOVETA
2112         if (doGPUDis)
2113                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2114 #endif
2115 }
2116
2117 static void gpu_opcode_movefa(void)
2118 {
2119 #ifdef GPU_DIS_MOVEFA
2120         if (doGPUDis)
2121                 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2122 #endif
2123         RN = ALTERNATE_RM;
2124 #ifdef GPU_DIS_MOVEFA
2125         if (doGPUDis)
2126                 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2127 #endif
2128 }
2129
2130 static void gpu_opcode_move(void)
2131 {
2132 #ifdef GPU_DIS_MOVE
2133         if (doGPUDis)
2134                 WriteLog("%06X: MOVE   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2135 #endif
2136         RN = RM;
2137 #ifdef GPU_DIS_MOVE
2138         if (doGPUDis)
2139                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2140 #endif
2141 }
2142
2143 static void gpu_opcode_moveq(void)
2144 {
2145 #ifdef GPU_DIS_MOVEQ
2146         if (doGPUDis)
2147                 WriteLog("%06X: MOVEQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2148 #endif
2149         RN = IMM_1;
2150 #ifdef GPU_DIS_MOVEQ
2151         if (doGPUDis)
2152                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2153 #endif
2154 }
2155
2156 static void gpu_opcode_resmac(void)
2157 {
2158         RN = gpu_acc;
2159 }
2160
2161 static void gpu_opcode_imult(void)
2162 {
2163 #ifdef GPU_DIS_IMULT
2164         if (doGPUDis)
2165                 WriteLog("%06X: IMULT  R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2166 #endif
2167         RN = (int16)RN * (int16)RM;
2168         SET_ZN(RN);
2169 #ifdef GPU_DIS_IMULT
2170         if (doGPUDis)
2171                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2172 #endif
2173 }
2174
2175 static void gpu_opcode_mult(void)
2176 {
2177 #ifdef GPU_DIS_MULT
2178         if (doGPUDis)
2179                 WriteLog("%06X: MULT   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2180 #endif
2181         RN = (uint16)RM * (uint16)RN;
2182         SET_ZN(RN);
2183 #ifdef GPU_DIS_MULT
2184         if (doGPUDis)
2185                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2186 #endif
2187 }
2188
2189 static void gpu_opcode_bclr(void)
2190 {
2191 #ifdef GPU_DIS_BCLR
2192         if (doGPUDis)
2193                 WriteLog("%06X: BCLR   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2194 #endif
2195         uint32 res = RN & ~(1 << IMM_1);
2196         RN = res;
2197         SET_ZN(res);
2198 #ifdef GPU_DIS_BCLR
2199         if (doGPUDis)
2200                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2201 #endif
2202 }
2203
2204 static void gpu_opcode_btst(void)
2205 {
2206 #ifdef GPU_DIS_BTST
2207         if (doGPUDis)
2208                 WriteLog("%06X: BTST   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2209 #endif
2210         gpu_flag_z = (~RN >> IMM_1) & 1;
2211 #ifdef GPU_DIS_BTST
2212         if (doGPUDis)
2213                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2214 #endif
2215 }
2216
2217 static void gpu_opcode_bset(void)
2218 {
2219 #ifdef GPU_DIS_BSET
2220         if (doGPUDis)
2221                 WriteLog("%06X: BSET   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2222 #endif
2223         uint32 res = RN | (1 << IMM_1);
2224         RN = res;
2225         SET_ZN(res);
2226 #ifdef GPU_DIS_BSET
2227         if (doGPUDis)
2228                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2229 #endif
2230 }
2231
2232 static void gpu_opcode_imacn(void)
2233 {
2234         uint32 res = (int16)RM * (int16)(RN);
2235         gpu_acc += res;
2236 }
2237
2238 static void gpu_opcode_mtoi(void)
2239 {
2240         uint32 _RM = RM;
2241         uint32 res = RN = (((int32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2242         SET_ZN(res);
2243 }
2244
2245 static void gpu_opcode_normi(void)
2246 {
2247         uint32 _RM = RM;
2248         uint32 res = 0;
2249
2250         if (_RM)
2251         {
2252                 while ((_RM & 0xFFC00000) == 0)
2253                 {
2254                         _RM <<= 1;
2255                         res--;
2256                 }
2257                 while ((_RM & 0xFF800000) != 0)
2258                 {
2259                         _RM >>= 1;
2260                         res++;
2261                 }
2262         }
2263         RN = res;
2264         SET_ZN(res);
2265 }
2266
2267 static void gpu_opcode_mmult(void)
2268 {
2269         int count       = gpu_matrix_control & 0x0F;    // Matrix width
2270         uint32 addr = gpu_pointer_to_matrix;            // In the GPU's RAM
2271         int64 accum = 0;
2272         uint32 res;
2273
2274         if (gpu_matrix_control & 0x10)                          // Column stepping
2275         {
2276                 for(int i=0; i<count; i++)
2277                 {
2278                         int16 a;
2279                         if (i & 0x01)
2280                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2281                         else
2282                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2283
2284                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2285                         accum += a * b;
2286                         addr += 4 * count;
2287                 }
2288         }
2289         else                                                                            // Row stepping
2290         {
2291                 for(int i=0; i<count; i++)
2292                 {
2293                         int16 a;
2294                         if (i & 0x01)
2295                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2296                         else
2297                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2298
2299                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2300                         accum += a * b;
2301                         addr += 4;
2302                 }
2303         }
2304         RN = res = (int32)accum;
2305         // carry flag to do (out of the last add)
2306         SET_ZN(res);
2307 }
2308
2309 static void gpu_opcode_abs(void)
2310 {
2311 #ifdef GPU_DIS_ABS
2312         if (doGPUDis)
2313                 WriteLog("%06X: ABS    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2314 #endif
2315         gpu_flag_c = RN >> 31;
2316         if (RN == 0x80000000)
2317         //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2318                 gpu_flag_n = 1, gpu_flag_z = 0;
2319         else
2320         {
2321                 if (gpu_flag_c)
2322                         RN = -RN;
2323                 gpu_flag_n = 0; SET_FLAG_Z(RN);
2324         }
2325 #ifdef GPU_DIS_ABS
2326         if (doGPUDis)
2327                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2328 #endif
2329 }
2330
2331 static void gpu_opcode_div(void)        // RN / RM
2332 {
2333 #ifdef GPU_DIS_DIV
2334         if (doGPUDis)
2335                 WriteLog("%06X: DIV    R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2336 #endif
2337 // NOTE: remainder is NOT calculated correctly here!
2338 //       The original tried to get it right by checking to see if the
2339 //       remainder was negative, but that's too late...
2340 // The code there should do it now, but I'm not 100% sure...
2341
2342         if (RM)
2343         {
2344                 if (gpu_div_control & 0x01)             // 16.16 division
2345                 {
2346                         RN = ((uint64)RN << 16) / RM;
2347                         gpu_remain = ((uint64)RN << 16) % RM;
2348                 }
2349                 else
2350                 {
2351                         RN = RN / RM;
2352                         gpu_remain = RN % RM;
2353                 }
2354
2355                 if ((gpu_remain - RM) & 0x80000000)     // If the result would have been negative...
2356                         gpu_remain -= RM;                       // Then make it negative!
2357         }
2358         else
2359                 RN = 0xFFFFFFFF;
2360
2361 /*      uint32 _RM=RM;
2362         uint32 _RN=RN;
2363
2364         if (_RM)
2365         {
2366                 if (gpu_div_control & 1)
2367                 {
2368                         gpu_remain = (((uint64)_RN) << 16) % _RM;
2369                         if (gpu_remain&0x80000000)
2370                                 gpu_remain-=_RM;
2371                         RN = (((uint64)_RN) << 16) / _RM;
2372                 }
2373                 else
2374                 {
2375                         gpu_remain = _RN % _RM;
2376                         if (gpu_remain&0x80000000)
2377                                 gpu_remain-=_RM;
2378                         RN/=_RM;
2379                 }
2380         }
2381         else
2382                 RN=0xffffffff;*/
2383 #ifdef GPU_DIS_DIV
2384         if (doGPUDis)
2385                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2386 #endif
2387 }
2388
2389 static void gpu_opcode_imultn(void)
2390 {
2391         uint32 res = (int32)((int16)RN * (int16)RM);
2392         gpu_acc = (int32)res;
2393         SET_FLAG_Z(res);
2394         SET_FLAG_N(res);
2395 }
2396
2397 static void gpu_opcode_neg(void)
2398 {
2399 #ifdef GPU_DIS_NEG
2400         if (doGPUDis)
2401                 WriteLog("%06X: NEG    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2402 #endif
2403         uint32 res = -RN;
2404         SET_ZNC_SUB(0, RN, res);
2405         RN = res;
2406 #ifdef GPU_DIS_NEG
2407         if (doGPUDis)
2408                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2409 #endif
2410 }
2411
2412 static void gpu_opcode_shlq(void)
2413 {
2414 #ifdef GPU_DIS_SHLQ
2415         if (doGPUDis)
2416                 WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2417 #endif
2418 // Was a bug here...
2419 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2420         int32 r1 = 32 - IMM_1;
2421         uint32 res = RN << r1;
2422         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2423         RN = res;
2424 #ifdef GPU_DIS_SHLQ
2425         if (doGPUDis)
2426                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2427 #endif
2428 }
2429
2430 static void gpu_opcode_shrq(void)
2431 {
2432 #ifdef GPU_DIS_SHRQ
2433         if (doGPUDis)
2434                 WriteLog("%06X: SHRQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2435 #endif
2436         int32 r1 = gpu_convert_zero[IMM_1];
2437         uint32 res = RN >> r1;
2438         SET_ZN(res); gpu_flag_c = RN & 1;
2439         RN = res;
2440 #ifdef GPU_DIS_SHRQ
2441         if (doGPUDis)
2442                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2443 #endif
2444 }
2445
2446 static void gpu_opcode_ror(void)
2447 {
2448 #ifdef GPU_DIS_ROR
2449         if (doGPUDis)
2450                 WriteLog("%06X: ROR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2451 #endif
2452         uint32 r1 = RM & 0x1F;
2453         uint32 res = (RN >> r1) | (RN << (32 - r1));
2454         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2455         RN = res;
2456 #ifdef GPU_DIS_ROR
2457         if (doGPUDis)
2458                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2459 #endif
2460 }
2461
2462 static void gpu_opcode_rorq(void)
2463 {
2464 #ifdef GPU_DIS_RORQ
2465         if (doGPUDis)
2466                 WriteLog("%06X: RORQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2467 #endif
2468         uint32 r1 = gpu_convert_zero[IMM_1 & 0x1F];
2469         uint32 r2 = RN;
2470         uint32 res = (r2 >> r1) | (r2 << (32 - r1));
2471         RN = res;
2472         SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2473 #ifdef GPU_DIS_RORQ
2474         if (doGPUDis)
2475                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2476 #endif
2477 }
2478
2479 static void gpu_opcode_sha(void)
2480 {
2481 /*      int dreg = jaguar.op & 31;
2482         int32 r1 = (int32)jaguar.r[(jaguar.op >> 5) & 31];
2483         uint32 r2 = jaguar.r[dreg];
2484         uint32 res;
2485
2486         CLR_ZNC;
2487         if (r1 < 0)
2488         {
2489                 res = (r1 <= -32) ? 0 : (r2 << -r1);
2490                 jaguar.FLAGS |= (r2 >> 30) & 2;
2491         }
2492         else
2493         {
2494                 res = (r1 >= 32) ? ((int32)r2 >> 31) : ((int32)r2 >> r1);
2495                 jaguar.FLAGS |= (r2 << 1) & 2;
2496         }
2497         jaguar.r[dreg] = res;
2498         SET_ZN(res);*/
2499
2500 #ifdef GPU_DIS_SHA
2501         if (doGPUDis)
2502                 WriteLog("%06X: SHA    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2503 #endif
2504         uint32 res;
2505
2506         if ((int32)RM < 0)
2507         {
2508                 res = ((int32)RM <= -32) ? 0 : (RN << -(int32)RM);
2509                 gpu_flag_c = RN >> 31;
2510         }
2511         else
2512         {
2513                 res = ((int32)RM >= 32) ? ((int32)RN >> 31) : ((int32)RN >> (int32)RM);
2514                 gpu_flag_c = RN & 0x01;
2515         }
2516         RN = res;
2517         SET_ZN(res);
2518 #ifdef GPU_DIS_SHA
2519         if (doGPUDis)
2520                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2521 #endif
2522
2523 /*      int32 sRM=(int32)RM;
2524         uint32 _RN=RN;
2525
2526         if (sRM<0)
2527         {
2528                 uint32 shift=-sRM;
2529                 if (shift>=32) shift=32;
2530                 gpu_flag_c=(_RN&0x80000000)>>31;
2531                 while (shift)
2532                 {
2533                         _RN<<=1;
2534                         shift--;
2535                 }
2536         }
2537         else
2538         {
2539                 uint32 shift=sRM;
2540                 if (shift>=32) shift=32;
2541                 gpu_flag_c=_RN&0x1;
2542                 while (shift)
2543                 {
2544                         _RN=((int32)_RN)>>1;
2545                         shift--;
2546                 }
2547         }
2548         RN=_RN;
2549         SET_FLAG_Z(_RN);
2550         SET_FLAG_N(_RN);*/
2551 }
2552
2553 static void gpu_opcode_sharq(void)
2554 {
2555 #ifdef GPU_DIS_SHARQ
2556         if (doGPUDis)
2557                 WriteLog("%06X: SHARQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2558 #endif
2559         uint32 res = (int32)RN >> gpu_convert_zero[IMM_1];
2560         SET_ZN(res); gpu_flag_c = RN & 0x01;
2561         RN = res;
2562 #ifdef GPU_DIS_SHARQ
2563         if (doGPUDis)
2564                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2565 #endif
2566 }
2567
2568 static void gpu_opcode_sh(void)
2569 {
2570 #ifdef GPU_DIS_SH
2571         if (doGPUDis)
2572                 WriteLog("%06X: SH     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2573 #endif
2574         if (RM & 0x80000000)            // Shift left
2575         {
2576                 gpu_flag_c = RN >> 31;
2577                 RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM);
2578         }
2579         else                                            // Shift right
2580         {
2581                 gpu_flag_c = RN & 0x01;
2582                 RN = (RM >= 32 ? 0 : RN >> RM);
2583         }
2584         SET_ZN(RN);
2585 #ifdef GPU_DIS_SH
2586         if (doGPUDis)
2587                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2588 #endif
2589 }
2590
2591 //Temporary: Testing only!
2592 //#include "gpu2.cpp"
2593 //#include "gpu3.cpp"
2594
2595 #else
2596
2597 // New thread-safe GPU core
2598
2599 int GPUCore(void * data)
2600 {
2601 }
2602
2603 #endif