]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
e1fdd41e8751be298c26804e4a198a37de0c4f7d
[virtualjaguar] / src / gpu.cpp
1 #if 1
2
3 //
4 // GPU Core
5 //
6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
10 //
11 // JLH = James Hammons <jlhamm@acm.org>
12 //
13 // Who  When        What
14 // ---  ----------  -------------------------------------------------------------
15 // JLH  01/16/2010  Created this log ;-)
16 // JLH  11/26/2011  Added fixes for LOAD/STORE alignment issues
17
18 //
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 //       the braindead way in which MAME handles memory. :-)
21 //
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
25 //
26
27 #include "gpu.h"
28
29 #include <stdlib.h>
30 #include <string.h>                                                             // For memset
31 #include "dsp.h"
32 #include "jagdasm.h"
33 #include "jaguar.h"
34 #include "log.h"
35 #include "m68000/m68kinterface.h"
36 //#include "memory.h"
37 #include "tom.h"
38
39
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
42 //#define GPU_DEBUG
43
44 // For GPU dissasembly...
45
46 #if 0
47 #define GPU_DIS_ABS
48 #define GPU_DIS_ADD
49 #define GPU_DIS_ADDC
50 #define GPU_DIS_ADDQ
51 #define GPU_DIS_ADDQT
52 #define GPU_DIS_AND
53 #define GPU_DIS_BCLR
54 #define GPU_DIS_BSET
55 #define GPU_DIS_BTST
56 #define GPU_DIS_CMP
57 #define GPU_DIS_CMPQ
58 #define GPU_DIS_DIV
59 #define GPU_DIS_IMULT
60 #define GPU_DIS_JUMP
61 #define GPU_DIS_JR
62 #define GPU_DIS_LOAD
63 #define GPU_DIS_LOADB
64 #define GPU_DIS_LOADW
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
69 #define GPU_DIS_MOVE
70 #define GPU_DIS_MOVEFA
71 #define GPU_DIS_MOVEI
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
74 #define GPU_DIS_MOVEQ
75 #define GPU_DIS_MULT
76 #define GPU_DIS_NEG
77 #define GPU_DIS_NOP
78 #define GPU_DIS_NOT
79 #define GPU_DIS_OR
80 #define GPU_DIS_PACK
81 #define GPU_DIS_ROR
82 #define GPU_DIS_RORQ
83 #define GPU_DIS_SAT8
84 #define GPU_DIS_SH
85 #define GPU_DIS_SHA
86 #define GPU_DIS_SHARQ
87 #define GPU_DIS_SHLQ
88 #define GPU_DIS_SHRQ
89 #define GPU_DIS_STORE
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
96 #define GPU_DIS_SUB
97 #define GPU_DIS_SUBC
98 #define GPU_DIS_SUBQ
99 #define GPU_DIS_SUBQT
100 #define GPU_DIS_XOR
101
102 //bool doGPUDis = false;
103 bool doGPUDis = true;
104 #endif
105
106 /*
107 GPU opcodes use (BIOS flying ATARI logo):
108 +                     add 357416
109 +                    addq 538030
110 +                   addqt 6999
111 +                     sub 116663
112 +                    subq 188059
113 +                   subqt 15086
114 +                     neg 36097
115 +                     and 233993
116 +                      or 109332
117 +                     xor 1384
118 +                    btst 111924
119 +                    bset 25029
120 +                    bclr 10551
121 +                    mult 28147
122 +                   imult 69148
123 +                     div 64102
124 +                     abs 159394
125 +                    shlq 194690
126 +                    shrq 292587
127 +                   sharq 192649
128 +                    rorq 58672
129 +                     cmp 244963
130 +                    cmpq 114834
131 +                    move 833472
132 +                   moveq 56427
133 +                  moveta 220814
134 +                  movefa 170678
135 +                   movei 152025
136 +                   loadw 108220
137 +                    load 430936
138 +                  storew 3036
139 +                   store 372490
140 +                 move_pc 2330
141 +                    jump 349134
142 +                      jr 529171
143                     mmult 64904
144 +                     nop 432179
145 */
146
147 // Various bits
148
149 #define CINT0FLAG                       0x0200
150 #define CINT1FLAG                       0x0400
151 #define CINT2FLAG                       0x0800
152 #define CINT3FLAG                       0x1000
153 #define CINT4FLAG                       0x2000
154 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
155
156 // GPU_FLAGS bits
157
158 #define ZERO_FLAG               0x0001
159 #define CARRY_FLAG              0x0002
160 #define NEGA_FLAG               0x0004
161 #define IMASK                   0x0008
162 #define INT_ENA0                0x0010
163 #define INT_ENA1                0x0020
164 #define INT_ENA2                0x0040
165 #define INT_ENA3                0x0080
166 #define INT_ENA4                0x0100
167 #define INT_CLR0                0x0200
168 #define INT_CLR1                0x0400
169 #define INT_CLR2                0x0800
170 #define INT_CLR3                0x1000
171 #define INT_CLR4                0x2000
172 #define REGPAGE                 0x4000
173 #define DMAEN                   0x8000
174
175 // External global variables
176
177 extern int start_logging;
178 extern int gpu_start_log;
179
180 // Private function prototypes
181
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
186
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
251
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8 gpu_opcode_cycles[64] =
254 {
255         3,  3,  3,  3,  3,  3,  3,  3,
256         3,  3,  3,  3,  3,  3,  3,  3,
257         3,  3,  1,  3,  1, 18,  3,  3,
258         3,  3,  3,  3,  3,  3,  3,  3,
259         3,  3,  2,  2,  2,  2,  3,  4,
260         5,  4,  5,  6,  6,  1,  1,  1,
261         1,  2,  2,  2,  1,  1,  9,  3,
262         3,  1,  6,  6,  2,  2,  3,  3
263 };//*/
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8 gpu_opcode_cycles[64] =
269 {
270         1,  1,  1,  1,  1,  1,  1,  1,
271         1,  1,  1,  1,  1,  1,  1,  1,
272         1,  1,  1,  1,  1,  9,  1,  1,
273         1,  1,  1,  1,  1,  1,  1,  1,
274         1,  1,  1,  1,  1,  1,  1,  2,
275         2,  2,  2,  3,  3,  1,  1,  1,
276         1,  1,  1,  1,  1,  1,  4,  1,
277         1,  1,  3,  3,  1,  1,  1,  1
278 };//*/
279 uint8 gpu_opcode_cycles[64] =
280 {
281         1,  1,  1,  1,  1,  1,  1,  1,
282         1,  1,  1,  1,  1,  1,  1,  1,
283         1,  1,  1,  1,  1,  1,  1,  1,
284         1,  1,  1,  1,  1,  1,  1,  1,
285         1,  1,  1,  1,  1,  1,  1,  1,
286         1,  1,  1,  1,  1,  1,  1,  1,
287         1,  1,  1,  1,  1,  1,  1,  1,
288         1,  1,  1,  1,  1,  1,  1,  1
289 };//*/
290
291 void (*gpu_opcode[64])()=
292 {
293         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
294         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
295         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
296         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
297         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
298         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
299         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
300         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
301         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
302         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
303         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
304         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
305         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
306         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
307         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
308         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
309 };
310
311 static uint8 gpu_ram_8[0x1000];
312 uint32 gpu_pc;
313 static uint32 gpu_acc;
314 static uint32 gpu_remain;
315 static uint32 gpu_hidata;
316 static uint32 gpu_flags;
317 static uint32 gpu_matrix_control;
318 static uint32 gpu_pointer_to_matrix;
319 static uint32 gpu_data_organization;
320 static uint32 gpu_control;
321 static uint32 gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 static uint32 gpu_reg_bank_0[32];
327 static uint32 gpu_reg_bank_1[32];
328 static uint32 * gpu_reg;
329 static uint32 * gpu_alternate_reg;
330
331 static uint32 gpu_instruction;
332 static uint32 gpu_opcode_first_parameter;
333 static uint32 gpu_opcode_second_parameter;
334
335 #define GPU_RUNNING             (gpu_control & 0x01)
336
337 #define RM                              gpu_reg[gpu_opcode_first_parameter]
338 #define RN                              gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM    gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN    gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1                   gpu_opcode_first_parameter
342 #define IMM_2                   gpu_opcode_second_parameter
343
344 #define SET_FLAG_Z(r)   (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r)   (gpu_flag_n = (((uint32)(r) >> 31) & 0x01));
346
347 #define RESET_FLAG_Z()  gpu_flag_z = 0;
348 #define RESET_FLAG_N()  gpu_flag_n = 0;
349 #define RESET_FLAG_C()  gpu_flag_c = 0;
350
351 #define CLR_Z                           (gpu_flag_z = 0)
352 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
355 #define SET_N(r)                        (gpu_flag_n = (((uint32)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(~(a))))
357 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
358 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
361
362 uint32 gpu_convert_zero[32] =
363         { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
364
365 uint8 * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
367
368 uint32 gpu_opcode_use[64];
369
370 const char * gpu_opcode_str[64]=
371 {
372         "add",                          "addc",                         "addq",                         "addqt",
373         "sub",                          "subc",                         "subq",                         "subqt",
374         "neg",                          "and",                          "or",                           "xor",
375         "not",                          "btst",                         "bset",                         "bclr",
376         "mult",                         "imult",                        "imultn",                       "resmac",
377         "imacn",                        "div",                          "abs",                          "sh",
378         "shlq",                         "shrq",                         "sha",                          "sharq",
379         "ror",                          "rorq",                         "cmp",                          "cmpq",
380         "sat8",                         "sat16",                        "move",                         "moveq",
381         "moveta",                       "movefa",                       "movei",                        "loadb",
382         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
383         "load_r15_indexed",     "storeb",                       "storew",                       "store",
384         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
385         "jump",                         "jr",                           "mmult",                        "mtoi",
386         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
387         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
388 };
389
390 static uint32 gpu_in_exec = 0;
391 static uint32 gpu_releaseTimeSlice_flag = 0;
392
393 void GPUReleaseTimeslice(void)
394 {
395         gpu_releaseTimeSlice_flag = 1;
396 }
397
398 uint32 GPUGetPC(void)
399 {
400         return gpu_pc;
401 }
402
403 void build_branch_condition_table(void)
404 {
405         if (!branch_condition_table)
406         {
407                 branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
408
409                 if (branch_condition_table)
410                 {
411                         for(int i=0; i<8; i++)
412                         {
413                                 for(int j=0; j<32; j++)
414                                 {
415                                         int result = 1;
416                                         if (j & 1)
417                                                 if (i & ZERO_FLAG)
418                                                         result = 0;
419                                         if (j & 2)
420                                                 if (!(i & ZERO_FLAG))
421                                                         result = 0;
422                                         if (j & 4)
423                                                 if (i & (CARRY_FLAG << (j >> 4)))
424                                                         result = 0;
425                                         if (j & 8)
426                                                 if (!(i & (CARRY_FLAG << (j >> 4))))
427                                                         result = 0;
428                                         branch_condition_table[i * 32 + j] = result;
429                                 }
430                         }
431                 }
432         }
433 }
434
435 //
436 // GPU byte access (read)
437 //
438 uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
439 {
440         if (offset >= 0xF02000 && offset <= 0xF020FF)
441                 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
442
443         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444                 return gpu_ram_8[offset & 0xFFF];
445         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
446         {
447                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
448
449                 if ((offset & 0x03) == 0)
450                         return data >> 24;
451                 else if ((offset & 0x03) == 1)
452                         return (data >> 16) & 0xFF;
453                 else if ((offset & 0x03) == 2)
454                         return (data >> 8) & 0xFF;
455                 else if ((offset & 0x03) == 3)
456                         return data & 0xFF;
457         }
458
459         return JaguarReadByte(offset, who);
460 }
461
462 //
463 // GPU word access (read)
464 //
465 uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
466 {
467         if (offset >= 0xF02000 && offset <= 0xF020FF)
468                 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
469
470         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
471         {
472                 offset &= 0xFFF;
473                 uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
474                 return data;
475         }
476         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
477         {
478 // This looks and smells wrong...
479 // But it *might* be OK...
480                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
481                         return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
482
483                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
484
485                 if (offset & 0x02)                      // Cases 0 & 2...
486                         return data & 0xFFFF;
487                 else
488                         return data >> 16;
489         }
490
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
494
495         return JaguarReadWord(offset, who);
496 }
497
498 //
499 // GPU dword access (read)
500 //
501 uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
502 {
503         if (offset >= 0xF02000 && offset <= 0xF020FF)
504                 WriteLog("GPU: ReadLong--Attempt to read from GPU register file by %s!\n", whoName[who]);
505
506 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
507         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
508         {
509                 offset &= 0xFFF;
510                 return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
511                         | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/
512 //              return GET32(gpu_ram_8, offset);
513         }
514 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
515         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
516         {
517                 offset &= 0x1F;
518                 switch (offset)
519                 {
520                 case 0x00:
521                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
522                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
523                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
524
525                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
526
527                         return gpu_flags & 0xFFFFC1FF;
528                 case 0x04:
529                         return gpu_matrix_control;
530                 case 0x08:
531                         return gpu_pointer_to_matrix;
532                 case 0x0C:
533                         return gpu_data_organization;
534                 case 0x10:
535                         return gpu_pc;
536                 case 0x14:
537                         return gpu_control;
538                 case 0x18:
539                         return gpu_hidata;
540                 case 0x1C:
541                         return gpu_remain;
542                 default:                                                                // unaligned long read
543 #ifdef GPU_DEBUG
544                         WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
545 #endif  // GPU_DEBUG
546                         return 0;
547                 }
548         }
549 //TEMP--Mirror of F03000? No. Writes only...
550 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
551 //      WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
552 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
553         WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
554
555         return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
556 }
557
558 //
559 // GPU byte access (write)
560 //
561 void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
562 {
563         if (offset >= 0xF02000 && offset <= 0xF020FF)
564                 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
565
566         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
567         {
568                 gpu_ram_8[offset & 0xFFF] = data;
569
570 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
571 /*              if (!gpu_in_exec)
572                 {
573                         m68k_end_timeslice();
574                         dsp_releaseTimeslice();
575                 }*/
576                 return;
577         }
578         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
579         {
580                 uint32 reg = offset & 0x1C;
581                 int bytenum = offset & 0x03;
582
583 //This is definitely wrong!
584                 if ((reg >= 0x1C) && (reg <= 0x1F))
585                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
586                 else
587                 {
588                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
589                         bytenum = 3 - bytenum; // convention motorola !!!
590                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
591                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
592                 }
593                 return;
594         }
595 //      WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
596         JaguarWriteByte(offset, data, who);
597 }
598
599 //
600 // GPU word access (write)
601 //
602 void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
603 {
604         if (offset >= 0xF02000 && offset <= 0xF020FF)
605                 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
606
607         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
608         {
609                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
610                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
611 /*              offset &= 0xFFF;
612                 SET16(gpu_ram_8, offset, data);//*/
613
614 /*if (offset >= 0xF03214 && offset < 0xF0321F)
615         WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
616
617
618 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
619 /*              if (!gpu_in_exec)
620                 {
621                         m68k_end_timeslice();
622                         dsp_releaseTimeslice();
623                 }*/
624                 return;
625         }
626         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
627         {
628                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
629                 {
630 #ifdef GPU_DEBUG
631                         WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
632                         GPUDumpRegisters();
633 #endif  // GPU_DEBUG
634                         return;
635                 }
636 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
637 //This just literally sucks.
638                 if ((offset & 0x1C) == 0x1C)
639                 {
640 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
641                         if (offset & 0x02)
642                                 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
643                         else
644                                 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
645                 }
646                 else
647                 {
648 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
649                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
650                         if (offset & 0x02)
651                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
652                         else
653                                 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
654                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
655                 }
656                 return;
657         }
658         else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
659         {
660 #ifdef GPU_DEBUG
661                         WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
662                         GPUDumpRegisters();
663 #endif  // GPU_DEBUG
664                 return;
665         }
666
667         // Have to be careful here--this can cause an infinite loop!
668         JaguarWriteWord(offset, data, who);
669 }
670
671 //
672 // GPU dword access (write)
673 //
674 void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
675 {
676         if (offset >= 0xF02000 && offset <= 0xF020FF)
677                 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
678
679 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
680         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
681         {
682 #ifdef GPU_DEBUG
683                 if (offset & 0x03)
684                 {
685                         WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
686                         GPUDumpRegisters();
687                 }
688 #endif  // GPU_DEBUG
689
690                 offset &= 0xFFF;
691                 SET32(gpu_ram_8, offset, data);
692                 return;
693         }
694 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
695         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
696         {
697                 offset &= 0x1F;
698                 switch (offset)
699                 {
700                 case 0x00:
701                 {
702                         bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
703                         gpu_flags = data;
704                         gpu_flag_z = gpu_flags & ZERO_FLAG;
705                         gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
706                         gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
707                         GPUUpdateRegisterBanks();
708                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);       // Interrupt latch clear bits
709 //Writing here is only an interrupt enable--this approach is just plain wrong!
710 //                      GPUHandleIRQs();
711 //This, however, is A-OK! ;-)
712                         if (IMASKCleared)                                               // If IMASK was cleared,
713                                 GPUHandleIRQs();                                        // see if any other interrupts need servicing!
714 #ifdef GPU_DEBUG
715                         if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
716                                 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
717                         WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
718 #endif  // GPU_DEBUG
719                         break;
720                 }
721                 case 0x04:
722                         gpu_matrix_control = data;
723                         break;
724                 case 0x08:
725                         // This can only point to long aligned addresses
726                         gpu_pointer_to_matrix = data & 0xFFFFFFFC;
727                         break;
728                 case 0x0C:
729                         gpu_data_organization = data;
730                         break;
731                 case 0x10:
732                         gpu_pc = data;
733 #ifdef GPU_DEBUG
734 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
735 #endif  // GPU_DEBUG
736                         break;
737                 case 0x14:
738                 {
739 //                      uint32 gpu_was_running = GPU_RUNNING;
740                         data &= ~0xF7C0;                // Disable writes to INT_LAT0-4 & TOM version number
741
742                         // check for GPU -> CPU interrupt
743                         if (data & 0x02)
744                         {
745 //WriteLog("GPU->CPU interrupt\n");
746                                 if (TOMIRQEnabled(IRQ_GPU))
747                                 {
748 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
749 //                                      if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
750                                         {
751                                                 TOMSetPendingGPUInt();
752                                                 m68k_set_irq(2);                        // Set 68000 IPL 2
753                                                 GPUReleaseTimeslice();
754                                         }
755                                 }
756                                 data &= ~0x02;
757                         }
758
759                         // check for CPU -> GPU interrupt #0
760                         if (data & 0x04)
761                         {
762 //WriteLog("CPU->GPU interrupt\n");
763                                 GPUSetIRQLine(0, ASSERT_LINE);
764                                 m68k_end_timeslice();
765                                 DSPReleaseTimeslice();
766                                 data &= ~0x04;
767                         }
768
769                         // single stepping
770                         if (data & 0x10)
771                         {
772                                 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
773                         }
774                         gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
775
776                         // if gpu wasn't running but is now running, execute a few cycles
777 #ifndef GPU_SINGLE_STEPPING
778 /*                      if (!gpu_was_running && GPU_RUNNING)
779 #ifdef GPU_DEBUG
780                         {
781                                 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
782 #endif  // GPU_DEBUG
783                                 GPUExec(200);
784 #ifdef GPU_DEBUG
785                         }
786 #endif  // GPU_DEBUG//*/
787 #else
788                         if (gpu_control & 0x18)
789                                 GPUExec(1);
790 #endif  // #ifndef GPU_SINGLE_STEPPING
791 #ifdef GPU_DEBUG
792 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
793 if (GPU_RUNNING)
794         WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
795 else
796         WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
797 WriteLog("\n");
798 #endif  // GPU_DEBUG
799 //if (GPU_RUNNING)
800 //      GPUDumpDisassembly();
801 /*if (GPU_RUNNING)
802 {
803         if (gpu_pc == 0xF035D8)
804         {
805 //              GPUDumpDisassembly();
806 //              log_done();
807 //              exit(1);
808                 gpu_control &= 0xFFFFFFFE;      // Don't run it and let's see what happens!
809 //Hmm. Seems to lock up when going into the demo...
810 //Try to disable the collision altogether!
811         }
812 }//*/
813 extern int effect_start5;
814 static bool finished = false;
815 //if (GPU_RUNNING && effect_start5 && !finished)
816 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
817 {
818         // Let's do a dump of $6528!
819 /*      uint32 numItems = JaguarReadWord(0x6BD6);
820         WriteLog("\nDump of $6528: %u items.\n\n", numItems);
821         for(int i=0; i<numItems*3*4; i+=3*4)
822         {
823                 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
824                         JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
825                 uint16 link = JaguarReadWord(0x6528+i+8+2);
826                 for(int j=0; j<40; j+=4)
827                         WriteLog("%08X ", JaguarReadLong(link + j));
828                 WriteLog("\n");
829         }
830         WriteLog("\n");//*/
831         // Let's try a manual blit here...
832 //This isn't working the way it should! !!! FIX !!!
833 //Err, actually, it is.
834 // NOW, it works right! Problem solved!!! It's a blitter bug!
835 /*      uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4;
836         for(int y=0; y<127; y++)
837         {
838                 for(int x=0; x<2; x++)
839                 {
840                         JaguarWriteLong(dst, JaguarReadLong(src));
841
842                         src += 4;
843                         dst += 4;
844                 }
845                 src += width - (2 * 4);
846         }//*/
847 /*      finished = true;
848         doGPUDis = true;
849         WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
850
851 /*      WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
852         int count = 0;
853         for(int i=0x004D54; i<0x004D54+2048; i++)
854         {
855                 WriteLog("%02X ", JaguarReadByte(i));
856                 count++;
857                 if (count == 32)
858                 {
859                         count = 0;
860                         WriteLog("\n");
861                 }
862         }
863         WriteLog("\n\nData @ F03000:\n\n");
864         count = 0;
865         for(int i=0xF03000; i<0xF03200; i++)
866         {
867                 WriteLog("%02X ", JaguarReadByte(i));
868                 count++;
869                 if (count == 32)
870                 {
871                         count = 0;
872                         WriteLog("\n");
873                 }
874         }
875         WriteLog("\n\n");
876         log_done();
877         exit(0);//*/
878 }
879 //if (!GPU_RUNNING)
880 //      doGPUDis = false;
881 /*if (!GPU_RUNNING && finished)
882 {
883         WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
884         GPUDumpRegisters();
885         log_done();
886         exit(0);
887 }//*/
888                         // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
889                         // allow the GPU a chance to run...
890                         // Yes! This partially fixed Trevor McFur...
891                         if (GPU_RUNNING)
892                                 m68k_end_timeslice();
893                         break;
894                 }
895                 case 0x18:
896                         gpu_hidata = data;
897                         break;
898                 case 0x1C:
899                         gpu_div_control = data;
900                         break;
901 //              default:   // unaligned long write
902                         //exit(0);
903                         //__asm int 3
904                 }
905                 return;
906         }
907
908 //      JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
909 //      JaguarWriteWord(offset+2, data & 0xFFFF, who);
910 // We're a 32-bit processor, we can do a long write...!
911         JaguarWriteLong(offset, data, who);
912 }
913
914 //
915 // Change register banks if necessary
916 //
917 void GPUUpdateRegisterBanks(void)
918 {
919         int bank = (gpu_flags & REGPAGE);               // REGPAGE bit
920
921         if (gpu_flags & IMASK)                                  // IMASK bit
922                 bank = 0;                                                       // IMASK forces main bank to be bank 0
923
924         if (bank)
925                 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
926         else
927                 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
928 }
929
930 void GPUHandleIRQs(void)
931 {
932         // Bail out if we're already in an interrupt!
933         if (gpu_flags & IMASK)
934                 return;
935
936         // Get the interrupt latch & enable bits
937         uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
938
939         // Bail out if latched interrupts aren't enabled
940         bits &= mask;
941         if (!bits)
942                 return;
943
944         // Determine which interrupt to service
945         uint32 which = 0; //Isn't there a #pragma to disable this warning???
946         if (bits & 0x01)
947                 which = 0;
948         if (bits & 0x02)
949                 which = 1;
950         if (bits & 0x04)
951                 which = 2;
952         if (bits & 0x08)
953                 which = 3;
954         if (bits & 0x10)
955                 which = 4;
956
957         if (start_logging)
958                 WriteLog("GPU: Generating IRQ #%i\n", which);
959
960         // set the interrupt flag
961         gpu_flags |= IMASK;
962         GPUUpdateRegisterBanks();
963
964         // subqt  #4,r31                ; pre-decrement stack pointer
965         // move  pc,r30                 ; address of interrupted code
966         // store  r30,(r31)     ; store return address
967         gpu_reg[31] -= 4;
968         GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
969
970         // movei  #service_address,r30  ; pointer to ISR entry
971         // jump  (r30)                                  ; jump to ISR
972         // nop
973         gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
974 }
975
976 void GPUSetIRQLine(int irqline, int state)
977 {
978         if (start_logging)
979                 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
980
981         uint32 mask = 0x0040 << irqline;
982         gpu_control &= ~mask;                           // Clear the interrupt latch
983
984         if (state)
985         {
986                 gpu_control |= mask;                    // Assert the interrupt latch
987                 GPUHandleIRQs();                                // And handle the interrupt...
988         }
989 }
990
991 //TEMPORARY: Testing only!
992 //#include "gpu2.h"
993 //#include "gpu3.h"
994
995 void GPUInit(void)
996 {
997 //      memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
998 //      memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs");
999 //      memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs");
1000
1001         build_branch_condition_table();
1002
1003         GPUReset();
1004
1005 //TEMPORARY: Testing only!
1006 //      gpu2_init();
1007 //      gpu3_init();
1008 }
1009
1010 void GPUReset(void)
1011 {
1012         // GPU registers (directly visible)
1013         gpu_flags                         = 0x00000000;
1014         gpu_matrix_control    = 0x00000000;
1015         gpu_pointer_to_matrix = 0x00000000;
1016         gpu_data_organization = 0xFFFFFFFF;
1017         gpu_pc                            = 0x00F03000;
1018         gpu_control                       = 0x00002800;                 // Correctly sets this as TOM Rev. 2
1019         gpu_hidata                        = 0x00000000;
1020         gpu_remain                        = 0x00000000;                 // These two registers are RO/WO
1021         gpu_div_control           = 0x00000000;
1022
1023         // GPU internal register
1024         gpu_acc                           = 0x00000000;
1025
1026         gpu_reg = gpu_reg_bank_0;
1027         gpu_alternate_reg = gpu_reg_bank_1;
1028
1029         for(int i=0; i<32; i++)
1030                 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1031
1032         CLR_ZNC;
1033         memset(gpu_ram_8, 0xFF, 0x1000);
1034         gpu_in_exec = 0;
1035 //not needed    GPUInterruptPending = false;
1036         GPUResetStats();
1037 }
1038
1039 uint32 GPUReadPC(void)
1040 {
1041         return gpu_pc;
1042 }
1043
1044 void GPUResetStats(void)
1045 {
1046         for(uint32 i=0; i<64; i++)
1047                 gpu_opcode_use[i] = 0;
1048         WriteLog("--> GPU stats were reset!\n");
1049 }
1050
1051 void GPUDumpDisassembly(void)
1052 {
1053         char buffer[512];
1054
1055         WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1056         uint32 j = 0xF03000;
1057         while (j <= 0xF03FFF)
1058         {
1059                 uint32 oldj = j;
1060                 j += dasmjag(JAGUAR_GPU, buffer, j);
1061                 WriteLog("\t%08X: %s\n", oldj, buffer);
1062         }
1063 }
1064
1065 void GPUDumpRegisters(void)
1066 {
1067         WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1068         WriteLog("\nRegisters bank 0\n");
1069         for(int j=0; j<8; j++)
1070         {
1071                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1072                                                   (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1073                                                   (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1074                                                   (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1075                                                   (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1076         }
1077         WriteLog("Registers bank 1\n");
1078         for(int j=0; j<8; j++)
1079         {
1080                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1081                                                   (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1082                                                   (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1083                                                   (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1084                                                   (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1085         }
1086 }
1087
1088 void GPUDumpMemory(void)
1089 {
1090         WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1091         for(int i=0; i<0xFFF; i+=4)
1092                 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1093                         gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1094 }
1095
1096 void GPUDone(void)
1097 {
1098         WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1099
1100         // Get the interrupt latch & enable bits
1101         uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1102         WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1103
1104         GPUDumpRegisters();
1105         GPUDumpDisassembly();
1106
1107         WriteLog("\nGPU opcodes use:\n");
1108         for(int i=0; i<64; i++)
1109         {
1110                 if (gpu_opcode_use[i])
1111                         WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1112         }
1113         WriteLog("\n");
1114
1115 //      memory_free(gpu_ram_8);
1116 //      memory_free(gpu_reg_bank_0);
1117 //      memory_free(gpu_reg_bank_1);
1118 }
1119
1120 //
1121 // Main GPU execution core
1122 //
1123 static int testCount = 1;
1124 static int len = 0;
1125 static bool tripwire = false;
1126 void GPUExec(int32 cycles)
1127 {
1128         if (!GPU_RUNNING)
1129                 return;
1130
1131 #ifdef GPU_SINGLE_STEPPING
1132         if (gpu_control & 0x18)
1133         {
1134                 cycles = 1;
1135                 gpu_control &= ~0x10;
1136         }
1137 #endif
1138         GPUHandleIRQs();
1139         gpu_releaseTimeSlice_flag = 0;
1140         gpu_in_exec++;
1141
1142         while (cycles > 0 && GPU_RUNNING)
1143         {
1144 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1145         && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1146 {
1147         if (gpu_pc == 0xF03000)
1148         {
1149                 extern uint32 starCount;
1150                 starCount = 0;
1151 /*              WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1152                 uint32 base = gpu_reg_bank_0[3];
1153                 for(uint32 i=0; i<0x100; i+=16)
1154                 {
1155                         WriteLog("%02X: ", i);
1156                         for(uint32 j=0; j<16; j++)
1157                         {
1158                                 WriteLog("%02X ", JaguarReadByte(base + i + j));
1159                         }
1160                         WriteLog("\n");
1161                 }*/
1162         }
1163 //      if (gpu_pc == 0xF03)
1164         {
1165         }
1166 }//*/
1167 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1168 {
1169         GPUDumpRegisters();
1170         WriteLog("GPU: Starting disassembly log...\n");
1171         doGPUDis = true;
1172 }//*/
1173 /*if (gpu_pc == 0xF0359A)
1174 {
1175         doGPUDis = true;
1176         GPUDumpRegisters();
1177 }*/
1178 /*              gpu_flag_c = (gpu_flag_c ? 1 : 0);
1179                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1180                 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1181
1182                 uint16 opcode = GPUReadWord(gpu_pc, GPU);
1183                 uint32 index = opcode >> 10;
1184                 gpu_instruction = opcode;                               // Added for GPU #3...
1185                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1186                 gpu_opcode_second_parameter = opcode & 0x1F;
1187 /*if (gpu_pc == 0xF03BE8)
1188 WriteLog("Start of OP frame write...\n");
1189 if (gpu_pc == 0xF03EEE)
1190 WriteLog("--> Writing BRANCH object ---\n");
1191 if (gpu_pc == 0xF03F62)
1192 WriteLog("--> Writing BITMAP object ***\n");//*/
1193 /*if (gpu_pc == 0xF03546)
1194 {
1195         WriteLog("\n--> GPU PC: F03546\n");
1196         GPUDumpRegisters();
1197         GPUDumpDisassembly();
1198 }//*/
1199 /*if (gpu_pc == 0xF033F6)
1200 {
1201         WriteLog("\n--> GPU PC: F033F6\n");
1202         GPUDumpRegisters();
1203         GPUDumpDisassembly();
1204 }//*/
1205 /*if (gpu_pc == 0xF033CC)
1206 {
1207         WriteLog("\n--> GPU PC: F033CC\n");
1208         GPUDumpRegisters();
1209         GPUDumpDisassembly();
1210 }//*/
1211 /*if (gpu_pc == 0xF033D6)
1212 {
1213         WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1214         GPUDumpRegisters();
1215         GPUDumpMemory();
1216 }//*/
1217 /*if (gpu_pc == 0xF033D8)
1218 {
1219         WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1220         GPUDumpRegisters();
1221         GPUDumpMemory();
1222 }//*/
1223 /*if (gpu_pc == 0xF0358E)
1224 {
1225         WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1226         GPUDumpRegisters();
1227         GPUDumpMemory();
1228 }//*/
1229 /*if (gpu_pc == 0xF034CA)
1230 {
1231         WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1232         GPUDumpRegisters();
1233 }//*/
1234 /*if (gpu_pc == 0xF034CA)
1235 {
1236         len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1237         WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n   ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1238         for(int i=0; i<len; i+=4)
1239                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1240         WriteLog("\n   ");
1241         for(int i=0; i<len; i+=4)
1242                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1243         WriteLog("\n\n");
1244 }
1245 if (gpu_pc == 0xF034DE)
1246 {
1247         WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n   ", gpu_reg[14], gpu_reg[15]);
1248         for(int i=0; i<len; i+=4)
1249                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1250         WriteLog("\n   ");
1251         for(int i=0; i<len; i+=4)
1252                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1253         WriteLog("\n   ");
1254         for(int i=0; i<len; i+=4)
1255                 WriteLog(" --------");
1256         WriteLog("\n   ");
1257         for(int i=0; i<len; i+=4)
1258                 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1259         WriteLog("\n\n");
1260 }//*/
1261 /*if (gpu_pc == 0xF035C8)
1262 {
1263         WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1264         GPUDumpRegisters();
1265         GPUDumpDisassembly();
1266 }//*/
1267
1268 if (gpu_start_log)
1269 {
1270 //      gpu_reset_stats();
1271 static char buffer[512];
1272 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1273 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1274 }//*/
1275 //$E400 -> 1110 01 -> $39 -> 57
1276 //GPU #1
1277                 gpu_pc += 2;
1278                 gpu_opcode[index]();
1279 //GPU #2
1280 //              gpu2_opcode[index]();
1281 //              gpu_pc += 2;
1282 //GPU #3                                (Doesn't show ATARI logo! #1 & #2 do...)
1283 //              gpu_pc += 2;
1284 //              gpu3_opcode[index]();
1285
1286 // BIOS hacking
1287 //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken.
1288 /*static bool firstTime = true;
1289 if (gpu_pc == 0xF03548 && firstTime)
1290 {
1291         gpu_flag_z = 1;
1292 //      firstTime = false;
1293
1294 //static char buffer[512];
1295 //int k=0xF03548;
1296 //while (k<0xF0356C)
1297 //{
1298 //int oldk = k;
1299 //k += dasmjag(JAGUAR_GPU, buffer, k);
1300 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1301 //}
1302 //      gpu_start_log = 1;
1303 }//*/
1304 //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1305 /*if (gpu_pc == 0xF0354C)
1306         gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1307
1308                 cycles -= gpu_opcode_cycles[index];
1309                 gpu_opcode_use[index]++;
1310 if (gpu_start_log)
1311         WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1312 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1313 {
1314         WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1315         tripwire = true;
1316 }
1317         }
1318
1319         gpu_in_exec--;
1320 }
1321
1322 //
1323 // GPU opcodes
1324 //
1325
1326 /*
1327 GPU opcodes use (offset punch--vertically below bad guy):
1328                       add 18686
1329                      addq 32621
1330                       sub 7483
1331                      subq 10252
1332                       and 21229
1333                        or 15003
1334                      btst 1822
1335                      bset 2072
1336                      mult 141
1337                       div 2392
1338                      shlq 13449
1339                      shrq 10297
1340                     sharq 11104
1341                       cmp 6775
1342                      cmpq 5944
1343                      move 31259
1344                     moveq 4473
1345                     movei 23277
1346                     loadb 46
1347                     loadw 4201
1348                      load 28580
1349          load_r14_indexed 1183
1350          load_r15_indexed 1125
1351                    storew 178
1352                     store 10144
1353         store_r14_indexed 320
1354         store_r15_indexed 1
1355                   move_pc 1742
1356                      jump 24467
1357                        jr 18090
1358                       nop 41362
1359 */
1360
1361 static void gpu_opcode_jump(void)
1362 {
1363 #ifdef GPU_DIS_JUMP
1364 const char * condition[32] =
1365 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1366         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1367         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1368         "???", "???", "???", "F" };
1369         if (doGPUDis)
1370                 WriteLog("%06X: JUMP   %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1371 #endif
1372         // normalize flags
1373 /*      gpu_flag_c = (gpu_flag_c ? 1 : 0);
1374         gpu_flag_z = (gpu_flag_z ? 1 : 0);
1375         gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1376         // KLUDGE: Used by BRANCH_CONDITION
1377         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1378
1379         if (BRANCH_CONDITION(IMM_2))
1380         {
1381 #ifdef GPU_DIS_JUMP
1382         if (doGPUDis)
1383                 WriteLog("Branched!\n");
1384 #endif
1385 if (gpu_start_log)
1386         WriteLog("    --> JUMP: Branch taken.\n");
1387                 uint32 delayed_pc = RM;
1388                 GPUExec(1);
1389                 gpu_pc = delayed_pc;
1390 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1391                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1392                 gpu_opcode_second_parameter = opcode & 0x1F;
1393
1394                 gpu_pc = delayed_pc;
1395                 gpu_opcode[opcode>>10]();//*/
1396         }
1397 #ifdef GPU_DIS_JUMP
1398         else
1399                 if (doGPUDis)
1400                         WriteLog("Branch NOT taken.\n");
1401 #endif
1402 }
1403
1404 static void gpu_opcode_jr(void)
1405 {
1406 #ifdef GPU_DIS_JR
1407 const char * condition[32] =
1408 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1409         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1410         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1411         "???", "???", "???", "F" };
1412         if (doGPUDis)
1413                 WriteLog("%06X: JR     %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1414 #endif
1415 /*      if (CONDITION(jaguar.op & 31))
1416         {
1417                 int32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1418                 uint32 newpc = jaguar.PC + r1;
1419                 CALL_MAME_DEBUG;
1420                 jaguar.op = ROPCODE(jaguar.PC);
1421                 jaguar.PC = newpc;
1422                 (*jaguar.table[jaguar.op >> 10])();
1423
1424                 jaguar_icount -= 3;     // 3 wait states guaranteed
1425         }*/
1426         // normalize flags
1427 /*      gpu_flag_n = (gpu_flag_n ? 1 : 0);
1428         gpu_flag_c = (gpu_flag_c ? 1 : 0);
1429         gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1430         // KLUDGE: Used by BRANCH_CONDITION
1431         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1432
1433         if (BRANCH_CONDITION(IMM_2))
1434         {
1435 #ifdef GPU_DIS_JR
1436         if (doGPUDis)
1437                 WriteLog("Branched!\n");
1438 #endif
1439 if (gpu_start_log)
1440         WriteLog("    --> JR: Branch taken.\n");
1441                 int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1);             // Sign extend IMM_1
1442                 int32 delayed_pc = gpu_pc + (offset * 2);
1443                 GPUExec(1);
1444                 gpu_pc = delayed_pc;
1445 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1446                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1447                 gpu_opcode_second_parameter = opcode & 0x1F;
1448
1449                 gpu_pc = delayed_pc;
1450                 gpu_opcode[opcode>>10]();//*/
1451         }
1452 #ifdef GPU_DIS_JR
1453         else
1454                 if (doGPUDis)
1455                         WriteLog("Branch NOT taken.\n");
1456 #endif
1457 }
1458
1459 static void gpu_opcode_add(void)
1460 {
1461 #ifdef GPU_DIS_ADD
1462         if (doGPUDis)
1463                 WriteLog("%06X: ADD    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1464 #endif
1465         uint32 res = RN + RM;
1466         CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1467         RN = res;
1468 #ifdef GPU_DIS_ADD
1469         if (doGPUDis)
1470                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1471 #endif
1472 }
1473
1474 static void gpu_opcode_addc(void)
1475 {
1476 #ifdef GPU_DIS_ADDC
1477         if (doGPUDis)
1478                 WriteLog("%06X: ADDC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1479 #endif
1480 /*      int dreg = jaguar.op & 31;
1481         uint32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1482         uint32 r2 = jaguar.r[dreg];
1483         uint32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1484         jaguar.r[dreg] = res;
1485         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1486
1487         uint32 res = RN + RM + gpu_flag_c;
1488         uint32 carry = gpu_flag_c;
1489 //      SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1490         SET_ZNC_ADD(RN + carry, RM, res);
1491 //      SET_ZNC_ADD(RN, RM + carry, res);
1492         RN = res;
1493 #ifdef GPU_DIS_ADDC
1494         if (doGPUDis)
1495                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1496 #endif
1497 }
1498
1499 static void gpu_opcode_addq(void)
1500 {
1501 #ifdef GPU_DIS_ADDQ
1502         if (doGPUDis)
1503                 WriteLog("%06X: ADDQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1504 #endif
1505         uint32 r1 = gpu_convert_zero[IMM_1];
1506         uint32 res = RN + r1;
1507         CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1508         RN = res;
1509 #ifdef GPU_DIS_ADDQ
1510         if (doGPUDis)
1511                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1512 #endif
1513 }
1514
1515 static void gpu_opcode_addqt(void)
1516 {
1517 #ifdef GPU_DIS_ADDQT
1518         if (doGPUDis)
1519                 WriteLog("%06X: ADDQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1520 #endif
1521         RN += gpu_convert_zero[IMM_1];
1522 #ifdef GPU_DIS_ADDQT
1523         if (doGPUDis)
1524                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1525 #endif
1526 }
1527
1528 static void gpu_opcode_sub(void)
1529 {
1530 #ifdef GPU_DIS_SUB
1531         if (doGPUDis)
1532                 WriteLog("%06X: SUB    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1533 #endif
1534         uint32 res = RN - RM;
1535         SET_ZNC_SUB(RN, RM, res);
1536         RN = res;
1537 #ifdef GPU_DIS_SUB
1538         if (doGPUDis)
1539                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1540 #endif
1541 }
1542
1543 static void gpu_opcode_subc(void)
1544 {
1545 #ifdef GPU_DIS_SUBC
1546         if (doGPUDis)
1547                 WriteLog("%06X: SUBC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1548 #endif
1549         uint32 res = RN - RM - gpu_flag_c;
1550         uint32 borrow = gpu_flag_c;
1551 //      SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1552 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1553 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1554 //      SET_ZNC_SUB(RN - borrow, RM, res);
1555         SET_ZNC_SUB(RN, RM + borrow, res);
1556         RN = res;
1557 #ifdef GPU_DIS_SUBC
1558         if (doGPUDis)
1559                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1560 #endif
1561 }
1562 /*
1563 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1564 N = 0, M = 1, 0 - 1 = -1, C = 0!
1565
1566 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
1567 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
1568 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1569 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1570 */
1571 static void gpu_opcode_subq(void)
1572 {
1573 #ifdef GPU_DIS_SUBQ
1574         if (doGPUDis)
1575                 WriteLog("%06X: SUBQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1576 #endif
1577         uint32 r1 = gpu_convert_zero[IMM_1];
1578         uint32 res = RN - r1;
1579         SET_ZNC_SUB(RN, r1, res);
1580         RN = res;
1581 #ifdef GPU_DIS_SUBQ
1582         if (doGPUDis)
1583                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1584 #endif
1585 }
1586
1587 static void gpu_opcode_subqt(void)
1588 {
1589 #ifdef GPU_DIS_SUBQT
1590         if (doGPUDis)
1591                 WriteLog("%06X: SUBQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1592 #endif
1593         RN -= gpu_convert_zero[IMM_1];
1594 #ifdef GPU_DIS_SUBQT
1595         if (doGPUDis)
1596                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1597 #endif
1598 }
1599
1600 static void gpu_opcode_cmp(void)
1601 {
1602 #ifdef GPU_DIS_CMP
1603         if (doGPUDis)
1604                 WriteLog("%06X: CMP    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1605 #endif
1606         uint32 res = RN - RM;
1607         SET_ZNC_SUB(RN, RM, res);
1608 #ifdef GPU_DIS_CMP
1609         if (doGPUDis)
1610                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1611 #endif
1612 }
1613
1614 static void gpu_opcode_cmpq(void)
1615 {
1616         static int32 sqtable[32] =
1617                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1618 #ifdef GPU_DIS_CMPQ
1619         if (doGPUDis)
1620                 WriteLog("%06X: CMPQ   #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1621 #endif
1622         uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1623         uint32 res = RN - r1;
1624         SET_ZNC_SUB(RN, r1, res);
1625 #ifdef GPU_DIS_CMPQ
1626         if (doGPUDis)
1627                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1628 #endif
1629 }
1630
1631 static void gpu_opcode_and(void)
1632 {
1633 #ifdef GPU_DIS_AND
1634         if (doGPUDis)
1635                 WriteLog("%06X: AND    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1636 #endif
1637         RN = RN & RM;
1638         SET_ZN(RN);
1639 #ifdef GPU_DIS_AND
1640         if (doGPUDis)
1641                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1642 #endif
1643 }
1644
1645 static void gpu_opcode_or(void)
1646 {
1647 #ifdef GPU_DIS_OR
1648         if (doGPUDis)
1649                 WriteLog("%06X: OR     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1650 #endif
1651         RN = RN | RM;
1652         SET_ZN(RN);
1653 #ifdef GPU_DIS_OR
1654         if (doGPUDis)
1655                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1656 #endif
1657 }
1658
1659 static void gpu_opcode_xor(void)
1660 {
1661 #ifdef GPU_DIS_XOR
1662         if (doGPUDis)
1663                 WriteLog("%06X: XOR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1664 #endif
1665         RN = RN ^ RM;
1666         SET_ZN(RN);
1667 #ifdef GPU_DIS_XOR
1668         if (doGPUDis)
1669                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1670 #endif
1671 }
1672
1673 static void gpu_opcode_not(void)
1674 {
1675 #ifdef GPU_DIS_NOT
1676         if (doGPUDis)
1677                 WriteLog("%06X: NOT    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1678 #endif
1679         RN = ~RN;
1680         SET_ZN(RN);
1681 #ifdef GPU_DIS_NOT
1682         if (doGPUDis)
1683                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1684 #endif
1685 }
1686
1687 static void gpu_opcode_move_pc(void)
1688 {
1689 #ifdef GPU_DIS_MOVEPC
1690         if (doGPUDis)
1691                 WriteLog("%06X: MOVE   PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1692 #endif
1693         // Should be previous PC--this might not always be previous instruction!
1694         // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1695         RN = gpu_pc - 2;
1696 #ifdef GPU_DIS_MOVEPC
1697         if (doGPUDis)
1698                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1699 #endif
1700 }
1701
1702 static void gpu_opcode_sat8(void)
1703 {
1704 #ifdef GPU_DIS_SAT8
1705         if (doGPUDis)
1706                 WriteLog("%06X: SAT8   R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1707 #endif
1708         RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1709         SET_ZN(RN);
1710 #ifdef GPU_DIS_SAT8
1711         if (doGPUDis)
1712                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1713 #endif
1714 }
1715
1716 static void gpu_opcode_sat16(void)
1717 {
1718         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1719         SET_ZN(RN);
1720 }
1721
1722 static void gpu_opcode_sat24(void)
1723 {
1724         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1725         SET_ZN(RN);
1726 }
1727
1728 static void gpu_opcode_store_r14_indexed(void)
1729 {
1730 #ifdef GPU_DIS_STORE14I
1731         if (doGPUDis)
1732                 WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1733 #endif
1734 #ifdef GPU_CORRECT_ALIGNMENT
1735         uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1736         
1737         if (address >= 0xF03000 && address <= 0xF03FFF)
1738                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1739         else
1740                 GPUWriteLong(address, RN, GPU);
1741 #else
1742         GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1743 #endif
1744 }
1745
1746 static void gpu_opcode_store_r15_indexed(void)
1747 {
1748 #ifdef GPU_DIS_STORE15I
1749         if (doGPUDis)
1750                 WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1751 #endif
1752 #ifdef GPU_CORRECT_ALIGNMENT
1753         uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1754
1755         if (address >= 0xF03000 && address <= 0xF03FFF)
1756                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1757         else
1758                 GPUWriteLong(address, RN, GPU);
1759 #else
1760         GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1761 #endif
1762 }
1763
1764 static void gpu_opcode_load_r14_ri(void)
1765 {
1766 #ifdef GPU_DIS_LOAD14R
1767         if (doGPUDis)
1768                 WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1769 #endif
1770 #ifdef GPU_CORRECT_ALIGNMENT
1771         uint32 address = gpu_reg[14] + RM;
1772
1773         if (address >= 0xF03000 && address <= 0xF03FFF)
1774                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1775         else
1776                 RN = GPUReadLong(address, GPU);
1777 #else
1778         RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1779 #endif
1780 #ifdef GPU_DIS_LOAD14R
1781         if (doGPUDis)
1782                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1783 #endif
1784 }
1785
1786 static void gpu_opcode_load_r15_ri(void)
1787 {
1788 #ifdef GPU_DIS_LOAD15R
1789         if (doGPUDis)
1790                 WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1791 #endif
1792 #ifdef GPU_CORRECT_ALIGNMENT
1793         uint32 address = gpu_reg[15] + RM;
1794
1795         if (address >= 0xF03000 && address <= 0xF03FFF)
1796                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1797         else
1798                 RN = GPUReadLong(address, GPU);
1799 #else
1800         RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1801 #endif
1802 #ifdef GPU_DIS_LOAD15R
1803         if (doGPUDis)
1804                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1805 #endif
1806 }
1807
1808 static void gpu_opcode_store_r14_ri(void)
1809 {
1810 #ifdef GPU_DIS_STORE14R
1811         if (doGPUDis)
1812                 WriteLog("%06X: STORE  R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1813 #endif
1814 #ifdef GPU_CORRECT_ALIGNMENT
1815         uint32 address = gpu_reg[14] + RM;
1816
1817         if (address >= 0xF03000 && address <= 0xF03FFF)
1818                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1819         else
1820                 GPUWriteLong(address, RN, GPU);
1821 #else
1822         GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1823 #endif
1824 }
1825
1826 static void gpu_opcode_store_r15_ri(void)
1827 {
1828 #ifdef GPU_DIS_STORE15R
1829         if (doGPUDis)
1830                 WriteLog("%06X: STORE  R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1831 #endif
1832 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1833         uint32 address = gpu_reg[15] + RM;
1834
1835         if (address >= 0xF03000 && address <= 0xF03FFF)
1836                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1837         else
1838                 GPUWriteLong(address, RN, GPU);
1839 #else
1840         GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1841 #endif
1842 }
1843
1844 static void gpu_opcode_nop(void)
1845 {
1846 #ifdef GPU_DIS_NOP
1847         if (doGPUDis)
1848                 WriteLog("%06X: NOP    [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1849 #endif
1850 }
1851
1852 static void gpu_opcode_pack(void)
1853 {
1854 #ifdef GPU_DIS_PACK
1855         if (doGPUDis)
1856                 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK  " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1857 #endif
1858         uint32 val = RN;
1859
1860 //BUG!  if (RM == 0)                            // Pack
1861         if (IMM_1 == 0)                         // Pack
1862                 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1863         else                                            // Unpack
1864                 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1865 #ifdef GPU_DIS_PACK
1866         if (doGPUDis)
1867                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1868 #endif
1869 }
1870
1871 static void gpu_opcode_storeb(void)
1872 {
1873 #ifdef GPU_DIS_STOREB
1874         if (doGPUDis)
1875                 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1876 #endif
1877 //Is this right???
1878 // Would appear to be so...!
1879         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1880                 GPUWriteLong(RM, RN & 0xFF, GPU);
1881         else
1882                 JaguarWriteByte(RM, RN, GPU);
1883 }
1884
1885 static void gpu_opcode_storew(void)
1886 {
1887 #ifdef GPU_DIS_STOREW
1888         if (doGPUDis)
1889                 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1890 #endif
1891 #ifdef GPU_CORRECT_ALIGNMENT
1892         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1893                 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1894         else
1895                 JaguarWriteWord(RM, RN, GPU);
1896 #else
1897         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1898                 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1899         else
1900                 JaguarWriteWord(RM, RN, GPU);
1901 #endif
1902 }
1903
1904 static void gpu_opcode_store(void)
1905 {
1906 #ifdef GPU_DIS_STORE
1907         if (doGPUDis)
1908                 WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1909 #endif
1910 #ifdef GPU_CORRECT_ALIGNMENT
1911         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1912                 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1913         else
1914                 GPUWriteLong(RM, RN, GPU);
1915 #else
1916         GPUWriteLong(RM, RN, GPU);
1917 #endif
1918 }
1919
1920 static void gpu_opcode_storep(void)
1921 {
1922 #ifdef GPU_CORRECT_ALIGNMENT
1923         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1924         {
1925                 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1926                 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1927         }
1928         else
1929         {
1930                 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1931                 GPUWriteLong(RM + 4, RN, GPU);
1932         }
1933 #else
1934         GPUWriteLong(RM + 0, gpu_hidata, GPU);
1935         GPUWriteLong(RM + 4, RN, GPU);
1936 #endif
1937 }
1938
1939 static void gpu_opcode_loadb(void)
1940 {
1941 #ifdef GPU_DIS_LOADB
1942         if (doGPUDis)
1943                 WriteLog("%06X: LOADB  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1944 #endif
1945         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1946                 RN = GPUReadLong(RM, GPU) & 0xFF;
1947         else
1948                 RN = JaguarReadByte(RM, GPU);
1949 #ifdef GPU_DIS_LOADB
1950         if (doGPUDis)
1951                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1952 #endif
1953 }
1954
1955 static void gpu_opcode_loadw(void)
1956 {
1957 #ifdef GPU_DIS_LOADW
1958         if (doGPUDis)
1959                 WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1960 #endif
1961 #ifdef GPU_CORRECT_ALIGNMENT
1962         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1963                 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1964         else
1965                 RN = JaguarReadWord(RM, GPU);
1966 #else
1967         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1968                 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1969         else
1970                 RN = JaguarReadWord(RM, GPU);
1971 #endif
1972 #ifdef GPU_DIS_LOADW
1973         if (doGPUDis)
1974                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1975 #endif
1976 }
1977
1978 // According to the docs, & "Do The Same", this address is long aligned...
1979 // So let's try it:
1980 // And it works!!! Need to fix all instances...
1981 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
1982 // the $F03000-$F03FFF range are aligned...
1983 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
1984 /*
1985 Preliminary testing on real hardware seems to confirm that something strange goes on
1986 with unaligned reads in main memory. When the address is off by 1, the result is the
1987 same as the long address with the top byte replaced by something. So if the read is
1988 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
1989 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
1990 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
1991 It may be that the "unknown" values come from the prefetch queue, but not sure how
1992 to test that. They seem to be stable, though, which would indicate such a mechanism.
1993 Sometimes, however, the off by 2 case returns $12345678!
1994 */
1995 static void gpu_opcode_load(void)
1996 {
1997 #ifdef GPU_DIS_LOAD
1998         if (doGPUDis)
1999                 WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2000 #endif
2001 #ifdef GPU_CORRECT_ALIGNMENT
2002         uint32 mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2003 //      if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2004                 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2005 //              RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2006 //      else
2007 //              RN = GPUReadLong(RM, GPU);
2008         // Simulate garbage in unaligned reads...
2009 //seems that this behavior is different in GPU mem vs. main mem...
2010 //      if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2011 //              RN |= mask[RM & 0x03];
2012 #else
2013         RN = GPUReadLong(RM, GPU);
2014 #endif
2015 #ifdef GPU_DIS_LOAD
2016         if (doGPUDis)
2017                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2018 #endif
2019 }
2020
2021 static void gpu_opcode_loadp(void)
2022 {
2023 #ifdef GPU_CORRECT_ALIGNMENT
2024         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2025         {
2026                 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2027                 RN                 = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2028         }
2029         else
2030         {
2031                 gpu_hidata = GPUReadLong(RM + 0, GPU);
2032                 RN                 = GPUReadLong(RM + 4, GPU);
2033         }
2034 #else
2035         gpu_hidata = GPUReadLong(RM + 0, GPU);
2036         RN                 = GPUReadLong(RM + 4, GPU);
2037 #endif
2038 }
2039
2040 static void gpu_opcode_load_r14_indexed(void)
2041 {
2042 #ifdef GPU_DIS_LOAD14I
2043         if (doGPUDis)
2044                 WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2045 #endif
2046 #ifdef GPU_CORRECT_ALIGNMENT
2047         uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2048
2049         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2050                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2051         else
2052                 RN = GPUReadLong(address, GPU);
2053 #else
2054         RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2055 #endif
2056 #ifdef GPU_DIS_LOAD14I
2057         if (doGPUDis)
2058                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2059 #endif
2060 }
2061
2062 static void gpu_opcode_load_r15_indexed(void)
2063 {
2064 #ifdef GPU_DIS_LOAD15I
2065         if (doGPUDis)
2066                 WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2067 #endif
2068 #ifdef GPU_CORRECT_ALIGNMENT
2069         uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2070
2071         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2072                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2073         else
2074                 RN = GPUReadLong(address, GPU);
2075 #else
2076         RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2077 #endif
2078 #ifdef GPU_DIS_LOAD15I
2079         if (doGPUDis)
2080                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2081 #endif
2082 }
2083
2084 static void gpu_opcode_movei(void)
2085 {
2086 #ifdef GPU_DIS_MOVEI
2087         if (doGPUDis)
2088                 WriteLog("%06X: MOVEI  #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2089 #endif
2090         // This instruction is followed by 32-bit value in LSW / MSW format...
2091         RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16);
2092         gpu_pc += 4;
2093 #ifdef GPU_DIS_MOVEI
2094         if (doGPUDis)
2095                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2096 #endif
2097 }
2098
2099 static void gpu_opcode_moveta(void)
2100 {
2101 #ifdef GPU_DIS_MOVETA
2102         if (doGPUDis)
2103                 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2104 #endif
2105         ALTERNATE_RN = RM;
2106 #ifdef GPU_DIS_MOVETA
2107         if (doGPUDis)
2108                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2109 #endif
2110 }
2111
2112 static void gpu_opcode_movefa(void)
2113 {
2114 #ifdef GPU_DIS_MOVEFA
2115         if (doGPUDis)
2116                 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2117 #endif
2118         RN = ALTERNATE_RM;
2119 #ifdef GPU_DIS_MOVEFA
2120         if (doGPUDis)
2121                 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2122 #endif
2123 }
2124
2125 static void gpu_opcode_move(void)
2126 {
2127 #ifdef GPU_DIS_MOVE
2128         if (doGPUDis)
2129                 WriteLog("%06X: MOVE   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2130 #endif
2131         RN = RM;
2132 #ifdef GPU_DIS_MOVE
2133         if (doGPUDis)
2134                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2135 #endif
2136 }
2137
2138 static void gpu_opcode_moveq(void)
2139 {
2140 #ifdef GPU_DIS_MOVEQ
2141         if (doGPUDis)
2142                 WriteLog("%06X: MOVEQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2143 #endif
2144         RN = IMM_1;
2145 #ifdef GPU_DIS_MOVEQ
2146         if (doGPUDis)
2147                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2148 #endif
2149 }
2150
2151 static void gpu_opcode_resmac(void)
2152 {
2153         RN = gpu_acc;
2154 }
2155
2156 static void gpu_opcode_imult(void)
2157 {
2158 #ifdef GPU_DIS_IMULT
2159         if (doGPUDis)
2160                 WriteLog("%06X: IMULT  R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2161 #endif
2162         RN = (int16)RN * (int16)RM;
2163         SET_ZN(RN);
2164 #ifdef GPU_DIS_IMULT
2165         if (doGPUDis)
2166                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2167 #endif
2168 }
2169
2170 static void gpu_opcode_mult(void)
2171 {
2172 #ifdef GPU_DIS_MULT
2173         if (doGPUDis)
2174                 WriteLog("%06X: MULT   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2175 #endif
2176         RN = (uint16)RM * (uint16)RN;
2177         SET_ZN(RN);
2178 #ifdef GPU_DIS_MULT
2179         if (doGPUDis)
2180                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2181 #endif
2182 }
2183
2184 static void gpu_opcode_bclr(void)
2185 {
2186 #ifdef GPU_DIS_BCLR
2187         if (doGPUDis)
2188                 WriteLog("%06X: BCLR   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2189 #endif
2190         uint32 res = RN & ~(1 << IMM_1);
2191         RN = res;
2192         SET_ZN(res);
2193 #ifdef GPU_DIS_BCLR
2194         if (doGPUDis)
2195                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2196 #endif
2197 }
2198
2199 static void gpu_opcode_btst(void)
2200 {
2201 #ifdef GPU_DIS_BTST
2202         if (doGPUDis)
2203                 WriteLog("%06X: BTST   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2204 #endif
2205         gpu_flag_z = (~RN >> IMM_1) & 1;
2206 #ifdef GPU_DIS_BTST
2207         if (doGPUDis)
2208                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2209 #endif
2210 }
2211
2212 static void gpu_opcode_bset(void)
2213 {
2214 #ifdef GPU_DIS_BSET
2215         if (doGPUDis)
2216                 WriteLog("%06X: BSET   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2217 #endif
2218         uint32 res = RN | (1 << IMM_1);
2219         RN = res;
2220         SET_ZN(res);
2221 #ifdef GPU_DIS_BSET
2222         if (doGPUDis)
2223                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2224 #endif
2225 }
2226
2227 static void gpu_opcode_imacn(void)
2228 {
2229         uint32 res = (int16)RM * (int16)(RN);
2230         gpu_acc += res;
2231 }
2232
2233 static void gpu_opcode_mtoi(void)
2234 {
2235         uint32 _RM = RM;
2236         uint32 res = RN = (((int32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2237         SET_ZN(res);
2238 }
2239
2240 static void gpu_opcode_normi(void)
2241 {
2242         uint32 _RM = RM;
2243         uint32 res = 0;
2244
2245         if (_RM)
2246         {
2247                 while ((_RM & 0xFFC00000) == 0)
2248                 {
2249                         _RM <<= 1;
2250                         res--;
2251                 }
2252                 while ((_RM & 0xFF800000) != 0)
2253                 {
2254                         _RM >>= 1;
2255                         res++;
2256                 }
2257         }
2258         RN = res;
2259         SET_ZN(res);
2260 }
2261
2262 static void gpu_opcode_mmult(void)
2263 {
2264         int count       = gpu_matrix_control & 0x0F;    // Matrix width
2265         uint32 addr = gpu_pointer_to_matrix;            // In the GPU's RAM
2266         int64 accum = 0;
2267         uint32 res;
2268
2269         if (gpu_matrix_control & 0x10)                          // Column stepping
2270         {
2271                 for(int i=0; i<count; i++)
2272                 {
2273                         int16 a;
2274                         if (i & 0x01)
2275                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2276                         else
2277                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2278
2279                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2280                         accum += a * b;
2281                         addr += 4 * count;
2282                 }
2283         }
2284         else                                                                            // Row stepping
2285         {
2286                 for(int i=0; i<count; i++)
2287                 {
2288                         int16 a;
2289                         if (i & 0x01)
2290                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2291                         else
2292                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2293
2294                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2295                         accum += a * b;
2296                         addr += 4;
2297                 }
2298         }
2299         RN = res = (int32)accum;
2300         // carry flag to do (out of the last add)
2301         SET_ZN(res);
2302 }
2303
2304 static void gpu_opcode_abs(void)
2305 {
2306 #ifdef GPU_DIS_ABS
2307         if (doGPUDis)
2308                 WriteLog("%06X: ABS    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2309 #endif
2310         gpu_flag_c = RN >> 31;
2311         if (RN == 0x80000000)
2312         //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2313                 gpu_flag_n = 1, gpu_flag_z = 0;
2314         else
2315         {
2316                 if (gpu_flag_c)
2317                         RN = -RN;
2318                 gpu_flag_n = 0; SET_FLAG_Z(RN);
2319         }
2320 #ifdef GPU_DIS_ABS
2321         if (doGPUDis)
2322                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2323 #endif
2324 }
2325
2326 static void gpu_opcode_div(void)        // RN / RM
2327 {
2328 #ifdef GPU_DIS_DIV
2329         if (doGPUDis)
2330                 WriteLog("%06X: DIV    R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2331 #endif
2332 // NOTE: remainder is NOT calculated correctly here!
2333 //       The original tried to get it right by checking to see if the
2334 //       remainder was negative, but that's too late...
2335 // The code there should do it now, but I'm not 100% sure...
2336
2337         if (RM)
2338         {
2339                 if (gpu_div_control & 0x01)             // 16.16 division
2340                 {
2341                         RN = ((uint64)RN << 16) / RM;
2342                         gpu_remain = ((uint64)RN << 16) % RM;
2343                 }
2344                 else
2345                 {
2346                         RN = RN / RM;
2347                         gpu_remain = RN % RM;
2348                 }
2349
2350                 if ((gpu_remain - RM) & 0x80000000)     // If the result would have been negative...
2351                         gpu_remain -= RM;                       // Then make it negative!
2352         }
2353         else
2354                 RN = 0xFFFFFFFF;
2355
2356 /*      uint32 _RM=RM;
2357         uint32 _RN=RN;
2358
2359         if (_RM)
2360         {
2361                 if (gpu_div_control & 1)
2362                 {
2363                         gpu_remain = (((uint64)_RN) << 16) % _RM;
2364                         if (gpu_remain&0x80000000)
2365                                 gpu_remain-=_RM;
2366                         RN = (((uint64)_RN) << 16) / _RM;
2367                 }
2368                 else
2369                 {
2370                         gpu_remain = _RN % _RM;
2371                         if (gpu_remain&0x80000000)
2372                                 gpu_remain-=_RM;
2373                         RN/=_RM;
2374                 }
2375         }
2376         else
2377                 RN=0xffffffff;*/
2378 #ifdef GPU_DIS_DIV
2379         if (doGPUDis)
2380                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2381 #endif
2382 }
2383
2384 static void gpu_opcode_imultn(void)
2385 {
2386         uint32 res = (int32)((int16)RN * (int16)RM);
2387         gpu_acc = (int32)res;
2388         SET_FLAG_Z(res);
2389         SET_FLAG_N(res);
2390 }
2391
2392 static void gpu_opcode_neg(void)
2393 {
2394 #ifdef GPU_DIS_NEG
2395         if (doGPUDis)
2396                 WriteLog("%06X: NEG    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2397 #endif
2398         uint32 res = -RN;
2399         SET_ZNC_SUB(0, RN, res);
2400         RN = res;
2401 #ifdef GPU_DIS_NEG
2402         if (doGPUDis)
2403                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2404 #endif
2405 }
2406
2407 static void gpu_opcode_shlq(void)
2408 {
2409 #ifdef GPU_DIS_SHLQ
2410         if (doGPUDis)
2411                 WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2412 #endif
2413 // Was a bug here...
2414 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2415         int32 r1 = 32 - IMM_1;
2416         uint32 res = RN << r1;
2417         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2418         RN = res;
2419 #ifdef GPU_DIS_SHLQ
2420         if (doGPUDis)
2421                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2422 #endif
2423 }
2424
2425 static void gpu_opcode_shrq(void)
2426 {
2427 #ifdef GPU_DIS_SHRQ
2428         if (doGPUDis)
2429                 WriteLog("%06X: SHRQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2430 #endif
2431         int32 r1 = gpu_convert_zero[IMM_1];
2432         uint32 res = RN >> r1;
2433         SET_ZN(res); gpu_flag_c = RN & 1;
2434         RN = res;
2435 #ifdef GPU_DIS_SHRQ
2436         if (doGPUDis)
2437                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2438 #endif
2439 }
2440
2441 static void gpu_opcode_ror(void)
2442 {
2443 #ifdef GPU_DIS_ROR
2444         if (doGPUDis)
2445                 WriteLog("%06X: ROR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2446 #endif
2447         uint32 r1 = RM & 0x1F;
2448         uint32 res = (RN >> r1) | (RN << (32 - r1));
2449         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2450         RN = res;
2451 #ifdef GPU_DIS_ROR
2452         if (doGPUDis)
2453                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2454 #endif
2455 }
2456
2457 static void gpu_opcode_rorq(void)
2458 {
2459 #ifdef GPU_DIS_RORQ
2460         if (doGPUDis)
2461                 WriteLog("%06X: RORQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2462 #endif
2463         uint32 r1 = gpu_convert_zero[IMM_1 & 0x1F];
2464         uint32 r2 = RN;
2465         uint32 res = (r2 >> r1) | (r2 << (32 - r1));
2466         RN = res;
2467         SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2468 #ifdef GPU_DIS_RORQ
2469         if (doGPUDis)
2470                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2471 #endif
2472 }
2473
2474 static void gpu_opcode_sha(void)
2475 {
2476 /*      int dreg = jaguar.op & 31;
2477         int32 r1 = (int32)jaguar.r[(jaguar.op >> 5) & 31];
2478         uint32 r2 = jaguar.r[dreg];
2479         uint32 res;
2480
2481         CLR_ZNC;
2482         if (r1 < 0)
2483         {
2484                 res = (r1 <= -32) ? 0 : (r2 << -r1);
2485                 jaguar.FLAGS |= (r2 >> 30) & 2;
2486         }
2487         else
2488         {
2489                 res = (r1 >= 32) ? ((int32)r2 >> 31) : ((int32)r2 >> r1);
2490                 jaguar.FLAGS |= (r2 << 1) & 2;
2491         }
2492         jaguar.r[dreg] = res;
2493         SET_ZN(res);*/
2494
2495 #ifdef GPU_DIS_SHA
2496         if (doGPUDis)
2497                 WriteLog("%06X: SHA    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2498 #endif
2499         uint32 res;
2500
2501         if ((int32)RM < 0)
2502         {
2503                 res = ((int32)RM <= -32) ? 0 : (RN << -(int32)RM);
2504                 gpu_flag_c = RN >> 31;
2505         }
2506         else
2507         {
2508                 res = ((int32)RM >= 32) ? ((int32)RN >> 31) : ((int32)RN >> (int32)RM);
2509                 gpu_flag_c = RN & 0x01;
2510         }
2511         RN = res;
2512         SET_ZN(res);
2513 #ifdef GPU_DIS_SHA
2514         if (doGPUDis)
2515                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2516 #endif
2517
2518 /*      int32 sRM=(int32)RM;
2519         uint32 _RN=RN;
2520
2521         if (sRM<0)
2522         {
2523                 uint32 shift=-sRM;
2524                 if (shift>=32) shift=32;
2525                 gpu_flag_c=(_RN&0x80000000)>>31;
2526                 while (shift)
2527                 {
2528                         _RN<<=1;
2529                         shift--;
2530                 }
2531         }
2532         else
2533         {
2534                 uint32 shift=sRM;
2535                 if (shift>=32) shift=32;
2536                 gpu_flag_c=_RN&0x1;
2537                 while (shift)
2538                 {
2539                         _RN=((int32)_RN)>>1;
2540                         shift--;
2541                 }
2542         }
2543         RN=_RN;
2544         SET_FLAG_Z(_RN);
2545         SET_FLAG_N(_RN);*/
2546 }
2547
2548 static void gpu_opcode_sharq(void)
2549 {
2550 #ifdef GPU_DIS_SHARQ
2551         if (doGPUDis)
2552                 WriteLog("%06X: SHARQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2553 #endif
2554         uint32 res = (int32)RN >> gpu_convert_zero[IMM_1];
2555         SET_ZN(res); gpu_flag_c = RN & 0x01;
2556         RN = res;
2557 #ifdef GPU_DIS_SHARQ
2558         if (doGPUDis)
2559                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2560 #endif
2561 }
2562
2563 static void gpu_opcode_sh(void)
2564 {
2565 #ifdef GPU_DIS_SH
2566         if (doGPUDis)
2567                 WriteLog("%06X: SH     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2568 #endif
2569         if (RM & 0x80000000)            // Shift left
2570         {
2571                 gpu_flag_c = RN >> 31;
2572                 RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM);
2573         }
2574         else                                            // Shift right
2575         {
2576                 gpu_flag_c = RN & 0x01;
2577                 RN = (RM >= 32 ? 0 : RN >> RM);
2578         }
2579         SET_ZN(RN);
2580 #ifdef GPU_DIS_SH
2581         if (doGPUDis)
2582                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2583 #endif
2584 }
2585
2586 //Temporary: Testing only!
2587 //#include "gpu2.cpp"
2588 //#include "gpu3.cpp"
2589
2590 #else
2591
2592 // New thread-safe GPU core
2593
2594 int GPUCore(void * data)
2595 {
2596 }
2597
2598 #endif