]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
fbf26a0e6846fba758246ee0cec664eaf677f044
[virtualjaguar] / src / gpu.cpp
1 #if 1
2
3 //
4 // GPU Core
5 //
6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
10 //
11 // JLH = James Hammons <jlhamm@acm.org>
12 //
13 // Who  When        What
14 // ---  ----------  -------------------------------------------------------------
15 // JLH  01/16/2010  Created this log ;-)
16 // JLH  11/26/2011  Added fixes for LOAD/STORE alignment issues
17
18 //
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 //       the braindead way in which MAME handles memory. :-)
21 //
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
25 //
26
27 #include "gpu.h"
28
29 #include <stdlib.h>
30 #include <string.h>                                                             // For memset
31 #include "dsp.h"
32 #include "jagdasm.h"
33 #include "jaguar.h"
34 #include "log.h"
35 #include "m68000/m68kinterface.h"
36 //#include "memory.h"
37 #include "tom.h"
38
39
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
42 //#define GPU_DEBUG
43
44 // For GPU dissasembly...
45
46 #if 0
47 #define GPU_DIS_ABS
48 #define GPU_DIS_ADD
49 #define GPU_DIS_ADDC
50 #define GPU_DIS_ADDQ
51 #define GPU_DIS_ADDQT
52 #define GPU_DIS_AND
53 #define GPU_DIS_BCLR
54 #define GPU_DIS_BSET
55 #define GPU_DIS_BTST
56 #define GPU_DIS_CMP
57 #define GPU_DIS_CMPQ
58 #define GPU_DIS_DIV
59 #define GPU_DIS_IMULT
60 #define GPU_DIS_JUMP
61 #define GPU_DIS_JR
62 #define GPU_DIS_LOAD
63 #define GPU_DIS_LOADB
64 #define GPU_DIS_LOADW
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
69 #define GPU_DIS_MOVE
70 #define GPU_DIS_MOVEFA
71 #define GPU_DIS_MOVEI
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
74 #define GPU_DIS_MOVEQ
75 #define GPU_DIS_MULT
76 #define GPU_DIS_NEG
77 #define GPU_DIS_NOP
78 #define GPU_DIS_NOT
79 #define GPU_DIS_OR
80 #define GPU_DIS_PACK
81 #define GPU_DIS_ROR
82 #define GPU_DIS_RORQ
83 #define GPU_DIS_SAT8
84 #define GPU_DIS_SH
85 #define GPU_DIS_SHA
86 #define GPU_DIS_SHARQ
87 #define GPU_DIS_SHLQ
88 #define GPU_DIS_SHRQ
89 #define GPU_DIS_STORE
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
96 #define GPU_DIS_SUB
97 #define GPU_DIS_SUBC
98 #define GPU_DIS_SUBQ
99 #define GPU_DIS_SUBQT
100 #define GPU_DIS_XOR
101
102 bool doGPUDis = false;
103 //bool doGPUDis = true;
104 #endif
105
106 /*
107 GPU opcodes use (BIOS flying ATARI logo):
108 +                     add 357416
109 +                    addq 538030
110 +                   addqt 6999
111 +                     sub 116663
112 +                    subq 188059
113 +                   subqt 15086
114 +                     neg 36097
115 +                     and 233993
116 +                      or 109332
117 +                     xor 1384
118 +                    btst 111924
119 +                    bset 25029
120 +                    bclr 10551
121 +                    mult 28147
122 +                   imult 69148
123 +                     div 64102
124 +                     abs 159394
125 +                    shlq 194690
126 +                    shrq 292587
127 +                   sharq 192649
128 +                    rorq 58672
129 +                     cmp 244963
130 +                    cmpq 114834
131 +                    move 833472
132 +                   moveq 56427
133 +                  moveta 220814
134 +                  movefa 170678
135 +                   movei 152025
136 +                   loadw 108220
137 +                    load 430936
138 +                  storew 3036
139 +                   store 372490
140 +                 move_pc 2330
141 +                    jump 349134
142 +                      jr 529171
143                     mmult 64904
144 +                     nop 432179
145 */
146
147 // Various bits
148
149 #define CINT0FLAG                       0x0200
150 #define CINT1FLAG                       0x0400
151 #define CINT2FLAG                       0x0800
152 #define CINT3FLAG                       0x1000
153 #define CINT4FLAG                       0x2000
154 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
155
156 // GPU_FLAGS bits
157
158 #define ZERO_FLAG               0x0001
159 #define CARRY_FLAG              0x0002
160 #define NEGA_FLAG               0x0004
161 #define IMASK                   0x0008
162 #define INT_ENA0                0x0010
163 #define INT_ENA1                0x0020
164 #define INT_ENA2                0x0040
165 #define INT_ENA3                0x0080
166 #define INT_ENA4                0x0100
167 #define INT_CLR0                0x0200
168 #define INT_CLR1                0x0400
169 #define INT_CLR2                0x0800
170 #define INT_CLR3                0x1000
171 #define INT_CLR4                0x2000
172 #define REGPAGE                 0x4000
173 #define DMAEN                   0x8000
174
175 // External global variables
176
177 extern int start_logging;
178 extern int gpu_start_log;
179
180 // Private function prototypes
181
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
186
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
251
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8_t gpu_opcode_cycles[64] =
254 {
255         3,  3,  3,  3,  3,  3,  3,  3,
256         3,  3,  3,  3,  3,  3,  3,  3,
257         3,  3,  1,  3,  1, 18,  3,  3,
258         3,  3,  3,  3,  3,  3,  3,  3,
259         3,  3,  2,  2,  2,  2,  3,  4,
260         5,  4,  5,  6,  6,  1,  1,  1,
261         1,  2,  2,  2,  1,  1,  9,  3,
262         3,  1,  6,  6,  2,  2,  3,  3
263 };//*/
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8_t gpu_opcode_cycles[64] =
269 {
270         1,  1,  1,  1,  1,  1,  1,  1,
271         1,  1,  1,  1,  1,  1,  1,  1,
272         1,  1,  1,  1,  1,  9,  1,  1,
273         1,  1,  1,  1,  1,  1,  1,  1,
274         1,  1,  1,  1,  1,  1,  1,  2,
275         2,  2,  2,  3,  3,  1,  1,  1,
276         1,  1,  1,  1,  1,  1,  4,  1,
277         1,  1,  3,  3,  1,  1,  1,  1
278 };//*/
279 uint8_t gpu_opcode_cycles[64] =
280 {
281         1,  1,  1,  1,  1,  1,  1,  1,
282         1,  1,  1,  1,  1,  1,  1,  1,
283         1,  1,  1,  1,  1,  1,  1,  1,
284         1,  1,  1,  1,  1,  1,  1,  1,
285         1,  1,  1,  1,  1,  1,  1,  1,
286         1,  1,  1,  1,  1,  1,  1,  1,
287         1,  1,  1,  1,  1,  1,  1,  1,
288         1,  1,  1,  1,  1,  1,  1,  1
289 };//*/
290
291 void (*gpu_opcode[64])()=
292 {
293         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
294         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
295         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
296         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
297         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
298         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
299         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
300         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
301         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
302         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
303         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
304         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
305         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
306         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
307         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
308         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
309 };
310
311 static uint8_t gpu_ram_8[0x1000];
312 uint32_t gpu_pc;
313 static uint32_t gpu_acc;
314 static uint32_t gpu_remain;
315 static uint32_t gpu_hidata;
316 static uint32_t gpu_flags;
317 static uint32_t gpu_matrix_control;
318 static uint32_t gpu_pointer_to_matrix;
319 static uint32_t gpu_data_organization;
320 static uint32_t gpu_control;
321 static uint32_t gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 uint32_t gpu_reg_bank_0[32];
327 uint32_t gpu_reg_bank_1[32];
328 static uint32_t * gpu_reg;
329 static uint32_t * gpu_alternate_reg;
330
331 static uint32_t gpu_instruction;
332 static uint32_t gpu_opcode_first_parameter;
333 static uint32_t gpu_opcode_second_parameter;
334
335 #define GPU_RUNNING             (gpu_control & 0x01)
336
337 #define RM                              gpu_reg[gpu_opcode_first_parameter]
338 #define RN                              gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM    gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN    gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1                   gpu_opcode_first_parameter
342 #define IMM_2                   gpu_opcode_second_parameter
343
344 #define SET_FLAG_Z(r)   (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r)   (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01));
346
347 #define RESET_FLAG_Z()  gpu_flag_z = 0;
348 #define RESET_FLAG_N()  gpu_flag_n = 0;
349 #define RESET_FLAG_C()  gpu_flag_c = 0;
350
351 #define CLR_Z                           (gpu_flag_z = 0)
352 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
355 #define SET_N(r)                        (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
357 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
358 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
361
362 uint32_t gpu_convert_zero[32] =
363         { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
364
365 uint8_t * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
367
368 uint32_t gpu_opcode_use[64];
369
370 const char * gpu_opcode_str[64]=
371 {
372         "add",                          "addc",                         "addq",                         "addqt",
373         "sub",                          "subc",                         "subq",                         "subqt",
374         "neg",                          "and",                          "or",                           "xor",
375         "not",                          "btst",                         "bset",                         "bclr",
376         "mult",                         "imult",                        "imultn",                       "resmac",
377         "imacn",                        "div",                          "abs",                          "sh",
378         "shlq",                         "shrq",                         "sha",                          "sharq",
379         "ror",                          "rorq",                         "cmp",                          "cmpq",
380         "sat8",                         "sat16",                        "move",                         "moveq",
381         "moveta",                       "movefa",                       "movei",                        "loadb",
382         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
383         "load_r15_indexed",     "storeb",                       "storew",                       "store",
384         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
385         "jump",                         "jr",                           "mmult",                        "mtoi",
386         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
387         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
388 };
389
390 static uint32_t gpu_in_exec = 0;
391 static uint32_t gpu_releaseTimeSlice_flag = 0;
392
393 void GPUReleaseTimeslice(void)
394 {
395         gpu_releaseTimeSlice_flag = 1;
396 }
397
398 uint32_t GPUGetPC(void)
399 {
400         return gpu_pc;
401 }
402
403 void build_branch_condition_table(void)
404 {
405         if (!branch_condition_table)
406         {
407                 branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
408
409                 if (branch_condition_table)
410                 {
411                         for(int i=0; i<8; i++)
412                         {
413                                 for(int j=0; j<32; j++)
414                                 {
415                                         int result = 1;
416                                         if (j & 1)
417                                                 if (i & ZERO_FLAG)
418                                                         result = 0;
419                                         if (j & 2)
420                                                 if (!(i & ZERO_FLAG))
421                                                         result = 0;
422                                         if (j & 4)
423                                                 if (i & (CARRY_FLAG << (j >> 4)))
424                                                         result = 0;
425                                         if (j & 8)
426                                                 if (!(i & (CARRY_FLAG << (j >> 4))))
427                                                         result = 0;
428                                         branch_condition_table[i * 32 + j] = result;
429                                 }
430                         }
431                 }
432         }
433 }
434
435 //
436 // GPU byte access (read)
437 //
438 uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
439 {
440         if (offset >= 0xF02000 && offset <= 0xF020FF)
441                 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
442
443         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444                 return gpu_ram_8[offset & 0xFFF];
445         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
446         {
447                 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
448
449                 if ((offset & 0x03) == 0)
450                         return data >> 24;
451                 else if ((offset & 0x03) == 1)
452                         return (data >> 16) & 0xFF;
453                 else if ((offset & 0x03) == 2)
454                         return (data >> 8) & 0xFF;
455                 else if ((offset & 0x03) == 3)
456                         return data & 0xFF;
457         }
458
459         return JaguarReadByte(offset, who);
460 }
461
462 //
463 // GPU word access (read)
464 //
465 uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
466 {
467         if (offset >= 0xF02000 && offset <= 0xF020FF)
468                 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
469
470         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
471         {
472                 offset &= 0xFFF;
473                 uint16_t data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1];
474                 return data;
475         }
476         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
477         {
478 // This looks and smells wrong...
479 // But it *might* be OK...
480                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
481                         return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
482
483                 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
484
485                 if (offset & 0x02)                      // Cases 0 & 2...
486                         return data & 0xFFFF;
487                 else
488                         return data >> 16;
489         }
490
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
494
495         return JaguarReadWord(offset, who);
496 }
497
498 //
499 // GPU dword access (read)
500 //
501 uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
502 {
503         if (offset >= 0xF02000 && offset <= 0xF020FF)
504         {
505                 WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
506                 uint32_t reg = (offset & 0xFC) >> 2;
507                 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]); 
508         }
509
510 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
511         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
512         {
513                 offset &= 0xFFF;
514                 return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16)
515                         | ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/
516 //              return GET32(gpu_ram_8, offset);
517         }
518 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
519         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
520         {
521                 offset &= 0x1F;
522                 switch (offset)
523                 {
524                 case 0x00:
525                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
526                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
527                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
528
529                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
530
531                         return gpu_flags & 0xFFFFC1FF;
532                 case 0x04:
533                         return gpu_matrix_control;
534                 case 0x08:
535                         return gpu_pointer_to_matrix;
536                 case 0x0C:
537                         return gpu_data_organization;
538                 case 0x10:
539                         return gpu_pc;
540                 case 0x14:
541                         return gpu_control;
542                 case 0x18:
543                         return gpu_hidata;
544                 case 0x1C:
545                         return gpu_remain;
546                 default:                                                                // unaligned long read
547 #ifdef GPU_DEBUG
548                         WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
549 #endif  // GPU_DEBUG
550                         return 0;
551                 }
552         }
553 //TEMP--Mirror of F03000? No. Writes only...
554 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
555 //      WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
556 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
557         WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
558
559         return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
560 }
561
562 //
563 // GPU byte access (write)
564 //
565 void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
566 {
567         if (offset >= 0xF02000 && offset <= 0xF020FF)
568                 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
569
570         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
571         {
572                 gpu_ram_8[offset & 0xFFF] = data;
573
574 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
575 /*              if (!gpu_in_exec)
576                 {
577                         m68k_end_timeslice();
578                         dsp_releaseTimeslice();
579                 }*/
580                 return;
581         }
582         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
583         {
584                 uint32_t reg = offset & 0x1C;
585                 int bytenum = offset & 0x03;
586
587 //This is definitely wrong!
588                 if ((reg >= 0x1C) && (reg <= 0x1F))
589                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
590                 else
591                 {
592                         uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
593                         bytenum = 3 - bytenum; // convention motorola !!!
594                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
595                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
596                 }
597                 return;
598         }
599 //      WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
600         JaguarWriteByte(offset, data, who);
601 }
602
603 //
604 // GPU word access (write)
605 //
606 void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
607 {
608         if (offset >= 0xF02000 && offset <= 0xF020FF)
609                 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
610
611         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
612         {
613                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
614                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
615 /*              offset &= 0xFFF;
616                 SET16(gpu_ram_8, offset, data);//*/
617
618 /*if (offset >= 0xF03214 && offset < 0xF0321F)
619         WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
620
621
622 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
623 /*              if (!gpu_in_exec)
624                 {
625                         m68k_end_timeslice();
626                         dsp_releaseTimeslice();
627                 }*/
628                 return;
629         }
630         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
631         {
632                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
633                 {
634 #ifdef GPU_DEBUG
635                         WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
636                         GPUDumpRegisters();
637 #endif  // GPU_DEBUG
638                         return;
639                 }
640 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
641 //This just literally sucks.
642                 if ((offset & 0x1C) == 0x1C)
643                 {
644 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
645                         if (offset & 0x02)
646                                 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
647                         else
648                                 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
649                 }
650                 else
651                 {
652 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
653                         uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
654
655                         if (offset & 0x02)
656                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
657                         else
658                                 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
659
660                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
661                 }
662
663                 return;
664         }
665         else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
666         {
667 #ifdef GPU_DEBUG
668                         WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
669                         GPUDumpRegisters();
670 #endif  // GPU_DEBUG
671                 return;
672         }
673
674         // Have to be careful here--this can cause an infinite loop!
675         JaguarWriteWord(offset, data, who);
676 }
677
678 //
679 // GPU dword access (write)
680 //
681 void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
682 {
683         if (offset >= 0xF02000 && offset <= 0xF020FF)
684                 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
685
686 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
687         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
688         {
689 #ifdef GPU_DEBUG
690                 if (offset & 0x03)
691                 {
692                         WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
693                         GPUDumpRegisters();
694                 }
695 #endif  // GPU_DEBUG
696
697                 offset &= 0xFFF;
698                 SET32(gpu_ram_8, offset, data);
699                 return;
700         }
701 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
702         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
703         {
704                 offset &= 0x1F;
705                 switch (offset)
706                 {
707                 case 0x00:
708                 {
709                         bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
710                         // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
711                         //       IRQ logic can set it. So we mask it out here to prevent problems...
712                         gpu_flags = data & (~IMASK);
713                         gpu_flag_z = gpu_flags & ZERO_FLAG;
714                         gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
715                         gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
716                         GPUUpdateRegisterBanks();
717                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);       // Interrupt latch clear bits
718 //Writing here is only an interrupt enable--this approach is just plain wrong!
719 //                      GPUHandleIRQs();
720 //This, however, is A-OK! ;-)
721                         if (IMASKCleared)                                               // If IMASK was cleared,
722                                 GPUHandleIRQs();                                        // see if any other interrupts need servicing!
723 #ifdef GPU_DEBUG
724                         if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
725                                 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
726                         WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
727 #endif  // GPU_DEBUG
728                         break;
729                 }
730                 case 0x04:
731                         gpu_matrix_control = data;
732                         break;
733                 case 0x08:
734                         // This can only point to long aligned addresses
735                         gpu_pointer_to_matrix = data & 0xFFFFFFFC;
736                         break;
737                 case 0x0C:
738                         gpu_data_organization = data;
739                         break;
740                 case 0x10:
741                         gpu_pc = data;
742 #ifdef GPU_DEBUG
743 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
744 #endif  // GPU_DEBUG
745                         break;
746                 case 0x14:
747                 {
748 //                      uint32_t gpu_was_running = GPU_RUNNING;
749                         data &= ~0xF7C0;                // Disable writes to INT_LAT0-4 & TOM version number
750
751                         // check for GPU -> CPU interrupt
752                         if (data & 0x02)
753                         {
754 //WriteLog("GPU->CPU interrupt\n");
755                                 if (TOMIRQEnabled(IRQ_GPU))
756                                 {
757 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
758 //                                      if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
759                                         {
760                                                 TOMSetPendingGPUInt();
761                                                 m68k_set_irq(2);                        // Set 68000 IPL 2
762                                                 GPUReleaseTimeslice();
763                                         }
764                                 }
765                                 data &= ~0x02;
766                         }
767
768                         // check for CPU -> GPU interrupt #0
769                         if (data & 0x04)
770                         {
771 //WriteLog("CPU->GPU interrupt\n");
772                                 GPUSetIRQLine(0, ASSERT_LINE);
773                                 m68k_end_timeslice();
774                                 DSPReleaseTimeslice();
775                                 data &= ~0x04;
776                         }
777
778                         // single stepping
779                         if (data & 0x10)
780                         {
781                                 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
782                         }
783
784                         gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
785
786                         // if gpu wasn't running but is now running, execute a few cycles
787 #ifndef GPU_SINGLE_STEPPING
788 /*                      if (!gpu_was_running && GPU_RUNNING)
789 #ifdef GPU_DEBUG
790                         {
791                                 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
792 #endif  // GPU_DEBUG
793                                 GPUExec(200);
794 #ifdef GPU_DEBUG
795                         }
796 #endif  // GPU_DEBUG//*/
797 #else
798                         if (gpu_control & 0x18)
799                                 GPUExec(1);
800 #endif  // #ifndef GPU_SINGLE_STEPPING
801 #ifdef GPU_DEBUG
802 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
803 if (GPU_RUNNING)
804         WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
805 else
806         WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
807 WriteLog("\n");
808 #endif  // GPU_DEBUG
809 //if (GPU_RUNNING)
810 //      GPUDumpDisassembly();
811 /*if (GPU_RUNNING)
812 {
813         if (gpu_pc == 0xF035D8)
814         {
815 //              GPUDumpDisassembly();
816 //              log_done();
817 //              exit(1);
818                 gpu_control &= 0xFFFFFFFE;      // Don't run it and let's see what happens!
819 //Hmm. Seems to lock up when going into the demo...
820 //Try to disable the collision altogether!
821         }
822 }//*/
823 extern int effect_start5;
824 static bool finished = false;
825 //if (GPU_RUNNING && effect_start5 && !finished)
826 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
827 {
828         // Let's do a dump of $6528!
829 /*      uint32_t numItems = JaguarReadWord(0x6BD6);
830         WriteLog("\nDump of $6528: %u items.\n\n", numItems);
831         for(int i=0; i<numItems*3*4; i+=3*4)
832         {
833                 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
834                         JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
835                 uint16_t link = JaguarReadWord(0x6528+i+8+2);
836                 for(int j=0; j<40; j+=4)
837                         WriteLog("%08X ", JaguarReadLong(link + j));
838                 WriteLog("\n");
839         }
840         WriteLog("\n");//*/
841         // Let's try a manual blit here...
842 //This isn't working the way it should! !!! FIX !!!
843 //Err, actually, it is.
844 // NOW, it works right! Problem solved!!! It's a blitter bug!
845 /*      uint32_t src = 0x4D54, dst = 0xF03000, width = 10 * 4;
846         for(int y=0; y<127; y++)
847         {
848                 for(int x=0; x<2; x++)
849                 {
850                         JaguarWriteLong(dst, JaguarReadLong(src));
851
852                         src += 4;
853                         dst += 4;
854                 }
855                 src += width - (2 * 4);
856         }//*/
857 /*      finished = true;
858         doGPUDis = true;
859         WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
860
861 /*      WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
862         int count = 0;
863         for(int i=0x004D54; i<0x004D54+2048; i++)
864         {
865                 WriteLog("%02X ", JaguarReadByte(i));
866                 count++;
867                 if (count == 32)
868                 {
869                         count = 0;
870                         WriteLog("\n");
871                 }
872         }
873         WriteLog("\n\nData @ F03000:\n\n");
874         count = 0;
875         for(int i=0xF03000; i<0xF03200; i++)
876         {
877                 WriteLog("%02X ", JaguarReadByte(i));
878                 count++;
879                 if (count == 32)
880                 {
881                         count = 0;
882                         WriteLog("\n");
883                 }
884         }
885         WriteLog("\n\n");
886         log_done();
887         exit(0);//*/
888 }
889 //if (!GPU_RUNNING)
890 //      doGPUDis = false;
891 /*if (!GPU_RUNNING && finished)
892 {
893         WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
894         GPUDumpRegisters();
895         log_done();
896         exit(0);
897 }//*/
898                         // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
899                         // allow the GPU a chance to run...
900                         // Yes! This partially fixed Trevor McFur...
901                         if (GPU_RUNNING)
902                                 m68k_end_timeslice();
903                         break;
904                 }
905                 case 0x18:
906                         gpu_hidata = data;
907                         break;
908                 case 0x1C:
909                         gpu_div_control = data;
910                         break;
911 //              default:   // unaligned long write
912                         //exit(0);
913                         //__asm int 3
914                 }
915                 return;
916         }
917
918 //      JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
919 //      JaguarWriteWord(offset+2, data & 0xFFFF, who);
920 // We're a 32-bit processor, we can do a long write...!
921         JaguarWriteLong(offset, data, who);
922 }
923
924 //
925 // Change register banks if necessary
926 //
927 void GPUUpdateRegisterBanks(void)
928 {
929         int bank = (gpu_flags & REGPAGE);               // REGPAGE bit
930
931         if (gpu_flags & IMASK)                                  // IMASK bit
932                 bank = 0;                                                       // IMASK forces main bank to be bank 0
933
934         if (bank)
935                 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
936         else
937                 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
938 }
939
940 void GPUHandleIRQs(void)
941 {
942         // Bail out if we're already in an interrupt!
943         if (gpu_flags & IMASK)
944                 return;
945
946         // Get the interrupt latch & enable bits
947         uint32_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
948
949         // Bail out if latched interrupts aren't enabled
950         bits &= mask;
951         if (!bits)
952                 return;
953
954         // Determine which interrupt to service
955         uint32_t which = 0; //Isn't there a #pragma to disable this warning???
956         if (bits & 0x01)
957                 which = 0;
958         if (bits & 0x02)
959                 which = 1;
960         if (bits & 0x04)
961                 which = 2;
962         if (bits & 0x08)
963                 which = 3;
964         if (bits & 0x10)
965                 which = 4;
966
967         if (start_logging)
968                 WriteLog("GPU: Generating IRQ #%i\n", which);
969
970         // set the interrupt flag
971         gpu_flags |= IMASK;
972         GPUUpdateRegisterBanks();
973
974         // subqt  #4,r31                ; pre-decrement stack pointer
975         // move  pc,r30                 ; address of interrupted code
976         // store  r30,(r31)     ; store return address
977         gpu_reg[31] -= 4;
978         GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
979
980         // movei  #service_address,r30  ; pointer to ISR entry
981         // jump  (r30)                                  ; jump to ISR
982         // nop
983         gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
984 }
985
986 void GPUSetIRQLine(int irqline, int state)
987 {
988         if (start_logging)
989                 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
990
991         uint32_t mask = 0x0040 << irqline;
992         gpu_control &= ~mask;                           // Clear the interrupt latch
993
994         if (state)
995         {
996                 gpu_control |= mask;                    // Assert the interrupt latch
997                 GPUHandleIRQs();                                // And handle the interrupt...
998         }
999 }
1000
1001 //TEMPORARY: Testing only!
1002 //#include "gpu2.h"
1003 //#include "gpu3.h"
1004
1005 void GPUInit(void)
1006 {
1007 //      memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1008 //      memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32_t), "GPU bank 0 regs");
1009 //      memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32_t), "GPU bank 1 regs");
1010
1011         build_branch_condition_table();
1012
1013         GPUReset();
1014
1015 //TEMPORARY: Testing only!
1016 //      gpu2_init();
1017 //      gpu3_init();
1018 }
1019
1020 void GPUReset(void)
1021 {
1022         // GPU registers (directly visible)
1023         gpu_flags                         = 0x00000000;
1024         gpu_matrix_control    = 0x00000000;
1025         gpu_pointer_to_matrix = 0x00000000;
1026         gpu_data_organization = 0xFFFFFFFF;
1027         gpu_pc                            = 0x00F03000;
1028         gpu_control                       = 0x00002800;                 // Correctly sets this as TOM Rev. 2
1029         gpu_hidata                        = 0x00000000;
1030         gpu_remain                        = 0x00000000;                 // These two registers are RO/WO
1031         gpu_div_control           = 0x00000000;
1032
1033         // GPU internal register
1034         gpu_acc                           = 0x00000000;
1035
1036         gpu_reg = gpu_reg_bank_0;
1037         gpu_alternate_reg = gpu_reg_bank_1;
1038
1039         for(int i=0; i<32; i++)
1040                 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1041
1042         CLR_ZNC;
1043         memset(gpu_ram_8, 0xFF, 0x1000);
1044         gpu_in_exec = 0;
1045 //not needed    GPUInterruptPending = false;
1046         GPUResetStats();
1047
1048         // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
1049         for(uint32_t i=0; i<4096; i+=4)
1050                 *((uint32_t *)(&gpu_ram_8[i])) = rand();
1051 }
1052
1053 uint32_t GPUReadPC(void)
1054 {
1055         return gpu_pc;
1056 }
1057
1058 void GPUResetStats(void)
1059 {
1060         for(uint32_t i=0; i<64; i++)
1061                 gpu_opcode_use[i] = 0;
1062         WriteLog("--> GPU stats were reset!\n");
1063 }
1064
1065 void GPUDumpDisassembly(void)
1066 {
1067         char buffer[512];
1068
1069         WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1070         uint32_t j = 0xF03000;
1071         while (j <= 0xF03FFF)
1072         {
1073                 uint32_t oldj = j;
1074                 j += dasmjag(JAGUAR_GPU, buffer, j);
1075                 WriteLog("\t%08X: %s\n", oldj, buffer);
1076         }
1077 }
1078
1079 void GPUDumpRegisters(void)
1080 {
1081         WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1082         WriteLog("\nRegisters bank 0\n");
1083         for(int j=0; j<8; j++)
1084         {
1085                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1086                                                   (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1087                                                   (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1088                                                   (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1089                                                   (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1090         }
1091         WriteLog("Registers bank 1\n");
1092         for(int j=0; j<8; j++)
1093         {
1094                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1095                                                   (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1096                                                   (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1097                                                   (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1098                                                   (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1099         }
1100 }
1101
1102 void GPUDumpMemory(void)
1103 {
1104         WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1105         for(int i=0; i<0xFFF; i+=4)
1106                 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1107                         gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1108 }
1109
1110 void GPUDone(void)
1111 {
1112         WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1113
1114         // Get the interrupt latch & enable bits
1115         uint8_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1116         WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1117
1118         GPUDumpRegisters();
1119         GPUDumpDisassembly();
1120
1121         WriteLog("\nGPU opcodes use:\n");
1122         for(int i=0; i<64; i++)
1123         {
1124                 if (gpu_opcode_use[i])
1125                         WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1126         }
1127         WriteLog("\n");
1128
1129 //      memory_free(gpu_ram_8);
1130 //      memory_free(gpu_reg_bank_0);
1131 //      memory_free(gpu_reg_bank_1);
1132 }
1133
1134 //
1135 // Main GPU execution core
1136 //
1137 static int testCount = 1;
1138 static int len = 0;
1139 static bool tripwire = false;
1140 void GPUExec(int32_t cycles)
1141 {
1142         if (!GPU_RUNNING)
1143                 return;
1144
1145 #ifdef GPU_SINGLE_STEPPING
1146         if (gpu_control & 0x18)
1147         {
1148                 cycles = 1;
1149                 gpu_control &= ~0x10;
1150         }
1151 #endif
1152         GPUHandleIRQs();
1153         gpu_releaseTimeSlice_flag = 0;
1154         gpu_in_exec++;
1155
1156         while (cycles > 0 && GPU_RUNNING)
1157         {
1158 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1159         && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1160 {
1161         if (gpu_pc == 0xF03000)
1162         {
1163                 extern uint32_t starCount;
1164                 starCount = 0;
1165 /*              WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1166                 uint32_t base = gpu_reg_bank_0[3];
1167                 for(uint32_t i=0; i<0x100; i+=16)
1168                 {
1169                         WriteLog("%02X: ", i);
1170                         for(uint32_t j=0; j<16; j++)
1171                         {
1172                                 WriteLog("%02X ", JaguarReadByte(base + i + j));
1173                         }
1174                         WriteLog("\n");
1175                 }*/
1176         }
1177 //      if (gpu_pc == 0xF03)
1178         {
1179         }
1180 }//*/
1181 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1182 {
1183         GPUDumpRegisters();
1184         WriteLog("GPU: Starting disassembly log...\n");
1185         doGPUDis = true;
1186 }//*/
1187 /*if (gpu_pc == 0xF0359A)
1188 {
1189         doGPUDis = true;
1190         GPUDumpRegisters();
1191 }*/
1192 /*              gpu_flag_c = (gpu_flag_c ? 1 : 0);
1193                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1194                 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1195 #if 0
1196 if (gpu_pc == 0xF03200)
1197         doGPUDis = true;
1198 #endif
1199
1200                 uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1201                 uint32_t index = opcode >> 10;
1202                 gpu_instruction = opcode;                               // Added for GPU #3...
1203                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1204                 gpu_opcode_second_parameter = opcode & 0x1F;
1205 /*if (gpu_pc == 0xF03BE8)
1206 WriteLog("Start of OP frame write...\n");
1207 if (gpu_pc == 0xF03EEE)
1208 WriteLog("--> Writing BRANCH object ---\n");
1209 if (gpu_pc == 0xF03F62)
1210 WriteLog("--> Writing BITMAP object ***\n");//*/
1211 /*if (gpu_pc == 0xF03546)
1212 {
1213         WriteLog("\n--> GPU PC: F03546\n");
1214         GPUDumpRegisters();
1215         GPUDumpDisassembly();
1216 }//*/
1217 /*if (gpu_pc == 0xF033F6)
1218 {
1219         WriteLog("\n--> GPU PC: F033F6\n");
1220         GPUDumpRegisters();
1221         GPUDumpDisassembly();
1222 }//*/
1223 /*if (gpu_pc == 0xF033CC)
1224 {
1225         WriteLog("\n--> GPU PC: F033CC\n");
1226         GPUDumpRegisters();
1227         GPUDumpDisassembly();
1228 }//*/
1229 /*if (gpu_pc == 0xF033D6)
1230 {
1231         WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1232         GPUDumpRegisters();
1233         GPUDumpMemory();
1234 }//*/
1235 /*if (gpu_pc == 0xF033D8)
1236 {
1237         WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1238         GPUDumpRegisters();
1239         GPUDumpMemory();
1240 }//*/
1241 /*if (gpu_pc == 0xF0358E)
1242 {
1243         WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1244         GPUDumpRegisters();
1245         GPUDumpMemory();
1246 }//*/
1247 /*if (gpu_pc == 0xF034CA)
1248 {
1249         WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1250         GPUDumpRegisters();
1251 }//*/
1252 /*if (gpu_pc == 0xF034CA)
1253 {
1254         len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1255         WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n   ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1256         for(int i=0; i<len; i+=4)
1257                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1258         WriteLog("\n   ");
1259         for(int i=0; i<len; i+=4)
1260                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1261         WriteLog("\n\n");
1262 }
1263 if (gpu_pc == 0xF034DE)
1264 {
1265         WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n   ", gpu_reg[14], gpu_reg[15]);
1266         for(int i=0; i<len; i+=4)
1267                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1268         WriteLog("\n   ");
1269         for(int i=0; i<len; i+=4)
1270                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1271         WriteLog("\n   ");
1272         for(int i=0; i<len; i+=4)
1273                 WriteLog(" --------");
1274         WriteLog("\n   ");
1275         for(int i=0; i<len; i+=4)
1276                 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1277         WriteLog("\n\n");
1278 }//*/
1279 /*if (gpu_pc == 0xF035C8)
1280 {
1281         WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1282         GPUDumpRegisters();
1283         GPUDumpDisassembly();
1284 }//*/
1285
1286 if (gpu_start_log)
1287 {
1288 //      gpu_reset_stats();
1289 static char buffer[512];
1290 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1291 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1292 }//*/
1293 //$E400 -> 1110 01 -> $39 -> 57
1294 //GPU #1
1295                 gpu_pc += 2;
1296                 gpu_opcode[index]();
1297 //GPU #2
1298 //              gpu2_opcode[index]();
1299 //              gpu_pc += 2;
1300 //GPU #3                                (Doesn't show ATARI logo! #1 & #2 do...)
1301 //              gpu_pc += 2;
1302 //              gpu3_opcode[index]();
1303
1304 // BIOS hacking
1305 //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken.
1306 /*static bool firstTime = true;
1307 if (gpu_pc == 0xF03548 && firstTime)
1308 {
1309         gpu_flag_z = 1;
1310 //      firstTime = false;
1311
1312 //static char buffer[512];
1313 //int k=0xF03548;
1314 //while (k<0xF0356C)
1315 //{
1316 //int oldk = k;
1317 //k += dasmjag(JAGUAR_GPU, buffer, k);
1318 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1319 //}
1320 //      gpu_start_log = 1;
1321 }//*/
1322 //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1323 /*if (gpu_pc == 0xF0354C)
1324         gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1325
1326                 cycles -= gpu_opcode_cycles[index];
1327                 gpu_opcode_use[index]++;
1328 if (gpu_start_log)
1329         WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1330 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1331 {
1332         WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1333         tripwire = true;
1334 }
1335         }
1336
1337         gpu_in_exec--;
1338 }
1339
1340 //
1341 // GPU opcodes
1342 //
1343
1344 /*
1345 GPU opcodes use (offset punch--vertically below bad guy):
1346                       add 18686
1347                      addq 32621
1348                       sub 7483
1349                      subq 10252
1350                       and 21229
1351                        or 15003
1352                      btst 1822
1353                      bset 2072
1354                      mult 141
1355                       div 2392
1356                      shlq 13449
1357                      shrq 10297
1358                     sharq 11104
1359                       cmp 6775
1360                      cmpq 5944
1361                      move 31259
1362                     moveq 4473
1363                     movei 23277
1364                     loadb 46
1365                     loadw 4201
1366                      load 28580
1367          load_r14_indexed 1183
1368          load_r15_indexed 1125
1369                    storew 178
1370                     store 10144
1371         store_r14_indexed 320
1372         store_r15_indexed 1
1373                   move_pc 1742
1374                      jump 24467
1375                        jr 18090
1376                       nop 41362
1377 */
1378
1379
1380 static void gpu_opcode_jump(void)
1381 {
1382 #ifdef GPU_DIS_JUMP
1383 const char * condition[32] =
1384 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1385         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1386         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1387         "???", "???", "???", "F" };
1388         if (doGPUDis)
1389                 WriteLog("%06X: JUMP   %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1390 #endif
1391         // normalize flags
1392 /*      gpu_flag_c = (gpu_flag_c ? 1 : 0);
1393         gpu_flag_z = (gpu_flag_z ? 1 : 0);
1394         gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1395         // KLUDGE: Used by BRANCH_CONDITION
1396         uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1397
1398         if (BRANCH_CONDITION(IMM_2))
1399         {
1400 #ifdef GPU_DIS_JUMP
1401         if (doGPUDis)
1402                 WriteLog("Branched!\n");
1403 #endif
1404 if (gpu_start_log)
1405         WriteLog("    --> JUMP: Branch taken.\n");
1406                 uint32_t delayed_pc = RM;
1407                 GPUExec(1);
1408                 gpu_pc = delayed_pc;
1409 /*              uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1410                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1411                 gpu_opcode_second_parameter = opcode & 0x1F;
1412
1413                 gpu_pc = delayed_pc;
1414                 gpu_opcode[opcode>>10]();//*/
1415         }
1416 #ifdef GPU_DIS_JUMP
1417         else
1418                 if (doGPUDis)
1419                         WriteLog("Branch NOT taken.\n");
1420 #endif
1421 }
1422
1423
1424 static void gpu_opcode_jr(void)
1425 {
1426 #ifdef GPU_DIS_JR
1427 const char * condition[32] =
1428 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1429         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1430         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1431         "???", "???", "???", "F" };
1432         if (doGPUDis)
1433                 WriteLog("%06X: JR     %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1434 #endif
1435 /*      if (CONDITION(jaguar.op & 31))
1436         {
1437                 int32_t r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1438                 uint32_t newpc = jaguar.PC + r1;
1439                 CALL_MAME_DEBUG;
1440                 jaguar.op = ROPCODE(jaguar.PC);
1441                 jaguar.PC = newpc;
1442                 (*jaguar.table[jaguar.op >> 10])();
1443
1444                 jaguar_icount -= 3;     // 3 wait states guaranteed
1445         }*/
1446         // normalize flags
1447 /*      gpu_flag_n = (gpu_flag_n ? 1 : 0);
1448         gpu_flag_c = (gpu_flag_c ? 1 : 0);
1449         gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1450         // KLUDGE: Used by BRANCH_CONDITION
1451         uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1452
1453         if (BRANCH_CONDITION(IMM_2))
1454         {
1455 #ifdef GPU_DIS_JR
1456         if (doGPUDis)
1457                 WriteLog("Branched!\n");
1458 #endif
1459 if (gpu_start_log)
1460         WriteLog("    --> JR: Branch taken.\n");
1461                 int32_t offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1);           // Sign extend IMM_1
1462                 int32_t delayed_pc = gpu_pc + (offset * 2);
1463                 GPUExec(1);
1464                 gpu_pc = delayed_pc;
1465 /*              uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1466                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1467                 gpu_opcode_second_parameter = opcode & 0x1F;
1468
1469                 gpu_pc = delayed_pc;
1470                 gpu_opcode[opcode>>10]();//*/
1471         }
1472 #ifdef GPU_DIS_JR
1473         else
1474                 if (doGPUDis)
1475                         WriteLog("Branch NOT taken.\n");
1476 #endif
1477 }
1478
1479
1480 static void gpu_opcode_add(void)
1481 {
1482 #ifdef GPU_DIS_ADD
1483         if (doGPUDis)
1484                 WriteLog("%06X: ADD    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1485 #endif
1486         uint32_t res = RN + RM;
1487         CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1488         RN = res;
1489 #ifdef GPU_DIS_ADD
1490         if (doGPUDis)
1491                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1492 #endif
1493 }
1494
1495
1496 static void gpu_opcode_addc(void)
1497 {
1498 #ifdef GPU_DIS_ADDC
1499         if (doGPUDis)
1500                 WriteLog("%06X: ADDC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1501 #endif
1502 /*      int dreg = jaguar.op & 31;
1503         uint32_t r1 = jaguar.r[(jaguar.op >> 5) & 31];
1504         uint32_t r2 = jaguar.r[dreg];
1505         uint32_t res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1506         jaguar.r[dreg] = res;
1507         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1508
1509         uint32_t res = RN + RM + gpu_flag_c;
1510         uint32_t carry = gpu_flag_c;
1511 //      SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1512         SET_ZNC_ADD(RN + carry, RM, res);
1513 //      SET_ZNC_ADD(RN, RM + carry, res);
1514         RN = res;
1515 #ifdef GPU_DIS_ADDC
1516         if (doGPUDis)
1517                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1518 #endif
1519 }
1520
1521
1522 static void gpu_opcode_addq(void)
1523 {
1524 #ifdef GPU_DIS_ADDQ
1525         if (doGPUDis)
1526                 WriteLog("%06X: ADDQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1527 #endif
1528         uint32_t r1 = gpu_convert_zero[IMM_1];
1529         uint32_t res = RN + r1;
1530         CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1531         RN = res;
1532 #ifdef GPU_DIS_ADDQ
1533         if (doGPUDis)
1534                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1535 #endif
1536 }
1537
1538
1539 static void gpu_opcode_addqt(void)
1540 {
1541 #ifdef GPU_DIS_ADDQT
1542         if (doGPUDis)
1543                 WriteLog("%06X: ADDQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1544 #endif
1545         RN += gpu_convert_zero[IMM_1];
1546 #ifdef GPU_DIS_ADDQT
1547         if (doGPUDis)
1548                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1549 #endif
1550 }
1551
1552
1553 static void gpu_opcode_sub(void)
1554 {
1555 #ifdef GPU_DIS_SUB
1556         if (doGPUDis)
1557                 WriteLog("%06X: SUB    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1558 #endif
1559         uint32_t res = RN - RM;
1560         SET_ZNC_SUB(RN, RM, res);
1561         RN = res;
1562 #ifdef GPU_DIS_SUB
1563         if (doGPUDis)
1564                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1565 #endif
1566 }
1567
1568
1569 static void gpu_opcode_subc(void)
1570 {
1571 #ifdef GPU_DIS_SUBC
1572         if (doGPUDis)
1573                 WriteLog("%06X: SUBC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1574 #endif
1575         // This is how the GPU ALU does it--Two's complement with inverted carry
1576         uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (gpu_flag_c ^ 1);
1577         // Carry out of the result is inverted too
1578         gpu_flag_c = ((res >> 32) & 0x01) ^ 1;
1579         RN = (res & 0xFFFFFFFF);
1580         SET_ZN(RN);
1581 #ifdef GPU_DIS_SUBC
1582         if (doGPUDis)
1583                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1584 #endif
1585 }
1586
1587
1588 static void gpu_opcode_subq(void)
1589 {
1590 #ifdef GPU_DIS_SUBQ
1591         if (doGPUDis)
1592                 WriteLog("%06X: SUBQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1593 #endif
1594         uint32_t r1 = gpu_convert_zero[IMM_1];
1595         uint32_t res = RN - r1;
1596         SET_ZNC_SUB(RN, r1, res);
1597         RN = res;
1598 #ifdef GPU_DIS_SUBQ
1599         if (doGPUDis)
1600                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1601 #endif
1602 }
1603
1604
1605 static void gpu_opcode_subqt(void)
1606 {
1607 #ifdef GPU_DIS_SUBQT
1608         if (doGPUDis)
1609                 WriteLog("%06X: SUBQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1610 #endif
1611         RN -= gpu_convert_zero[IMM_1];
1612 #ifdef GPU_DIS_SUBQT
1613         if (doGPUDis)
1614                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1615 #endif
1616 }
1617
1618
1619 static void gpu_opcode_cmp(void)
1620 {
1621 #ifdef GPU_DIS_CMP
1622         if (doGPUDis)
1623                 WriteLog("%06X: CMP    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1624 #endif
1625         uint32_t res = RN - RM;
1626         SET_ZNC_SUB(RN, RM, res);
1627 #ifdef GPU_DIS_CMP
1628         if (doGPUDis)
1629                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1630 #endif
1631 }
1632
1633
1634 static void gpu_opcode_cmpq(void)
1635 {
1636         static int32_t sqtable[32] =
1637                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1638 #ifdef GPU_DIS_CMPQ
1639         if (doGPUDis)
1640                 WriteLog("%06X: CMPQ   #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1641 #endif
1642         uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1643         uint32_t res = RN - r1;
1644         SET_ZNC_SUB(RN, r1, res);
1645 #ifdef GPU_DIS_CMPQ
1646         if (doGPUDis)
1647                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1648 #endif
1649 }
1650
1651
1652 static void gpu_opcode_and(void)
1653 {
1654 #ifdef GPU_DIS_AND
1655         if (doGPUDis)
1656                 WriteLog("%06X: AND    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1657 #endif
1658         RN = RN & RM;
1659         SET_ZN(RN);
1660 #ifdef GPU_DIS_AND
1661         if (doGPUDis)
1662                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1663 #endif
1664 }
1665
1666
1667 static void gpu_opcode_or(void)
1668 {
1669 #ifdef GPU_DIS_OR
1670         if (doGPUDis)
1671                 WriteLog("%06X: OR     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1672 #endif
1673         RN = RN | RM;
1674         SET_ZN(RN);
1675 #ifdef GPU_DIS_OR
1676         if (doGPUDis)
1677                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1678 #endif
1679 }
1680
1681
1682 static void gpu_opcode_xor(void)
1683 {
1684 #ifdef GPU_DIS_XOR
1685         if (doGPUDis)
1686                 WriteLog("%06X: XOR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1687 #endif
1688         RN = RN ^ RM;
1689         SET_ZN(RN);
1690 #ifdef GPU_DIS_XOR
1691         if (doGPUDis)
1692                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1693 #endif
1694 }
1695
1696
1697 static void gpu_opcode_not(void)
1698 {
1699 #ifdef GPU_DIS_NOT
1700         if (doGPUDis)
1701                 WriteLog("%06X: NOT    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1702 #endif
1703         RN = ~RN;
1704         SET_ZN(RN);
1705 #ifdef GPU_DIS_NOT
1706         if (doGPUDis)
1707                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1708 #endif
1709 }
1710
1711
1712 static void gpu_opcode_move_pc(void)
1713 {
1714 #ifdef GPU_DIS_MOVEPC
1715         if (doGPUDis)
1716                 WriteLog("%06X: MOVE   PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1717 #endif
1718         // Should be previous PC--this might not always be previous instruction!
1719         // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1720         RN = gpu_pc - 2;
1721 #ifdef GPU_DIS_MOVEPC
1722         if (doGPUDis)
1723                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1724 #endif
1725 }
1726
1727
1728 static void gpu_opcode_sat8(void)
1729 {
1730 #ifdef GPU_DIS_SAT8
1731         if (doGPUDis)
1732                 WriteLog("%06X: SAT8   R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1733 #endif
1734         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1735         SET_ZN(RN);
1736 #ifdef GPU_DIS_SAT8
1737         if (doGPUDis)
1738                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1739 #endif
1740 }
1741
1742
1743 static void gpu_opcode_sat16(void)
1744 {
1745         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1746         SET_ZN(RN);
1747 }
1748
1749 static void gpu_opcode_sat24(void)
1750 {
1751         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1752         SET_ZN(RN);
1753 }
1754
1755
1756 static void gpu_opcode_store_r14_indexed(void)
1757 {
1758 #ifdef GPU_DIS_STORE14I
1759         if (doGPUDis)
1760                 WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1761 #endif
1762 #ifdef GPU_CORRECT_ALIGNMENT
1763         uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1764         
1765         if (address >= 0xF03000 && address <= 0xF03FFF)
1766                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1767         else
1768                 GPUWriteLong(address, RN, GPU);
1769 #else
1770         GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1771 #endif
1772 }
1773
1774
1775 static void gpu_opcode_store_r15_indexed(void)
1776 {
1777 #ifdef GPU_DIS_STORE15I
1778         if (doGPUDis)
1779                 WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1780 #endif
1781 #ifdef GPU_CORRECT_ALIGNMENT
1782         uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1783
1784         if (address >= 0xF03000 && address <= 0xF03FFF)
1785                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1786         else
1787                 GPUWriteLong(address, RN, GPU);
1788 #else
1789         GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1790 #endif
1791 }
1792
1793
1794 static void gpu_opcode_load_r14_ri(void)
1795 {
1796 #ifdef GPU_DIS_LOAD14R
1797         if (doGPUDis)
1798                 WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1799 #endif
1800 #ifdef GPU_CORRECT_ALIGNMENT
1801         uint32_t address = gpu_reg[14] + RM;
1802
1803         if (address >= 0xF03000 && address <= 0xF03FFF)
1804                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1805         else
1806                 RN = GPUReadLong(address, GPU);
1807 #else
1808         RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1809 #endif
1810 #ifdef GPU_DIS_LOAD14R
1811         if (doGPUDis)
1812                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1813 #endif
1814 }
1815
1816
1817 static void gpu_opcode_load_r15_ri(void)
1818 {
1819 #ifdef GPU_DIS_LOAD15R
1820         if (doGPUDis)
1821                 WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1822 #endif
1823 #ifdef GPU_CORRECT_ALIGNMENT
1824         uint32_t address = gpu_reg[15] + RM;
1825
1826         if (address >= 0xF03000 && address <= 0xF03FFF)
1827                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1828         else
1829                 RN = GPUReadLong(address, GPU);
1830 #else
1831         RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1832 #endif
1833 #ifdef GPU_DIS_LOAD15R
1834         if (doGPUDis)
1835                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1836 #endif
1837 }
1838
1839
1840 static void gpu_opcode_store_r14_ri(void)
1841 {
1842 #ifdef GPU_DIS_STORE14R
1843         if (doGPUDis)
1844                 WriteLog("%06X: STORE  R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1845 #endif
1846 #ifdef GPU_CORRECT_ALIGNMENT
1847         uint32_t address = gpu_reg[14] + RM;
1848
1849         if (address >= 0xF03000 && address <= 0xF03FFF)
1850                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1851         else
1852                 GPUWriteLong(address, RN, GPU);
1853 #else
1854         GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1855 #endif
1856 }
1857
1858
1859 static void gpu_opcode_store_r15_ri(void)
1860 {
1861 #ifdef GPU_DIS_STORE15R
1862         if (doGPUDis)
1863                 WriteLog("%06X: STORE  R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1864 #endif
1865 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1866         uint32_t address = gpu_reg[15] + RM;
1867
1868         if (address >= 0xF03000 && address <= 0xF03FFF)
1869                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1870         else
1871                 GPUWriteLong(address, RN, GPU);
1872 #else
1873         GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1874 #endif
1875 }
1876
1877
1878 static void gpu_opcode_nop(void)
1879 {
1880 #ifdef GPU_DIS_NOP
1881         if (doGPUDis)
1882                 WriteLog("%06X: NOP    [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1883 #endif
1884 }
1885
1886
1887 static void gpu_opcode_pack(void)
1888 {
1889 #ifdef GPU_DIS_PACK
1890         if (doGPUDis)
1891                 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK  " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1892 #endif
1893         uint32_t val = RN;
1894
1895 //BUG!  if (RM == 0)                            // Pack
1896         if (IMM_1 == 0)                         // Pack
1897                 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1898         else                                            // Unpack
1899                 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1900 #ifdef GPU_DIS_PACK
1901         if (doGPUDis)
1902                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1903 #endif
1904 }
1905
1906
1907 static void gpu_opcode_storeb(void)
1908 {
1909 #ifdef GPU_DIS_STOREB
1910         if (doGPUDis)
1911                 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1912 #endif
1913 //Is this right???
1914 // Would appear to be so...!
1915         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1916                 GPUWriteLong(RM, RN & 0xFF, GPU);
1917         else
1918                 JaguarWriteByte(RM, RN, GPU);
1919 }
1920
1921
1922 static void gpu_opcode_storew(void)
1923 {
1924 #ifdef GPU_DIS_STOREW
1925         if (doGPUDis)
1926                 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1927 #endif
1928 #ifdef GPU_CORRECT_ALIGNMENT
1929         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1930                 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1931         else
1932                 JaguarWriteWord(RM, RN, GPU);
1933 #else
1934         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1935                 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1936         else
1937                 JaguarWriteWord(RM, RN, GPU);
1938 #endif
1939 }
1940
1941
1942 static void gpu_opcode_store(void)
1943 {
1944 #ifdef GPU_DIS_STORE
1945         if (doGPUDis)
1946                 WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1947 #endif
1948 #ifdef GPU_CORRECT_ALIGNMENT
1949         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1950                 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1951         else
1952                 GPUWriteLong(RM, RN, GPU);
1953 #else
1954         GPUWriteLong(RM, RN, GPU);
1955 #endif
1956 }
1957
1958
1959 static void gpu_opcode_storep(void)
1960 {
1961 #ifdef GPU_CORRECT_ALIGNMENT
1962         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1963         {
1964                 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1965                 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1966         }
1967         else
1968         {
1969                 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1970                 GPUWriteLong(RM + 4, RN, GPU);
1971         }
1972 #else
1973         GPUWriteLong(RM + 0, gpu_hidata, GPU);
1974         GPUWriteLong(RM + 4, RN, GPU);
1975 #endif
1976 }
1977
1978 static void gpu_opcode_loadb(void)
1979 {
1980 #ifdef GPU_DIS_LOADB
1981         if (doGPUDis)
1982                 WriteLog("%06X: LOADB  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1983 #endif
1984         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1985                 RN = GPUReadLong(RM, GPU) & 0xFF;
1986         else
1987                 RN = JaguarReadByte(RM, GPU);
1988 #ifdef GPU_DIS_LOADB
1989         if (doGPUDis)
1990                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1991 #endif
1992 }
1993
1994
1995 static void gpu_opcode_loadw(void)
1996 {
1997 #ifdef GPU_DIS_LOADW
1998         if (doGPUDis)
1999                 WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2000 #endif
2001 #ifdef GPU_CORRECT_ALIGNMENT
2002         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2003                 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
2004         else
2005                 RN = JaguarReadWord(RM, GPU);
2006 #else
2007         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2008                 RN = GPUReadLong(RM, GPU) & 0xFFFF;
2009         else
2010                 RN = JaguarReadWord(RM, GPU);
2011 #endif
2012 #ifdef GPU_DIS_LOADW
2013         if (doGPUDis)
2014                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2015 #endif
2016 }
2017
2018
2019 // According to the docs, & "Do The Same", this address is long aligned...
2020 // So let's try it:
2021 // And it works!!! Need to fix all instances...
2022 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
2023 // the $F03000-$F03FFF range are aligned...
2024 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
2025 /*
2026 Preliminary testing on real hardware seems to confirm that something strange goes on
2027 with unaligned reads in main memory. When the address is off by 1, the result is the
2028 same as the long address with the top byte replaced by something. So if the read is
2029 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
2030 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
2031 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
2032 It may be that the "unknown" values come from the prefetch queue, but not sure how
2033 to test that. They seem to be stable, though, which would indicate such a mechanism.
2034 Sometimes, however, the off by 2 case returns $12345678!
2035 */
2036 static void gpu_opcode_load(void)
2037 {
2038 #ifdef GPU_DIS_LOAD
2039         if (doGPUDis)
2040                 WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2041 #endif
2042 #ifdef GPU_CORRECT_ALIGNMENT
2043         uint32_t mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2044 //      if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2045                 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2046 //              RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2047 //      else
2048 //              RN = GPUReadLong(RM, GPU);
2049         // Simulate garbage in unaligned reads...
2050 //seems that this behavior is different in GPU mem vs. main mem...
2051 //      if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2052 //              RN |= mask[RM & 0x03];
2053 #else
2054         RN = GPUReadLong(RM, GPU);
2055 #endif
2056 #ifdef GPU_DIS_LOAD
2057         if (doGPUDis)
2058                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2059 #endif
2060 }
2061
2062
2063 static void gpu_opcode_loadp(void)
2064 {
2065 #ifdef GPU_CORRECT_ALIGNMENT
2066         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2067         {
2068                 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2069                 RN                 = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2070         }
2071         else
2072         {
2073                 gpu_hidata = GPUReadLong(RM + 0, GPU);
2074                 RN                 = GPUReadLong(RM + 4, GPU);
2075         }
2076 #else
2077         gpu_hidata = GPUReadLong(RM + 0, GPU);
2078         RN                 = GPUReadLong(RM + 4, GPU);
2079 #endif
2080 }
2081
2082
2083 static void gpu_opcode_load_r14_indexed(void)
2084 {
2085 #ifdef GPU_DIS_LOAD14I
2086         if (doGPUDis)
2087                 WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2088 #endif
2089 #ifdef GPU_CORRECT_ALIGNMENT
2090         uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2091
2092         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2093                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2094         else
2095                 RN = GPUReadLong(address, GPU);
2096 #else
2097         RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2098 #endif
2099 #ifdef GPU_DIS_LOAD14I
2100         if (doGPUDis)
2101                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2102 #endif
2103 }
2104
2105
2106 static void gpu_opcode_load_r15_indexed(void)
2107 {
2108 #ifdef GPU_DIS_LOAD15I
2109         if (doGPUDis)
2110                 WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2111 #endif
2112 #ifdef GPU_CORRECT_ALIGNMENT
2113         uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2114
2115         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2116                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2117         else
2118                 RN = GPUReadLong(address, GPU);
2119 #else
2120         RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2121 #endif
2122 #ifdef GPU_DIS_LOAD15I
2123         if (doGPUDis)
2124                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2125 #endif
2126 }
2127
2128
2129 static void gpu_opcode_movei(void)
2130 {
2131 #ifdef GPU_DIS_MOVEI
2132         if (doGPUDis)
2133                 WriteLog("%06X: MOVEI  #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32_t)GPUReadWord(gpu_pc) | ((uint32_t)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2134 #endif
2135         // This instruction is followed by 32-bit value in LSW / MSW format...
2136         RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16);
2137         gpu_pc += 4;
2138 #ifdef GPU_DIS_MOVEI
2139         if (doGPUDis)
2140                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2141 #endif
2142 }
2143
2144
2145 static void gpu_opcode_moveta(void)
2146 {
2147 #ifdef GPU_DIS_MOVETA
2148         if (doGPUDis)
2149                 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2150 #endif
2151         ALTERNATE_RN = RM;
2152 #ifdef GPU_DIS_MOVETA
2153         if (doGPUDis)
2154                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2155 #endif
2156 }
2157
2158
2159 static void gpu_opcode_movefa(void)
2160 {
2161 #ifdef GPU_DIS_MOVEFA
2162         if (doGPUDis)
2163                 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2164 #endif
2165         RN = ALTERNATE_RM;
2166 #ifdef GPU_DIS_MOVEFA
2167         if (doGPUDis)
2168                 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2169 #endif
2170 }
2171
2172
2173 static void gpu_opcode_move(void)
2174 {
2175 #ifdef GPU_DIS_MOVE
2176         if (doGPUDis)
2177                 WriteLog("%06X: MOVE   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2178 #endif
2179         RN = RM;
2180 #ifdef GPU_DIS_MOVE
2181         if (doGPUDis)
2182                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2183 #endif
2184 }
2185
2186
2187 static void gpu_opcode_moveq(void)
2188 {
2189 #ifdef GPU_DIS_MOVEQ
2190         if (doGPUDis)
2191                 WriteLog("%06X: MOVEQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2192 #endif
2193         RN = IMM_1;
2194 #ifdef GPU_DIS_MOVEQ
2195         if (doGPUDis)
2196                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2197 #endif
2198 }
2199
2200
2201 static void gpu_opcode_resmac(void)
2202 {
2203         RN = gpu_acc;
2204 }
2205
2206
2207 static void gpu_opcode_imult(void)
2208 {
2209 #ifdef GPU_DIS_IMULT
2210         if (doGPUDis)
2211                 WriteLog("%06X: IMULT  R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2212 #endif
2213         RN = (int16_t)RN * (int16_t)RM;
2214         SET_ZN(RN);
2215 #ifdef GPU_DIS_IMULT
2216         if (doGPUDis)
2217                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2218 #endif
2219 }
2220
2221
2222 static void gpu_opcode_mult(void)
2223 {
2224 #ifdef GPU_DIS_MULT
2225         if (doGPUDis)
2226                 WriteLog("%06X: MULT   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2227 #endif
2228         RN = (uint16_t)RM * (uint16_t)RN;
2229 //      RN = (RM & 0xFFFF) * (RN & 0xFFFF);
2230         SET_ZN(RN);
2231 #ifdef GPU_DIS_MULT
2232         if (doGPUDis)
2233                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2234 #endif
2235 }
2236
2237
2238 static void gpu_opcode_bclr(void)
2239 {
2240 #ifdef GPU_DIS_BCLR
2241         if (doGPUDis)
2242                 WriteLog("%06X: BCLR   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2243 #endif
2244         uint32_t res = RN & ~(1 << IMM_1);
2245         RN = res;
2246         SET_ZN(res);
2247 #ifdef GPU_DIS_BCLR
2248         if (doGPUDis)
2249                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2250 #endif
2251 }
2252
2253
2254 static void gpu_opcode_btst(void)
2255 {
2256 #ifdef GPU_DIS_BTST
2257         if (doGPUDis)
2258                 WriteLog("%06X: BTST   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2259 #endif
2260         gpu_flag_z = (~RN >> IMM_1) & 1;
2261 #ifdef GPU_DIS_BTST
2262         if (doGPUDis)
2263                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2264 #endif
2265 }
2266
2267
2268 static void gpu_opcode_bset(void)
2269 {
2270 #ifdef GPU_DIS_BSET
2271         if (doGPUDis)
2272                 WriteLog("%06X: BSET   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2273 #endif
2274         uint32_t res = RN | (1 << IMM_1);
2275         RN = res;
2276         SET_ZN(res);
2277 #ifdef GPU_DIS_BSET
2278         if (doGPUDis)
2279                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2280 #endif
2281 }
2282
2283
2284 static void gpu_opcode_imacn(void)
2285 {
2286         uint32_t res = (int16_t)RM * (int16_t)(RN);
2287         gpu_acc += res;
2288 }
2289
2290
2291 static void gpu_opcode_mtoi(void)
2292 {
2293         uint32_t _RM = RM;
2294         uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2295         SET_ZN(res);
2296 }
2297
2298
2299 static void gpu_opcode_normi(void)
2300 {
2301         uint32_t _RM = RM;
2302         uint32_t res = 0;
2303
2304         if (_RM)
2305         {
2306                 while ((_RM & 0xFFC00000) == 0)
2307                 {
2308                         _RM <<= 1;
2309                         res--;
2310                 }
2311                 while ((_RM & 0xFF800000) != 0)
2312                 {
2313                         _RM >>= 1;
2314                         res++;
2315                 }
2316         }
2317         RN = res;
2318         SET_ZN(res);
2319 }
2320
2321 static void gpu_opcode_mmult(void)
2322 {
2323         int count       = gpu_matrix_control & 0x0F;    // Matrix width
2324         uint32_t addr = gpu_pointer_to_matrix;          // In the GPU's RAM
2325         int64_t accum = 0;
2326         uint32_t res;
2327
2328         if (gpu_matrix_control & 0x10)                          // Column stepping
2329         {
2330                 for(int i=0; i<count; i++)
2331                 {
2332                         int16_t a;
2333                         if (i & 0x01)
2334                                 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2335                         else
2336                                 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2337
2338                         int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2339                         accum += a * b;
2340                         addr += 4 * count;
2341                 }
2342         }
2343         else                                                                            // Row stepping
2344         {
2345                 for(int i=0; i<count; i++)
2346                 {
2347                         int16_t a;
2348                         if (i & 0x01)
2349                                 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2350                         else
2351                                 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2352
2353                         int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2354                         accum += a * b;
2355                         addr += 4;
2356                 }
2357         }
2358         RN = res = (int32_t)accum;
2359         // carry flag to do (out of the last add)
2360         SET_ZN(res);
2361 }
2362
2363
2364 static void gpu_opcode_abs(void)
2365 {
2366 #ifdef GPU_DIS_ABS
2367         if (doGPUDis)
2368                 WriteLog("%06X: ABS    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2369 #endif
2370         gpu_flag_c = RN >> 31;
2371         if (RN == 0x80000000)
2372         //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2373                 gpu_flag_n = 1, gpu_flag_z = 0;
2374         else
2375         {
2376                 if (gpu_flag_c)
2377                         RN = -RN;
2378                 gpu_flag_n = 0; SET_FLAG_Z(RN);
2379         }
2380 #ifdef GPU_DIS_ABS
2381         if (doGPUDis)
2382                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2383 #endif
2384 }
2385
2386
2387 static void gpu_opcode_div(void)        // RN / RM
2388 {
2389 #ifdef GPU_DIS_DIV
2390         if (doGPUDis)
2391                 WriteLog("%06X: DIV    R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2392 #endif
2393 #if 0
2394         if (RM)
2395         {
2396                 if (gpu_div_control & 0x01)             // 16.16 division
2397                 {
2398                         gpu_remain = ((uint64_t)RN << 16) % RM;
2399                         RN = ((uint64_t)RN << 16) / RM;
2400                 }
2401                 else
2402                 {
2403                         // We calculate the remainder first because we destroy RN after
2404                         // this by assigning it to itself.
2405                         gpu_remain = RN % RM;
2406                         RN = RN / RM;
2407                 }
2408         }
2409         else
2410         {
2411                 // This is what happens according to SCPCD. NYAN!
2412                 RN = 0xFFFFFFFF;
2413                 gpu_remain = 0;
2414         }
2415 #else
2416         // Real algorithm, courtesy of SCPCD: NYAN!
2417         uint32_t q = RN;
2418         uint32_t r = 0;
2419
2420         // If 16.16 division, stuff top 16 bits of RN into remainder and put the
2421         // bottom 16 of RN in top 16 of quotient
2422         if (gpu_div_control & 0x01)
2423                 q <<= 16, r = RN >> 16;
2424
2425         for(int i=0; i<32; i++)
2426         {
2427 //              uint32_t sign = (r >> 31) & 0x01;
2428                 uint32_t sign = r & 0x80000000;
2429                 r = (r << 1) | ((q >> 31) & 0x01);
2430                 r += (sign ? RM : -RM);
2431                 q = (q << 1) | (((~r) >> 31) & 0x01);
2432         }
2433
2434         RN = q;
2435         gpu_remain = r;
2436 #endif
2437
2438 #ifdef GPU_DIS_DIV
2439         if (doGPUDis)
2440                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2441 #endif
2442 }
2443
2444
2445 static void gpu_opcode_imultn(void)
2446 {
2447         uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
2448         gpu_acc = (int32_t)res;
2449         SET_FLAG_Z(res);
2450         SET_FLAG_N(res);
2451 }
2452
2453
2454 static void gpu_opcode_neg(void)
2455 {
2456 #ifdef GPU_DIS_NEG
2457         if (doGPUDis)
2458                 WriteLog("%06X: NEG    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2459 #endif
2460         uint32_t res = -RN;
2461         SET_ZNC_SUB(0, RN, res);
2462         RN = res;
2463 #ifdef GPU_DIS_NEG
2464         if (doGPUDis)
2465                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2466 #endif
2467 }
2468
2469
2470 static void gpu_opcode_shlq(void)
2471 {
2472 #ifdef GPU_DIS_SHLQ
2473         if (doGPUDis)
2474                 WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2475 #endif
2476 // Was a bug here...
2477 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2478         int32_t r1 = 32 - IMM_1;
2479         uint32_t res = RN << r1;
2480         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2481         RN = res;
2482 #ifdef GPU_DIS_SHLQ
2483         if (doGPUDis)
2484                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2485 #endif
2486 }
2487
2488
2489 static void gpu_opcode_shrq(void)
2490 {
2491 #ifdef GPU_DIS_SHRQ
2492         if (doGPUDis)
2493                 WriteLog("%06X: SHRQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2494 #endif
2495         int32_t r1 = gpu_convert_zero[IMM_1];
2496         uint32_t res = RN >> r1;
2497         SET_ZN(res); gpu_flag_c = RN & 1;
2498         RN = res;
2499 #ifdef GPU_DIS_SHRQ
2500         if (doGPUDis)
2501                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2502 #endif
2503 }
2504
2505
2506 static void gpu_opcode_ror(void)
2507 {
2508 #ifdef GPU_DIS_ROR
2509         if (doGPUDis)
2510                 WriteLog("%06X: ROR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2511 #endif
2512         uint32_t r1 = RM & 0x1F;
2513         uint32_t res = (RN >> r1) | (RN << (32 - r1));
2514         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2515         RN = res;
2516 #ifdef GPU_DIS_ROR
2517         if (doGPUDis)
2518                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2519 #endif
2520 }
2521
2522
2523 static void gpu_opcode_rorq(void)
2524 {
2525 #ifdef GPU_DIS_RORQ
2526         if (doGPUDis)
2527                 WriteLog("%06X: RORQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2528 #endif
2529         uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F];
2530         uint32_t r2 = RN;
2531         uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
2532         RN = res;
2533         SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2534 #ifdef GPU_DIS_RORQ
2535         if (doGPUDis)
2536                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2537 #endif
2538 }
2539
2540
2541 static void gpu_opcode_sha(void)
2542 {
2543 /*      int dreg = jaguar.op & 31;
2544         int32_t r1 = (int32_t)jaguar.r[(jaguar.op >> 5) & 31];
2545         uint32_t r2 = jaguar.r[dreg];
2546         uint32_t res;
2547
2548         CLR_ZNC;
2549         if (r1 < 0)
2550         {
2551                 res = (r1 <= -32) ? 0 : (r2 << -r1);
2552                 jaguar.FLAGS |= (r2 >> 30) & 2;
2553         }
2554         else
2555         {
2556                 res = (r1 >= 32) ? ((int32_t)r2 >> 31) : ((int32_t)r2 >> r1);
2557                 jaguar.FLAGS |= (r2 << 1) & 2;
2558         }
2559         jaguar.r[dreg] = res;
2560         SET_ZN(res);*/
2561
2562 #ifdef GPU_DIS_SHA
2563         if (doGPUDis)
2564                 WriteLog("%06X: SHA    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2565 #endif
2566         uint32_t res;
2567
2568         if ((int32_t)RM < 0)
2569         {
2570                 res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM);
2571                 gpu_flag_c = RN >> 31;
2572         }
2573         else
2574         {
2575                 res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM);
2576                 gpu_flag_c = RN & 0x01;
2577         }
2578         RN = res;
2579         SET_ZN(res);
2580 #ifdef GPU_DIS_SHA
2581         if (doGPUDis)
2582                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2583 #endif
2584
2585 /*      int32_t sRM=(int32_t)RM;
2586         uint32_t _RN=RN;
2587
2588         if (sRM<0)
2589         {
2590                 uint32_t shift=-sRM;
2591                 if (shift>=32) shift=32;
2592                 gpu_flag_c=(_RN&0x80000000)>>31;
2593                 while (shift)
2594                 {
2595                         _RN<<=1;
2596                         shift--;
2597                 }
2598         }
2599         else
2600         {
2601                 uint32_t shift=sRM;
2602                 if (shift>=32) shift=32;
2603                 gpu_flag_c=_RN&0x1;
2604                 while (shift)
2605                 {
2606                         _RN=((int32_t)_RN)>>1;
2607                         shift--;
2608                 }
2609         }
2610         RN=_RN;
2611         SET_FLAG_Z(_RN);
2612         SET_FLAG_N(_RN);*/
2613 }
2614
2615
2616 static void gpu_opcode_sharq(void)
2617 {
2618 #ifdef GPU_DIS_SHARQ
2619         if (doGPUDis)
2620                 WriteLog("%06X: SHARQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2621 #endif
2622         uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1];
2623         SET_ZN(res); gpu_flag_c = RN & 0x01;
2624         RN = res;
2625 #ifdef GPU_DIS_SHARQ
2626         if (doGPUDis)
2627                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2628 #endif
2629 }
2630
2631
2632 static void gpu_opcode_sh(void)
2633 {
2634 #ifdef GPU_DIS_SH
2635         if (doGPUDis)
2636                 WriteLog("%06X: SH     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2637 #endif
2638         if (RM & 0x80000000)            // Shift left
2639         {
2640                 gpu_flag_c = RN >> 31;
2641                 RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM);
2642         }
2643         else                                            // Shift right
2644         {
2645                 gpu_flag_c = RN & 0x01;
2646                 RN = (RM >= 32 ? 0 : RN >> RM);
2647         }
2648         SET_ZN(RN);
2649 #ifdef GPU_DIS_SH
2650         if (doGPUDis)
2651                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2652 #endif
2653 }
2654
2655
2656 //Temporary: Testing only!
2657 //#include "gpu2.cpp"
2658 //#include "gpu3.cpp"
2659
2660 #else
2661
2662
2663 // New thread-safe GPU core
2664
2665 int GPUCore(void * data)
2666 {
2667 }
2668
2669 #endif
2670