]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
02fa112a37281ca310b7925de28b3b550b3458bb
[virtualjaguar] / src / gpu.cpp
1 #if 1
2
3 //
4 // GPU Core
5 //
6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
10 //
11 // JLH = James Hammons <jlhamm@acm.org>
12 //
13 // Who  When        What
14 // ---  ----------  -------------------------------------------------------------
15 // JLH  01/16/2010  Created this log ;-)
16 // JLH  11/26/2011  Added fixes for LOAD/STORE alignment issues
17
18 //
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 //       the braindead way in which MAME handles memory. :-)
21 //
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
25 //
26
27 #include "gpu.h"
28
29 #include <stdlib.h>
30 #include <string.h>                                                             // For memset
31 #include "dsp.h"
32 #include "jagdasm.h"
33 #include "jaguar.h"
34 #include "log.h"
35 #include "m68000/m68kinterface.h"
36 //#include "memory.h"
37 #include "tom.h"
38
39
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
42 //#define GPU_DEBUG
43
44 // For GPU dissasembly...
45
46 #if 0
47 #define GPU_DIS_ABS
48 #define GPU_DIS_ADD
49 #define GPU_DIS_ADDC
50 #define GPU_DIS_ADDQ
51 #define GPU_DIS_ADDQT
52 #define GPU_DIS_AND
53 #define GPU_DIS_BCLR
54 #define GPU_DIS_BSET
55 #define GPU_DIS_BTST
56 #define GPU_DIS_CMP
57 #define GPU_DIS_CMPQ
58 #define GPU_DIS_DIV
59 #define GPU_DIS_IMULT
60 #define GPU_DIS_JUMP
61 #define GPU_DIS_JR
62 #define GPU_DIS_LOAD
63 #define GPU_DIS_LOADB
64 #define GPU_DIS_LOADW
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
69 #define GPU_DIS_MOVE
70 #define GPU_DIS_MOVEFA
71 #define GPU_DIS_MOVEI
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
74 #define GPU_DIS_MOVEQ
75 #define GPU_DIS_MULT
76 #define GPU_DIS_NEG
77 #define GPU_DIS_NOP
78 #define GPU_DIS_NOT
79 #define GPU_DIS_OR
80 #define GPU_DIS_PACK
81 #define GPU_DIS_ROR
82 #define GPU_DIS_RORQ
83 #define GPU_DIS_SAT8
84 #define GPU_DIS_SH
85 #define GPU_DIS_SHA
86 #define GPU_DIS_SHARQ
87 #define GPU_DIS_SHLQ
88 #define GPU_DIS_SHRQ
89 #define GPU_DIS_STORE
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
96 #define GPU_DIS_SUB
97 #define GPU_DIS_SUBC
98 #define GPU_DIS_SUBQ
99 #define GPU_DIS_SUBQT
100 #define GPU_DIS_XOR
101
102 //bool doGPUDis = false;
103 bool doGPUDis = true;
104 #endif
105
106 /*
107 GPU opcodes use (BIOS flying ATARI logo):
108 +                     add 357416
109 +                    addq 538030
110 +                   addqt 6999
111 +                     sub 116663
112 +                    subq 188059
113 +                   subqt 15086
114 +                     neg 36097
115 +                     and 233993
116 +                      or 109332
117 +                     xor 1384
118 +                    btst 111924
119 +                    bset 25029
120 +                    bclr 10551
121 +                    mult 28147
122 +                   imult 69148
123 +                     div 64102
124 +                     abs 159394
125 +                    shlq 194690
126 +                    shrq 292587
127 +                   sharq 192649
128 +                    rorq 58672
129 +                     cmp 244963
130 +                    cmpq 114834
131 +                    move 833472
132 +                   moveq 56427
133 +                  moveta 220814
134 +                  movefa 170678
135 +                   movei 152025
136 +                   loadw 108220
137 +                    load 430936
138 +                  storew 3036
139 +                   store 372490
140 +                 move_pc 2330
141 +                    jump 349134
142 +                      jr 529171
143                     mmult 64904
144 +                     nop 432179
145 */
146
147 // Various bits
148
149 #define CINT0FLAG                       0x0200
150 #define CINT1FLAG                       0x0400
151 #define CINT2FLAG                       0x0800
152 #define CINT3FLAG                       0x1000
153 #define CINT4FLAG                       0x2000
154 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
155
156 // GPU_FLAGS bits
157
158 #define ZERO_FLAG               0x0001
159 #define CARRY_FLAG              0x0002
160 #define NEGA_FLAG               0x0004
161 #define IMASK                   0x0008
162 #define INT_ENA0                0x0010
163 #define INT_ENA1                0x0020
164 #define INT_ENA2                0x0040
165 #define INT_ENA3                0x0080
166 #define INT_ENA4                0x0100
167 #define INT_CLR0                0x0200
168 #define INT_CLR1                0x0400
169 #define INT_CLR2                0x0800
170 #define INT_CLR3                0x1000
171 #define INT_CLR4                0x2000
172 #define REGPAGE                 0x4000
173 #define DMAEN                   0x8000
174
175 // External global variables
176
177 extern int start_logging;
178 extern int gpu_start_log;
179
180 // Private function prototypes
181
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
186
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
251
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8 gpu_opcode_cycles[64] =
254 {
255         3,  3,  3,  3,  3,  3,  3,  3,
256         3,  3,  3,  3,  3,  3,  3,  3,
257         3,  3,  1,  3,  1, 18,  3,  3,
258         3,  3,  3,  3,  3,  3,  3,  3,
259         3,  3,  2,  2,  2,  2,  3,  4,
260         5,  4,  5,  6,  6,  1,  1,  1,
261         1,  2,  2,  2,  1,  1,  9,  3,
262         3,  1,  6,  6,  2,  2,  3,  3
263 };//*/
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8 gpu_opcode_cycles[64] =
269 {
270         1,  1,  1,  1,  1,  1,  1,  1,
271         1,  1,  1,  1,  1,  1,  1,  1,
272         1,  1,  1,  1,  1,  9,  1,  1,
273         1,  1,  1,  1,  1,  1,  1,  1,
274         1,  1,  1,  1,  1,  1,  1,  2,
275         2,  2,  2,  3,  3,  1,  1,  1,
276         1,  1,  1,  1,  1,  1,  4,  1,
277         1,  1,  3,  3,  1,  1,  1,  1
278 };//*/
279 uint8 gpu_opcode_cycles[64] =
280 {
281         1,  1,  1,  1,  1,  1,  1,  1,
282         1,  1,  1,  1,  1,  1,  1,  1,
283         1,  1,  1,  1,  1,  1,  1,  1,
284         1,  1,  1,  1,  1,  1,  1,  1,
285         1,  1,  1,  1,  1,  1,  1,  1,
286         1,  1,  1,  1,  1,  1,  1,  1,
287         1,  1,  1,  1,  1,  1,  1,  1,
288         1,  1,  1,  1,  1,  1,  1,  1
289 };//*/
290
291 void (*gpu_opcode[64])()=
292 {
293         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
294         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
295         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
296         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
297         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
298         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
299         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
300         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
301         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
302         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
303         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
304         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
305         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
306         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
307         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
308         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
309 };
310
311 static uint8 gpu_ram_8[0x1000];
312 uint32 gpu_pc;
313 static uint32 gpu_acc;
314 static uint32 gpu_remain;
315 static uint32 gpu_hidata;
316 static uint32 gpu_flags;
317 static uint32 gpu_matrix_control;
318 static uint32 gpu_pointer_to_matrix;
319 static uint32 gpu_data_organization;
320 static uint32 gpu_control;
321 static uint32 gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need to clear
323 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
324 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
325 static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c;
326 static uint32 gpu_reg_bank_0[32];
327 static uint32 gpu_reg_bank_1[32];
328 static uint32 * gpu_reg;
329 static uint32 * gpu_alternate_reg;
330
331 static uint32 gpu_instruction;
332 static uint32 gpu_opcode_first_parameter;
333 static uint32 gpu_opcode_second_parameter;
334
335 #define GPU_RUNNING             (gpu_control & 0x01)
336
337 #define RM                              gpu_reg[gpu_opcode_first_parameter]
338 #define RN                              gpu_reg[gpu_opcode_second_parameter]
339 #define ALTERNATE_RM    gpu_alternate_reg[gpu_opcode_first_parameter]
340 #define ALTERNATE_RN    gpu_alternate_reg[gpu_opcode_second_parameter]
341 #define IMM_1                   gpu_opcode_first_parameter
342 #define IMM_2                   gpu_opcode_second_parameter
343
344 #define SET_FLAG_Z(r)   (gpu_flag_z = ((r) == 0));
345 #define SET_FLAG_N(r)   (gpu_flag_n = (((uint32)(r) >> 31) & 0x01));
346
347 #define RESET_FLAG_Z()  gpu_flag_z = 0;
348 #define RESET_FLAG_N()  gpu_flag_n = 0;
349 #define RESET_FLAG_C()  gpu_flag_c = 0;
350
351 #define CLR_Z                           (gpu_flag_z = 0)
352 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
353 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
354 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
355 #define SET_N(r)                        (gpu_flag_n = (((uint32)(r) >> 31) & 0x01))
356 #define SET_C_ADD(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(~(a))))
357 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
358 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
359 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
360 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
361
362 uint32 gpu_convert_zero[32] =
363         { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
364
365 uint8 * branch_condition_table = 0;
366 #define BRANCH_CONDITION(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
367
368 uint32 gpu_opcode_use[64];
369
370 const char * gpu_opcode_str[64]=
371 {
372         "add",                          "addc",                         "addq",                         "addqt",
373         "sub",                          "subc",                         "subq",                         "subqt",
374         "neg",                          "and",                          "or",                           "xor",
375         "not",                          "btst",                         "bset",                         "bclr",
376         "mult",                         "imult",                        "imultn",                       "resmac",
377         "imacn",                        "div",                          "abs",                          "sh",
378         "shlq",                         "shrq",                         "sha",                          "sharq",
379         "ror",                          "rorq",                         "cmp",                          "cmpq",
380         "sat8",                         "sat16",                        "move",                         "moveq",
381         "moveta",                       "movefa",                       "movei",                        "loadb",
382         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
383         "load_r15_indexed",     "storeb",                       "storew",                       "store",
384         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
385         "jump",                         "jr",                           "mmult",                        "mtoi",
386         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
387         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
388 };
389
390 static uint32 gpu_in_exec = 0;
391 static uint32 gpu_releaseTimeSlice_flag = 0;
392
393 void GPUReleaseTimeslice(void)
394 {
395         gpu_releaseTimeSlice_flag = 1;
396 }
397
398 uint32 GPUGetPC(void)
399 {
400         return gpu_pc;
401 }
402
403 void build_branch_condition_table(void)
404 {
405         if (!branch_condition_table)
406         {
407                 branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
408
409                 if (branch_condition_table)
410                 {
411                         for(int i=0; i<8; i++)
412                         {
413                                 for(int j=0; j<32; j++)
414                                 {
415                                         int result = 1;
416                                         if (j & 1)
417                                                 if (i & ZERO_FLAG)
418                                                         result = 0;
419                                         if (j & 2)
420                                                 if (!(i & ZERO_FLAG))
421                                                         result = 0;
422                                         if (j & 4)
423                                                 if (i & (CARRY_FLAG << (j >> 4)))
424                                                         result = 0;
425                                         if (j & 8)
426                                                 if (!(i & (CARRY_FLAG << (j >> 4))))
427                                                         result = 0;
428                                         branch_condition_table[i * 32 + j] = result;
429                                 }
430                         }
431                 }
432         }
433 }
434
435 //
436 // GPU byte access (read)
437 //
438 uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
439 {
440         if (offset >= 0xF02000 && offset <= 0xF020FF)
441                 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
442
443         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
444                 return gpu_ram_8[offset & 0xFFF];
445         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
446         {
447                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
448
449                 if ((offset & 0x03) == 0)
450                         return data >> 24;
451                 else if ((offset & 0x03) == 1)
452                         return (data >> 16) & 0xFF;
453                 else if ((offset & 0x03) == 2)
454                         return (data >> 8) & 0xFF;
455                 else if ((offset & 0x03) == 3)
456                         return data & 0xFF;
457         }
458
459         return JaguarReadByte(offset, who);
460 }
461
462 //
463 // GPU word access (read)
464 //
465 uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
466 {
467         if (offset >= 0xF02000 && offset <= 0xF020FF)
468                 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
469
470         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
471         {
472                 offset &= 0xFFF;
473                 uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
474                 return data;
475         }
476         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
477         {
478 // This looks and smells wrong...
479 // But it *might* be OK...
480                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
481                         return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
482
483                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
484
485                 if (offset & 0x02)                      // Cases 0 & 2...
486                         return data & 0xFFFF;
487                 else
488                         return data >> 16;
489         }
490
491 //TEMP--Mirror of F03000? No. Writes only...
492 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
493 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
494
495         return JaguarReadWord(offset, who);
496 }
497
498 //
499 // GPU dword access (read)
500 //
501 uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
502 {
503         if (offset >= 0xF02000 && offset <= 0xF020FF)
504         {
505                 WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
506                 uint32 reg = (offset & 0xFC) >> 2;
507                 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]); 
508         }
509
510 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
511         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
512         {
513                 offset &= 0xFFF;
514                 return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
515                         | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/
516 //              return GET32(gpu_ram_8, offset);
517         }
518 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
519         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
520         {
521                 offset &= 0x1F;
522                 switch (offset)
523                 {
524                 case 0x00:
525                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
526                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
527                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
528
529                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
530
531                         return gpu_flags & 0xFFFFC1FF;
532                 case 0x04:
533                         return gpu_matrix_control;
534                 case 0x08:
535                         return gpu_pointer_to_matrix;
536                 case 0x0C:
537                         return gpu_data_organization;
538                 case 0x10:
539                         return gpu_pc;
540                 case 0x14:
541                         return gpu_control;
542                 case 0x18:
543                         return gpu_hidata;
544                 case 0x1C:
545                         return gpu_remain;
546                 default:                                                                // unaligned long read
547 #ifdef GPU_DEBUG
548                         WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
549 #endif  // GPU_DEBUG
550                         return 0;
551                 }
552         }
553 //TEMP--Mirror of F03000? No. Writes only...
554 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
555 //      WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
556 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
557         WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
558
559         return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
560 }
561
562 //
563 // GPU byte access (write)
564 //
565 void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
566 {
567         if (offset >= 0xF02000 && offset <= 0xF020FF)
568                 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
569
570         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
571         {
572                 gpu_ram_8[offset & 0xFFF] = data;
573
574 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
575 /*              if (!gpu_in_exec)
576                 {
577                         m68k_end_timeslice();
578                         dsp_releaseTimeslice();
579                 }*/
580                 return;
581         }
582         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
583         {
584                 uint32 reg = offset & 0x1C;
585                 int bytenum = offset & 0x03;
586
587 //This is definitely wrong!
588                 if ((reg >= 0x1C) && (reg <= 0x1F))
589                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
590                 else
591                 {
592                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
593                         bytenum = 3 - bytenum; // convention motorola !!!
594                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
595                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
596                 }
597                 return;
598         }
599 //      WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
600         JaguarWriteByte(offset, data, who);
601 }
602
603 //
604 // GPU word access (write)
605 //
606 void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
607 {
608         if (offset >= 0xF02000 && offset <= 0xF020FF)
609                 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
610
611         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
612         {
613                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
614                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
615 /*              offset &= 0xFFF;
616                 SET16(gpu_ram_8, offset, data);//*/
617
618 /*if (offset >= 0xF03214 && offset < 0xF0321F)
619         WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
620
621
622 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
623 /*              if (!gpu_in_exec)
624                 {
625                         m68k_end_timeslice();
626                         dsp_releaseTimeslice();
627                 }*/
628                 return;
629         }
630         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
631         {
632                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
633                 {
634 #ifdef GPU_DEBUG
635                         WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
636                         GPUDumpRegisters();
637 #endif  // GPU_DEBUG
638                         return;
639                 }
640 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
641 //This just literally sucks.
642                 if ((offset & 0x1C) == 0x1C)
643                 {
644 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
645                         if (offset & 0x02)
646                                 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
647                         else
648                                 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
649                 }
650                 else
651                 {
652 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
653                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
654
655                         if (offset & 0x02)
656                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
657                         else
658                                 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
659
660                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
661                 }
662
663                 return;
664         }
665         else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
666         {
667 #ifdef GPU_DEBUG
668                         WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
669                         GPUDumpRegisters();
670 #endif  // GPU_DEBUG
671                 return;
672         }
673
674         // Have to be careful here--this can cause an infinite loop!
675         JaguarWriteWord(offset, data, who);
676 }
677
678 //
679 // GPU dword access (write)
680 //
681 void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
682 {
683         if (offset >= 0xF02000 && offset <= 0xF020FF)
684                 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
685
686 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
687         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
688         {
689 #ifdef GPU_DEBUG
690                 if (offset & 0x03)
691                 {
692                         WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
693                         GPUDumpRegisters();
694                 }
695 #endif  // GPU_DEBUG
696
697                 offset &= 0xFFF;
698                 SET32(gpu_ram_8, offset, data);
699                 return;
700         }
701 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
702         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
703         {
704                 offset &= 0x1F;
705                 switch (offset)
706                 {
707                 case 0x00:
708                 {
709                         bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
710                         // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
711                         //       IRQ logic can set it. So we mask it out here to prevent problems...
712                         gpu_flags = data & (~IMASK);
713                         gpu_flag_z = gpu_flags & ZERO_FLAG;
714                         gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
715                         gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
716                         GPUUpdateRegisterBanks();
717                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);       // Interrupt latch clear bits
718 //Writing here is only an interrupt enable--this approach is just plain wrong!
719 //                      GPUHandleIRQs();
720 //This, however, is A-OK! ;-)
721                         if (IMASKCleared)                                               // If IMASK was cleared,
722                                 GPUHandleIRQs();                                        // see if any other interrupts need servicing!
723 #ifdef GPU_DEBUG
724                         if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
725                                 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
726                         WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
727 #endif  // GPU_DEBUG
728                         break;
729                 }
730                 case 0x04:
731                         gpu_matrix_control = data;
732                         break;
733                 case 0x08:
734                         // This can only point to long aligned addresses
735                         gpu_pointer_to_matrix = data & 0xFFFFFFFC;
736                         break;
737                 case 0x0C:
738                         gpu_data_organization = data;
739                         break;
740                 case 0x10:
741                         gpu_pc = data;
742 #ifdef GPU_DEBUG
743 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
744 #endif  // GPU_DEBUG
745                         break;
746                 case 0x14:
747                 {
748 //                      uint32 gpu_was_running = GPU_RUNNING;
749                         data &= ~0xF7C0;                // Disable writes to INT_LAT0-4 & TOM version number
750
751                         // check for GPU -> CPU interrupt
752                         if (data & 0x02)
753                         {
754 //WriteLog("GPU->CPU interrupt\n");
755                                 if (TOMIRQEnabled(IRQ_GPU))
756                                 {
757 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
758 //                                      if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
759                                         {
760                                                 TOMSetPendingGPUInt();
761                                                 m68k_set_irq(2);                        // Set 68000 IPL 2
762                                                 GPUReleaseTimeslice();
763                                         }
764                                 }
765                                 data &= ~0x02;
766                         }
767
768                         // check for CPU -> GPU interrupt #0
769                         if (data & 0x04)
770                         {
771 //WriteLog("CPU->GPU interrupt\n");
772                                 GPUSetIRQLine(0, ASSERT_LINE);
773                                 m68k_end_timeslice();
774                                 DSPReleaseTimeslice();
775                                 data &= ~0x04;
776                         }
777
778                         // single stepping
779                         if (data & 0x10)
780                         {
781                                 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
782                         }
783                         gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
784
785                         // if gpu wasn't running but is now running, execute a few cycles
786 #ifndef GPU_SINGLE_STEPPING
787 /*                      if (!gpu_was_running && GPU_RUNNING)
788 #ifdef GPU_DEBUG
789                         {
790                                 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
791 #endif  // GPU_DEBUG
792                                 GPUExec(200);
793 #ifdef GPU_DEBUG
794                         }
795 #endif  // GPU_DEBUG//*/
796 #else
797                         if (gpu_control & 0x18)
798                                 GPUExec(1);
799 #endif  // #ifndef GPU_SINGLE_STEPPING
800 #ifdef GPU_DEBUG
801 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
802 if (GPU_RUNNING)
803         WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
804 else
805         WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
806 WriteLog("\n");
807 #endif  // GPU_DEBUG
808 //if (GPU_RUNNING)
809 //      GPUDumpDisassembly();
810 /*if (GPU_RUNNING)
811 {
812         if (gpu_pc == 0xF035D8)
813         {
814 //              GPUDumpDisassembly();
815 //              log_done();
816 //              exit(1);
817                 gpu_control &= 0xFFFFFFFE;      // Don't run it and let's see what happens!
818 //Hmm. Seems to lock up when going into the demo...
819 //Try to disable the collision altogether!
820         }
821 }//*/
822 extern int effect_start5;
823 static bool finished = false;
824 //if (GPU_RUNNING && effect_start5 && !finished)
825 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
826 {
827         // Let's do a dump of $6528!
828 /*      uint32 numItems = JaguarReadWord(0x6BD6);
829         WriteLog("\nDump of $6528: %u items.\n\n", numItems);
830         for(int i=0; i<numItems*3*4; i+=3*4)
831         {
832                 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
833                         JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
834                 uint16 link = JaguarReadWord(0x6528+i+8+2);
835                 for(int j=0; j<40; j+=4)
836                         WriteLog("%08X ", JaguarReadLong(link + j));
837                 WriteLog("\n");
838         }
839         WriteLog("\n");//*/
840         // Let's try a manual blit here...
841 //This isn't working the way it should! !!! FIX !!!
842 //Err, actually, it is.
843 // NOW, it works right! Problem solved!!! It's a blitter bug!
844 /*      uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4;
845         for(int y=0; y<127; y++)
846         {
847                 for(int x=0; x<2; x++)
848                 {
849                         JaguarWriteLong(dst, JaguarReadLong(src));
850
851                         src += 4;
852                         dst += 4;
853                 }
854                 src += width - (2 * 4);
855         }//*/
856 /*      finished = true;
857         doGPUDis = true;
858         WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
859
860 /*      WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
861         int count = 0;
862         for(int i=0x004D54; i<0x004D54+2048; i++)
863         {
864                 WriteLog("%02X ", JaguarReadByte(i));
865                 count++;
866                 if (count == 32)
867                 {
868                         count = 0;
869                         WriteLog("\n");
870                 }
871         }
872         WriteLog("\n\nData @ F03000:\n\n");
873         count = 0;
874         for(int i=0xF03000; i<0xF03200; i++)
875         {
876                 WriteLog("%02X ", JaguarReadByte(i));
877                 count++;
878                 if (count == 32)
879                 {
880                         count = 0;
881                         WriteLog("\n");
882                 }
883         }
884         WriteLog("\n\n");
885         log_done();
886         exit(0);//*/
887 }
888 //if (!GPU_RUNNING)
889 //      doGPUDis = false;
890 /*if (!GPU_RUNNING && finished)
891 {
892         WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
893         GPUDumpRegisters();
894         log_done();
895         exit(0);
896 }//*/
897                         // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
898                         // allow the GPU a chance to run...
899                         // Yes! This partially fixed Trevor McFur...
900                         if (GPU_RUNNING)
901                                 m68k_end_timeslice();
902                         break;
903                 }
904                 case 0x18:
905                         gpu_hidata = data;
906                         break;
907                 case 0x1C:
908                         gpu_div_control = data;
909                         break;
910 //              default:   // unaligned long write
911                         //exit(0);
912                         //__asm int 3
913                 }
914                 return;
915         }
916
917 //      JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
918 //      JaguarWriteWord(offset+2, data & 0xFFFF, who);
919 // We're a 32-bit processor, we can do a long write...!
920         JaguarWriteLong(offset, data, who);
921 }
922
923 //
924 // Change register banks if necessary
925 //
926 void GPUUpdateRegisterBanks(void)
927 {
928         int bank = (gpu_flags & REGPAGE);               // REGPAGE bit
929
930         if (gpu_flags & IMASK)                                  // IMASK bit
931                 bank = 0;                                                       // IMASK forces main bank to be bank 0
932
933         if (bank)
934                 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
935         else
936                 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
937 }
938
939 void GPUHandleIRQs(void)
940 {
941         // Bail out if we're already in an interrupt!
942         if (gpu_flags & IMASK)
943                 return;
944
945         // Get the interrupt latch & enable bits
946         uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
947
948         // Bail out if latched interrupts aren't enabled
949         bits &= mask;
950         if (!bits)
951                 return;
952
953         // Determine which interrupt to service
954         uint32 which = 0; //Isn't there a #pragma to disable this warning???
955         if (bits & 0x01)
956                 which = 0;
957         if (bits & 0x02)
958                 which = 1;
959         if (bits & 0x04)
960                 which = 2;
961         if (bits & 0x08)
962                 which = 3;
963         if (bits & 0x10)
964                 which = 4;
965
966         if (start_logging)
967                 WriteLog("GPU: Generating IRQ #%i\n", which);
968
969         // set the interrupt flag
970         gpu_flags |= IMASK;
971         GPUUpdateRegisterBanks();
972
973         // subqt  #4,r31                ; pre-decrement stack pointer
974         // move  pc,r30                 ; address of interrupted code
975         // store  r30,(r31)     ; store return address
976         gpu_reg[31] -= 4;
977         GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
978
979         // movei  #service_address,r30  ; pointer to ISR entry
980         // jump  (r30)                                  ; jump to ISR
981         // nop
982         gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
983 }
984
985 void GPUSetIRQLine(int irqline, int state)
986 {
987         if (start_logging)
988                 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
989
990         uint32 mask = 0x0040 << irqline;
991         gpu_control &= ~mask;                           // Clear the interrupt latch
992
993         if (state)
994         {
995                 gpu_control |= mask;                    // Assert the interrupt latch
996                 GPUHandleIRQs();                                // And handle the interrupt...
997         }
998 }
999
1000 //TEMPORARY: Testing only!
1001 //#include "gpu2.h"
1002 //#include "gpu3.h"
1003
1004 void GPUInit(void)
1005 {
1006 //      memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1007 //      memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs");
1008 //      memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs");
1009
1010         build_branch_condition_table();
1011
1012         GPUReset();
1013
1014 //TEMPORARY: Testing only!
1015 //      gpu2_init();
1016 //      gpu3_init();
1017 }
1018
1019 void GPUReset(void)
1020 {
1021         // GPU registers (directly visible)
1022         gpu_flags                         = 0x00000000;
1023         gpu_matrix_control    = 0x00000000;
1024         gpu_pointer_to_matrix = 0x00000000;
1025         gpu_data_organization = 0xFFFFFFFF;
1026         gpu_pc                            = 0x00F03000;
1027         gpu_control                       = 0x00002800;                 // Correctly sets this as TOM Rev. 2
1028         gpu_hidata                        = 0x00000000;
1029         gpu_remain                        = 0x00000000;                 // These two registers are RO/WO
1030         gpu_div_control           = 0x00000000;
1031
1032         // GPU internal register
1033         gpu_acc                           = 0x00000000;
1034
1035         gpu_reg = gpu_reg_bank_0;
1036         gpu_alternate_reg = gpu_reg_bank_1;
1037
1038         for(int i=0; i<32; i++)
1039                 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1040
1041         CLR_ZNC;
1042         memset(gpu_ram_8, 0xFF, 0x1000);
1043         gpu_in_exec = 0;
1044 //not needed    GPUInterruptPending = false;
1045         GPUResetStats();
1046 }
1047
1048 uint32 GPUReadPC(void)
1049 {
1050         return gpu_pc;
1051 }
1052
1053 void GPUResetStats(void)
1054 {
1055         for(uint32 i=0; i<64; i++)
1056                 gpu_opcode_use[i] = 0;
1057         WriteLog("--> GPU stats were reset!\n");
1058 }
1059
1060 void GPUDumpDisassembly(void)
1061 {
1062         char buffer[512];
1063
1064         WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1065         uint32 j = 0xF03000;
1066         while (j <= 0xF03FFF)
1067         {
1068                 uint32 oldj = j;
1069                 j += dasmjag(JAGUAR_GPU, buffer, j);
1070                 WriteLog("\t%08X: %s\n", oldj, buffer);
1071         }
1072 }
1073
1074 void GPUDumpRegisters(void)
1075 {
1076         WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1077         WriteLog("\nRegisters bank 0\n");
1078         for(int j=0; j<8; j++)
1079         {
1080                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1081                                                   (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1082                                                   (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1083                                                   (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1084                                                   (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1085         }
1086         WriteLog("Registers bank 1\n");
1087         for(int j=0; j<8; j++)
1088         {
1089                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1090                                                   (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1091                                                   (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1092                                                   (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1093                                                   (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1094         }
1095 }
1096
1097 void GPUDumpMemory(void)
1098 {
1099         WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1100         for(int i=0; i<0xFFF; i+=4)
1101                 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1102                         gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1103 }
1104
1105 void GPUDone(void)
1106 {
1107         WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1108
1109         // Get the interrupt latch & enable bits
1110         uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1111         WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1112
1113         GPUDumpRegisters();
1114         GPUDumpDisassembly();
1115
1116         WriteLog("\nGPU opcodes use:\n");
1117         for(int i=0; i<64; i++)
1118         {
1119                 if (gpu_opcode_use[i])
1120                         WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1121         }
1122         WriteLog("\n");
1123
1124 //      memory_free(gpu_ram_8);
1125 //      memory_free(gpu_reg_bank_0);
1126 //      memory_free(gpu_reg_bank_1);
1127 }
1128
1129 //
1130 // Main GPU execution core
1131 //
1132 static int testCount = 1;
1133 static int len = 0;
1134 static bool tripwire = false;
1135 void GPUExec(int32 cycles)
1136 {
1137         if (!GPU_RUNNING)
1138                 return;
1139
1140 #ifdef GPU_SINGLE_STEPPING
1141         if (gpu_control & 0x18)
1142         {
1143                 cycles = 1;
1144                 gpu_control &= ~0x10;
1145         }
1146 #endif
1147         GPUHandleIRQs();
1148         gpu_releaseTimeSlice_flag = 0;
1149         gpu_in_exec++;
1150
1151         while (cycles > 0 && GPU_RUNNING)
1152         {
1153 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1154         && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1155 {
1156         if (gpu_pc == 0xF03000)
1157         {
1158                 extern uint32 starCount;
1159                 starCount = 0;
1160 /*              WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1161                 uint32 base = gpu_reg_bank_0[3];
1162                 for(uint32 i=0; i<0x100; i+=16)
1163                 {
1164                         WriteLog("%02X: ", i);
1165                         for(uint32 j=0; j<16; j++)
1166                         {
1167                                 WriteLog("%02X ", JaguarReadByte(base + i + j));
1168                         }
1169                         WriteLog("\n");
1170                 }*/
1171         }
1172 //      if (gpu_pc == 0xF03)
1173         {
1174         }
1175 }//*/
1176 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1177 {
1178         GPUDumpRegisters();
1179         WriteLog("GPU: Starting disassembly log...\n");
1180         doGPUDis = true;
1181 }//*/
1182 /*if (gpu_pc == 0xF0359A)
1183 {
1184         doGPUDis = true;
1185         GPUDumpRegisters();
1186 }*/
1187 /*              gpu_flag_c = (gpu_flag_c ? 1 : 0);
1188                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1189                 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1190
1191                 uint16 opcode = GPUReadWord(gpu_pc, GPU);
1192                 uint32 index = opcode >> 10;
1193                 gpu_instruction = opcode;                               // Added for GPU #3...
1194                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1195                 gpu_opcode_second_parameter = opcode & 0x1F;
1196 /*if (gpu_pc == 0xF03BE8)
1197 WriteLog("Start of OP frame write...\n");
1198 if (gpu_pc == 0xF03EEE)
1199 WriteLog("--> Writing BRANCH object ---\n");
1200 if (gpu_pc == 0xF03F62)
1201 WriteLog("--> Writing BITMAP object ***\n");//*/
1202 /*if (gpu_pc == 0xF03546)
1203 {
1204         WriteLog("\n--> GPU PC: F03546\n");
1205         GPUDumpRegisters();
1206         GPUDumpDisassembly();
1207 }//*/
1208 /*if (gpu_pc == 0xF033F6)
1209 {
1210         WriteLog("\n--> GPU PC: F033F6\n");
1211         GPUDumpRegisters();
1212         GPUDumpDisassembly();
1213 }//*/
1214 /*if (gpu_pc == 0xF033CC)
1215 {
1216         WriteLog("\n--> GPU PC: F033CC\n");
1217         GPUDumpRegisters();
1218         GPUDumpDisassembly();
1219 }//*/
1220 /*if (gpu_pc == 0xF033D6)
1221 {
1222         WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1223         GPUDumpRegisters();
1224         GPUDumpMemory();
1225 }//*/
1226 /*if (gpu_pc == 0xF033D8)
1227 {
1228         WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1229         GPUDumpRegisters();
1230         GPUDumpMemory();
1231 }//*/
1232 /*if (gpu_pc == 0xF0358E)
1233 {
1234         WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1235         GPUDumpRegisters();
1236         GPUDumpMemory();
1237 }//*/
1238 /*if (gpu_pc == 0xF034CA)
1239 {
1240         WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1241         GPUDumpRegisters();
1242 }//*/
1243 /*if (gpu_pc == 0xF034CA)
1244 {
1245         len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1246         WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n   ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1247         for(int i=0; i<len; i+=4)
1248                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1249         WriteLog("\n   ");
1250         for(int i=0; i<len; i+=4)
1251                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1252         WriteLog("\n\n");
1253 }
1254 if (gpu_pc == 0xF034DE)
1255 {
1256         WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n   ", gpu_reg[14], gpu_reg[15]);
1257         for(int i=0; i<len; i+=4)
1258                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1259         WriteLog("\n   ");
1260         for(int i=0; i<len; i+=4)
1261                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1262         WriteLog("\n   ");
1263         for(int i=0; i<len; i+=4)
1264                 WriteLog(" --------");
1265         WriteLog("\n   ");
1266         for(int i=0; i<len; i+=4)
1267                 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1268         WriteLog("\n\n");
1269 }//*/
1270 /*if (gpu_pc == 0xF035C8)
1271 {
1272         WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1273         GPUDumpRegisters();
1274         GPUDumpDisassembly();
1275 }//*/
1276
1277 if (gpu_start_log)
1278 {
1279 //      gpu_reset_stats();
1280 static char buffer[512];
1281 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1282 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1283 }//*/
1284 //$E400 -> 1110 01 -> $39 -> 57
1285 //GPU #1
1286                 gpu_pc += 2;
1287                 gpu_opcode[index]();
1288 //GPU #2
1289 //              gpu2_opcode[index]();
1290 //              gpu_pc += 2;
1291 //GPU #3                                (Doesn't show ATARI logo! #1 & #2 do...)
1292 //              gpu_pc += 2;
1293 //              gpu3_opcode[index]();
1294
1295 // BIOS hacking
1296 //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken.
1297 /*static bool firstTime = true;
1298 if (gpu_pc == 0xF03548 && firstTime)
1299 {
1300         gpu_flag_z = 1;
1301 //      firstTime = false;
1302
1303 //static char buffer[512];
1304 //int k=0xF03548;
1305 //while (k<0xF0356C)
1306 //{
1307 //int oldk = k;
1308 //k += dasmjag(JAGUAR_GPU, buffer, k);
1309 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1310 //}
1311 //      gpu_start_log = 1;
1312 }//*/
1313 //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1314 /*if (gpu_pc == 0xF0354C)
1315         gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1316
1317                 cycles -= gpu_opcode_cycles[index];
1318                 gpu_opcode_use[index]++;
1319 if (gpu_start_log)
1320         WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1321 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1322 {
1323         WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1324         tripwire = true;
1325 }
1326         }
1327
1328         gpu_in_exec--;
1329 }
1330
1331 //
1332 // GPU opcodes
1333 //
1334
1335 /*
1336 GPU opcodes use (offset punch--vertically below bad guy):
1337                       add 18686
1338                      addq 32621
1339                       sub 7483
1340                      subq 10252
1341                       and 21229
1342                        or 15003
1343                      btst 1822
1344                      bset 2072
1345                      mult 141
1346                       div 2392
1347                      shlq 13449
1348                      shrq 10297
1349                     sharq 11104
1350                       cmp 6775
1351                      cmpq 5944
1352                      move 31259
1353                     moveq 4473
1354                     movei 23277
1355                     loadb 46
1356                     loadw 4201
1357                      load 28580
1358          load_r14_indexed 1183
1359          load_r15_indexed 1125
1360                    storew 178
1361                     store 10144
1362         store_r14_indexed 320
1363         store_r15_indexed 1
1364                   move_pc 1742
1365                      jump 24467
1366                        jr 18090
1367                       nop 41362
1368 */
1369
1370 static void gpu_opcode_jump(void)
1371 {
1372 #ifdef GPU_DIS_JUMP
1373 const char * condition[32] =
1374 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1375         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1376         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1377         "???", "???", "???", "F" };
1378         if (doGPUDis)
1379                 WriteLog("%06X: JUMP   %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1380 #endif
1381         // normalize flags
1382 /*      gpu_flag_c = (gpu_flag_c ? 1 : 0);
1383         gpu_flag_z = (gpu_flag_z ? 1 : 0);
1384         gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1385         // KLUDGE: Used by BRANCH_CONDITION
1386         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1387
1388         if (BRANCH_CONDITION(IMM_2))
1389         {
1390 #ifdef GPU_DIS_JUMP
1391         if (doGPUDis)
1392                 WriteLog("Branched!\n");
1393 #endif
1394 if (gpu_start_log)
1395         WriteLog("    --> JUMP: Branch taken.\n");
1396                 uint32 delayed_pc = RM;
1397                 GPUExec(1);
1398                 gpu_pc = delayed_pc;
1399 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1400                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1401                 gpu_opcode_second_parameter = opcode & 0x1F;
1402
1403                 gpu_pc = delayed_pc;
1404                 gpu_opcode[opcode>>10]();//*/
1405         }
1406 #ifdef GPU_DIS_JUMP
1407         else
1408                 if (doGPUDis)
1409                         WriteLog("Branch NOT taken.\n");
1410 #endif
1411 }
1412
1413 static void gpu_opcode_jr(void)
1414 {
1415 #ifdef GPU_DIS_JR
1416 const char * condition[32] =
1417 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1418         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1419         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1420         "???", "???", "???", "F" };
1421         if (doGPUDis)
1422                 WriteLog("%06X: JR     %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1423 #endif
1424 /*      if (CONDITION(jaguar.op & 31))
1425         {
1426                 int32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1427                 uint32 newpc = jaguar.PC + r1;
1428                 CALL_MAME_DEBUG;
1429                 jaguar.op = ROPCODE(jaguar.PC);
1430                 jaguar.PC = newpc;
1431                 (*jaguar.table[jaguar.op >> 10])();
1432
1433                 jaguar_icount -= 3;     // 3 wait states guaranteed
1434         }*/
1435         // normalize flags
1436 /*      gpu_flag_n = (gpu_flag_n ? 1 : 0);
1437         gpu_flag_c = (gpu_flag_c ? 1 : 0);
1438         gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1439         // KLUDGE: Used by BRANCH_CONDITION
1440         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1441
1442         if (BRANCH_CONDITION(IMM_2))
1443         {
1444 #ifdef GPU_DIS_JR
1445         if (doGPUDis)
1446                 WriteLog("Branched!\n");
1447 #endif
1448 if (gpu_start_log)
1449         WriteLog("    --> JR: Branch taken.\n");
1450                 int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1);             // Sign extend IMM_1
1451                 int32 delayed_pc = gpu_pc + (offset * 2);
1452                 GPUExec(1);
1453                 gpu_pc = delayed_pc;
1454 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1455                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1456                 gpu_opcode_second_parameter = opcode & 0x1F;
1457
1458                 gpu_pc = delayed_pc;
1459                 gpu_opcode[opcode>>10]();//*/
1460         }
1461 #ifdef GPU_DIS_JR
1462         else
1463                 if (doGPUDis)
1464                         WriteLog("Branch NOT taken.\n");
1465 #endif
1466 }
1467
1468 static void gpu_opcode_add(void)
1469 {
1470 #ifdef GPU_DIS_ADD
1471         if (doGPUDis)
1472                 WriteLog("%06X: ADD    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1473 #endif
1474         uint32 res = RN + RM;
1475         CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1476         RN = res;
1477 #ifdef GPU_DIS_ADD
1478         if (doGPUDis)
1479                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1480 #endif
1481 }
1482
1483 static void gpu_opcode_addc(void)
1484 {
1485 #ifdef GPU_DIS_ADDC
1486         if (doGPUDis)
1487                 WriteLog("%06X: ADDC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1488 #endif
1489 /*      int dreg = jaguar.op & 31;
1490         uint32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1491         uint32 r2 = jaguar.r[dreg];
1492         uint32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1493         jaguar.r[dreg] = res;
1494         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1495
1496         uint32 res = RN + RM + gpu_flag_c;
1497         uint32 carry = gpu_flag_c;
1498 //      SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1499         SET_ZNC_ADD(RN + carry, RM, res);
1500 //      SET_ZNC_ADD(RN, RM + carry, res);
1501         RN = res;
1502 #ifdef GPU_DIS_ADDC
1503         if (doGPUDis)
1504                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1505 #endif
1506 }
1507
1508 static void gpu_opcode_addq(void)
1509 {
1510 #ifdef GPU_DIS_ADDQ
1511         if (doGPUDis)
1512                 WriteLog("%06X: ADDQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1513 #endif
1514         uint32 r1 = gpu_convert_zero[IMM_1];
1515         uint32 res = RN + r1;
1516         CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1517         RN = res;
1518 #ifdef GPU_DIS_ADDQ
1519         if (doGPUDis)
1520                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1521 #endif
1522 }
1523
1524 static void gpu_opcode_addqt(void)
1525 {
1526 #ifdef GPU_DIS_ADDQT
1527         if (doGPUDis)
1528                 WriteLog("%06X: ADDQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1529 #endif
1530         RN += gpu_convert_zero[IMM_1];
1531 #ifdef GPU_DIS_ADDQT
1532         if (doGPUDis)
1533                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1534 #endif
1535 }
1536
1537 static void gpu_opcode_sub(void)
1538 {
1539 #ifdef GPU_DIS_SUB
1540         if (doGPUDis)
1541                 WriteLog("%06X: SUB    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1542 #endif
1543         uint32 res = RN - RM;
1544         SET_ZNC_SUB(RN, RM, res);
1545         RN = res;
1546 #ifdef GPU_DIS_SUB
1547         if (doGPUDis)
1548                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1549 #endif
1550 }
1551
1552 static void gpu_opcode_subc(void)
1553 {
1554 #ifdef GPU_DIS_SUBC
1555         if (doGPUDis)
1556                 WriteLog("%06X: SUBC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1557 #endif
1558         uint32 res = RN - RM - gpu_flag_c;
1559         uint32 borrow = gpu_flag_c;
1560 //      SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1561 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1562 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1563 //      SET_ZNC_SUB(RN - borrow, RM, res);
1564         SET_ZNC_SUB(RN, RM + borrow, res);
1565         RN = res;
1566 #ifdef GPU_DIS_SUBC
1567         if (doGPUDis)
1568                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1569 #endif
1570 }
1571 /*
1572 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1573 N = 0, M = 1, 0 - 1 = -1, C = 0!
1574
1575 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
1576 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
1577 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1578 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1579 */
1580 static void gpu_opcode_subq(void)
1581 {
1582 #ifdef GPU_DIS_SUBQ
1583         if (doGPUDis)
1584                 WriteLog("%06X: SUBQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1585 #endif
1586         uint32 r1 = gpu_convert_zero[IMM_1];
1587         uint32 res = RN - r1;
1588         SET_ZNC_SUB(RN, r1, res);
1589         RN = res;
1590 #ifdef GPU_DIS_SUBQ
1591         if (doGPUDis)
1592                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1593 #endif
1594 }
1595
1596 static void gpu_opcode_subqt(void)
1597 {
1598 #ifdef GPU_DIS_SUBQT
1599         if (doGPUDis)
1600                 WriteLog("%06X: SUBQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1601 #endif
1602         RN -= gpu_convert_zero[IMM_1];
1603 #ifdef GPU_DIS_SUBQT
1604         if (doGPUDis)
1605                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1606 #endif
1607 }
1608
1609 static void gpu_opcode_cmp(void)
1610 {
1611 #ifdef GPU_DIS_CMP
1612         if (doGPUDis)
1613                 WriteLog("%06X: CMP    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1614 #endif
1615         uint32 res = RN - RM;
1616         SET_ZNC_SUB(RN, RM, res);
1617 #ifdef GPU_DIS_CMP
1618         if (doGPUDis)
1619                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1620 #endif
1621 }
1622
1623 static void gpu_opcode_cmpq(void)
1624 {
1625         static int32 sqtable[32] =
1626                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1627 #ifdef GPU_DIS_CMPQ
1628         if (doGPUDis)
1629                 WriteLog("%06X: CMPQ   #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1630 #endif
1631         uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1632         uint32 res = RN - r1;
1633         SET_ZNC_SUB(RN, r1, res);
1634 #ifdef GPU_DIS_CMPQ
1635         if (doGPUDis)
1636                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1637 #endif
1638 }
1639
1640 static void gpu_opcode_and(void)
1641 {
1642 #ifdef GPU_DIS_AND
1643         if (doGPUDis)
1644                 WriteLog("%06X: AND    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1645 #endif
1646         RN = RN & RM;
1647         SET_ZN(RN);
1648 #ifdef GPU_DIS_AND
1649         if (doGPUDis)
1650                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1651 #endif
1652 }
1653
1654 static void gpu_opcode_or(void)
1655 {
1656 #ifdef GPU_DIS_OR
1657         if (doGPUDis)
1658                 WriteLog("%06X: OR     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1659 #endif
1660         RN = RN | RM;
1661         SET_ZN(RN);
1662 #ifdef GPU_DIS_OR
1663         if (doGPUDis)
1664                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1665 #endif
1666 }
1667
1668 static void gpu_opcode_xor(void)
1669 {
1670 #ifdef GPU_DIS_XOR
1671         if (doGPUDis)
1672                 WriteLog("%06X: XOR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1673 #endif
1674         RN = RN ^ RM;
1675         SET_ZN(RN);
1676 #ifdef GPU_DIS_XOR
1677         if (doGPUDis)
1678                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1679 #endif
1680 }
1681
1682 static void gpu_opcode_not(void)
1683 {
1684 #ifdef GPU_DIS_NOT
1685         if (doGPUDis)
1686                 WriteLog("%06X: NOT    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1687 #endif
1688         RN = ~RN;
1689         SET_ZN(RN);
1690 #ifdef GPU_DIS_NOT
1691         if (doGPUDis)
1692                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1693 #endif
1694 }
1695
1696 static void gpu_opcode_move_pc(void)
1697 {
1698 #ifdef GPU_DIS_MOVEPC
1699         if (doGPUDis)
1700                 WriteLog("%06X: MOVE   PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1701 #endif
1702         // Should be previous PC--this might not always be previous instruction!
1703         // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1704         RN = gpu_pc - 2;
1705 #ifdef GPU_DIS_MOVEPC
1706         if (doGPUDis)
1707                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1708 #endif
1709 }
1710
1711 static void gpu_opcode_sat8(void)
1712 {
1713 #ifdef GPU_DIS_SAT8
1714         if (doGPUDis)
1715                 WriteLog("%06X: SAT8   R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1716 #endif
1717         RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1718         SET_ZN(RN);
1719 #ifdef GPU_DIS_SAT8
1720         if (doGPUDis)
1721                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1722 #endif
1723 }
1724
1725 static void gpu_opcode_sat16(void)
1726 {
1727         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1728         SET_ZN(RN);
1729 }
1730
1731 static void gpu_opcode_sat24(void)
1732 {
1733         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1734         SET_ZN(RN);
1735 }
1736
1737 static void gpu_opcode_store_r14_indexed(void)
1738 {
1739 #ifdef GPU_DIS_STORE14I
1740         if (doGPUDis)
1741                 WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1742 #endif
1743 #ifdef GPU_CORRECT_ALIGNMENT
1744         uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1745         
1746         if (address >= 0xF03000 && address <= 0xF03FFF)
1747                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1748         else
1749                 GPUWriteLong(address, RN, GPU);
1750 #else
1751         GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1752 #endif
1753 }
1754
1755 static void gpu_opcode_store_r15_indexed(void)
1756 {
1757 #ifdef GPU_DIS_STORE15I
1758         if (doGPUDis)
1759                 WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1760 #endif
1761 #ifdef GPU_CORRECT_ALIGNMENT
1762         uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1763
1764         if (address >= 0xF03000 && address <= 0xF03FFF)
1765                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1766         else
1767                 GPUWriteLong(address, RN, GPU);
1768 #else
1769         GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1770 #endif
1771 }
1772
1773 static void gpu_opcode_load_r14_ri(void)
1774 {
1775 #ifdef GPU_DIS_LOAD14R
1776         if (doGPUDis)
1777                 WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1778 #endif
1779 #ifdef GPU_CORRECT_ALIGNMENT
1780         uint32 address = gpu_reg[14] + RM;
1781
1782         if (address >= 0xF03000 && address <= 0xF03FFF)
1783                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1784         else
1785                 RN = GPUReadLong(address, GPU);
1786 #else
1787         RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1788 #endif
1789 #ifdef GPU_DIS_LOAD14R
1790         if (doGPUDis)
1791                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1792 #endif
1793 }
1794
1795 static void gpu_opcode_load_r15_ri(void)
1796 {
1797 #ifdef GPU_DIS_LOAD15R
1798         if (doGPUDis)
1799                 WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1800 #endif
1801 #ifdef GPU_CORRECT_ALIGNMENT
1802         uint32 address = gpu_reg[15] + RM;
1803
1804         if (address >= 0xF03000 && address <= 0xF03FFF)
1805                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1806         else
1807                 RN = GPUReadLong(address, GPU);
1808 #else
1809         RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1810 #endif
1811 #ifdef GPU_DIS_LOAD15R
1812         if (doGPUDis)
1813                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1814 #endif
1815 }
1816
1817 static void gpu_opcode_store_r14_ri(void)
1818 {
1819 #ifdef GPU_DIS_STORE14R
1820         if (doGPUDis)
1821                 WriteLog("%06X: STORE  R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1822 #endif
1823 #ifdef GPU_CORRECT_ALIGNMENT
1824         uint32 address = gpu_reg[14] + RM;
1825
1826         if (address >= 0xF03000 && address <= 0xF03FFF)
1827                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1828         else
1829                 GPUWriteLong(address, RN, GPU);
1830 #else
1831         GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1832 #endif
1833 }
1834
1835 static void gpu_opcode_store_r15_ri(void)
1836 {
1837 #ifdef GPU_DIS_STORE15R
1838         if (doGPUDis)
1839                 WriteLog("%06X: STORE  R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1840 #endif
1841 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1842         uint32 address = gpu_reg[15] + RM;
1843
1844         if (address >= 0xF03000 && address <= 0xF03FFF)
1845                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1846         else
1847                 GPUWriteLong(address, RN, GPU);
1848 #else
1849         GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1850 #endif
1851 }
1852
1853 static void gpu_opcode_nop(void)
1854 {
1855 #ifdef GPU_DIS_NOP
1856         if (doGPUDis)
1857                 WriteLog("%06X: NOP    [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1858 #endif
1859 }
1860
1861 static void gpu_opcode_pack(void)
1862 {
1863 #ifdef GPU_DIS_PACK
1864         if (doGPUDis)
1865                 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK  " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1866 #endif
1867         uint32 val = RN;
1868
1869 //BUG!  if (RM == 0)                            // Pack
1870         if (IMM_1 == 0)                         // Pack
1871                 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1872         else                                            // Unpack
1873                 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1874 #ifdef GPU_DIS_PACK
1875         if (doGPUDis)
1876                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1877 #endif
1878 }
1879
1880 static void gpu_opcode_storeb(void)
1881 {
1882 #ifdef GPU_DIS_STOREB
1883         if (doGPUDis)
1884                 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1885 #endif
1886 //Is this right???
1887 // Would appear to be so...!
1888         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1889                 GPUWriteLong(RM, RN & 0xFF, GPU);
1890         else
1891                 JaguarWriteByte(RM, RN, GPU);
1892 }
1893
1894 static void gpu_opcode_storew(void)
1895 {
1896 #ifdef GPU_DIS_STOREW
1897         if (doGPUDis)
1898                 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1899 #endif
1900 #ifdef GPU_CORRECT_ALIGNMENT
1901         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1902                 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1903         else
1904                 JaguarWriteWord(RM, RN, GPU);
1905 #else
1906         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1907                 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1908         else
1909                 JaguarWriteWord(RM, RN, GPU);
1910 #endif
1911 }
1912
1913 static void gpu_opcode_store(void)
1914 {
1915 #ifdef GPU_DIS_STORE
1916         if (doGPUDis)
1917                 WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1918 #endif
1919 #ifdef GPU_CORRECT_ALIGNMENT
1920         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1921                 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1922         else
1923                 GPUWriteLong(RM, RN, GPU);
1924 #else
1925         GPUWriteLong(RM, RN, GPU);
1926 #endif
1927 }
1928
1929 static void gpu_opcode_storep(void)
1930 {
1931 #ifdef GPU_CORRECT_ALIGNMENT
1932         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1933         {
1934                 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1935                 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1936         }
1937         else
1938         {
1939                 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1940                 GPUWriteLong(RM + 4, RN, GPU);
1941         }
1942 #else
1943         GPUWriteLong(RM + 0, gpu_hidata, GPU);
1944         GPUWriteLong(RM + 4, RN, GPU);
1945 #endif
1946 }
1947
1948 static void gpu_opcode_loadb(void)
1949 {
1950 #ifdef GPU_DIS_LOADB
1951         if (doGPUDis)
1952                 WriteLog("%06X: LOADB  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1953 #endif
1954         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1955                 RN = GPUReadLong(RM, GPU) & 0xFF;
1956         else
1957                 RN = JaguarReadByte(RM, GPU);
1958 #ifdef GPU_DIS_LOADB
1959         if (doGPUDis)
1960                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1961 #endif
1962 }
1963
1964 static void gpu_opcode_loadw(void)
1965 {
1966 #ifdef GPU_DIS_LOADW
1967         if (doGPUDis)
1968                 WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1969 #endif
1970 #ifdef GPU_CORRECT_ALIGNMENT
1971         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1972                 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1973         else
1974                 RN = JaguarReadWord(RM, GPU);
1975 #else
1976         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1977                 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1978         else
1979                 RN = JaguarReadWord(RM, GPU);
1980 #endif
1981 #ifdef GPU_DIS_LOADW
1982         if (doGPUDis)
1983                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1984 #endif
1985 }
1986
1987 // According to the docs, & "Do The Same", this address is long aligned...
1988 // So let's try it:
1989 // And it works!!! Need to fix all instances...
1990 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
1991 // the $F03000-$F03FFF range are aligned...
1992 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
1993 /*
1994 Preliminary testing on real hardware seems to confirm that something strange goes on
1995 with unaligned reads in main memory. When the address is off by 1, the result is the
1996 same as the long address with the top byte replaced by something. So if the read is
1997 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
1998 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
1999 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
2000 It may be that the "unknown" values come from the prefetch queue, but not sure how
2001 to test that. They seem to be stable, though, which would indicate such a mechanism.
2002 Sometimes, however, the off by 2 case returns $12345678!
2003 */
2004 static void gpu_opcode_load(void)
2005 {
2006 #ifdef GPU_DIS_LOAD
2007         if (doGPUDis)
2008                 WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2009 #endif
2010 #ifdef GPU_CORRECT_ALIGNMENT
2011         uint32 mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2012 //      if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2013                 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2014 //              RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2015 //      else
2016 //              RN = GPUReadLong(RM, GPU);
2017         // Simulate garbage in unaligned reads...
2018 //seems that this behavior is different in GPU mem vs. main mem...
2019 //      if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2020 //              RN |= mask[RM & 0x03];
2021 #else
2022         RN = GPUReadLong(RM, GPU);
2023 #endif
2024 #ifdef GPU_DIS_LOAD
2025         if (doGPUDis)
2026                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2027 #endif
2028 }
2029
2030 static void gpu_opcode_loadp(void)
2031 {
2032 #ifdef GPU_CORRECT_ALIGNMENT
2033         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2034         {
2035                 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2036                 RN                 = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2037         }
2038         else
2039         {
2040                 gpu_hidata = GPUReadLong(RM + 0, GPU);
2041                 RN                 = GPUReadLong(RM + 4, GPU);
2042         }
2043 #else
2044         gpu_hidata = GPUReadLong(RM + 0, GPU);
2045         RN                 = GPUReadLong(RM + 4, GPU);
2046 #endif
2047 }
2048
2049 static void gpu_opcode_load_r14_indexed(void)
2050 {
2051 #ifdef GPU_DIS_LOAD14I
2052         if (doGPUDis)
2053                 WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2054 #endif
2055 #ifdef GPU_CORRECT_ALIGNMENT
2056         uint32 address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2057
2058         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2059                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2060         else
2061                 RN = GPUReadLong(address, GPU);
2062 #else
2063         RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2064 #endif
2065 #ifdef GPU_DIS_LOAD14I
2066         if (doGPUDis)
2067                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2068 #endif
2069 }
2070
2071 static void gpu_opcode_load_r15_indexed(void)
2072 {
2073 #ifdef GPU_DIS_LOAD15I
2074         if (doGPUDis)
2075                 WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2076 #endif
2077 #ifdef GPU_CORRECT_ALIGNMENT
2078         uint32 address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2079
2080         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2081                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2082         else
2083                 RN = GPUReadLong(address, GPU);
2084 #else
2085         RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2086 #endif
2087 #ifdef GPU_DIS_LOAD15I
2088         if (doGPUDis)
2089                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2090 #endif
2091 }
2092
2093 static void gpu_opcode_movei(void)
2094 {
2095 #ifdef GPU_DIS_MOVEI
2096         if (doGPUDis)
2097                 WriteLog("%06X: MOVEI  #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2098 #endif
2099         // This instruction is followed by 32-bit value in LSW / MSW format...
2100         RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16);
2101         gpu_pc += 4;
2102 #ifdef GPU_DIS_MOVEI
2103         if (doGPUDis)
2104                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2105 #endif
2106 }
2107
2108 static void gpu_opcode_moveta(void)
2109 {
2110 #ifdef GPU_DIS_MOVETA
2111         if (doGPUDis)
2112                 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2113 #endif
2114         ALTERNATE_RN = RM;
2115 #ifdef GPU_DIS_MOVETA
2116         if (doGPUDis)
2117                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2118 #endif
2119 }
2120
2121 static void gpu_opcode_movefa(void)
2122 {
2123 #ifdef GPU_DIS_MOVEFA
2124         if (doGPUDis)
2125                 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2126 #endif
2127         RN = ALTERNATE_RM;
2128 #ifdef GPU_DIS_MOVEFA
2129         if (doGPUDis)
2130                 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2131 #endif
2132 }
2133
2134 static void gpu_opcode_move(void)
2135 {
2136 #ifdef GPU_DIS_MOVE
2137         if (doGPUDis)
2138                 WriteLog("%06X: MOVE   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2139 #endif
2140         RN = RM;
2141 #ifdef GPU_DIS_MOVE
2142         if (doGPUDis)
2143                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2144 #endif
2145 }
2146
2147 static void gpu_opcode_moveq(void)
2148 {
2149 #ifdef GPU_DIS_MOVEQ
2150         if (doGPUDis)
2151                 WriteLog("%06X: MOVEQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2152 #endif
2153         RN = IMM_1;
2154 #ifdef GPU_DIS_MOVEQ
2155         if (doGPUDis)
2156                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2157 #endif
2158 }
2159
2160 static void gpu_opcode_resmac(void)
2161 {
2162         RN = gpu_acc;
2163 }
2164
2165 static void gpu_opcode_imult(void)
2166 {
2167 #ifdef GPU_DIS_IMULT
2168         if (doGPUDis)
2169                 WriteLog("%06X: IMULT  R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2170 #endif
2171         RN = (int16)RN * (int16)RM;
2172         SET_ZN(RN);
2173 #ifdef GPU_DIS_IMULT
2174         if (doGPUDis)
2175                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2176 #endif
2177 }
2178
2179 static void gpu_opcode_mult(void)
2180 {
2181 #ifdef GPU_DIS_MULT
2182         if (doGPUDis)
2183                 WriteLog("%06X: MULT   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2184 #endif
2185         RN = (uint16)RM * (uint16)RN;
2186         SET_ZN(RN);
2187 #ifdef GPU_DIS_MULT
2188         if (doGPUDis)
2189                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2190 #endif
2191 }
2192
2193 static void gpu_opcode_bclr(void)
2194 {
2195 #ifdef GPU_DIS_BCLR
2196         if (doGPUDis)
2197                 WriteLog("%06X: BCLR   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2198 #endif
2199         uint32 res = RN & ~(1 << IMM_1);
2200         RN = res;
2201         SET_ZN(res);
2202 #ifdef GPU_DIS_BCLR
2203         if (doGPUDis)
2204                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2205 #endif
2206 }
2207
2208 static void gpu_opcode_btst(void)
2209 {
2210 #ifdef GPU_DIS_BTST
2211         if (doGPUDis)
2212                 WriteLog("%06X: BTST   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2213 #endif
2214         gpu_flag_z = (~RN >> IMM_1) & 1;
2215 #ifdef GPU_DIS_BTST
2216         if (doGPUDis)
2217                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2218 #endif
2219 }
2220
2221 static void gpu_opcode_bset(void)
2222 {
2223 #ifdef GPU_DIS_BSET
2224         if (doGPUDis)
2225                 WriteLog("%06X: BSET   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2226 #endif
2227         uint32 res = RN | (1 << IMM_1);
2228         RN = res;
2229         SET_ZN(res);
2230 #ifdef GPU_DIS_BSET
2231         if (doGPUDis)
2232                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2233 #endif
2234 }
2235
2236 static void gpu_opcode_imacn(void)
2237 {
2238         uint32 res = (int16)RM * (int16)(RN);
2239         gpu_acc += res;
2240 }
2241
2242 static void gpu_opcode_mtoi(void)
2243 {
2244         uint32 _RM = RM;
2245         uint32 res = RN = (((int32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2246         SET_ZN(res);
2247 }
2248
2249 static void gpu_opcode_normi(void)
2250 {
2251         uint32 _RM = RM;
2252         uint32 res = 0;
2253
2254         if (_RM)
2255         {
2256                 while ((_RM & 0xFFC00000) == 0)
2257                 {
2258                         _RM <<= 1;
2259                         res--;
2260                 }
2261                 while ((_RM & 0xFF800000) != 0)
2262                 {
2263                         _RM >>= 1;
2264                         res++;
2265                 }
2266         }
2267         RN = res;
2268         SET_ZN(res);
2269 }
2270
2271 static void gpu_opcode_mmult(void)
2272 {
2273         int count       = gpu_matrix_control & 0x0F;    // Matrix width
2274         uint32 addr = gpu_pointer_to_matrix;            // In the GPU's RAM
2275         int64 accum = 0;
2276         uint32 res;
2277
2278         if (gpu_matrix_control & 0x10)                          // Column stepping
2279         {
2280                 for(int i=0; i<count; i++)
2281                 {
2282                         int16 a;
2283                         if (i & 0x01)
2284                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2285                         else
2286                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2287
2288                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2289                         accum += a * b;
2290                         addr += 4 * count;
2291                 }
2292         }
2293         else                                                                            // Row stepping
2294         {
2295                 for(int i=0; i<count; i++)
2296                 {
2297                         int16 a;
2298                         if (i & 0x01)
2299                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2300                         else
2301                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2302
2303                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2304                         accum += a * b;
2305                         addr += 4;
2306                 }
2307         }
2308         RN = res = (int32)accum;
2309         // carry flag to do (out of the last add)
2310         SET_ZN(res);
2311 }
2312
2313 static void gpu_opcode_abs(void)
2314 {
2315 #ifdef GPU_DIS_ABS
2316         if (doGPUDis)
2317                 WriteLog("%06X: ABS    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2318 #endif
2319         gpu_flag_c = RN >> 31;
2320         if (RN == 0x80000000)
2321         //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2322                 gpu_flag_n = 1, gpu_flag_z = 0;
2323         else
2324         {
2325                 if (gpu_flag_c)
2326                         RN = -RN;
2327                 gpu_flag_n = 0; SET_FLAG_Z(RN);
2328         }
2329 #ifdef GPU_DIS_ABS
2330         if (doGPUDis)
2331                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2332 #endif
2333 }
2334
2335 static void gpu_opcode_div(void)        // RN / RM
2336 {
2337 #ifdef GPU_DIS_DIV
2338         if (doGPUDis)
2339                 WriteLog("%06X: DIV    R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2340 #endif
2341 // NOTE: remainder is NOT calculated correctly here!
2342 //       The original tried to get it right by checking to see if the
2343 //       remainder was negative, but that's too late...
2344 // The code there should do it now, but I'm not 100% sure...
2345
2346         if (RM)
2347         {
2348                 if (gpu_div_control & 0x01)             // 16.16 division
2349                 {
2350                         RN = ((uint64)RN << 16) / RM;
2351                         gpu_remain = ((uint64)RN << 16) % RM;
2352                 }
2353                 else
2354                 {
2355                         RN = RN / RM;
2356                         gpu_remain = RN % RM;
2357                 }
2358
2359                 if ((gpu_remain - RM) & 0x80000000)     // If the result would have been negative...
2360                         gpu_remain -= RM;                       // Then make it negative!
2361         }
2362         else
2363                 RN = 0xFFFFFFFF;
2364
2365 /*      uint32 _RM=RM;
2366         uint32 _RN=RN;
2367
2368         if (_RM)
2369         {
2370                 if (gpu_div_control & 1)
2371                 {
2372                         gpu_remain = (((uint64)_RN) << 16) % _RM;
2373                         if (gpu_remain&0x80000000)
2374                                 gpu_remain-=_RM;
2375                         RN = (((uint64)_RN) << 16) / _RM;
2376                 }
2377                 else
2378                 {
2379                         gpu_remain = _RN % _RM;
2380                         if (gpu_remain&0x80000000)
2381                                 gpu_remain-=_RM;
2382                         RN/=_RM;
2383                 }
2384         }
2385         else
2386                 RN=0xffffffff;*/
2387 #ifdef GPU_DIS_DIV
2388         if (doGPUDis)
2389                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2390 #endif
2391 }
2392
2393 static void gpu_opcode_imultn(void)
2394 {
2395         uint32 res = (int32)((int16)RN * (int16)RM);
2396         gpu_acc = (int32)res;
2397         SET_FLAG_Z(res);
2398         SET_FLAG_N(res);
2399 }
2400
2401 static void gpu_opcode_neg(void)
2402 {
2403 #ifdef GPU_DIS_NEG
2404         if (doGPUDis)
2405                 WriteLog("%06X: NEG    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2406 #endif
2407         uint32 res = -RN;
2408         SET_ZNC_SUB(0, RN, res);
2409         RN = res;
2410 #ifdef GPU_DIS_NEG
2411         if (doGPUDis)
2412                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2413 #endif
2414 }
2415
2416 static void gpu_opcode_shlq(void)
2417 {
2418 #ifdef GPU_DIS_SHLQ
2419         if (doGPUDis)
2420                 WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2421 #endif
2422 // Was a bug here...
2423 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2424         int32 r1 = 32 - IMM_1;
2425         uint32 res = RN << r1;
2426         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2427         RN = res;
2428 #ifdef GPU_DIS_SHLQ
2429         if (doGPUDis)
2430                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2431 #endif
2432 }
2433
2434 static void gpu_opcode_shrq(void)
2435 {
2436 #ifdef GPU_DIS_SHRQ
2437         if (doGPUDis)
2438                 WriteLog("%06X: SHRQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2439 #endif
2440         int32 r1 = gpu_convert_zero[IMM_1];
2441         uint32 res = RN >> r1;
2442         SET_ZN(res); gpu_flag_c = RN & 1;
2443         RN = res;
2444 #ifdef GPU_DIS_SHRQ
2445         if (doGPUDis)
2446                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2447 #endif
2448 }
2449
2450 static void gpu_opcode_ror(void)
2451 {
2452 #ifdef GPU_DIS_ROR
2453         if (doGPUDis)
2454                 WriteLog("%06X: ROR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2455 #endif
2456         uint32 r1 = RM & 0x1F;
2457         uint32 res = (RN >> r1) | (RN << (32 - r1));
2458         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2459         RN = res;
2460 #ifdef GPU_DIS_ROR
2461         if (doGPUDis)
2462                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2463 #endif
2464 }
2465
2466 static void gpu_opcode_rorq(void)
2467 {
2468 #ifdef GPU_DIS_RORQ
2469         if (doGPUDis)
2470                 WriteLog("%06X: RORQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2471 #endif
2472         uint32 r1 = gpu_convert_zero[IMM_1 & 0x1F];
2473         uint32 r2 = RN;
2474         uint32 res = (r2 >> r1) | (r2 << (32 - r1));
2475         RN = res;
2476         SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2477 #ifdef GPU_DIS_RORQ
2478         if (doGPUDis)
2479                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2480 #endif
2481 }
2482
2483 static void gpu_opcode_sha(void)
2484 {
2485 /*      int dreg = jaguar.op & 31;
2486         int32 r1 = (int32)jaguar.r[(jaguar.op >> 5) & 31];
2487         uint32 r2 = jaguar.r[dreg];
2488         uint32 res;
2489
2490         CLR_ZNC;
2491         if (r1 < 0)
2492         {
2493                 res = (r1 <= -32) ? 0 : (r2 << -r1);
2494                 jaguar.FLAGS |= (r2 >> 30) & 2;
2495         }
2496         else
2497         {
2498                 res = (r1 >= 32) ? ((int32)r2 >> 31) : ((int32)r2 >> r1);
2499                 jaguar.FLAGS |= (r2 << 1) & 2;
2500         }
2501         jaguar.r[dreg] = res;
2502         SET_ZN(res);*/
2503
2504 #ifdef GPU_DIS_SHA
2505         if (doGPUDis)
2506                 WriteLog("%06X: SHA    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2507 #endif
2508         uint32 res;
2509
2510         if ((int32)RM < 0)
2511         {
2512                 res = ((int32)RM <= -32) ? 0 : (RN << -(int32)RM);
2513                 gpu_flag_c = RN >> 31;
2514         }
2515         else
2516         {
2517                 res = ((int32)RM >= 32) ? ((int32)RN >> 31) : ((int32)RN >> (int32)RM);
2518                 gpu_flag_c = RN & 0x01;
2519         }
2520         RN = res;
2521         SET_ZN(res);
2522 #ifdef GPU_DIS_SHA
2523         if (doGPUDis)
2524                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2525 #endif
2526
2527 /*      int32 sRM=(int32)RM;
2528         uint32 _RN=RN;
2529
2530         if (sRM<0)
2531         {
2532                 uint32 shift=-sRM;
2533                 if (shift>=32) shift=32;
2534                 gpu_flag_c=(_RN&0x80000000)>>31;
2535                 while (shift)
2536                 {
2537                         _RN<<=1;
2538                         shift--;
2539                 }
2540         }
2541         else
2542         {
2543                 uint32 shift=sRM;
2544                 if (shift>=32) shift=32;
2545                 gpu_flag_c=_RN&0x1;
2546                 while (shift)
2547                 {
2548                         _RN=((int32)_RN)>>1;
2549                         shift--;
2550                 }
2551         }
2552         RN=_RN;
2553         SET_FLAG_Z(_RN);
2554         SET_FLAG_N(_RN);*/
2555 }
2556
2557 static void gpu_opcode_sharq(void)
2558 {
2559 #ifdef GPU_DIS_SHARQ
2560         if (doGPUDis)
2561                 WriteLog("%06X: SHARQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2562 #endif
2563         uint32 res = (int32)RN >> gpu_convert_zero[IMM_1];
2564         SET_ZN(res); gpu_flag_c = RN & 0x01;
2565         RN = res;
2566 #ifdef GPU_DIS_SHARQ
2567         if (doGPUDis)
2568                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2569 #endif
2570 }
2571
2572 static void gpu_opcode_sh(void)
2573 {
2574 #ifdef GPU_DIS_SH
2575         if (doGPUDis)
2576                 WriteLog("%06X: SH     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2577 #endif
2578         if (RM & 0x80000000)            // Shift left
2579         {
2580                 gpu_flag_c = RN >> 31;
2581                 RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM);
2582         }
2583         else                                            // Shift right
2584         {
2585                 gpu_flag_c = RN & 0x01;
2586                 RN = (RM >= 32 ? 0 : RN >> RM);
2587         }
2588         SET_ZN(RN);
2589 #ifdef GPU_DIS_SH
2590         if (doGPUDis)
2591                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2592 #endif
2593 }
2594
2595 //Temporary: Testing only!
2596 //#include "gpu2.cpp"
2597 //#include "gpu3.cpp"
2598
2599 #else
2600
2601 // New thread-safe GPU core
2602
2603 int GPUCore(void * data)
2604 {
2605 }
2606
2607 #endif