]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
Changed cycles to reflect pipelined architecture (still not right)
[virtualjaguar] / src / gpu.cpp
1 //
2 // GPU Core
3 //
4 // Originally by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Cleanups, endian wrongness, and bad ASM amelioration by James L. Hammons
7 // Note: Endian wrongness probably stems from the MAME origins of this emu and
8 //       the braindead way in which MAME handles memory. :-)
9 //
10 // Problem with not booting the BIOS was the incorrect way that the
11 // SUBC instruction set the carry when the carry was set going in...
12 // Same problem with ADDC...
13 //
14
15 #include "gpu.h"
16
17 //#define GPU_DEBUG
18
19 // For GPU dissasembly...
20
21 #define GPU_DIS_ABS
22 #define GPU_DIS_ADD
23 #define GPU_DIS_ADDC
24 #define GPU_DIS_ADDQ
25 #define GPU_DIS_ADDQT
26 #define GPU_DIS_AND
27 #define GPU_DIS_BCLR
28 #define GPU_DIS_BSET
29 #define GPU_DIS_BTST
30 #define GPU_DIS_CMP
31 #define GPU_DIS_CMPQ
32 #define GPU_DIS_DIV
33 #define GPU_DIS_IMULT
34 #define GPU_DIS_JUMP
35 #define GPU_DIS_JR
36 #define GPU_DIS_LOAD
37 #define GPU_DIS_LOADB
38 #define GPU_DIS_LOADW
39 #define GPU_DIS_LOAD14I
40 #define GPU_DIS_LOAD14R
41 #define GPU_DIS_LOAD15I
42 #define GPU_DIS_LOAD15R
43 #define GPU_DIS_MOVE
44 #define GPU_DIS_MOVEFA
45 #define GPU_DIS_MOVEI
46 #define GPU_DIS_MOVEPC
47 #define GPU_DIS_MOVETA
48 #define GPU_DIS_MOVEQ
49 #define GPU_DIS_MULT
50 #define GPU_DIS_NEG
51 #define GPU_DIS_NOP
52 #define GPU_DIS_NOT
53 #define GPU_DIS_OR
54 #define GPU_DIS_PACK
55 #define GPU_DIS_ROR
56 #define GPU_DIS_RORQ
57 #define GPU_DIS_SAT8
58 #define GPU_DIS_SH
59 #define GPU_DIS_SHA
60 #define GPU_DIS_SHARQ
61 #define GPU_DIS_SHLQ
62 #define GPU_DIS_SHRQ
63 #define GPU_DIS_STORE
64 #define GPU_DIS_STOREB
65 #define GPU_DIS_STOREW
66 #define GPU_DIS_STORE14I
67 #define GPU_DIS_STORE14R
68 #define GPU_DIS_STORE15I
69 #define GPU_DIS_STORE15R
70 #define GPU_DIS_SUB
71 #define GPU_DIS_SUBC
72 #define GPU_DIS_SUBQ
73 #define GPU_DIS_SUBQT
74 #define GPU_DIS_XOR
75
76 bool doGPUDis = false;
77 //bool doGPUDis = true;
78 //*/
79 /*
80 GPU opcodes use (BIOS flying ATARI logo):
81 +                     add 357416
82 +                    addq 538030
83 +                   addqt 6999
84 +                     sub 116663
85 +                    subq 188059
86 +                   subqt 15086
87 +                     neg 36097
88 +                     and 233993
89 +                      or 109332
90 +                     xor 1384
91 +                    btst 111924
92 +                    bset 25029
93 +                    bclr 10551
94 +                    mult 28147
95 +                   imult 69148
96 +                     div 64102
97 +                     abs 159394
98 +                    shlq 194690
99 +                    shrq 292587
100 +                   sharq 192649
101 +                    rorq 58672
102 +                     cmp 244963
103 +                    cmpq 114834
104 +                    move 833472
105 +                   moveq 56427
106 +                  moveta 220814
107 +                  movefa 170678
108 +                   movei 152025
109 +                   loadw 108220
110 +                    load 430936
111 +                  storew 3036
112 +                   store 372490
113 +                 move_pc 2330
114 +                    jump 349134
115 +                      jr 529171
116                     mmult 64904
117 +                     nop 432179
118 */
119
120 // Various bits
121
122 #define CINT0FLAG                       0x0200
123 #define CINT1FLAG                       0x0400
124 #define CINT2FLAG                       0x0800
125 #define CINT3FLAG                       0x1000
126 #define CINT4FLAG                       0x2000
127 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
128
129 // GPU_FLAGS bits
130
131 #define ZERO_FLAG               0x0001
132 #define CARRY_FLAG              0x0002
133 #define NEGA_FLAG               0x0004
134 #define IMASK                   0x0008
135 #define INT_ENA0                0x0010
136 #define INT_ENA1                0x0020
137 #define INT_ENA2                0x0040
138 #define INT_ENA3                0x0080
139 #define INT_ENA4                0x0100
140 #define INT_CLR0                0x0200
141 #define INT_CLR1                0x0400
142 #define INT_CLR2                0x0800
143 #define INT_CLR3                0x1000
144 #define INT_CLR4                0x2000
145 #define REGPAGE                 0x4000
146 #define DMAEN                   0x8000
147
148 // External global variables
149
150 extern int start_logging;
151 extern int gpu_start_log;
152
153 // Private function prototypes
154
155 void GPUUpdateRegisterBanks(void);
156
157 void GPUDumpDisassembly(void);
158 void GPUDumpRegisters(void);
159 void GPUDumpMemory(void);
160
161 static void gpu_opcode_add(void);
162 static void gpu_opcode_addc(void);
163 static void gpu_opcode_addq(void);
164 static void gpu_opcode_addqt(void);
165 static void gpu_opcode_sub(void);
166 static void gpu_opcode_subc(void);
167 static void gpu_opcode_subq(void);
168 static void gpu_opcode_subqt(void);
169 static void gpu_opcode_neg(void);
170 static void gpu_opcode_and(void);
171 static void gpu_opcode_or(void);
172 static void gpu_opcode_xor(void);
173 static void gpu_opcode_not(void);
174 static void gpu_opcode_btst(void);
175 static void gpu_opcode_bset(void);
176 static void gpu_opcode_bclr(void);
177 static void gpu_opcode_mult(void);
178 static void gpu_opcode_imult(void);
179 static void gpu_opcode_imultn(void);
180 static void gpu_opcode_resmac(void);
181 static void gpu_opcode_imacn(void);
182 static void gpu_opcode_div(void);
183 static void gpu_opcode_abs(void);
184 static void gpu_opcode_sh(void);
185 static void gpu_opcode_shlq(void);
186 static void gpu_opcode_shrq(void);
187 static void gpu_opcode_sha(void);
188 static void gpu_opcode_sharq(void);
189 static void gpu_opcode_ror(void);
190 static void gpu_opcode_rorq(void);
191 static void gpu_opcode_cmp(void);
192 static void gpu_opcode_cmpq(void);
193 static void gpu_opcode_sat8(void);
194 static void gpu_opcode_sat16(void);
195 static void gpu_opcode_move(void);
196 static void gpu_opcode_moveq(void);
197 static void gpu_opcode_moveta(void);
198 static void gpu_opcode_movefa(void);
199 static void gpu_opcode_movei(void);
200 static void gpu_opcode_loadb(void);
201 static void gpu_opcode_loadw(void);
202 static void gpu_opcode_load(void);
203 static void gpu_opcode_loadp(void);
204 static void gpu_opcode_load_r14_indexed(void);
205 static void gpu_opcode_load_r15_indexed(void);
206 static void gpu_opcode_storeb(void);
207 static void gpu_opcode_storew(void);
208 static void gpu_opcode_store(void);
209 static void gpu_opcode_storep(void);
210 static void gpu_opcode_store_r14_indexed(void);
211 static void gpu_opcode_store_r15_indexed(void);
212 static void gpu_opcode_move_pc(void);
213 static void gpu_opcode_jump(void);
214 static void gpu_opcode_jr(void);
215 static void gpu_opcode_mmult(void);
216 static void gpu_opcode_mtoi(void);
217 static void gpu_opcode_normi(void);
218 static void gpu_opcode_nop(void);
219 static void gpu_opcode_load_r14_ri(void);
220 static void gpu_opcode_load_r15_ri(void);
221 static void gpu_opcode_store_r14_ri(void);
222 static void gpu_opcode_store_r15_ri(void);
223 static void gpu_opcode_sat24(void);
224 static void gpu_opcode_pack(void);
225
226 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
227 /*uint8 gpu_opcode_cycles[64] = 
228 {
229         3,  3,  3,  3,  3,  3,  3,  3,
230         3,  3,  3,  3,  3,  3,  3,  3,
231         3,  3,  1,  3,  1, 18,  3,  3,
232         3,  3,  3,  3,  3,  3,  3,  3,
233         3,  3,  2,  2,  2,  2,  3,  4,
234         5,  4,  5,  6,  6,  1,  1,  1,
235         1,  2,  2,  2,  1,  1,  9,  3,
236         3,  1,  6,  6,  2,  2,  3,  3
237 };//*/
238 //Here's a QnD kludge...
239 //This is wrong, wrong, WRONG, but it seems to work for the time being...
240 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
241 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
242 /*uint8 gpu_opcode_cycles[64] = 
243 {
244         1,  1,  1,  1,  1,  1,  1,  1,
245         1,  1,  1,  1,  1,  1,  1,  1,
246         1,  1,  1,  1,  1,  9,  1,  1,
247         1,  1,  1,  1,  1,  1,  1,  1,
248         1,  1,  1,  1,  1,  1,  1,  2,
249         2,  2,  2,  3,  3,  1,  1,  1,
250         1,  1,  1,  1,  1,  1,  4,  1,
251         1,  1,  3,  3,  1,  1,  1,  1
252 };//*/
253 uint8 gpu_opcode_cycles[64] = 
254 {
255         1,  1,  1,  1,  1,  1,  1,  1,
256         1,  1,  1,  1,  1,  1,  1,  1,
257         1,  1,  1,  1,  1,  1,  1,  1,
258         1,  1,  1,  1,  1,  1,  1,  1,
259         1,  1,  1,  1,  1,  1,  1,  1,
260         1,  1,  1,  1,  1,  1,  1,  1,
261         1,  1,  1,  1,  1,  1,  1,  1,
262         1,  1,  1,  1,  1,  1,  1,  1
263 };//*/
264
265 void (*gpu_opcode[64])()= 
266 {       
267         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
268         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
269         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
270         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
271         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
272         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
273         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
274         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
275         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
276         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
277         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
278         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
279         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
280         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
281         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
282         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
283 };
284
285 static uint8 * gpu_ram_8;
286 uint32 gpu_pc;
287 static uint32 gpu_acc;
288 static uint32 gpu_remain;
289 static uint32 gpu_hidata;
290 static uint32 gpu_flags;
291 static uint32 gpu_matrix_control;
292 static uint32 gpu_pointer_to_matrix;
293 static uint32 gpu_data_organization;
294 static uint32 gpu_control;
295 static uint32 gpu_div_control;
296 // There is a distinct advantage to having these separated out--there's no need to clear
297 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
298 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
299 static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c;
300 static uint32 * gpu_reg_bank_0;
301 static uint32 * gpu_reg_bank_1;
302 static uint32 * gpu_reg;
303 static uint32 * gpu_alternate_reg;
304
305 static uint32 gpu_instruction;
306 static uint32 gpu_opcode_first_parameter;
307 static uint32 gpu_opcode_second_parameter;
308
309 #define GPU_RUNNING             (gpu_control & 0x01)
310
311 #define RM                              gpu_reg[gpu_opcode_first_parameter]
312 #define RN                              gpu_reg[gpu_opcode_second_parameter]
313 #define ALTERNATE_RM    gpu_alternate_reg[gpu_opcode_first_parameter]
314 #define ALTERNATE_RN    gpu_alternate_reg[gpu_opcode_second_parameter]
315 #define IMM_1                   gpu_opcode_first_parameter
316 #define IMM_2                   gpu_opcode_second_parameter
317
318 #define SET_FLAG_Z(r)   (gpu_flag_z = ((r) == 0));
319 #define SET_FLAG_N(r)   (gpu_flag_n = (((UINT32)(r) >> 31) & 0x01));
320
321 #define RESET_FLAG_Z()  gpu_flag_z = 0;
322 #define RESET_FLAG_N()  gpu_flag_n = 0;
323 #define RESET_FLAG_C()  gpu_flag_c = 0;    
324
325 #define CLR_Z                           (gpu_flag_z = 0)
326 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
327 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
328 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
329 #define SET_N(r)                        (gpu_flag_n = (((UINT32)(r) >> 31) & 0x01))
330 #define SET_C_ADD(a,b)          (gpu_flag_c = ((UINT32)(b) > (UINT32)(~(a))))
331 #define SET_C_SUB(a,b)          (gpu_flag_c = ((UINT32)(b) > (UINT32)(a)))
332 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
333 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
334 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
335
336 uint32 gpu_convert_zero[32] =
337         { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
338
339 uint8 * branch_condition_table = 0;
340 #define BRANCH_CONDITION(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
341
342 uint32 gpu_opcode_use[64];
343
344 char * gpu_opcode_str[64]= 
345 {       
346         "add",                          "addc",                         "addq",                         "addqt",
347         "sub",                          "subc",                         "subq",                         "subqt",
348         "neg",                          "and",                          "or",                           "xor",
349         "not",                          "btst",                         "bset",                         "bclr",
350         "mult",                         "imult",                        "imultn",                       "resmac",
351         "imacn",                        "div",                          "abs",                          "sh",
352         "shlq",                         "shrq",                         "sha",                          "sharq",
353         "ror",                          "rorq",                         "cmp",                          "cmpq",
354         "sat8",                         "sat16",                        "move",                         "moveq",
355         "moveta",                       "movefa",                       "movei",                        "loadb",
356         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
357         "load_r15_indexed",     "storeb",                       "storew",                       "store",
358         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
359         "jump",                         "jr",                           "mmult",                        "mtoi",
360         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
361         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
362 };
363
364 static uint32 gpu_in_exec = 0;
365 static uint32 gpu_releaseTimeSlice_flag = 0;
366
367 void gpu_releaseTimeslice(void)
368 {
369         gpu_releaseTimeSlice_flag = 1;
370 }
371
372 uint32 gpu_get_pc(void)
373 {
374         return gpu_pc;
375 }
376
377 void build_branch_condition_table(void)
378 {
379         if (!branch_condition_table)
380         {
381                 branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
382
383                 if (branch_condition_table)
384                 {
385                         for(int i=0; i<8; i++)
386                         {
387                                 for(int j=0; j<32; j++)
388                                 {
389                                         int result = 1;
390                                         if (j & 1)
391                                                 if (i & ZERO_FLAG)
392                                                         result = 0;
393                                         if (j & 2)
394                                                 if (!(i & ZERO_FLAG))
395                                                         result = 0;
396                                         if (j & 4)
397                                                 if (i & (CARRY_FLAG << (j >> 4)))
398                                                         result = 0;
399                                         if (j & 8)
400                                                 if (!(i & (CARRY_FLAG << (j >> 4))))
401                                                         result = 0;
402                                         branch_condition_table[i * 32 + j] = result;
403                                 }
404                         }
405                 }
406         }
407 }
408
409 //
410 // GPU byte access (read)
411 //
412 uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
413 {
414         if (offset >= 0xF02000 && offset <= 0xF020FF)
415                 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
416
417         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
418                 return gpu_ram_8[offset & 0xFFF];
419         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
420         {
421                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
422
423                 if ((offset & 0x03) == 0)
424                         return data >> 24;
425                 else if ((offset & 0x03) == 1)
426                         return (data >> 16) & 0xFF;
427                 else if ((offset & 0x03) == 2)
428                         return (data >> 8) & 0xFF;
429                 else if ((offset & 0x03) == 3)
430                         return data & 0xFF;
431         }
432
433         return JaguarReadByte(offset, who);
434 }
435
436 //
437 // GPU word access (read)
438 //
439 uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
440 {
441         if (offset >= 0xF02000 && offset <= 0xF020FF)
442                 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
443
444         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
445         {
446                 offset &= 0xFFF;
447                 uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
448                 return data;
449         }
450         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
451         {
452 // This looks and smells wrong...
453 // But it *might* be OK...
454                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
455                         return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
456
457                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
458
459                 if (offset & 0x02)                      // Cases 0 & 2...
460                         return data & 0xFFFF;
461                 else
462                         return data >> 16;
463         }
464
465 //TEMP--Mirror of F03000? No. Writes only...
466 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
467 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
468
469         return JaguarReadWord(offset, who);
470 }
471
472 //
473 // GPU dword access (read)
474 //
475 uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
476 {
477         if (offset >= 0xF02000 && offset <= 0xF020FF)
478                 WriteLog("GPU: ReadLong--Attempt to read from GPU register file by %s!\n", whoName[who]);
479
480 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
481         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
482         {
483                 offset &= 0xFFF;
484                 return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
485                         | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/
486 //              return GET32(gpu_ram_8, offset);
487         }
488 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
489         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
490         {
491                 offset &= 0x1F;
492                 switch (offset)
493                 {
494                 case 0x00:
495                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
496                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
497                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
498
499                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
500                                         
501                         return gpu_flags & 0xFFFFC1FF;
502                 case 0x04:
503                         return gpu_matrix_control;
504                 case 0x08:
505                         return gpu_pointer_to_matrix;
506                 case 0x0C:
507                         return gpu_data_organization;
508                 case 0x10:
509                         return gpu_pc;
510                 case 0x14:
511                         return gpu_control;
512                 case 0x18:
513                         return gpu_hidata;
514                 case 0x1C:
515                         return gpu_remain;
516                 default:                                                                // unaligned long read
517 #ifdef GPU_DEBUG
518                         WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
519 #endif  // GPU_DEBUG
520                         return 0;
521                 }
522         }
523 //TEMP--Mirror of F03000? No. Writes only...
524 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
525 //      WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
526 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
527         WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
528
529         return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
530 }
531
532 //
533 // GPU byte access (write)
534 //
535 void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
536 {
537         if (offset >= 0xF02000 && offset <= 0xF020FF)
538                 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
539
540         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
541         {
542                 gpu_ram_8[offset & 0xFFF] = data;
543
544 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
545 /*              if (!gpu_in_exec)
546                 {
547                         m68k_end_timeslice();
548                         dsp_releaseTimeslice();
549                 }*/
550                 return;
551         }
552         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
553         {
554                 uint32 reg = offset & 0x1C;
555                 int bytenum = offset & 0x03;
556
557 //This is definitely wrong!
558                 if ((reg >= 0x1C) && (reg <= 0x1F))
559                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
560                 else
561                 {
562                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
563                         bytenum = 3 - bytenum; // convention motorola !!!
564                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
565                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
566                 }
567                 return;
568         }
569 //      WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
570         JaguarWriteByte(offset, data, who);
571 }
572
573 //
574 // GPU word access (write)
575 //
576 void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
577 {
578         if (offset >= 0xF02000 && offset <= 0xF020FF)
579                 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
580
581         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
582         {
583                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
584                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
585 /*              offset &= 0xFFF;
586                 SET16(gpu_ram_8, offset, data);//*/
587
588 /*if (offset >= 0xF03214 && offset < 0xF0321F)
589         WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
590
591
592 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
593 /*              if (!gpu_in_exec)
594                 {
595                         m68k_end_timeslice();
596                         dsp_releaseTimeslice();
597                 }*/
598                 return;
599         }
600         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
601         {
602                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
603                 {
604 #ifdef GPU_DEBUG
605                         WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
606                         GPUDumpRegisters();
607 #endif  // GPU_DEBUG
608                         return;
609                 }
610 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
611 //This just literally sucks.
612                 if ((offset & 0x1C) == 0x1C)
613                 {
614 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
615                         if (offset & 0x02)
616                                 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
617                         else
618                                 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
619                 }
620                 else 
621                 {
622 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
623                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
624                         if (offset & 0x02)
625                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
626                         else
627                                 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
628                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
629                 }
630                 return;
631         }
632         else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
633         {
634 #ifdef GPU_DEBUG
635                         WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
636                         GPUDumpRegisters();
637 #endif  // GPU_DEBUG
638                 return;
639         }
640
641         // Have to be careful here--this can cause an infinite loop!
642         JaguarWriteWord(offset, data, who);
643 }
644
645 //
646 // GPU dword access (write)
647 //
648 void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
649 {
650         if (offset >= 0xF02000 && offset <= 0xF020FF)
651                 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
652
653 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
654         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
655         {
656 #ifdef GPU_DEBUG
657                 if (offset & 0x03)
658                 {
659                         WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
660                         GPUDumpRegisters();
661                 }
662 #endif  // GPU_DEBUG
663
664                 offset &= 0xFFF;
665                 SET32(gpu_ram_8, offset, data);
666                 return;
667         }
668 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
669         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
670         {
671                 offset &= 0x1F;
672                 switch (offset)
673                 {
674                 case 0x00:
675                 {
676                         bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
677                         gpu_flags = data;
678                         gpu_flag_z = gpu_flags & ZERO_FLAG;
679                         gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
680                         gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
681                         GPUUpdateRegisterBanks();
682                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);       // Interrupt latch clear bits
683 //Writing here is only an interrupt enable--this approach is just plain wrong!
684 //                      GPUHandleIRQs();
685 //This, however, is A-OK! ;-)
686                         if (IMASKCleared)                                               // If IMASK was cleared,
687                                 GPUHandleIRQs();                                        // see if any other interrupts need servicing!
688 #ifdef GPU_DEBUG
689                         if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
690                                 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
691                         WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
692 #endif  // GPU_DEBUG
693                         break;
694                 }
695                 case 0x04:
696                         gpu_matrix_control = data;
697                         break;
698                 case 0x08:
699                         // This can only point to long aligned addresses
700                         gpu_pointer_to_matrix = data & 0xFFFFFFFC;
701                         break;
702                 case 0x0C:
703                         gpu_data_organization = data;
704                         break;
705                 case 0x10:
706                         gpu_pc = data;
707 #ifdef GPU_DEBUG
708 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
709 #endif  // GPU_DEBUG
710                         break;
711                 case 0x14:
712                 {       
713 //                      uint32 gpu_was_running = GPU_RUNNING;
714                         data &= ~0xF7C0;                // Disable writes to INT_LAT0-4 & TOM version number
715
716                         // check for GPU -> CPU interrupt
717                         if (data & 0x02)
718                         {
719 //WriteLog("GPU->CPU interrupt\n");
720                                 if (tom_irq_enabled(IRQ_GPU))
721                                 {
722                                         if ((tom_irq_enabled(IRQ_GPU)) && (jaguar_interrupt_handler_is_valid(64)))
723                                         {
724                                                 tom_set_pending_gpu_int();
725                                                 m68k_set_irq(7);                        // Set 68000 NMI
726                                                 gpu_releaseTimeslice();
727                                         }
728                                 }
729                                 data &= ~0x02;
730                         }
731
732                         // check for CPU -> GPU interrupt #0
733                         if (data & 0x04)
734                         {
735 //WriteLog("CPU->GPU interrupt\n");
736                                 GPUSetIRQLine(0, ASSERT_LINE);
737                                 m68k_end_timeslice();
738                                 dsp_releaseTimeslice();
739                                 data &= ~0x04;
740                         }
741
742                         // single stepping
743                         if (data & 0x10)
744                         {
745                                 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
746                         }
747                         gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
748
749                         // if gpu wasn't running but is now running, execute a few cycles
750 #ifndef GPU_SINGLE_STEPPING
751 /*                      if (!gpu_was_running && GPU_RUNNING)
752 #ifdef GPU_DEBUG
753                         {
754                                 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
755 #endif  // GPU_DEBUG
756                                 gpu_exec(200);
757 #ifdef GPU_DEBUG
758                         }
759 #endif  // GPU_DEBUG//*/
760 #else
761                         if (gpu_control & 0x18)
762                                 gpu_exec(1);
763 #endif  // #ifndef GPU_SINGLE_STEPPING
764 #ifdef GPU_DEBUG
765 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
766 if (GPU_RUNNING)
767         WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
768 else
769         WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
770 WriteLog("\n");
771 #endif  // GPU_DEBUG
772 //if (GPU_RUNNING)
773 //      GPUDumpDisassembly();
774 /*if (GPU_RUNNING)
775 {
776         if (gpu_pc == 0xF035D8)
777         {
778 //              GPUDumpDisassembly();
779 //              log_done();
780 //              exit(1);
781                 gpu_control &= 0xFFFFFFFE;      // Don't run it and let's see what happens!
782 //Hmm. Seems to lock up when going into the demo...
783 //Try to disable the collision altogether!
784         }
785 }//*/
786 extern int effect_start5;
787 static bool finished = false;
788 //if (GPU_RUNNING && effect_start5 && !finished)
789 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
790 {
791         // Let's do a dump of $6528!
792 /*      uint32 numItems = JaguarReadWord(0x6BD6);
793         WriteLog("\nDump of $6528: %u items.\n\n", numItems);
794         for(int i=0; i<numItems*3*4; i+=3*4)
795         {
796                 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
797                         JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
798                 uint16 link = JaguarReadWord(0x6528+i+8+2);
799                 for(int j=0; j<40; j+=4)
800                         WriteLog("%08X ", JaguarReadLong(link + j));
801                 WriteLog("\n");
802         }
803         WriteLog("\n");//*/
804         // Let's try a manual blit here...
805 //This isn't working the way it should! !!! FIX !!!
806 //Err, actually, it is.
807 // NOW, it works right! Problem solved!!! It's a blitter bug!
808 /*      uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4;
809         for(int y=0; y<127; y++)
810         {
811                 for(int x=0; x<2; x++)
812                 {
813                         JaguarWriteLong(dst, JaguarReadLong(src));
814                         
815                         src += 4;
816                         dst += 4;
817                 }
818                 src += width - (2 * 4);
819         }//*/
820 /*      finished = true;
821         doGPUDis = true;
822         WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
823
824 /*      WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
825         int count = 0;
826         for(int i=0x004D54; i<0x004D54+2048; i++)
827         {
828                 WriteLog("%02X ", JaguarReadByte(i));
829                 count++;
830                 if (count == 32)
831                 {
832                         count = 0;
833                         WriteLog("\n");
834                 }
835         }
836         WriteLog("\n\nData @ F03000:\n\n");
837         count = 0;
838         for(int i=0xF03000; i<0xF03200; i++)
839         {
840                 WriteLog("%02X ", JaguarReadByte(i));
841                 count++;
842                 if (count == 32)
843                 {
844                         count = 0;
845                         WriteLog("\n");
846                 }
847         }
848         WriteLog("\n\n");
849         log_done();
850         exit(0);//*/
851 }
852 //if (!GPU_RUNNING)
853 //      doGPUDis = false;
854 /*if (!GPU_RUNNING && finished)
855 {
856         WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
857         GPUDumpRegisters();
858         log_done();
859         exit(0);
860 }//*/
861                         // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
862                         // allow the GPU a chance to run...
863                         // Yes! This partially fixed Trevor McFur...
864                         if (GPU_RUNNING)
865                                 m68k_end_timeslice();
866                         break;
867                 }
868                 case 0x18:
869                         gpu_hidata = data;
870                         break;
871                 case 0x1C:
872                         gpu_div_control = data;
873                         break;
874 //              default:   // unaligned long write
875                         //exit(0);
876                         //__asm int 3
877                 }
878                 return;
879         }
880
881 //      JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
882 //      JaguarWriteWord(offset+2, data & 0xFFFF, who);
883 // We're a 32-bit processor, we can do a long write...!
884         JaguarWriteLong(offset, data, who);
885 }
886
887 //
888 // Change register banks if necessary
889 //
890 void GPUUpdateRegisterBanks(void)
891 {
892         int bank = (gpu_flags & REGPAGE);               // REGPAGE bit
893
894         if (gpu_flags & IMASK)                                  // IMASK bit
895                 bank = 0;                                                       // IMASK forces main bank to be bank 0
896
897         if (bank)
898                 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
899         else
900                 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
901 }
902
903 void GPUHandleIRQs(void)
904 {
905         // Bail out if we're already in an interrupt!
906         if (gpu_flags & IMASK)
907                 return;
908
909         // Get the interrupt latch & enable bits
910         uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
911         
912         // Bail out if latched interrupts aren't enabled
913         bits &= mask;
914         if (!bits)
915                 return;
916         
917         // Determine which interrupt to service
918         uint32 which = 0; //Isn't there a #pragma to disable this warning???
919         if (bits & 0x01)
920                 which = 0;
921         if (bits & 0x02)
922                 which = 1;
923         if (bits & 0x04)
924                 which = 2;
925         if (bits & 0x08)
926                 which = 3;
927         if (bits & 0x10)
928                 which = 4;
929
930         if (start_logging)
931                 WriteLog("GPU: Generating IRQ #%i\n", which);
932
933         // set the interrupt flag 
934         gpu_flags |= IMASK;
935         GPUUpdateRegisterBanks();
936
937         // subqt  #4,r31                ; pre-decrement stack pointer 
938         // move  pc,r30                 ; address of interrupted code 
939         // store  r30,(r31)     ; store return address
940         gpu_reg[31] -= 4;
941         GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
942         
943         // movei  #service_address,r30  ; pointer to ISR entry 
944         // jump  (r30)                                  ; jump to ISR 
945         // nop
946         gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
947 }
948
949 void GPUSetIRQLine(int irqline, int state)
950 {
951         if (start_logging)
952                 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
953
954         uint32 mask = 0x0040 << irqline;
955         gpu_control &= ~mask;                           // Clear the interrupt latch
956
957         if (state)
958         {
959                 gpu_control |= mask;                    // Assert the interrupt latch
960                 GPUHandleIRQs();                                // And handle the interrupt...
961         }
962 }
963
964 //TEMPORARY: Testing only!
965 //#include "gpu2.h"
966 //#include "gpu3.h"
967
968 void gpu_init(void)
969 {
970         memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
971         memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs");
972         memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs");
973
974         build_branch_condition_table();
975
976         gpu_reset();
977
978 //TEMPORARY: Testing only!
979 //      gpu2_init();
980 //      gpu3_init();
981 }
982
983 void gpu_reset(void)
984 {
985         // GPU registers (directly visible)
986         gpu_flags                         = 0x00000000;
987         gpu_matrix_control    = 0x00000000;
988         gpu_pointer_to_matrix = 0x00000000;
989         gpu_data_organization = 0xFFFFFFFF;
990         gpu_pc                            = 0x00F03000;
991         gpu_control                       = 0x00002800;                 // Correctly sets this as TOM Rev. 2
992         gpu_hidata                        = 0x00000000;
993         gpu_remain                        = 0x00000000;                 // These two registers are RO/WO
994         gpu_div_control           = 0x00000000;
995
996         // GPU internal register
997         gpu_acc                           = 0x00000000;
998
999         gpu_reg = gpu_reg_bank_0;
1000         gpu_alternate_reg = gpu_reg_bank_1;
1001
1002         for(int i=0; i<32; i++)
1003                 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1004
1005         CLR_ZNC;
1006         memset(gpu_ram_8, 0xFF, 0x1000);
1007         gpu_in_exec = 0;
1008 //not needed    GPUInterruptPending = false;
1009         gpu_reset_stats();
1010 }
1011
1012 uint32 gpu_read_pc(void)
1013 {
1014         return gpu_pc;
1015 }
1016
1017 void gpu_reset_stats(void)
1018 {
1019         for(uint32 i=0; i<64; i++)
1020                 gpu_opcode_use[i] = 0;
1021         WriteLog("--> GPU stats were reset!\n");
1022 }
1023
1024 void GPUDumpDisassembly(void)
1025 {
1026         char buffer[512];
1027
1028         WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1029         uint32 j = 0xF03000;
1030         while (j <= 0xF03FFF)
1031         {
1032                 uint32 oldj = j;
1033                 j += dasmjag(JAGUAR_GPU, buffer, j);
1034                 WriteLog("\t%08X: %s\n", oldj, buffer);
1035         }
1036 }
1037
1038 void GPUDumpRegisters(void)
1039 {
1040         WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1041         WriteLog("\nRegisters bank 0\n");
1042         for(int j=0; j<8; j++)
1043         {
1044                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1045                                                   (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1046                                                   (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1047                                                   (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1048                                                   (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1049         }
1050         WriteLog("Registers bank 1\n");
1051         for(int j=0; j<8; j++)
1052         {
1053                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1054                                                   (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1055                                                   (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1056                                                   (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1057                                                   (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1058         }
1059 }
1060
1061 void GPUDumpMemory(void)
1062 {
1063         WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1064         for(int i=0; i<0xFFF; i+=4)
1065                 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1066                         gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1067 }
1068
1069 void gpu_done(void)
1070
1071         WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1072
1073         // Get the interrupt latch & enable bits 
1074         uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1075         WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1076
1077         GPUDumpRegisters();
1078         GPUDumpDisassembly();
1079
1080         WriteLog("\nGPU opcodes use:\n");
1081         for(int i=0; i<64; i++)
1082         {
1083                 if (gpu_opcode_use[i])
1084                         WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1085         }
1086         WriteLog("\n");
1087
1088         memory_free(gpu_ram_8);
1089         memory_free(gpu_reg_bank_0);
1090         memory_free(gpu_reg_bank_1);
1091 }
1092
1093 //
1094 // Main GPU execution core
1095 //
1096 static int testCount = 1;
1097 static int len = 0;
1098 static bool tripwire = false;
1099 void gpu_exec(int32 cycles)
1100 {
1101         if (!GPU_RUNNING)
1102                 return;
1103
1104 #ifdef GPU_SINGLE_STEPPING
1105         if (gpu_control & 0x18)
1106         {
1107                 cycles = 1;
1108                 gpu_control &= ~0x10;
1109         }
1110 #endif
1111         GPUHandleIRQs();
1112         gpu_releaseTimeSlice_flag = 0;
1113         gpu_in_exec++;
1114
1115         while (cycles > 0 && GPU_RUNNING)
1116         {
1117 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1118         && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1119 {
1120         if (gpu_pc == 0xF03000)
1121         {
1122                 extern uint32 starCount;
1123                 starCount = 0;
1124 /*              WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1125                 uint32 base = gpu_reg_bank_0[3];
1126                 for(uint32 i=0; i<0x100; i+=16)
1127                 {
1128                         WriteLog("%02X: ", i);
1129                         for(uint32 j=0; j<16; j++)
1130                         {
1131                                 WriteLog("%02X ", JaguarReadByte(base + i + j));
1132                         }
1133                         WriteLog("\n");
1134                 }*/
1135         }
1136 //      if (gpu_pc == 0xF03)
1137         {
1138         }
1139 }//*/
1140 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1141 {
1142         GPUDumpRegisters();
1143         WriteLog("GPU: Starting disassembly log...\n");
1144         doGPUDis = true;
1145 }//*/
1146 /*if (gpu_pc == 0xF0359A)
1147 {
1148         doGPUDis = true;
1149         GPUDumpRegisters();
1150 }*/
1151 /*              gpu_flag_c = (gpu_flag_c ? 1 : 0);
1152                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1153                 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1154         
1155                 uint16 opcode = GPUReadWord(gpu_pc, GPU);
1156                 uint32 index = opcode >> 10;
1157                 gpu_instruction = opcode;                               // Added for GPU #3...
1158                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1159                 gpu_opcode_second_parameter = opcode & 0x1F;
1160 /*if (gpu_pc == 0xF03BE8)
1161 WriteLog("Start of OP frame write...\n");
1162 if (gpu_pc == 0xF03EEE)
1163 WriteLog("--> Writing BRANCH object ---\n");
1164 if (gpu_pc == 0xF03F62)
1165 WriteLog("--> Writing BITMAP object ***\n");//*/
1166 /*if (gpu_pc == 0xF03546)
1167 {
1168         WriteLog("\n--> GPU PC: F03546\n");
1169         GPUDumpRegisters();
1170         GPUDumpDisassembly();
1171 }//*/
1172 /*if (gpu_pc == 0xF033F6)
1173 {
1174         WriteLog("\n--> GPU PC: F033F6\n");
1175         GPUDumpRegisters();
1176         GPUDumpDisassembly();
1177 }//*/
1178 /*if (gpu_pc == 0xF033CC)
1179 {
1180         WriteLog("\n--> GPU PC: F033CC\n");
1181         GPUDumpRegisters();
1182         GPUDumpDisassembly();
1183 }//*/
1184 /*if (gpu_pc == 0xF033D6)
1185 {
1186         WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1187         GPUDumpRegisters();
1188         GPUDumpMemory();
1189 }//*/
1190 /*if (gpu_pc == 0xF033D8)
1191 {
1192         WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1193         GPUDumpRegisters();
1194         GPUDumpMemory();
1195 }//*/
1196 /*if (gpu_pc == 0xF0358E)
1197 {
1198         WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1199         GPUDumpRegisters();
1200         GPUDumpMemory();
1201 }//*/
1202 /*if (gpu_pc == 0xF034CA)
1203 {
1204         WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1205         GPUDumpRegisters();
1206 }//*/
1207 /*if (gpu_pc == 0xF034CA)
1208 {
1209         len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1210         WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n   ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1211         for(int i=0; i<len; i+=4)
1212                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1213         WriteLog("\n   ");
1214         for(int i=0; i<len; i+=4)
1215                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1216         WriteLog("\n\n");
1217 }
1218 if (gpu_pc == 0xF034DE)
1219 {
1220         WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n   ", gpu_reg[14], gpu_reg[15]);
1221         for(int i=0; i<len; i+=4)
1222                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1223         WriteLog("\n   ");
1224         for(int i=0; i<len; i+=4)
1225                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1226         WriteLog("\n   ");
1227         for(int i=0; i<len; i+=4)
1228                 WriteLog(" --------");
1229         WriteLog("\n   ");
1230         for(int i=0; i<len; i+=4)
1231                 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1232         WriteLog("\n\n");
1233 }//*/
1234 /*if (gpu_pc == 0xF035C8)
1235 {
1236         WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1237         GPUDumpRegisters();
1238         GPUDumpDisassembly();
1239 }//*/
1240
1241 if (gpu_start_log)
1242 {
1243 //      gpu_reset_stats();
1244 static char buffer[512];
1245 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1246 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1247 }//*/
1248 //$E400 -> 1110 01 -> $39 -> 57
1249 //GPU #1
1250                 gpu_pc += 2;
1251                 gpu_opcode[index]();
1252 //GPU #2
1253 //              gpu2_opcode[index]();
1254 //              gpu_pc += 2;
1255 //GPU #3                                (Doesn't show ATARI logo! #1 & #2 do...)
1256 //              gpu_pc += 2;
1257 //              gpu3_opcode[index]();
1258
1259 // BIOS hacking
1260 //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken. 
1261 /*static bool firstTime = true;
1262 if (gpu_pc == 0xF03548 && firstTime)
1263 {
1264         gpu_flag_z = 1;
1265 //      firstTime = false;
1266
1267 //static char buffer[512];
1268 //int k=0xF03548;
1269 //while (k<0xF0356C)
1270 //{
1271 //int oldk = k;
1272 //k += dasmjag(JAGUAR_GPU, buffer, k);
1273 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1274 //}
1275 //      gpu_start_log = 1;
1276 }//*/
1277 //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1278 /*if (gpu_pc == 0xF0354C)
1279         gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1280
1281                 cycles -= gpu_opcode_cycles[index];
1282                 gpu_opcode_use[index]++;
1283 if (gpu_start_log)
1284         WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1285 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1286 {
1287         WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1288         tripwire = true;
1289 }
1290         }
1291
1292         gpu_in_exec--;
1293 }
1294
1295 //
1296 // GPU opcodes
1297 //
1298
1299 /*
1300 GPU opcodes use (offset punch--vertically below bad guy):
1301                       add 18686
1302                      addq 32621
1303                       sub 7483
1304                      subq 10252
1305                       and 21229
1306                        or 15003
1307                      btst 1822
1308                      bset 2072
1309                      mult 141
1310                       div 2392
1311                      shlq 13449
1312                      shrq 10297
1313                     sharq 11104
1314                       cmp 6775
1315                      cmpq 5944
1316                      move 31259
1317                     moveq 4473
1318                     movei 23277
1319                     loadb 46
1320                     loadw 4201
1321                      load 28580
1322          load_r14_indexed 1183
1323          load_r15_indexed 1125
1324                    storew 178
1325                     store 10144
1326         store_r14_indexed 320
1327         store_r15_indexed 1
1328                   move_pc 1742
1329                      jump 24467
1330                        jr 18090
1331                       nop 41362
1332 */
1333
1334 static void gpu_opcode_jump(void)
1335 {
1336 #ifdef GPU_DIS_JUMP
1337 char * condition[32] =
1338 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1339         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1340         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1341         "???", "???", "???", "F" };
1342         if (doGPUDis)
1343                 WriteLog("%06X: JUMP   %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1344 #endif
1345         // normalize flags
1346 /*      gpu_flag_c = (gpu_flag_c ? 1 : 0);
1347         gpu_flag_z = (gpu_flag_z ? 1 : 0);
1348         gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1349         // KLUDGE: Used by BRANCH_CONDITION
1350         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1351
1352         if (BRANCH_CONDITION(IMM_2))
1353         {
1354 #ifdef GPU_DIS_JUMP
1355         if (doGPUDis)
1356                 WriteLog("Branched!\n");
1357 #endif
1358 if (gpu_start_log)
1359         WriteLog("    --> JUMP: Branch taken.\n");
1360                 uint32 delayed_pc = RM;
1361                 gpu_exec(1);
1362                 gpu_pc = delayed_pc;
1363 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1364                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1365                 gpu_opcode_second_parameter = opcode & 0x1F;
1366
1367                 gpu_pc = delayed_pc;
1368                 gpu_opcode[opcode>>10]();//*/
1369         }
1370 #ifdef GPU_DIS_JUMP
1371         else
1372                 if (doGPUDis)
1373                         WriteLog("Branch NOT taken.\n");
1374 #endif
1375 }
1376
1377 static void gpu_opcode_jr(void)
1378 {
1379 #ifdef GPU_DIS_JR
1380 char * condition[32] =
1381 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1382         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1383         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1384         "???", "???", "???", "F" };
1385         if (doGPUDis)
1386                 WriteLog("%06X: JR     %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1387 #endif
1388 /*      if (CONDITION(jaguar.op & 31))
1389         {
1390                 INT32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1391                 UINT32 newpc = jaguar.PC + r1;
1392                 CALL_MAME_DEBUG;
1393                 jaguar.op = ROPCODE(jaguar.PC);
1394                 jaguar.PC = newpc;
1395                 (*jaguar.table[jaguar.op >> 10])();
1396
1397                 jaguar_icount -= 3;     // 3 wait states guaranteed
1398         }*/
1399         // normalize flags
1400 /*      gpu_flag_n = (gpu_flag_n ? 1 : 0);
1401         gpu_flag_c = (gpu_flag_c ? 1 : 0);
1402         gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1403         // KLUDGE: Used by BRANCH_CONDITION
1404         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1405
1406         if (BRANCH_CONDITION(IMM_2))
1407         {
1408 #ifdef GPU_DIS_JR
1409         if (doGPUDis)
1410                 WriteLog("Branched!\n");
1411 #endif
1412 if (gpu_start_log)
1413         WriteLog("    --> JR: Branch taken.\n");
1414                 int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1);             // Sign extend IMM_1
1415                 int32 delayed_pc = gpu_pc + (offset * 2);
1416                 gpu_exec(1);
1417                 gpu_pc = delayed_pc;
1418 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1419                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1420                 gpu_opcode_second_parameter = opcode & 0x1F;
1421
1422                 gpu_pc = delayed_pc;
1423                 gpu_opcode[opcode>>10]();//*/
1424         }
1425 #ifdef GPU_DIS_JR
1426         else
1427                 if (doGPUDis)
1428                         WriteLog("Branch NOT taken.\n");
1429 #endif
1430 }
1431
1432 static void gpu_opcode_add(void)
1433 {
1434 #ifdef GPU_DIS_ADD
1435         if (doGPUDis)
1436                 WriteLog("%06X: ADD    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1437 #endif
1438         UINT32 res = RN + RM;
1439         CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1440         RN = res;
1441 #ifdef GPU_DIS_ADD
1442         if (doGPUDis)
1443                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1444 #endif
1445 }
1446
1447 static void gpu_opcode_addc(void)
1448 {
1449 #ifdef GPU_DIS_ADDC
1450         if (doGPUDis)
1451                 WriteLog("%06X: ADDC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1452 #endif
1453 /*      int dreg = jaguar.op & 31;
1454         UINT32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1455         UINT32 r2 = jaguar.r[dreg];
1456         UINT32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1457         jaguar.r[dreg] = res;
1458         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1459
1460         UINT32 res = RN + RM + gpu_flag_c;
1461         UINT32 carry = gpu_flag_c;
1462 //      SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1463         SET_ZNC_ADD(RN + carry, RM, res);
1464 //      SET_ZNC_ADD(RN, RM + carry, res);
1465         RN = res;
1466 #ifdef GPU_DIS_ADDC
1467         if (doGPUDis)
1468                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1469 #endif
1470 }
1471
1472 static void gpu_opcode_addq(void)
1473 {
1474 #ifdef GPU_DIS_ADDQ
1475         if (doGPUDis)
1476                 WriteLog("%06X: ADDQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1477 #endif
1478         UINT32 r1 = gpu_convert_zero[IMM_1];
1479         UINT32 res = RN + r1;
1480         CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1481         RN = res;
1482 #ifdef GPU_DIS_ADDQ
1483         if (doGPUDis)
1484                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1485 #endif
1486 }
1487
1488 static void gpu_opcode_addqt(void)
1489 {
1490 #ifdef GPU_DIS_ADDQT
1491         if (doGPUDis)
1492                 WriteLog("%06X: ADDQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1493 #endif
1494         RN += gpu_convert_zero[IMM_1];
1495 #ifdef GPU_DIS_ADDQT
1496         if (doGPUDis)
1497                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1498 #endif
1499 }
1500
1501 static void gpu_opcode_sub(void)
1502 {
1503 #ifdef GPU_DIS_SUB
1504         if (doGPUDis)
1505                 WriteLog("%06X: SUB    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1506 #endif
1507         UINT32 res = RN - RM;
1508         SET_ZNC_SUB(RN, RM, res);
1509         RN = res;
1510 #ifdef GPU_DIS_SUB
1511         if (doGPUDis)
1512                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1513 #endif
1514 }
1515
1516 static void gpu_opcode_subc(void)
1517 {
1518 #ifdef GPU_DIS_SUBC
1519         if (doGPUDis)
1520                 WriteLog("%06X: SUBC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1521 #endif
1522         UINT32 res = RN - RM - gpu_flag_c;
1523         UINT32 borrow = gpu_flag_c;
1524 //      SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1525 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1526 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1527 //      SET_ZNC_SUB(RN - borrow, RM, res);
1528         SET_ZNC_SUB(RN, RM + borrow, res);
1529         RN = res;
1530 #ifdef GPU_DIS_SUBC
1531         if (doGPUDis)
1532                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1533 #endif
1534 }
1535 /*
1536 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1537 N = 0, M = 1, 0 - 1 = -1, C = 0!
1538
1539 #define SET_C_SUB(a,b)          (gpu_flag_c = ((UINT32)(b) > (UINT32)(a)))
1540 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
1541 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1542 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1543 */
1544 static void gpu_opcode_subq(void)
1545 {
1546 #ifdef GPU_DIS_SUBQ
1547         if (doGPUDis)
1548                 WriteLog("%06X: SUBQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1549 #endif
1550         UINT32 r1 = gpu_convert_zero[IMM_1];
1551         UINT32 res = RN - r1;
1552         SET_ZNC_SUB(RN, r1, res);
1553         RN = res;
1554 #ifdef GPU_DIS_SUBQ
1555         if (doGPUDis)
1556                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1557 #endif
1558 }
1559
1560 static void gpu_opcode_subqt(void)
1561 {
1562 #ifdef GPU_DIS_SUBQT
1563         if (doGPUDis)
1564                 WriteLog("%06X: SUBQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1565 #endif
1566         RN -= gpu_convert_zero[IMM_1];
1567 #ifdef GPU_DIS_SUBQT
1568         if (doGPUDis)
1569                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1570 #endif
1571 }
1572
1573 static void gpu_opcode_cmp(void)
1574 {
1575 #ifdef GPU_DIS_CMP
1576         if (doGPUDis)
1577                 WriteLog("%06X: CMP    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1578 #endif
1579         UINT32 res = RN - RM;
1580         SET_ZNC_SUB(RN, RM, res);
1581 #ifdef GPU_DIS_CMP
1582         if (doGPUDis)
1583                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1584 #endif
1585 }
1586
1587 static void gpu_opcode_cmpq(void)
1588 {
1589         static int32 sqtable[32] =
1590                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1591 #ifdef GPU_DIS_CMPQ
1592         if (doGPUDis)
1593                 WriteLog("%06X: CMPQ   #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1594 #endif
1595         UINT32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1596         UINT32 res = RN - r1;
1597         SET_ZNC_SUB(RN, r1, res);
1598 #ifdef GPU_DIS_CMPQ
1599         if (doGPUDis)
1600                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1601 #endif
1602 }
1603
1604 static void gpu_opcode_and(void)
1605 {
1606 #ifdef GPU_DIS_AND
1607         if (doGPUDis)
1608                 WriteLog("%06X: AND    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1609 #endif
1610         RN = RN & RM;
1611         SET_ZN(RN);
1612 #ifdef GPU_DIS_AND
1613         if (doGPUDis)
1614                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1615 #endif
1616 }
1617
1618 static void gpu_opcode_or(void)
1619 {
1620 #ifdef GPU_DIS_OR
1621         if (doGPUDis)
1622                 WriteLog("%06X: OR     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1623 #endif
1624         RN = RN | RM;
1625         SET_ZN(RN);
1626 #ifdef GPU_DIS_OR
1627         if (doGPUDis)
1628                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1629 #endif
1630 }
1631
1632 static void gpu_opcode_xor(void)
1633 {
1634 #ifdef GPU_DIS_XOR
1635         if (doGPUDis)
1636                 WriteLog("%06X: XOR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1637 #endif
1638         RN = RN ^ RM;
1639         SET_ZN(RN);
1640 #ifdef GPU_DIS_XOR
1641         if (doGPUDis)
1642                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1643 #endif
1644 }
1645
1646 static void gpu_opcode_not(void)
1647 {
1648 #ifdef GPU_DIS_NOT
1649         if (doGPUDis)
1650                 WriteLog("%06X: NOT    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1651 #endif
1652         RN = ~RN;
1653         SET_ZN(RN);
1654 #ifdef GPU_DIS_NOT
1655         if (doGPUDis)
1656                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1657 #endif
1658 }
1659
1660 static void gpu_opcode_move_pc(void)
1661 {
1662 #ifdef GPU_DIS_MOVEPC
1663         if (doGPUDis)
1664                 WriteLog("%06X: MOVE   PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1665 #endif
1666         // Should be previous PC--this might not always be previous instruction!
1667         // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1668         RN = gpu_pc - 2;
1669 #ifdef GPU_DIS_MOVEPC
1670         if (doGPUDis)
1671                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1672 #endif
1673 }
1674
1675 static void gpu_opcode_sat8(void)
1676 {
1677 #ifdef GPU_DIS_SAT8
1678         if (doGPUDis)
1679                 WriteLog("%06X: SAT8   R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1680 #endif
1681         RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1682         SET_ZN(RN);
1683 #ifdef GPU_DIS_SAT8
1684         if (doGPUDis)
1685                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1686 #endif
1687 }
1688
1689 static void gpu_opcode_sat16(void)
1690 {
1691         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1692         SET_ZN(RN);
1693 }
1694
1695 static void gpu_opcode_sat24(void)
1696 {
1697         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1698         SET_ZN(RN);
1699 }
1700
1701 static void gpu_opcode_store_r14_indexed(void)
1702 {
1703 #ifdef GPU_DIS_STORE14I
1704         if (doGPUDis)
1705                 WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1706 #endif
1707         GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1708 }
1709
1710 static void gpu_opcode_store_r15_indexed(void)
1711 {
1712 #ifdef GPU_DIS_STORE15I
1713         if (doGPUDis)
1714                 WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1715 #endif
1716         GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1717 }
1718
1719 static void gpu_opcode_load_r14_ri(void)
1720 {
1721 #ifdef GPU_DIS_LOAD14R
1722         if (doGPUDis)
1723                 WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1724 #endif
1725         RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1726 #ifdef GPU_DIS_LOAD14R
1727         if (doGPUDis)
1728                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1729 #endif
1730 }
1731
1732 static void gpu_opcode_load_r15_ri(void)
1733 {
1734 #ifdef GPU_DIS_LOAD15R
1735         if (doGPUDis)
1736                 WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1737 #endif
1738         RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1739 #ifdef GPU_DIS_LOAD15R
1740         if (doGPUDis)
1741                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1742 #endif
1743 }
1744
1745 static void gpu_opcode_store_r14_ri(void)
1746 {
1747 #ifdef GPU_DIS_STORE14R
1748         if (doGPUDis)
1749                 WriteLog("%06X: STORE  R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1750 #endif
1751         GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1752 }
1753
1754 static void gpu_opcode_store_r15_ri(void)
1755 {
1756 #ifdef GPU_DIS_STORE15R
1757         if (doGPUDis)
1758                 WriteLog("%06X: STORE  R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1759 #endif
1760         GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1761 }
1762
1763 static void gpu_opcode_nop(void)
1764 {
1765 #ifdef GPU_DIS_NOP
1766         if (doGPUDis)
1767                 WriteLog("%06X: NOP    [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1768 #endif
1769 }
1770
1771 static void gpu_opcode_pack(void)
1772 {
1773 #ifdef GPU_DIS_PACK
1774         if (doGPUDis)
1775                 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK  " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1776 #endif
1777         uint32 val = RN;
1778
1779 //BUG!  if (RM == 0)                            // Pack
1780         if (IMM_1 == 0)                         // Pack
1781                 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1782         else                                            // Unpack
1783                 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1784 #ifdef GPU_DIS_PACK
1785         if (doGPUDis)
1786                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1787 #endif
1788 }
1789
1790 static void gpu_opcode_storeb(void)
1791 {
1792 #ifdef GPU_DIS_STOREB
1793         if (doGPUDis)
1794                 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1795 #endif
1796 //Is this right???
1797 // Would appear to be so...!
1798         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1799                 GPUWriteLong(RM, RN & 0xFF, GPU);
1800         else
1801                 JaguarWriteByte(RM, RN, GPU);
1802 }
1803
1804 static void gpu_opcode_storew(void)
1805 {
1806 #ifdef GPU_DIS_STOREW
1807         if (doGPUDis)
1808                 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1809 #endif
1810         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1811                 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1812         else
1813                 JaguarWriteWord(RM, RN, GPU);
1814 }
1815
1816 static void gpu_opcode_store(void)
1817 {
1818 #ifdef GPU_DIS_STORE
1819         if (doGPUDis)
1820                 WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1821 #endif
1822         GPUWriteLong(RM, RN, GPU);
1823 }
1824
1825 static void gpu_opcode_storep(void)
1826 {
1827         GPUWriteLong(RM + 0, gpu_hidata, GPU);
1828         GPUWriteLong(RM + 4, RN, GPU);
1829 }
1830
1831 static void gpu_opcode_loadb(void)
1832 {
1833 #ifdef GPU_DIS_LOADB
1834         if (doGPUDis)
1835                 WriteLog("%06X: LOADB  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1836 #endif
1837         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1838                 RN = GPUReadLong(RM, GPU) & 0xFF;
1839         else
1840                 RN = JaguarReadByte(RM, GPU);
1841 #ifdef GPU_DIS_LOADB
1842         if (doGPUDis)
1843                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1844 #endif
1845 }
1846
1847 static void gpu_opcode_loadw(void)
1848 {
1849 #ifdef GPU_DIS_LOADW
1850         if (doGPUDis)
1851                 WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1852 #endif
1853         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1854                 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1855         else
1856                 RN = JaguarReadWord(RM, GPU);
1857 #ifdef GPU_DIS_LOADW
1858         if (doGPUDis)
1859                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1860 #endif
1861 }
1862
1863 static void gpu_opcode_load(void)
1864 {
1865 #ifdef GPU_DIS_LOAD
1866         if (doGPUDis)
1867                 WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1868 #endif
1869         RN = GPUReadLong(RM, GPU);
1870 #ifdef GPU_DIS_LOAD
1871         if (doGPUDis)
1872                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1873 #endif
1874 }
1875
1876 static void gpu_opcode_loadp(void)
1877 {
1878         gpu_hidata = GPUReadLong(RM + 0, GPU);
1879         RN                 = GPUReadLong(RM + 4, GPU);
1880 }
1881
1882 static void gpu_opcode_load_r14_indexed(void)
1883 {
1884 #ifdef GPU_DIS_LOAD14I
1885         if (doGPUDis)
1886                 WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
1887 #endif
1888         RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
1889 #ifdef GPU_DIS_LOAD14I
1890         if (doGPUDis)
1891                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1892 #endif
1893 }
1894
1895 static void gpu_opcode_load_r15_indexed(void)
1896 {
1897 #ifdef GPU_DIS_LOAD15I
1898         if (doGPUDis)
1899                 WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
1900 #endif
1901         RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
1902 #ifdef GPU_DIS_LOAD15I
1903         if (doGPUDis)
1904                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1905 #endif
1906 }
1907
1908 static void gpu_opcode_movei(void)
1909 {
1910 #ifdef GPU_DIS_MOVEI
1911         if (doGPUDis)
1912                 WriteLog("%06X: MOVEI  #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1913 #endif
1914         // This instruction is followed by 32-bit value in LSW / MSW format...
1915         RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16);
1916         gpu_pc += 4;
1917 #ifdef GPU_DIS_MOVEI
1918         if (doGPUDis)
1919                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1920 #endif
1921 }
1922
1923 static void gpu_opcode_moveta(void)
1924 {
1925 #ifdef GPU_DIS_MOVETA
1926         if (doGPUDis)
1927                 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
1928 #endif
1929         ALTERNATE_RN = RM;
1930 #ifdef GPU_DIS_MOVETA
1931         if (doGPUDis)
1932                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
1933 #endif
1934 }
1935
1936 static void gpu_opcode_movefa(void)
1937 {
1938 #ifdef GPU_DIS_MOVEFA
1939         if (doGPUDis)
1940                 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
1941 #endif
1942         RN = ALTERNATE_RM;
1943 #ifdef GPU_DIS_MOVEFA
1944         if (doGPUDis)
1945                 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
1946 #endif
1947 }
1948
1949 static void gpu_opcode_move(void)
1950 {
1951 #ifdef GPU_DIS_MOVE
1952         if (doGPUDis)
1953                 WriteLog("%06X: MOVE   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1954 #endif
1955         RN = RM;
1956 #ifdef GPU_DIS_MOVE
1957         if (doGPUDis)
1958                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1959 #endif
1960 }
1961
1962 static void gpu_opcode_moveq(void)
1963 {
1964 #ifdef GPU_DIS_MOVEQ
1965         if (doGPUDis)
1966                 WriteLog("%06X: MOVEQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1967 #endif
1968         RN = IMM_1;
1969 #ifdef GPU_DIS_MOVEQ
1970         if (doGPUDis)
1971                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1972 #endif
1973 }
1974
1975 static void gpu_opcode_resmac(void)
1976 {
1977         RN = gpu_acc;
1978 }
1979
1980 static void gpu_opcode_imult(void)
1981 {
1982 #ifdef GPU_DIS_IMULT
1983         if (doGPUDis)
1984                 WriteLog("%06X: IMULT  R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1985 #endif
1986         RN = (int16)RN * (int16)RM;
1987         SET_ZN(RN);
1988 #ifdef GPU_DIS_IMULT
1989         if (doGPUDis)
1990                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1991 #endif
1992 }
1993
1994 static void gpu_opcode_mult(void)
1995 {
1996 #ifdef GPU_DIS_MULT
1997         if (doGPUDis)
1998                 WriteLog("%06X: MULT   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1999 #endif
2000         RN = (uint16)RM * (uint16)RN;
2001         SET_ZN(RN);
2002 #ifdef GPU_DIS_MULT
2003         if (doGPUDis)
2004                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2005 #endif
2006 }
2007
2008 static void gpu_opcode_bclr(void)
2009 {
2010 #ifdef GPU_DIS_BCLR
2011         if (doGPUDis)
2012                 WriteLog("%06X: BCLR   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2013 #endif
2014         UINT32 res = RN & ~(1 << IMM_1);
2015         RN = res;
2016         SET_ZN(res);
2017 #ifdef GPU_DIS_BCLR
2018         if (doGPUDis)
2019                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2020 #endif
2021 }
2022
2023 static void gpu_opcode_btst(void)
2024 {
2025 #ifdef GPU_DIS_BTST
2026         if (doGPUDis)
2027                 WriteLog("%06X: BTST   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2028 #endif
2029         gpu_flag_z = (~RN >> IMM_1) & 1;
2030 #ifdef GPU_DIS_BTST
2031         if (doGPUDis)
2032                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2033 #endif
2034 }
2035
2036 static void gpu_opcode_bset(void)
2037 {
2038 #ifdef GPU_DIS_BSET
2039         if (doGPUDis)
2040                 WriteLog("%06X: BSET   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2041 #endif
2042         UINT32 res = RN | (1 << IMM_1);
2043         RN = res;
2044         SET_ZN(res);
2045 #ifdef GPU_DIS_BSET
2046         if (doGPUDis)
2047                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2048 #endif
2049 }
2050
2051 static void gpu_opcode_imacn(void)
2052 {
2053         uint32 res = (int16)RM * (int16)(RN);
2054         gpu_acc += res;
2055 }
2056
2057 static void gpu_opcode_mtoi(void)
2058 {
2059         uint32 _RM = RM;
2060         uint32 res = RN = (((INT32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2061         SET_ZN(res);
2062 }
2063
2064 static void gpu_opcode_normi(void)
2065 {
2066         uint32 _RM = RM;
2067         uint32 res = 0;
2068
2069         if (_RM)
2070         {
2071                 while ((_RM & 0xFFC00000) == 0)
2072                 {
2073                         _RM <<= 1;
2074                         res--;
2075                 }
2076                 while ((_RM & 0xFF800000) != 0)
2077                 {
2078                         _RM >>= 1;
2079                         res++;
2080                 }
2081         }
2082         RN = res;
2083         SET_ZN(res);
2084 }
2085
2086 static void gpu_opcode_mmult(void)
2087 {
2088         int count       = gpu_matrix_control & 0x0F;    // Matrix width
2089         uint32 addr = gpu_pointer_to_matrix;            // In the GPU's RAM
2090         int64 accum = 0;
2091         uint32 res;
2092
2093         if (gpu_matrix_control & 0x10)                          // Column stepping
2094         {
2095                 for(int i=0; i<count; i++)
2096                 { 
2097                         int16 a;
2098                         if (i & 0x01)
2099                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2100                         else
2101                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2102
2103                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2104                         accum += a * b;
2105                         addr += 4 * count;
2106                 }
2107         }
2108         else                                                                            // Row stepping
2109         {
2110                 for(int i=0; i<count; i++)
2111                 {
2112                         int16 a;
2113                         if (i & 0x01)
2114                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2115                         else
2116                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2117
2118                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2119                         accum += a * b;
2120                         addr += 4;
2121                 }
2122         }
2123         RN = res = (int32)accum;
2124         // carry flag to do (out of the last add)
2125         SET_ZN(res);
2126 }
2127
2128 static void gpu_opcode_abs(void)
2129 {
2130 #ifdef GPU_DIS_ABS
2131         if (doGPUDis)
2132                 WriteLog("%06X: ABS    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2133 #endif
2134         gpu_flag_c = RN >> 31;
2135         if (RN == 0x80000000)
2136         //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2137                 gpu_flag_n = 1, gpu_flag_z = 0;
2138         else
2139         {
2140                 if (gpu_flag_c)
2141                         RN = -RN;
2142                 gpu_flag_n = 0; SET_FLAG_Z(RN);
2143         }
2144 #ifdef GPU_DIS_ABS
2145         if (doGPUDis)
2146                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2147 #endif
2148 }
2149
2150 static void gpu_opcode_div(void)        // RN / RM
2151 {
2152 #ifdef GPU_DIS_DIV
2153         if (doGPUDis)
2154                 WriteLog("%06X: DIV    R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2155 #endif
2156 // NOTE: remainder is NOT calculated correctly here!
2157 //       The original tried to get it right by checking to see if the
2158 //       remainder was negative, but that's too late...
2159 // The code there should do it now, but I'm not 100% sure...
2160
2161         if (RM)
2162         {
2163                 if (gpu_div_control & 0x01)             // 16.16 division
2164                 {
2165                         RN = ((UINT64)RN << 16) / RM;
2166                         gpu_remain = ((UINT64)RN << 16) % RM;
2167                 }
2168                 else
2169                 {
2170                         RN = RN / RM;
2171                         gpu_remain = RN % RM;
2172                 }
2173
2174                 if ((gpu_remain - RM) & 0x80000000)     // If the result would have been negative...
2175                         gpu_remain -= RM;                       // Then make it negative!
2176         }
2177         else
2178                 RN = 0xFFFFFFFF;
2179
2180 /*      uint32 _RM=RM;
2181         uint32 _RN=RN;
2182
2183         if (_RM)
2184         {
2185                 if (gpu_div_control & 1)
2186                 {
2187                         gpu_remain = (((uint64)_RN) << 16) % _RM;
2188                         if (gpu_remain&0x80000000)
2189                                 gpu_remain-=_RM;
2190                         RN = (((uint64)_RN) << 16) / _RM;
2191                 }
2192                 else
2193                 {
2194                         gpu_remain = _RN % _RM;
2195                         if (gpu_remain&0x80000000)
2196                                 gpu_remain-=_RM;
2197                         RN/=_RM;
2198                 }
2199         }
2200         else
2201                 RN=0xffffffff;*/
2202 #ifdef GPU_DIS_DIV
2203         if (doGPUDis)
2204                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2205 #endif
2206 }
2207
2208 static void gpu_opcode_imultn(void)
2209 {
2210         uint32 res = (int32)((int16)RN * (int16)RM);
2211         gpu_acc = (int32)res;
2212         SET_FLAG_Z(res);
2213         SET_FLAG_N(res);
2214 }
2215
2216 static void gpu_opcode_neg(void)
2217 {
2218 #ifdef GPU_DIS_NEG
2219         if (doGPUDis)
2220                 WriteLog("%06X: NEG    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2221 #endif
2222         UINT32 res = -RN;
2223         SET_ZNC_SUB(0, RN, res);
2224         RN = res;
2225 #ifdef GPU_DIS_NEG
2226         if (doGPUDis)
2227                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2228 #endif
2229 }
2230
2231 static void gpu_opcode_shlq(void)
2232 {
2233 #ifdef GPU_DIS_SHLQ
2234         if (doGPUDis)
2235                 WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2236 #endif
2237 // Was a bug here...
2238 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2239         INT32 r1 = 32 - IMM_1;
2240         UINT32 res = RN << r1;
2241         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2242         RN = res;
2243 #ifdef GPU_DIS_SHLQ
2244         if (doGPUDis)
2245                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2246 #endif
2247 }
2248
2249 static void gpu_opcode_shrq(void)
2250 {
2251 #ifdef GPU_DIS_SHRQ
2252         if (doGPUDis)
2253                 WriteLog("%06X: SHRQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2254 #endif
2255         INT32 r1 = gpu_convert_zero[IMM_1];
2256         UINT32 res = RN >> r1;
2257         SET_ZN(res); gpu_flag_c = RN & 1;
2258         RN = res;
2259 #ifdef GPU_DIS_SHRQ
2260         if (doGPUDis)
2261                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2262 #endif
2263 }
2264
2265 static void gpu_opcode_ror(void)
2266 {
2267 #ifdef GPU_DIS_ROR
2268         if (doGPUDis)
2269                 WriteLog("%06X: ROR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2270 #endif
2271         UINT32 r1 = RM & 0x1F;
2272         UINT32 res = (RN >> r1) | (RN << (32 - r1));
2273         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2274         RN = res;
2275 #ifdef GPU_DIS_ROR
2276         if (doGPUDis)
2277                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2278 #endif
2279 }
2280
2281 static void gpu_opcode_rorq(void)
2282 {
2283 #ifdef GPU_DIS_RORQ
2284         if (doGPUDis)
2285                 WriteLog("%06X: RORQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2286 #endif
2287         UINT32 r1 = gpu_convert_zero[IMM_1 & 0x1F];
2288         UINT32 r2 = RN;
2289         UINT32 res = (r2 >> r1) | (r2 << (32 - r1));
2290         RN = res;
2291         SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2292 #ifdef GPU_DIS_RORQ
2293         if (doGPUDis)
2294                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2295 #endif
2296 }
2297
2298 static void gpu_opcode_sha(void)
2299 {
2300 /*      int dreg = jaguar.op & 31;
2301         INT32 r1 = (INT32)jaguar.r[(jaguar.op >> 5) & 31];
2302         UINT32 r2 = jaguar.r[dreg];
2303         UINT32 res;
2304
2305         CLR_ZNC;
2306         if (r1 < 0)
2307         {
2308                 res = (r1 <= -32) ? 0 : (r2 << -r1);
2309                 jaguar.FLAGS |= (r2 >> 30) & 2;
2310         }
2311         else
2312         {
2313                 res = (r1 >= 32) ? ((INT32)r2 >> 31) : ((INT32)r2 >> r1);
2314                 jaguar.FLAGS |= (r2 << 1) & 2;
2315         }
2316         jaguar.r[dreg] = res;
2317         SET_ZN(res);*/
2318
2319 #ifdef GPU_DIS_SHA
2320         if (doGPUDis)
2321                 WriteLog("%06X: SHA    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2322 #endif
2323         UINT32 res;
2324
2325         if ((INT32)RM < 0)
2326         {
2327                 res = ((INT32)RM <= -32) ? 0 : (RN << -(INT32)RM);
2328                 gpu_flag_c = RN >> 31;
2329         }
2330         else
2331         {
2332                 res = ((INT32)RM >= 32) ? ((INT32)RN >> 31) : ((INT32)RN >> (INT32)RM);
2333                 gpu_flag_c = RN & 0x01;
2334         }
2335         RN = res;
2336         SET_ZN(res);
2337 #ifdef GPU_DIS_SHA
2338         if (doGPUDis)
2339                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2340 #endif
2341
2342 /*      int32 sRM=(int32)RM;
2343         uint32 _RN=RN;
2344
2345         if (sRM<0)
2346         {
2347                 uint32 shift=-sRM;
2348                 if (shift>=32) shift=32;
2349                 gpu_flag_c=(_RN&0x80000000)>>31;
2350                 while (shift)
2351                 {
2352                         _RN<<=1;
2353                         shift--;
2354                 }
2355         }
2356         else
2357         {
2358                 uint32 shift=sRM;
2359                 if (shift>=32) shift=32;
2360                 gpu_flag_c=_RN&0x1;
2361                 while (shift)
2362                 {
2363                         _RN=((int32)_RN)>>1;
2364                         shift--;
2365                 }
2366         }
2367         RN=_RN;
2368         SET_FLAG_Z(_RN);
2369         SET_FLAG_N(_RN);*/
2370 }
2371
2372 static void gpu_opcode_sharq(void)
2373 {
2374 #ifdef GPU_DIS_SHARQ
2375         if (doGPUDis)
2376                 WriteLog("%06X: SHARQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2377 #endif
2378         UINT32 res = (INT32)RN >> gpu_convert_zero[IMM_1];
2379         SET_ZN(res); gpu_flag_c = RN & 0x01;
2380         RN = res;
2381 #ifdef GPU_DIS_SHARQ
2382         if (doGPUDis)
2383                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2384 #endif
2385 }
2386
2387 static void gpu_opcode_sh(void)
2388 {
2389 #ifdef GPU_DIS_SH
2390         if (doGPUDis)
2391                 WriteLog("%06X: SH     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2392 #endif
2393         if (RM & 0x80000000)            // Shift left
2394         {
2395                 gpu_flag_c = RN >> 31;
2396                 RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM);
2397         }
2398         else                                            // Shift right
2399         {
2400                 gpu_flag_c = RN & 0x01;
2401                 RN = (RM >= 32 ? 0 : RN >> RM);
2402         }
2403         SET_ZN(RN);
2404 #ifdef GPU_DIS_SH
2405         if (doGPUDis)
2406                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2407 #endif
2408 }
2409
2410 //Temporary: Testing only!
2411 //#include "gpu2.cpp"
2412 //#include "gpu3.cpp"