]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
Initial changeset to experimental branch
[virtualjaguar] / src / gpu.cpp
1 #if 1
2
3 //
4 // GPU Core
5 //
6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James L. Hammons
9 // (C) 2010 Underground Software
10 //
11 // JLH = James L. Hammons <jlhamm@acm.org>
12 //
13 // Who  When        What
14 // ---  ----------  -------------------------------------------------------------
15 // JLH  01/16/2010  Created this log ;-)
16
17 //
18 // Note: Endian wrongness probably stems from the MAME origins of this emu and
19 //       the braindead way in which MAME handles memory. :-)
20 //
21 // Problem with not booting the BIOS was the incorrect way that the
22 // SUBC instruction set the carry when the carry was set going in...
23 // Same problem with ADDC...
24 //
25
26 #include "gpu.h"
27
28 #include <stdlib.h>
29 #include <string.h>                                                             // For memset
30 #include "dsp.h"
31 #include "jagdasm.h"
32 #include "jaguar.h"
33 #include "log.h"
34 #include "m68k.h"
35 //#include "memory.h"
36 #include "tom.h"
37
38 //#define GPU_DEBUG
39
40 // For GPU dissasembly...
41
42 #define GPU_DIS_ABS
43 #define GPU_DIS_ADD
44 #define GPU_DIS_ADDC
45 #define GPU_DIS_ADDQ
46 #define GPU_DIS_ADDQT
47 #define GPU_DIS_AND
48 #define GPU_DIS_BCLR
49 #define GPU_DIS_BSET
50 #define GPU_DIS_BTST
51 #define GPU_DIS_CMP
52 #define GPU_DIS_CMPQ
53 #define GPU_DIS_DIV
54 #define GPU_DIS_IMULT
55 #define GPU_DIS_JUMP
56 #define GPU_DIS_JR
57 #define GPU_DIS_LOAD
58 #define GPU_DIS_LOADB
59 #define GPU_DIS_LOADW
60 #define GPU_DIS_LOAD14I
61 #define GPU_DIS_LOAD14R
62 #define GPU_DIS_LOAD15I
63 #define GPU_DIS_LOAD15R
64 #define GPU_DIS_MOVE
65 #define GPU_DIS_MOVEFA
66 #define GPU_DIS_MOVEI
67 #define GPU_DIS_MOVEPC
68 #define GPU_DIS_MOVETA
69 #define GPU_DIS_MOVEQ
70 #define GPU_DIS_MULT
71 #define GPU_DIS_NEG
72 #define GPU_DIS_NOP
73 #define GPU_DIS_NOT
74 #define GPU_DIS_OR
75 #define GPU_DIS_PACK
76 #define GPU_DIS_ROR
77 #define GPU_DIS_RORQ
78 #define GPU_DIS_SAT8
79 #define GPU_DIS_SH
80 #define GPU_DIS_SHA
81 #define GPU_DIS_SHARQ
82 #define GPU_DIS_SHLQ
83 #define GPU_DIS_SHRQ
84 #define GPU_DIS_STORE
85 #define GPU_DIS_STOREB
86 #define GPU_DIS_STOREW
87 #define GPU_DIS_STORE14I
88 #define GPU_DIS_STORE14R
89 #define GPU_DIS_STORE15I
90 #define GPU_DIS_STORE15R
91 #define GPU_DIS_SUB
92 #define GPU_DIS_SUBC
93 #define GPU_DIS_SUBQ
94 #define GPU_DIS_SUBQT
95 #define GPU_DIS_XOR
96
97 bool doGPUDis = false;
98 //bool doGPUDis = true;
99 //*/
100 /*
101 GPU opcodes use (BIOS flying ATARI logo):
102 +                     add 357416
103 +                    addq 538030
104 +                   addqt 6999
105 +                     sub 116663
106 +                    subq 188059
107 +                   subqt 15086
108 +                     neg 36097
109 +                     and 233993
110 +                      or 109332
111 +                     xor 1384
112 +                    btst 111924
113 +                    bset 25029
114 +                    bclr 10551
115 +                    mult 28147
116 +                   imult 69148
117 +                     div 64102
118 +                     abs 159394
119 +                    shlq 194690
120 +                    shrq 292587
121 +                   sharq 192649
122 +                    rorq 58672
123 +                     cmp 244963
124 +                    cmpq 114834
125 +                    move 833472
126 +                   moveq 56427
127 +                  moveta 220814
128 +                  movefa 170678
129 +                   movei 152025
130 +                   loadw 108220
131 +                    load 430936
132 +                  storew 3036
133 +                   store 372490
134 +                 move_pc 2330
135 +                    jump 349134
136 +                      jr 529171
137                     mmult 64904
138 +                     nop 432179
139 */
140
141 // Various bits
142
143 #define CINT0FLAG                       0x0200
144 #define CINT1FLAG                       0x0400
145 #define CINT2FLAG                       0x0800
146 #define CINT3FLAG                       0x1000
147 #define CINT4FLAG                       0x2000
148 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
149
150 // GPU_FLAGS bits
151
152 #define ZERO_FLAG               0x0001
153 #define CARRY_FLAG              0x0002
154 #define NEGA_FLAG               0x0004
155 #define IMASK                   0x0008
156 #define INT_ENA0                0x0010
157 #define INT_ENA1                0x0020
158 #define INT_ENA2                0x0040
159 #define INT_ENA3                0x0080
160 #define INT_ENA4                0x0100
161 #define INT_CLR0                0x0200
162 #define INT_CLR1                0x0400
163 #define INT_CLR2                0x0800
164 #define INT_CLR3                0x1000
165 #define INT_CLR4                0x2000
166 #define REGPAGE                 0x4000
167 #define DMAEN                   0x8000
168
169 // External global variables
170
171 extern int start_logging;
172 extern int gpu_start_log;
173
174 // Private function prototypes
175
176 void GPUUpdateRegisterBanks(void);
177 void GPUDumpDisassembly(void);
178 void GPUDumpRegisters(void);
179 void GPUDumpMemory(void);
180
181 static void gpu_opcode_add(void);
182 static void gpu_opcode_addc(void);
183 static void gpu_opcode_addq(void);
184 static void gpu_opcode_addqt(void);
185 static void gpu_opcode_sub(void);
186 static void gpu_opcode_subc(void);
187 static void gpu_opcode_subq(void);
188 static void gpu_opcode_subqt(void);
189 static void gpu_opcode_neg(void);
190 static void gpu_opcode_and(void);
191 static void gpu_opcode_or(void);
192 static void gpu_opcode_xor(void);
193 static void gpu_opcode_not(void);
194 static void gpu_opcode_btst(void);
195 static void gpu_opcode_bset(void);
196 static void gpu_opcode_bclr(void);
197 static void gpu_opcode_mult(void);
198 static void gpu_opcode_imult(void);
199 static void gpu_opcode_imultn(void);
200 static void gpu_opcode_resmac(void);
201 static void gpu_opcode_imacn(void);
202 static void gpu_opcode_div(void);
203 static void gpu_opcode_abs(void);
204 static void gpu_opcode_sh(void);
205 static void gpu_opcode_shlq(void);
206 static void gpu_opcode_shrq(void);
207 static void gpu_opcode_sha(void);
208 static void gpu_opcode_sharq(void);
209 static void gpu_opcode_ror(void);
210 static void gpu_opcode_rorq(void);
211 static void gpu_opcode_cmp(void);
212 static void gpu_opcode_cmpq(void);
213 static void gpu_opcode_sat8(void);
214 static void gpu_opcode_sat16(void);
215 static void gpu_opcode_move(void);
216 static void gpu_opcode_moveq(void);
217 static void gpu_opcode_moveta(void);
218 static void gpu_opcode_movefa(void);
219 static void gpu_opcode_movei(void);
220 static void gpu_opcode_loadb(void);
221 static void gpu_opcode_loadw(void);
222 static void gpu_opcode_load(void);
223 static void gpu_opcode_loadp(void);
224 static void gpu_opcode_load_r14_indexed(void);
225 static void gpu_opcode_load_r15_indexed(void);
226 static void gpu_opcode_storeb(void);
227 static void gpu_opcode_storew(void);
228 static void gpu_opcode_store(void);
229 static void gpu_opcode_storep(void);
230 static void gpu_opcode_store_r14_indexed(void);
231 static void gpu_opcode_store_r15_indexed(void);
232 static void gpu_opcode_move_pc(void);
233 static void gpu_opcode_jump(void);
234 static void gpu_opcode_jr(void);
235 static void gpu_opcode_mmult(void);
236 static void gpu_opcode_mtoi(void);
237 static void gpu_opcode_normi(void);
238 static void gpu_opcode_nop(void);
239 static void gpu_opcode_load_r14_ri(void);
240 static void gpu_opcode_load_r15_ri(void);
241 static void gpu_opcode_store_r14_ri(void);
242 static void gpu_opcode_store_r15_ri(void);
243 static void gpu_opcode_sat24(void);
244 static void gpu_opcode_pack(void);
245
246 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
247 /*uint8 gpu_opcode_cycles[64] =
248 {
249         3,  3,  3,  3,  3,  3,  3,  3,
250         3,  3,  3,  3,  3,  3,  3,  3,
251         3,  3,  1,  3,  1, 18,  3,  3,
252         3,  3,  3,  3,  3,  3,  3,  3,
253         3,  3,  2,  2,  2,  2,  3,  4,
254         5,  4,  5,  6,  6,  1,  1,  1,
255         1,  2,  2,  2,  1,  1,  9,  3,
256         3,  1,  6,  6,  2,  2,  3,  3
257 };//*/
258 //Here's a QnD kludge...
259 //This is wrong, wrong, WRONG, but it seems to work for the time being...
260 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
261 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
262 /*uint8 gpu_opcode_cycles[64] =
263 {
264         1,  1,  1,  1,  1,  1,  1,  1,
265         1,  1,  1,  1,  1,  1,  1,  1,
266         1,  1,  1,  1,  1,  9,  1,  1,
267         1,  1,  1,  1,  1,  1,  1,  1,
268         1,  1,  1,  1,  1,  1,  1,  2,
269         2,  2,  2,  3,  3,  1,  1,  1,
270         1,  1,  1,  1,  1,  1,  4,  1,
271         1,  1,  3,  3,  1,  1,  1,  1
272 };//*/
273 uint8 gpu_opcode_cycles[64] =
274 {
275         1,  1,  1,  1,  1,  1,  1,  1,
276         1,  1,  1,  1,  1,  1,  1,  1,
277         1,  1,  1,  1,  1,  1,  1,  1,
278         1,  1,  1,  1,  1,  1,  1,  1,
279         1,  1,  1,  1,  1,  1,  1,  1,
280         1,  1,  1,  1,  1,  1,  1,  1,
281         1,  1,  1,  1,  1,  1,  1,  1,
282         1,  1,  1,  1,  1,  1,  1,  1
283 };//*/
284
285 void (*gpu_opcode[64])()=
286 {
287         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
288         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
289         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
290         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
291         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
292         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
293         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
294         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
295         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
296         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
297         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
298         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
299         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
300         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
301         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
302         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
303 };
304
305 static uint8 gpu_ram_8[0x1000];
306 uint32 gpu_pc;
307 static uint32 gpu_acc;
308 static uint32 gpu_remain;
309 static uint32 gpu_hidata;
310 static uint32 gpu_flags;
311 static uint32 gpu_matrix_control;
312 static uint32 gpu_pointer_to_matrix;
313 static uint32 gpu_data_organization;
314 static uint32 gpu_control;
315 static uint32 gpu_div_control;
316 // There is a distinct advantage to having these separated out--there's no need to clear
317 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
318 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
319 static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c;
320 static uint32 gpu_reg_bank_0[32];
321 static uint32 gpu_reg_bank_1[32];
322 static uint32 * gpu_reg;
323 static uint32 * gpu_alternate_reg;
324
325 static uint32 gpu_instruction;
326 static uint32 gpu_opcode_first_parameter;
327 static uint32 gpu_opcode_second_parameter;
328
329 #define GPU_RUNNING             (gpu_control & 0x01)
330
331 #define RM                              gpu_reg[gpu_opcode_first_parameter]
332 #define RN                              gpu_reg[gpu_opcode_second_parameter]
333 #define ALTERNATE_RM    gpu_alternate_reg[gpu_opcode_first_parameter]
334 #define ALTERNATE_RN    gpu_alternate_reg[gpu_opcode_second_parameter]
335 #define IMM_1                   gpu_opcode_first_parameter
336 #define IMM_2                   gpu_opcode_second_parameter
337
338 #define SET_FLAG_Z(r)   (gpu_flag_z = ((r) == 0));
339 #define SET_FLAG_N(r)   (gpu_flag_n = (((uint32)(r) >> 31) & 0x01));
340
341 #define RESET_FLAG_Z()  gpu_flag_z = 0;
342 #define RESET_FLAG_N()  gpu_flag_n = 0;
343 #define RESET_FLAG_C()  gpu_flag_c = 0;
344
345 #define CLR_Z                           (gpu_flag_z = 0)
346 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
347 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
348 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
349 #define SET_N(r)                        (gpu_flag_n = (((uint32)(r) >> 31) & 0x01))
350 #define SET_C_ADD(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(~(a))))
351 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
352 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
353 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
354 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
355
356 uint32 gpu_convert_zero[32] =
357         { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
358
359 uint8 * branch_condition_table = 0;
360 #define BRANCH_CONDITION(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
361
362 uint32 gpu_opcode_use[64];
363
364 const char * gpu_opcode_str[64]=
365 {
366         "add",                          "addc",                         "addq",                         "addqt",
367         "sub",                          "subc",                         "subq",                         "subqt",
368         "neg",                          "and",                          "or",                           "xor",
369         "not",                          "btst",                         "bset",                         "bclr",
370         "mult",                         "imult",                        "imultn",                       "resmac",
371         "imacn",                        "div",                          "abs",                          "sh",
372         "shlq",                         "shrq",                         "sha",                          "sharq",
373         "ror",                          "rorq",                         "cmp",                          "cmpq",
374         "sat8",                         "sat16",                        "move",                         "moveq",
375         "moveta",                       "movefa",                       "movei",                        "loadb",
376         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
377         "load_r15_indexed",     "storeb",                       "storew",                       "store",
378         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
379         "jump",                         "jr",                           "mmult",                        "mtoi",
380         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
381         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
382 };
383
384 static uint32 gpu_in_exec = 0;
385 static uint32 gpu_releaseTimeSlice_flag = 0;
386
387 void GPUReleaseTimeslice(void)
388 {
389         gpu_releaseTimeSlice_flag = 1;
390 }
391
392 uint32 GPUGetPC(void)
393 {
394         return gpu_pc;
395 }
396
397 void build_branch_condition_table(void)
398 {
399         if (!branch_condition_table)
400         {
401                 branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
402
403                 if (branch_condition_table)
404                 {
405                         for(int i=0; i<8; i++)
406                         {
407                                 for(int j=0; j<32; j++)
408                                 {
409                                         int result = 1;
410                                         if (j & 1)
411                                                 if (i & ZERO_FLAG)
412                                                         result = 0;
413                                         if (j & 2)
414                                                 if (!(i & ZERO_FLAG))
415                                                         result = 0;
416                                         if (j & 4)
417                                                 if (i & (CARRY_FLAG << (j >> 4)))
418                                                         result = 0;
419                                         if (j & 8)
420                                                 if (!(i & (CARRY_FLAG << (j >> 4))))
421                                                         result = 0;
422                                         branch_condition_table[i * 32 + j] = result;
423                                 }
424                         }
425                 }
426         }
427 }
428
429 //
430 // GPU byte access (read)
431 //
432 uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
433 {
434         if (offset >= 0xF02000 && offset <= 0xF020FF)
435                 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
436
437         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
438                 return gpu_ram_8[offset & 0xFFF];
439         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
440         {
441                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
442
443                 if ((offset & 0x03) == 0)
444                         return data >> 24;
445                 else if ((offset & 0x03) == 1)
446                         return (data >> 16) & 0xFF;
447                 else if ((offset & 0x03) == 2)
448                         return (data >> 8) & 0xFF;
449                 else if ((offset & 0x03) == 3)
450                         return data & 0xFF;
451         }
452
453         return JaguarReadByte(offset, who);
454 }
455
456 //
457 // GPU word access (read)
458 //
459 uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
460 {
461         if (offset >= 0xF02000 && offset <= 0xF020FF)
462                 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
463
464         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
465         {
466                 offset &= 0xFFF;
467                 uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
468                 return data;
469         }
470         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
471         {
472 // This looks and smells wrong...
473 // But it *might* be OK...
474                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
475                         return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
476
477                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
478
479                 if (offset & 0x02)                      // Cases 0 & 2...
480                         return data & 0xFFFF;
481                 else
482                         return data >> 16;
483         }
484
485 //TEMP--Mirror of F03000? No. Writes only...
486 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
487 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
488
489         return JaguarReadWord(offset, who);
490 }
491
492 //
493 // GPU dword access (read)
494 //
495 uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
496 {
497         if (offset >= 0xF02000 && offset <= 0xF020FF)
498                 WriteLog("GPU: ReadLong--Attempt to read from GPU register file by %s!\n", whoName[who]);
499
500 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
501         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
502         {
503                 offset &= 0xFFF;
504                 return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
505                         | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/
506 //              return GET32(gpu_ram_8, offset);
507         }
508 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
509         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
510         {
511                 offset &= 0x1F;
512                 switch (offset)
513                 {
514                 case 0x00:
515                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
516                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
517                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
518
519                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
520
521                         return gpu_flags & 0xFFFFC1FF;
522                 case 0x04:
523                         return gpu_matrix_control;
524                 case 0x08:
525                         return gpu_pointer_to_matrix;
526                 case 0x0C:
527                         return gpu_data_organization;
528                 case 0x10:
529                         return gpu_pc;
530                 case 0x14:
531                         return gpu_control;
532                 case 0x18:
533                         return gpu_hidata;
534                 case 0x1C:
535                         return gpu_remain;
536                 default:                                                                // unaligned long read
537 #ifdef GPU_DEBUG
538                         WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
539 #endif  // GPU_DEBUG
540                         return 0;
541                 }
542         }
543 //TEMP--Mirror of F03000? No. Writes only...
544 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
545 //      WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
546 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
547         WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
548
549         return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
550 }
551
552 //
553 // GPU byte access (write)
554 //
555 void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
556 {
557         if (offset >= 0xF02000 && offset <= 0xF020FF)
558                 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
559
560         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
561         {
562                 gpu_ram_8[offset & 0xFFF] = data;
563
564 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
565 /*              if (!gpu_in_exec)
566                 {
567                         m68k_end_timeslice();
568                         dsp_releaseTimeslice();
569                 }*/
570                 return;
571         }
572         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
573         {
574                 uint32 reg = offset & 0x1C;
575                 int bytenum = offset & 0x03;
576
577 //This is definitely wrong!
578                 if ((reg >= 0x1C) && (reg <= 0x1F))
579                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
580                 else
581                 {
582                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
583                         bytenum = 3 - bytenum; // convention motorola !!!
584                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
585                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
586                 }
587                 return;
588         }
589 //      WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
590         JaguarWriteByte(offset, data, who);
591 }
592
593 //
594 // GPU word access (write)
595 //
596 void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
597 {
598         if (offset >= 0xF02000 && offset <= 0xF020FF)
599                 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
600
601         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
602         {
603                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
604                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
605 /*              offset &= 0xFFF;
606                 SET16(gpu_ram_8, offset, data);//*/
607
608 /*if (offset >= 0xF03214 && offset < 0xF0321F)
609         WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
610
611
612 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
613 /*              if (!gpu_in_exec)
614                 {
615                         m68k_end_timeslice();
616                         dsp_releaseTimeslice();
617                 }*/
618                 return;
619         }
620         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
621         {
622                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
623                 {
624 #ifdef GPU_DEBUG
625                         WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
626                         GPUDumpRegisters();
627 #endif  // GPU_DEBUG
628                         return;
629                 }
630 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
631 //This just literally sucks.
632                 if ((offset & 0x1C) == 0x1C)
633                 {
634 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
635                         if (offset & 0x02)
636                                 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
637                         else
638                                 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
639                 }
640                 else
641                 {
642 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
643                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
644                         if (offset & 0x02)
645                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
646                         else
647                                 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
648                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
649                 }
650                 return;
651         }
652         else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
653         {
654 #ifdef GPU_DEBUG
655                         WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
656                         GPUDumpRegisters();
657 #endif  // GPU_DEBUG
658                 return;
659         }
660
661         // Have to be careful here--this can cause an infinite loop!
662         JaguarWriteWord(offset, data, who);
663 }
664
665 //
666 // GPU dword access (write)
667 //
668 void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
669 {
670         if (offset >= 0xF02000 && offset <= 0xF020FF)
671                 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
672
673 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
674         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
675         {
676 #ifdef GPU_DEBUG
677                 if (offset & 0x03)
678                 {
679                         WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
680                         GPUDumpRegisters();
681                 }
682 #endif  // GPU_DEBUG
683
684                 offset &= 0xFFF;
685                 SET32(gpu_ram_8, offset, data);
686                 return;
687         }
688 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
689         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
690         {
691                 offset &= 0x1F;
692                 switch (offset)
693                 {
694                 case 0x00:
695                 {
696                         bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
697                         gpu_flags = data;
698                         gpu_flag_z = gpu_flags & ZERO_FLAG;
699                         gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
700                         gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
701                         GPUUpdateRegisterBanks();
702                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);       // Interrupt latch clear bits
703 //Writing here is only an interrupt enable--this approach is just plain wrong!
704 //                      GPUHandleIRQs();
705 //This, however, is A-OK! ;-)
706                         if (IMASKCleared)                                               // If IMASK was cleared,
707                                 GPUHandleIRQs();                                        // see if any other interrupts need servicing!
708 #ifdef GPU_DEBUG
709                         if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
710                                 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
711                         WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
712 #endif  // GPU_DEBUG
713                         break;
714                 }
715                 case 0x04:
716                         gpu_matrix_control = data;
717                         break;
718                 case 0x08:
719                         // This can only point to long aligned addresses
720                         gpu_pointer_to_matrix = data & 0xFFFFFFFC;
721                         break;
722                 case 0x0C:
723                         gpu_data_organization = data;
724                         break;
725                 case 0x10:
726                         gpu_pc = data;
727 #ifdef GPU_DEBUG
728 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
729 #endif  // GPU_DEBUG
730                         break;
731                 case 0x14:
732                 {
733 //                      uint32 gpu_was_running = GPU_RUNNING;
734                         data &= ~0xF7C0;                // Disable writes to INT_LAT0-4 & TOM version number
735
736                         // check for GPU -> CPU interrupt
737                         if (data & 0x02)
738                         {
739 //WriteLog("GPU->CPU interrupt\n");
740                                 if (TOMIRQEnabled(IRQ_GPU))
741                                 {
742                                         if ((TOMIRQEnabled(IRQ_GPU)) && (JaguarInterruptHandlerIsValid(64)))
743                                         {
744                                                 TOMSetPendingGPUInt();
745                                                 m68k_set_irq(7);                        // Set 68000 NMI
746                                                 GPUReleaseTimeslice();
747                                         }
748                                 }
749                                 data &= ~0x02;
750                         }
751
752                         // check for CPU -> GPU interrupt #0
753                         if (data & 0x04)
754                         {
755 //WriteLog("CPU->GPU interrupt\n");
756                                 GPUSetIRQLine(0, ASSERT_LINE);
757                                 m68k_end_timeslice();
758                                 DSPReleaseTimeslice();
759                                 data &= ~0x04;
760                         }
761
762                         // single stepping
763                         if (data & 0x10)
764                         {
765                                 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
766                         }
767                         gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
768
769                         // if gpu wasn't running but is now running, execute a few cycles
770 #ifndef GPU_SINGLE_STEPPING
771 /*                      if (!gpu_was_running && GPU_RUNNING)
772 #ifdef GPU_DEBUG
773                         {
774                                 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
775 #endif  // GPU_DEBUG
776                                 GPUExec(200);
777 #ifdef GPU_DEBUG
778                         }
779 #endif  // GPU_DEBUG//*/
780 #else
781                         if (gpu_control & 0x18)
782                                 GPUExec(1);
783 #endif  // #ifndef GPU_SINGLE_STEPPING
784 #ifdef GPU_DEBUG
785 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
786 if (GPU_RUNNING)
787         WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
788 else
789         WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
790 WriteLog("\n");
791 #endif  // GPU_DEBUG
792 //if (GPU_RUNNING)
793 //      GPUDumpDisassembly();
794 /*if (GPU_RUNNING)
795 {
796         if (gpu_pc == 0xF035D8)
797         {
798 //              GPUDumpDisassembly();
799 //              log_done();
800 //              exit(1);
801                 gpu_control &= 0xFFFFFFFE;      // Don't run it and let's see what happens!
802 //Hmm. Seems to lock up when going into the demo...
803 //Try to disable the collision altogether!
804         }
805 }//*/
806 extern int effect_start5;
807 static bool finished = false;
808 //if (GPU_RUNNING && effect_start5 && !finished)
809 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
810 {
811         // Let's do a dump of $6528!
812 /*      uint32 numItems = JaguarReadWord(0x6BD6);
813         WriteLog("\nDump of $6528: %u items.\n\n", numItems);
814         for(int i=0; i<numItems*3*4; i+=3*4)
815         {
816                 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
817                         JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
818                 uint16 link = JaguarReadWord(0x6528+i+8+2);
819                 for(int j=0; j<40; j+=4)
820                         WriteLog("%08X ", JaguarReadLong(link + j));
821                 WriteLog("\n");
822         }
823         WriteLog("\n");//*/
824         // Let's try a manual blit here...
825 //This isn't working the way it should! !!! FIX !!!
826 //Err, actually, it is.
827 // NOW, it works right! Problem solved!!! It's a blitter bug!
828 /*      uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4;
829         for(int y=0; y<127; y++)
830         {
831                 for(int x=0; x<2; x++)
832                 {
833                         JaguarWriteLong(dst, JaguarReadLong(src));
834
835                         src += 4;
836                         dst += 4;
837                 }
838                 src += width - (2 * 4);
839         }//*/
840 /*      finished = true;
841         doGPUDis = true;
842         WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
843
844 /*      WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
845         int count = 0;
846         for(int i=0x004D54; i<0x004D54+2048; i++)
847         {
848                 WriteLog("%02X ", JaguarReadByte(i));
849                 count++;
850                 if (count == 32)
851                 {
852                         count = 0;
853                         WriteLog("\n");
854                 }
855         }
856         WriteLog("\n\nData @ F03000:\n\n");
857         count = 0;
858         for(int i=0xF03000; i<0xF03200; i++)
859         {
860                 WriteLog("%02X ", JaguarReadByte(i));
861                 count++;
862                 if (count == 32)
863                 {
864                         count = 0;
865                         WriteLog("\n");
866                 }
867         }
868         WriteLog("\n\n");
869         log_done();
870         exit(0);//*/
871 }
872 //if (!GPU_RUNNING)
873 //      doGPUDis = false;
874 /*if (!GPU_RUNNING && finished)
875 {
876         WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
877         GPUDumpRegisters();
878         log_done();
879         exit(0);
880 }//*/
881                         // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
882                         // allow the GPU a chance to run...
883                         // Yes! This partially fixed Trevor McFur...
884                         if (GPU_RUNNING)
885                                 m68k_end_timeslice();
886                         break;
887                 }
888                 case 0x18:
889                         gpu_hidata = data;
890                         break;
891                 case 0x1C:
892                         gpu_div_control = data;
893                         break;
894 //              default:   // unaligned long write
895                         //exit(0);
896                         //__asm int 3
897                 }
898                 return;
899         }
900
901 //      JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
902 //      JaguarWriteWord(offset+2, data & 0xFFFF, who);
903 // We're a 32-bit processor, we can do a long write...!
904         JaguarWriteLong(offset, data, who);
905 }
906
907 //
908 // Change register banks if necessary
909 //
910 void GPUUpdateRegisterBanks(void)
911 {
912         int bank = (gpu_flags & REGPAGE);               // REGPAGE bit
913
914         if (gpu_flags & IMASK)                                  // IMASK bit
915                 bank = 0;                                                       // IMASK forces main bank to be bank 0
916
917         if (bank)
918                 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
919         else
920                 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
921 }
922
923 void GPUHandleIRQs(void)
924 {
925         // Bail out if we're already in an interrupt!
926         if (gpu_flags & IMASK)
927                 return;
928
929         // Get the interrupt latch & enable bits
930         uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
931
932         // Bail out if latched interrupts aren't enabled
933         bits &= mask;
934         if (!bits)
935                 return;
936
937         // Determine which interrupt to service
938         uint32 which = 0; //Isn't there a #pragma to disable this warning???
939         if (bits & 0x01)
940                 which = 0;
941         if (bits & 0x02)
942                 which = 1;
943         if (bits & 0x04)
944                 which = 2;
945         if (bits & 0x08)
946                 which = 3;
947         if (bits & 0x10)
948                 which = 4;
949
950         if (start_logging)
951                 WriteLog("GPU: Generating IRQ #%i\n", which);
952
953         // set the interrupt flag
954         gpu_flags |= IMASK;
955         GPUUpdateRegisterBanks();
956
957         // subqt  #4,r31                ; pre-decrement stack pointer
958         // move  pc,r30                 ; address of interrupted code
959         // store  r30,(r31)     ; store return address
960         gpu_reg[31] -= 4;
961         GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
962
963         // movei  #service_address,r30  ; pointer to ISR entry
964         // jump  (r30)                                  ; jump to ISR
965         // nop
966         gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
967 }
968
969 void GPUSetIRQLine(int irqline, int state)
970 {
971         if (start_logging)
972                 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
973
974         uint32 mask = 0x0040 << irqline;
975         gpu_control &= ~mask;                           // Clear the interrupt latch
976
977         if (state)
978         {
979                 gpu_control |= mask;                    // Assert the interrupt latch
980                 GPUHandleIRQs();                                // And handle the interrupt...
981         }
982 }
983
984 //TEMPORARY: Testing only!
985 //#include "gpu2.h"
986 //#include "gpu3.h"
987
988 void GPUInit(void)
989 {
990 //      memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
991 //      memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs");
992 //      memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs");
993
994         build_branch_condition_table();
995
996         GPUReset();
997
998 //TEMPORARY: Testing only!
999 //      gpu2_init();
1000 //      gpu3_init();
1001 }
1002
1003 void GPUReset(void)
1004 {
1005         // GPU registers (directly visible)
1006         gpu_flags                         = 0x00000000;
1007         gpu_matrix_control    = 0x00000000;
1008         gpu_pointer_to_matrix = 0x00000000;
1009         gpu_data_organization = 0xFFFFFFFF;
1010         gpu_pc                            = 0x00F03000;
1011         gpu_control                       = 0x00002800;                 // Correctly sets this as TOM Rev. 2
1012         gpu_hidata                        = 0x00000000;
1013         gpu_remain                        = 0x00000000;                 // These two registers are RO/WO
1014         gpu_div_control           = 0x00000000;
1015
1016         // GPU internal register
1017         gpu_acc                           = 0x00000000;
1018
1019         gpu_reg = gpu_reg_bank_0;
1020         gpu_alternate_reg = gpu_reg_bank_1;
1021
1022         for(int i=0; i<32; i++)
1023                 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1024
1025         CLR_ZNC;
1026         memset(gpu_ram_8, 0xFF, 0x1000);
1027         gpu_in_exec = 0;
1028 //not needed    GPUInterruptPending = false;
1029         GPUResetStats();
1030 }
1031
1032 uint32 GPUReadPC(void)
1033 {
1034         return gpu_pc;
1035 }
1036
1037 void GPUResetStats(void)
1038 {
1039         for(uint32 i=0; i<64; i++)
1040                 gpu_opcode_use[i] = 0;
1041         WriteLog("--> GPU stats were reset!\n");
1042 }
1043
1044 void GPUDumpDisassembly(void)
1045 {
1046         char buffer[512];
1047
1048         WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1049         uint32 j = 0xF03000;
1050         while (j <= 0xF03FFF)
1051         {
1052                 uint32 oldj = j;
1053                 j += dasmjag(JAGUAR_GPU, buffer, j);
1054                 WriteLog("\t%08X: %s\n", oldj, buffer);
1055         }
1056 }
1057
1058 void GPUDumpRegisters(void)
1059 {
1060         WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1061         WriteLog("\nRegisters bank 0\n");
1062         for(int j=0; j<8; j++)
1063         {
1064                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1065                                                   (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1066                                                   (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1067                                                   (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1068                                                   (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1069         }
1070         WriteLog("Registers bank 1\n");
1071         for(int j=0; j<8; j++)
1072         {
1073                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1074                                                   (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1075                                                   (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1076                                                   (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1077                                                   (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1078         }
1079 }
1080
1081 void GPUDumpMemory(void)
1082 {
1083         WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1084         for(int i=0; i<0xFFF; i+=4)
1085                 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1086                         gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1087 }
1088
1089 void GPUDone(void)
1090 {
1091         WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1092
1093         // Get the interrupt latch & enable bits
1094         uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1095         WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1096
1097         GPUDumpRegisters();
1098         GPUDumpDisassembly();
1099
1100         WriteLog("\nGPU opcodes use:\n");
1101         for(int i=0; i<64; i++)
1102         {
1103                 if (gpu_opcode_use[i])
1104                         WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1105         }
1106         WriteLog("\n");
1107
1108 //      memory_free(gpu_ram_8);
1109 //      memory_free(gpu_reg_bank_0);
1110 //      memory_free(gpu_reg_bank_1);
1111 }
1112
1113 //
1114 // Main GPU execution core
1115 //
1116 static int testCount = 1;
1117 static int len = 0;
1118 static bool tripwire = false;
1119 void GPUExec(int32 cycles)
1120 {
1121         if (!GPU_RUNNING)
1122                 return;
1123
1124 #ifdef GPU_SINGLE_STEPPING
1125         if (gpu_control & 0x18)
1126         {
1127                 cycles = 1;
1128                 gpu_control &= ~0x10;
1129         }
1130 #endif
1131         GPUHandleIRQs();
1132         gpu_releaseTimeSlice_flag = 0;
1133         gpu_in_exec++;
1134
1135         while (cycles > 0 && GPU_RUNNING)
1136         {
1137 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1138         && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1139 {
1140         if (gpu_pc == 0xF03000)
1141         {
1142                 extern uint32 starCount;
1143                 starCount = 0;
1144 /*              WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1145                 uint32 base = gpu_reg_bank_0[3];
1146                 for(uint32 i=0; i<0x100; i+=16)
1147                 {
1148                         WriteLog("%02X: ", i);
1149                         for(uint32 j=0; j<16; j++)
1150                         {
1151                                 WriteLog("%02X ", JaguarReadByte(base + i + j));
1152                         }
1153                         WriteLog("\n");
1154                 }*/
1155         }
1156 //      if (gpu_pc == 0xF03)
1157         {
1158         }
1159 }//*/
1160 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1161 {
1162         GPUDumpRegisters();
1163         WriteLog("GPU: Starting disassembly log...\n");
1164         doGPUDis = true;
1165 }//*/
1166 /*if (gpu_pc == 0xF0359A)
1167 {
1168         doGPUDis = true;
1169         GPUDumpRegisters();
1170 }*/
1171 /*              gpu_flag_c = (gpu_flag_c ? 1 : 0);
1172                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1173                 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1174
1175                 uint16 opcode = GPUReadWord(gpu_pc, GPU);
1176                 uint32 index = opcode >> 10;
1177                 gpu_instruction = opcode;                               // Added for GPU #3...
1178                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1179                 gpu_opcode_second_parameter = opcode & 0x1F;
1180 /*if (gpu_pc == 0xF03BE8)
1181 WriteLog("Start of OP frame write...\n");
1182 if (gpu_pc == 0xF03EEE)
1183 WriteLog("--> Writing BRANCH object ---\n");
1184 if (gpu_pc == 0xF03F62)
1185 WriteLog("--> Writing BITMAP object ***\n");//*/
1186 /*if (gpu_pc == 0xF03546)
1187 {
1188         WriteLog("\n--> GPU PC: F03546\n");
1189         GPUDumpRegisters();
1190         GPUDumpDisassembly();
1191 }//*/
1192 /*if (gpu_pc == 0xF033F6)
1193 {
1194         WriteLog("\n--> GPU PC: F033F6\n");
1195         GPUDumpRegisters();
1196         GPUDumpDisassembly();
1197 }//*/
1198 /*if (gpu_pc == 0xF033CC)
1199 {
1200         WriteLog("\n--> GPU PC: F033CC\n");
1201         GPUDumpRegisters();
1202         GPUDumpDisassembly();
1203 }//*/
1204 /*if (gpu_pc == 0xF033D6)
1205 {
1206         WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1207         GPUDumpRegisters();
1208         GPUDumpMemory();
1209 }//*/
1210 /*if (gpu_pc == 0xF033D8)
1211 {
1212         WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1213         GPUDumpRegisters();
1214         GPUDumpMemory();
1215 }//*/
1216 /*if (gpu_pc == 0xF0358E)
1217 {
1218         WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1219         GPUDumpRegisters();
1220         GPUDumpMemory();
1221 }//*/
1222 /*if (gpu_pc == 0xF034CA)
1223 {
1224         WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1225         GPUDumpRegisters();
1226 }//*/
1227 /*if (gpu_pc == 0xF034CA)
1228 {
1229         len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1230         WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n   ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1231         for(int i=0; i<len; i+=4)
1232                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1233         WriteLog("\n   ");
1234         for(int i=0; i<len; i+=4)
1235                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1236         WriteLog("\n\n");
1237 }
1238 if (gpu_pc == 0xF034DE)
1239 {
1240         WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n   ", gpu_reg[14], gpu_reg[15]);
1241         for(int i=0; i<len; i+=4)
1242                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1243         WriteLog("\n   ");
1244         for(int i=0; i<len; i+=4)
1245                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1246         WriteLog("\n   ");
1247         for(int i=0; i<len; i+=4)
1248                 WriteLog(" --------");
1249         WriteLog("\n   ");
1250         for(int i=0; i<len; i+=4)
1251                 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1252         WriteLog("\n\n");
1253 }//*/
1254 /*if (gpu_pc == 0xF035C8)
1255 {
1256         WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1257         GPUDumpRegisters();
1258         GPUDumpDisassembly();
1259 }//*/
1260
1261 if (gpu_start_log)
1262 {
1263 //      gpu_reset_stats();
1264 static char buffer[512];
1265 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1266 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1267 }//*/
1268 //$E400 -> 1110 01 -> $39 -> 57
1269 //GPU #1
1270                 gpu_pc += 2;
1271                 gpu_opcode[index]();
1272 //GPU #2
1273 //              gpu2_opcode[index]();
1274 //              gpu_pc += 2;
1275 //GPU #3                                (Doesn't show ATARI logo! #1 & #2 do...)
1276 //              gpu_pc += 2;
1277 //              gpu3_opcode[index]();
1278
1279 // BIOS hacking
1280 //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken.
1281 /*static bool firstTime = true;
1282 if (gpu_pc == 0xF03548 && firstTime)
1283 {
1284         gpu_flag_z = 1;
1285 //      firstTime = false;
1286
1287 //static char buffer[512];
1288 //int k=0xF03548;
1289 //while (k<0xF0356C)
1290 //{
1291 //int oldk = k;
1292 //k += dasmjag(JAGUAR_GPU, buffer, k);
1293 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1294 //}
1295 //      gpu_start_log = 1;
1296 }//*/
1297 //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1298 /*if (gpu_pc == 0xF0354C)
1299         gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1300
1301                 cycles -= gpu_opcode_cycles[index];
1302                 gpu_opcode_use[index]++;
1303 if (gpu_start_log)
1304         WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1305 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1306 {
1307         WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1308         tripwire = true;
1309 }
1310         }
1311
1312         gpu_in_exec--;
1313 }
1314
1315 //
1316 // GPU opcodes
1317 //
1318
1319 /*
1320 GPU opcodes use (offset punch--vertically below bad guy):
1321                       add 18686
1322                      addq 32621
1323                       sub 7483
1324                      subq 10252
1325                       and 21229
1326                        or 15003
1327                      btst 1822
1328                      bset 2072
1329                      mult 141
1330                       div 2392
1331                      shlq 13449
1332                      shrq 10297
1333                     sharq 11104
1334                       cmp 6775
1335                      cmpq 5944
1336                      move 31259
1337                     moveq 4473
1338                     movei 23277
1339                     loadb 46
1340                     loadw 4201
1341                      load 28580
1342          load_r14_indexed 1183
1343          load_r15_indexed 1125
1344                    storew 178
1345                     store 10144
1346         store_r14_indexed 320
1347         store_r15_indexed 1
1348                   move_pc 1742
1349                      jump 24467
1350                        jr 18090
1351                       nop 41362
1352 */
1353
1354 static void gpu_opcode_jump(void)
1355 {
1356 #ifdef GPU_DIS_JUMP
1357 const char * condition[32] =
1358 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1359         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1360         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1361         "???", "???", "???", "F" };
1362         if (doGPUDis)
1363                 WriteLog("%06X: JUMP   %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1364 #endif
1365         // normalize flags
1366 /*      gpu_flag_c = (gpu_flag_c ? 1 : 0);
1367         gpu_flag_z = (gpu_flag_z ? 1 : 0);
1368         gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1369         // KLUDGE: Used by BRANCH_CONDITION
1370         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1371
1372         if (BRANCH_CONDITION(IMM_2))
1373         {
1374 #ifdef GPU_DIS_JUMP
1375         if (doGPUDis)
1376                 WriteLog("Branched!\n");
1377 #endif
1378 if (gpu_start_log)
1379         WriteLog("    --> JUMP: Branch taken.\n");
1380                 uint32 delayed_pc = RM;
1381                 GPUExec(1);
1382                 gpu_pc = delayed_pc;
1383 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1384                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1385                 gpu_opcode_second_parameter = opcode & 0x1F;
1386
1387                 gpu_pc = delayed_pc;
1388                 gpu_opcode[opcode>>10]();//*/
1389         }
1390 #ifdef GPU_DIS_JUMP
1391         else
1392                 if (doGPUDis)
1393                         WriteLog("Branch NOT taken.\n");
1394 #endif
1395 }
1396
1397 static void gpu_opcode_jr(void)
1398 {
1399 #ifdef GPU_DIS_JR
1400 const char * condition[32] =
1401 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1402         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1403         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1404         "???", "???", "???", "F" };
1405         if (doGPUDis)
1406                 WriteLog("%06X: JR     %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1407 #endif
1408 /*      if (CONDITION(jaguar.op & 31))
1409         {
1410                 int32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1411                 uint32 newpc = jaguar.PC + r1;
1412                 CALL_MAME_DEBUG;
1413                 jaguar.op = ROPCODE(jaguar.PC);
1414                 jaguar.PC = newpc;
1415                 (*jaguar.table[jaguar.op >> 10])();
1416
1417                 jaguar_icount -= 3;     // 3 wait states guaranteed
1418         }*/
1419         // normalize flags
1420 /*      gpu_flag_n = (gpu_flag_n ? 1 : 0);
1421         gpu_flag_c = (gpu_flag_c ? 1 : 0);
1422         gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1423         // KLUDGE: Used by BRANCH_CONDITION
1424         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1425
1426         if (BRANCH_CONDITION(IMM_2))
1427         {
1428 #ifdef GPU_DIS_JR
1429         if (doGPUDis)
1430                 WriteLog("Branched!\n");
1431 #endif
1432 if (gpu_start_log)
1433         WriteLog("    --> JR: Branch taken.\n");
1434                 int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1);             // Sign extend IMM_1
1435                 int32 delayed_pc = gpu_pc + (offset * 2);
1436                 GPUExec(1);
1437                 gpu_pc = delayed_pc;
1438 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1439                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1440                 gpu_opcode_second_parameter = opcode & 0x1F;
1441
1442                 gpu_pc = delayed_pc;
1443                 gpu_opcode[opcode>>10]();//*/
1444         }
1445 #ifdef GPU_DIS_JR
1446         else
1447                 if (doGPUDis)
1448                         WriteLog("Branch NOT taken.\n");
1449 #endif
1450 }
1451
1452 static void gpu_opcode_add(void)
1453 {
1454 #ifdef GPU_DIS_ADD
1455         if (doGPUDis)
1456                 WriteLog("%06X: ADD    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1457 #endif
1458         uint32 res = RN + RM;
1459         CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1460         RN = res;
1461 #ifdef GPU_DIS_ADD
1462         if (doGPUDis)
1463                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1464 #endif
1465 }
1466
1467 static void gpu_opcode_addc(void)
1468 {
1469 #ifdef GPU_DIS_ADDC
1470         if (doGPUDis)
1471                 WriteLog("%06X: ADDC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1472 #endif
1473 /*      int dreg = jaguar.op & 31;
1474         uint32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1475         uint32 r2 = jaguar.r[dreg];
1476         uint32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1477         jaguar.r[dreg] = res;
1478         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1479
1480         uint32 res = RN + RM + gpu_flag_c;
1481         uint32 carry = gpu_flag_c;
1482 //      SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1483         SET_ZNC_ADD(RN + carry, RM, res);
1484 //      SET_ZNC_ADD(RN, RM + carry, res);
1485         RN = res;
1486 #ifdef GPU_DIS_ADDC
1487         if (doGPUDis)
1488                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1489 #endif
1490 }
1491
1492 static void gpu_opcode_addq(void)
1493 {
1494 #ifdef GPU_DIS_ADDQ
1495         if (doGPUDis)
1496                 WriteLog("%06X: ADDQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1497 #endif
1498         uint32 r1 = gpu_convert_zero[IMM_1];
1499         uint32 res = RN + r1;
1500         CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1501         RN = res;
1502 #ifdef GPU_DIS_ADDQ
1503         if (doGPUDis)
1504                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1505 #endif
1506 }
1507
1508 static void gpu_opcode_addqt(void)
1509 {
1510 #ifdef GPU_DIS_ADDQT
1511         if (doGPUDis)
1512                 WriteLog("%06X: ADDQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1513 #endif
1514         RN += gpu_convert_zero[IMM_1];
1515 #ifdef GPU_DIS_ADDQT
1516         if (doGPUDis)
1517                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1518 #endif
1519 }
1520
1521 static void gpu_opcode_sub(void)
1522 {
1523 #ifdef GPU_DIS_SUB
1524         if (doGPUDis)
1525                 WriteLog("%06X: SUB    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1526 #endif
1527         uint32 res = RN - RM;
1528         SET_ZNC_SUB(RN, RM, res);
1529         RN = res;
1530 #ifdef GPU_DIS_SUB
1531         if (doGPUDis)
1532                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1533 #endif
1534 }
1535
1536 static void gpu_opcode_subc(void)
1537 {
1538 #ifdef GPU_DIS_SUBC
1539         if (doGPUDis)
1540                 WriteLog("%06X: SUBC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1541 #endif
1542         uint32 res = RN - RM - gpu_flag_c;
1543         uint32 borrow = gpu_flag_c;
1544 //      SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1545 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1546 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1547 //      SET_ZNC_SUB(RN - borrow, RM, res);
1548         SET_ZNC_SUB(RN, RM + borrow, res);
1549         RN = res;
1550 #ifdef GPU_DIS_SUBC
1551         if (doGPUDis)
1552                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1553 #endif
1554 }
1555 /*
1556 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1557 N = 0, M = 1, 0 - 1 = -1, C = 0!
1558
1559 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
1560 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
1561 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1562 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1563 */
1564 static void gpu_opcode_subq(void)
1565 {
1566 #ifdef GPU_DIS_SUBQ
1567         if (doGPUDis)
1568                 WriteLog("%06X: SUBQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1569 #endif
1570         uint32 r1 = gpu_convert_zero[IMM_1];
1571         uint32 res = RN - r1;
1572         SET_ZNC_SUB(RN, r1, res);
1573         RN = res;
1574 #ifdef GPU_DIS_SUBQ
1575         if (doGPUDis)
1576                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1577 #endif
1578 }
1579
1580 static void gpu_opcode_subqt(void)
1581 {
1582 #ifdef GPU_DIS_SUBQT
1583         if (doGPUDis)
1584                 WriteLog("%06X: SUBQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1585 #endif
1586         RN -= gpu_convert_zero[IMM_1];
1587 #ifdef GPU_DIS_SUBQT
1588         if (doGPUDis)
1589                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1590 #endif
1591 }
1592
1593 static void gpu_opcode_cmp(void)
1594 {
1595 #ifdef GPU_DIS_CMP
1596         if (doGPUDis)
1597                 WriteLog("%06X: CMP    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1598 #endif
1599         uint32 res = RN - RM;
1600         SET_ZNC_SUB(RN, RM, res);
1601 #ifdef GPU_DIS_CMP
1602         if (doGPUDis)
1603                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1604 #endif
1605 }
1606
1607 static void gpu_opcode_cmpq(void)
1608 {
1609         static int32 sqtable[32] =
1610                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1611 #ifdef GPU_DIS_CMPQ
1612         if (doGPUDis)
1613                 WriteLog("%06X: CMPQ   #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1614 #endif
1615         uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1616         uint32 res = RN - r1;
1617         SET_ZNC_SUB(RN, r1, res);
1618 #ifdef GPU_DIS_CMPQ
1619         if (doGPUDis)
1620                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1621 #endif
1622 }
1623
1624 static void gpu_opcode_and(void)
1625 {
1626 #ifdef GPU_DIS_AND
1627         if (doGPUDis)
1628                 WriteLog("%06X: AND    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1629 #endif
1630         RN = RN & RM;
1631         SET_ZN(RN);
1632 #ifdef GPU_DIS_AND
1633         if (doGPUDis)
1634                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1635 #endif
1636 }
1637
1638 static void gpu_opcode_or(void)
1639 {
1640 #ifdef GPU_DIS_OR
1641         if (doGPUDis)
1642                 WriteLog("%06X: OR     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1643 #endif
1644         RN = RN | RM;
1645         SET_ZN(RN);
1646 #ifdef GPU_DIS_OR
1647         if (doGPUDis)
1648                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1649 #endif
1650 }
1651
1652 static void gpu_opcode_xor(void)
1653 {
1654 #ifdef GPU_DIS_XOR
1655         if (doGPUDis)
1656                 WriteLog("%06X: XOR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1657 #endif
1658         RN = RN ^ RM;
1659         SET_ZN(RN);
1660 #ifdef GPU_DIS_XOR
1661         if (doGPUDis)
1662                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1663 #endif
1664 }
1665
1666 static void gpu_opcode_not(void)
1667 {
1668 #ifdef GPU_DIS_NOT
1669         if (doGPUDis)
1670                 WriteLog("%06X: NOT    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1671 #endif
1672         RN = ~RN;
1673         SET_ZN(RN);
1674 #ifdef GPU_DIS_NOT
1675         if (doGPUDis)
1676                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1677 #endif
1678 }
1679
1680 static void gpu_opcode_move_pc(void)
1681 {
1682 #ifdef GPU_DIS_MOVEPC
1683         if (doGPUDis)
1684                 WriteLog("%06X: MOVE   PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1685 #endif
1686         // Should be previous PC--this might not always be previous instruction!
1687         // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1688         RN = gpu_pc - 2;
1689 #ifdef GPU_DIS_MOVEPC
1690         if (doGPUDis)
1691                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1692 #endif
1693 }
1694
1695 static void gpu_opcode_sat8(void)
1696 {
1697 #ifdef GPU_DIS_SAT8
1698         if (doGPUDis)
1699                 WriteLog("%06X: SAT8   R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1700 #endif
1701         RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1702         SET_ZN(RN);
1703 #ifdef GPU_DIS_SAT8
1704         if (doGPUDis)
1705                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1706 #endif
1707 }
1708
1709 static void gpu_opcode_sat16(void)
1710 {
1711         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1712         SET_ZN(RN);
1713 }
1714
1715 static void gpu_opcode_sat24(void)
1716 {
1717         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1718         SET_ZN(RN);
1719 }
1720
1721 static void gpu_opcode_store_r14_indexed(void)
1722 {
1723 #ifdef GPU_DIS_STORE14I
1724         if (doGPUDis)
1725                 WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1726 #endif
1727         GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1728 }
1729
1730 static void gpu_opcode_store_r15_indexed(void)
1731 {
1732 #ifdef GPU_DIS_STORE15I
1733         if (doGPUDis)
1734                 WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1735 #endif
1736         GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1737 }
1738
1739 static void gpu_opcode_load_r14_ri(void)
1740 {
1741 #ifdef GPU_DIS_LOAD14R
1742         if (doGPUDis)
1743                 WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1744 #endif
1745         RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1746 #ifdef GPU_DIS_LOAD14R
1747         if (doGPUDis)
1748                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1749 #endif
1750 }
1751
1752 static void gpu_opcode_load_r15_ri(void)
1753 {
1754 #ifdef GPU_DIS_LOAD15R
1755         if (doGPUDis)
1756                 WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1757 #endif
1758         RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1759 #ifdef GPU_DIS_LOAD15R
1760         if (doGPUDis)
1761                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1762 #endif
1763 }
1764
1765 static void gpu_opcode_store_r14_ri(void)
1766 {
1767 #ifdef GPU_DIS_STORE14R
1768         if (doGPUDis)
1769                 WriteLog("%06X: STORE  R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1770 #endif
1771         GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1772 }
1773
1774 static void gpu_opcode_store_r15_ri(void)
1775 {
1776 #ifdef GPU_DIS_STORE15R
1777         if (doGPUDis)
1778                 WriteLog("%06X: STORE  R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1779 #endif
1780         GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1781 }
1782
1783 static void gpu_opcode_nop(void)
1784 {
1785 #ifdef GPU_DIS_NOP
1786         if (doGPUDis)
1787                 WriteLog("%06X: NOP    [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1788 #endif
1789 }
1790
1791 static void gpu_opcode_pack(void)
1792 {
1793 #ifdef GPU_DIS_PACK
1794         if (doGPUDis)
1795                 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK  " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1796 #endif
1797         uint32 val = RN;
1798
1799 //BUG!  if (RM == 0)                            // Pack
1800         if (IMM_1 == 0)                         // Pack
1801                 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1802         else                                            // Unpack
1803                 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1804 #ifdef GPU_DIS_PACK
1805         if (doGPUDis)
1806                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1807 #endif
1808 }
1809
1810 static void gpu_opcode_storeb(void)
1811 {
1812 #ifdef GPU_DIS_STOREB
1813         if (doGPUDis)
1814                 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1815 #endif
1816 //Is this right???
1817 // Would appear to be so...!
1818         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1819                 GPUWriteLong(RM, RN & 0xFF, GPU);
1820         else
1821                 JaguarWriteByte(RM, RN, GPU);
1822 }
1823
1824 static void gpu_opcode_storew(void)
1825 {
1826 #ifdef GPU_DIS_STOREW
1827         if (doGPUDis)
1828                 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1829 #endif
1830         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1831                 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1832         else
1833                 JaguarWriteWord(RM, RN, GPU);
1834 }
1835
1836 static void gpu_opcode_store(void)
1837 {
1838 #ifdef GPU_DIS_STORE
1839         if (doGPUDis)
1840                 WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1841 #endif
1842         GPUWriteLong(RM, RN, GPU);
1843 }
1844
1845 static void gpu_opcode_storep(void)
1846 {
1847         GPUWriteLong(RM + 0, gpu_hidata, GPU);
1848         GPUWriteLong(RM + 4, RN, GPU);
1849 }
1850
1851 static void gpu_opcode_loadb(void)
1852 {
1853 #ifdef GPU_DIS_LOADB
1854         if (doGPUDis)
1855                 WriteLog("%06X: LOADB  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1856 #endif
1857         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1858                 RN = GPUReadLong(RM, GPU) & 0xFF;
1859         else
1860                 RN = JaguarReadByte(RM, GPU);
1861 #ifdef GPU_DIS_LOADB
1862         if (doGPUDis)
1863                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1864 #endif
1865 }
1866
1867 static void gpu_opcode_loadw(void)
1868 {
1869 #ifdef GPU_DIS_LOADW
1870         if (doGPUDis)
1871                 WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1872 #endif
1873         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1874                 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1875         else
1876                 RN = JaguarReadWord(RM, GPU);
1877 #ifdef GPU_DIS_LOADW
1878         if (doGPUDis)
1879                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1880 #endif
1881 }
1882
1883 static void gpu_opcode_load(void)
1884 {
1885 #ifdef GPU_DIS_LOAD
1886         if (doGPUDis)
1887                 WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1888 #endif
1889         RN = GPUReadLong(RM, GPU);
1890 #ifdef GPU_DIS_LOAD
1891         if (doGPUDis)
1892                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1893 #endif
1894 }
1895
1896 static void gpu_opcode_loadp(void)
1897 {
1898         gpu_hidata = GPUReadLong(RM + 0, GPU);
1899         RN                 = GPUReadLong(RM + 4, GPU);
1900 }
1901
1902 static void gpu_opcode_load_r14_indexed(void)
1903 {
1904 #ifdef GPU_DIS_LOAD14I
1905         if (doGPUDis)
1906                 WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
1907 #endif
1908         RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
1909 #ifdef GPU_DIS_LOAD14I
1910         if (doGPUDis)
1911                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1912 #endif
1913 }
1914
1915 static void gpu_opcode_load_r15_indexed(void)
1916 {
1917 #ifdef GPU_DIS_LOAD15I
1918         if (doGPUDis)
1919                 WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
1920 #endif
1921         RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
1922 #ifdef GPU_DIS_LOAD15I
1923         if (doGPUDis)
1924                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1925 #endif
1926 }
1927
1928 static void gpu_opcode_movei(void)
1929 {
1930 #ifdef GPU_DIS_MOVEI
1931         if (doGPUDis)
1932                 WriteLog("%06X: MOVEI  #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1933 #endif
1934         // This instruction is followed by 32-bit value in LSW / MSW format...
1935         RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16);
1936         gpu_pc += 4;
1937 #ifdef GPU_DIS_MOVEI
1938         if (doGPUDis)
1939                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1940 #endif
1941 }
1942
1943 static void gpu_opcode_moveta(void)
1944 {
1945 #ifdef GPU_DIS_MOVETA
1946         if (doGPUDis)
1947                 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
1948 #endif
1949         ALTERNATE_RN = RM;
1950 #ifdef GPU_DIS_MOVETA
1951         if (doGPUDis)
1952                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
1953 #endif
1954 }
1955
1956 static void gpu_opcode_movefa(void)
1957 {
1958 #ifdef GPU_DIS_MOVEFA
1959         if (doGPUDis)
1960                 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
1961 #endif
1962         RN = ALTERNATE_RM;
1963 #ifdef GPU_DIS_MOVEFA
1964         if (doGPUDis)
1965                 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
1966 #endif
1967 }
1968
1969 static void gpu_opcode_move(void)
1970 {
1971 #ifdef GPU_DIS_MOVE
1972         if (doGPUDis)
1973                 WriteLog("%06X: MOVE   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1974 #endif
1975         RN = RM;
1976 #ifdef GPU_DIS_MOVE
1977         if (doGPUDis)
1978                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1979 #endif
1980 }
1981
1982 static void gpu_opcode_moveq(void)
1983 {
1984 #ifdef GPU_DIS_MOVEQ
1985         if (doGPUDis)
1986                 WriteLog("%06X: MOVEQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1987 #endif
1988         RN = IMM_1;
1989 #ifdef GPU_DIS_MOVEQ
1990         if (doGPUDis)
1991                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1992 #endif
1993 }
1994
1995 static void gpu_opcode_resmac(void)
1996 {
1997         RN = gpu_acc;
1998 }
1999
2000 static void gpu_opcode_imult(void)
2001 {
2002 #ifdef GPU_DIS_IMULT
2003         if (doGPUDis)
2004                 WriteLog("%06X: IMULT  R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2005 #endif
2006         RN = (int16)RN * (int16)RM;
2007         SET_ZN(RN);
2008 #ifdef GPU_DIS_IMULT
2009         if (doGPUDis)
2010                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2011 #endif
2012 }
2013
2014 static void gpu_opcode_mult(void)
2015 {
2016 #ifdef GPU_DIS_MULT
2017         if (doGPUDis)
2018                 WriteLog("%06X: MULT   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2019 #endif
2020         RN = (uint16)RM * (uint16)RN;
2021         SET_ZN(RN);
2022 #ifdef GPU_DIS_MULT
2023         if (doGPUDis)
2024                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2025 #endif
2026 }
2027
2028 static void gpu_opcode_bclr(void)
2029 {
2030 #ifdef GPU_DIS_BCLR
2031         if (doGPUDis)
2032                 WriteLog("%06X: BCLR   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2033 #endif
2034         uint32 res = RN & ~(1 << IMM_1);
2035         RN = res;
2036         SET_ZN(res);
2037 #ifdef GPU_DIS_BCLR
2038         if (doGPUDis)
2039                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2040 #endif
2041 }
2042
2043 static void gpu_opcode_btst(void)
2044 {
2045 #ifdef GPU_DIS_BTST
2046         if (doGPUDis)
2047                 WriteLog("%06X: BTST   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2048 #endif
2049         gpu_flag_z = (~RN >> IMM_1) & 1;
2050 #ifdef GPU_DIS_BTST
2051         if (doGPUDis)
2052                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2053 #endif
2054 }
2055
2056 static void gpu_opcode_bset(void)
2057 {
2058 #ifdef GPU_DIS_BSET
2059         if (doGPUDis)
2060                 WriteLog("%06X: BSET   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2061 #endif
2062         uint32 res = RN | (1 << IMM_1);
2063         RN = res;
2064         SET_ZN(res);
2065 #ifdef GPU_DIS_BSET
2066         if (doGPUDis)
2067                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2068 #endif
2069 }
2070
2071 static void gpu_opcode_imacn(void)
2072 {
2073         uint32 res = (int16)RM * (int16)(RN);
2074         gpu_acc += res;
2075 }
2076
2077 static void gpu_opcode_mtoi(void)
2078 {
2079         uint32 _RM = RM;
2080         uint32 res = RN = (((int32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2081         SET_ZN(res);
2082 }
2083
2084 static void gpu_opcode_normi(void)
2085 {
2086         uint32 _RM = RM;
2087         uint32 res = 0;
2088
2089         if (_RM)
2090         {
2091                 while ((_RM & 0xFFC00000) == 0)
2092                 {
2093                         _RM <<= 1;
2094                         res--;
2095                 }
2096                 while ((_RM & 0xFF800000) != 0)
2097                 {
2098                         _RM >>= 1;
2099                         res++;
2100                 }
2101         }
2102         RN = res;
2103         SET_ZN(res);
2104 }
2105
2106 static void gpu_opcode_mmult(void)
2107 {
2108         int count       = gpu_matrix_control & 0x0F;    // Matrix width
2109         uint32 addr = gpu_pointer_to_matrix;            // In the GPU's RAM
2110         int64 accum = 0;
2111         uint32 res;
2112
2113         if (gpu_matrix_control & 0x10)                          // Column stepping
2114         {
2115                 for(int i=0; i<count; i++)
2116                 {
2117                         int16 a;
2118                         if (i & 0x01)
2119                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2120                         else
2121                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2122
2123                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2124                         accum += a * b;
2125                         addr += 4 * count;
2126                 }
2127         }
2128         else                                                                            // Row stepping
2129         {
2130                 for(int i=0; i<count; i++)
2131                 {
2132                         int16 a;
2133                         if (i & 0x01)
2134                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2135                         else
2136                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2137
2138                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2139                         accum += a * b;
2140                         addr += 4;
2141                 }
2142         }
2143         RN = res = (int32)accum;
2144         // carry flag to do (out of the last add)
2145         SET_ZN(res);
2146 }
2147
2148 static void gpu_opcode_abs(void)
2149 {
2150 #ifdef GPU_DIS_ABS
2151         if (doGPUDis)
2152                 WriteLog("%06X: ABS    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2153 #endif
2154         gpu_flag_c = RN >> 31;
2155         if (RN == 0x80000000)
2156         //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2157                 gpu_flag_n = 1, gpu_flag_z = 0;
2158         else
2159         {
2160                 if (gpu_flag_c)
2161                         RN = -RN;
2162                 gpu_flag_n = 0; SET_FLAG_Z(RN);
2163         }
2164 #ifdef GPU_DIS_ABS
2165         if (doGPUDis)
2166                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2167 #endif
2168 }
2169
2170 static void gpu_opcode_div(void)        // RN / RM
2171 {
2172 #ifdef GPU_DIS_DIV
2173         if (doGPUDis)
2174                 WriteLog("%06X: DIV    R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2175 #endif
2176 // NOTE: remainder is NOT calculated correctly here!
2177 //       The original tried to get it right by checking to see if the
2178 //       remainder was negative, but that's too late...
2179 // The code there should do it now, but I'm not 100% sure...
2180
2181         if (RM)
2182         {
2183                 if (gpu_div_control & 0x01)             // 16.16 division
2184                 {
2185                         RN = ((uint64)RN << 16) / RM;
2186                         gpu_remain = ((uint64)RN << 16) % RM;
2187                 }
2188                 else
2189                 {
2190                         RN = RN / RM;
2191                         gpu_remain = RN % RM;
2192                 }
2193
2194                 if ((gpu_remain - RM) & 0x80000000)     // If the result would have been negative...
2195                         gpu_remain -= RM;                       // Then make it negative!
2196         }
2197         else
2198                 RN = 0xFFFFFFFF;
2199
2200 /*      uint32 _RM=RM;
2201         uint32 _RN=RN;
2202
2203         if (_RM)
2204         {
2205                 if (gpu_div_control & 1)
2206                 {
2207                         gpu_remain = (((uint64)_RN) << 16) % _RM;
2208                         if (gpu_remain&0x80000000)
2209                                 gpu_remain-=_RM;
2210                         RN = (((uint64)_RN) << 16) / _RM;
2211                 }
2212                 else
2213                 {
2214                         gpu_remain = _RN % _RM;
2215                         if (gpu_remain&0x80000000)
2216                                 gpu_remain-=_RM;
2217                         RN/=_RM;
2218                 }
2219         }
2220         else
2221                 RN=0xffffffff;*/
2222 #ifdef GPU_DIS_DIV
2223         if (doGPUDis)
2224                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2225 #endif
2226 }
2227
2228 static void gpu_opcode_imultn(void)
2229 {
2230         uint32 res = (int32)((int16)RN * (int16)RM);
2231         gpu_acc = (int32)res;
2232         SET_FLAG_Z(res);
2233         SET_FLAG_N(res);
2234 }
2235
2236 static void gpu_opcode_neg(void)
2237 {
2238 #ifdef GPU_DIS_NEG
2239         if (doGPUDis)
2240                 WriteLog("%06X: NEG    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2241 #endif
2242         uint32 res = -RN;
2243         SET_ZNC_SUB(0, RN, res);
2244         RN = res;
2245 #ifdef GPU_DIS_NEG
2246         if (doGPUDis)
2247                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2248 #endif
2249 }
2250
2251 static void gpu_opcode_shlq(void)
2252 {
2253 #ifdef GPU_DIS_SHLQ
2254         if (doGPUDis)
2255                 WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2256 #endif
2257 // Was a bug here...
2258 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2259         int32 r1 = 32 - IMM_1;
2260         uint32 res = RN << r1;
2261         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2262         RN = res;
2263 #ifdef GPU_DIS_SHLQ
2264         if (doGPUDis)
2265                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2266 #endif
2267 }
2268
2269 static void gpu_opcode_shrq(void)
2270 {
2271 #ifdef GPU_DIS_SHRQ
2272         if (doGPUDis)
2273                 WriteLog("%06X: SHRQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2274 #endif
2275         int32 r1 = gpu_convert_zero[IMM_1];
2276         uint32 res = RN >> r1;
2277         SET_ZN(res); gpu_flag_c = RN & 1;
2278         RN = res;
2279 #ifdef GPU_DIS_SHRQ
2280         if (doGPUDis)
2281                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2282 #endif
2283 }
2284
2285 static void gpu_opcode_ror(void)
2286 {
2287 #ifdef GPU_DIS_ROR
2288         if (doGPUDis)
2289                 WriteLog("%06X: ROR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2290 #endif
2291         uint32 r1 = RM & 0x1F;
2292         uint32 res = (RN >> r1) | (RN << (32 - r1));
2293         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2294         RN = res;
2295 #ifdef GPU_DIS_ROR
2296         if (doGPUDis)
2297                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2298 #endif
2299 }
2300
2301 static void gpu_opcode_rorq(void)
2302 {
2303 #ifdef GPU_DIS_RORQ
2304         if (doGPUDis)
2305                 WriteLog("%06X: RORQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2306 #endif
2307         uint32 r1 = gpu_convert_zero[IMM_1 & 0x1F];
2308         uint32 r2 = RN;
2309         uint32 res = (r2 >> r1) | (r2 << (32 - r1));
2310         RN = res;
2311         SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2312 #ifdef GPU_DIS_RORQ
2313         if (doGPUDis)
2314                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2315 #endif
2316 }
2317
2318 static void gpu_opcode_sha(void)
2319 {
2320 /*      int dreg = jaguar.op & 31;
2321         int32 r1 = (int32)jaguar.r[(jaguar.op >> 5) & 31];
2322         uint32 r2 = jaguar.r[dreg];
2323         uint32 res;
2324
2325         CLR_ZNC;
2326         if (r1 < 0)
2327         {
2328                 res = (r1 <= -32) ? 0 : (r2 << -r1);
2329                 jaguar.FLAGS |= (r2 >> 30) & 2;
2330         }
2331         else
2332         {
2333                 res = (r1 >= 32) ? ((int32)r2 >> 31) : ((int32)r2 >> r1);
2334                 jaguar.FLAGS |= (r2 << 1) & 2;
2335         }
2336         jaguar.r[dreg] = res;
2337         SET_ZN(res);*/
2338
2339 #ifdef GPU_DIS_SHA
2340         if (doGPUDis)
2341                 WriteLog("%06X: SHA    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2342 #endif
2343         uint32 res;
2344
2345         if ((int32)RM < 0)
2346         {
2347                 res = ((int32)RM <= -32) ? 0 : (RN << -(int32)RM);
2348                 gpu_flag_c = RN >> 31;
2349         }
2350         else
2351         {
2352                 res = ((int32)RM >= 32) ? ((int32)RN >> 31) : ((int32)RN >> (int32)RM);
2353                 gpu_flag_c = RN & 0x01;
2354         }
2355         RN = res;
2356         SET_ZN(res);
2357 #ifdef GPU_DIS_SHA
2358         if (doGPUDis)
2359                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2360 #endif
2361
2362 /*      int32 sRM=(int32)RM;
2363         uint32 _RN=RN;
2364
2365         if (sRM<0)
2366         {
2367                 uint32 shift=-sRM;
2368                 if (shift>=32) shift=32;
2369                 gpu_flag_c=(_RN&0x80000000)>>31;
2370                 while (shift)
2371                 {
2372                         _RN<<=1;
2373                         shift--;
2374                 }
2375         }
2376         else
2377         {
2378                 uint32 shift=sRM;
2379                 if (shift>=32) shift=32;
2380                 gpu_flag_c=_RN&0x1;
2381                 while (shift)
2382                 {
2383                         _RN=((int32)_RN)>>1;
2384                         shift--;
2385                 }
2386         }
2387         RN=_RN;
2388         SET_FLAG_Z(_RN);
2389         SET_FLAG_N(_RN);*/
2390 }
2391
2392 static void gpu_opcode_sharq(void)
2393 {
2394 #ifdef GPU_DIS_SHARQ
2395         if (doGPUDis)
2396                 WriteLog("%06X: SHARQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2397 #endif
2398         uint32 res = (int32)RN >> gpu_convert_zero[IMM_1];
2399         SET_ZN(res); gpu_flag_c = RN & 0x01;
2400         RN = res;
2401 #ifdef GPU_DIS_SHARQ
2402         if (doGPUDis)
2403                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2404 #endif
2405 }
2406
2407 static void gpu_opcode_sh(void)
2408 {
2409 #ifdef GPU_DIS_SH
2410         if (doGPUDis)
2411                 WriteLog("%06X: SH     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2412 #endif
2413         if (RM & 0x80000000)            // Shift left
2414         {
2415                 gpu_flag_c = RN >> 31;
2416                 RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM);
2417         }
2418         else                                            // Shift right
2419         {
2420                 gpu_flag_c = RN & 0x01;
2421                 RN = (RM >= 32 ? 0 : RN >> RM);
2422         }
2423         SET_ZN(RN);
2424 #ifdef GPU_DIS_SH
2425         if (doGPUDis)
2426                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2427 #endif
2428 }
2429
2430 //Temporary: Testing only!
2431 //#include "gpu2.cpp"
2432 //#include "gpu3.cpp"
2433
2434 #else
2435
2436 // New thread-safe GPU core
2437
2438 int GPUCore(void * data)
2439 {
2440 }
2441
2442 #endif