]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
Fixes for the 68K IRQ system. There's probably a little more to do though.
[virtualjaguar] / src / gpu.cpp
1 #if 1
2
3 //
4 // GPU Core
5 //
6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James L. Hammons
9 // (C) 2010 Underground Software
10 //
11 // JLH = James L. Hammons <jlhamm@acm.org>
12 //
13 // Who  When        What
14 // ---  ----------  -------------------------------------------------------------
15 // JLH  01/16/2010  Created this log ;-)
16
17 //
18 // Note: Endian wrongness probably stems from the MAME origins of this emu and
19 //       the braindead way in which MAME handles memory. :-)
20 //
21 // Problem with not booting the BIOS was the incorrect way that the
22 // SUBC instruction set the carry when the carry was set going in...
23 // Same problem with ADDC...
24 //
25
26 #include "gpu.h"
27
28 #include <stdlib.h>
29 #include <string.h>                                                             // For memset
30 #include "dsp.h"
31 #include "jagdasm.h"
32 #include "jaguar.h"
33 #include "log.h"
34 #include "m68k.h"
35 //#include "memory.h"
36 #include "tom.h"
37
38 //#define GPU_DEBUG
39
40 // For GPU dissasembly...
41
42 #define GPU_DIS_ABS
43 #define GPU_DIS_ADD
44 #define GPU_DIS_ADDC
45 #define GPU_DIS_ADDQ
46 #define GPU_DIS_ADDQT
47 #define GPU_DIS_AND
48 #define GPU_DIS_BCLR
49 #define GPU_DIS_BSET
50 #define GPU_DIS_BTST
51 #define GPU_DIS_CMP
52 #define GPU_DIS_CMPQ
53 #define GPU_DIS_DIV
54 #define GPU_DIS_IMULT
55 #define GPU_DIS_JUMP
56 #define GPU_DIS_JR
57 #define GPU_DIS_LOAD
58 #define GPU_DIS_LOADB
59 #define GPU_DIS_LOADW
60 #define GPU_DIS_LOAD14I
61 #define GPU_DIS_LOAD14R
62 #define GPU_DIS_LOAD15I
63 #define GPU_DIS_LOAD15R
64 #define GPU_DIS_MOVE
65 #define GPU_DIS_MOVEFA
66 #define GPU_DIS_MOVEI
67 #define GPU_DIS_MOVEPC
68 #define GPU_DIS_MOVETA
69 #define GPU_DIS_MOVEQ
70 #define GPU_DIS_MULT
71 #define GPU_DIS_NEG
72 #define GPU_DIS_NOP
73 #define GPU_DIS_NOT
74 #define GPU_DIS_OR
75 #define GPU_DIS_PACK
76 #define GPU_DIS_ROR
77 #define GPU_DIS_RORQ
78 #define GPU_DIS_SAT8
79 #define GPU_DIS_SH
80 #define GPU_DIS_SHA
81 #define GPU_DIS_SHARQ
82 #define GPU_DIS_SHLQ
83 #define GPU_DIS_SHRQ
84 #define GPU_DIS_STORE
85 #define GPU_DIS_STOREB
86 #define GPU_DIS_STOREW
87 #define GPU_DIS_STORE14I
88 #define GPU_DIS_STORE14R
89 #define GPU_DIS_STORE15I
90 #define GPU_DIS_STORE15R
91 #define GPU_DIS_SUB
92 #define GPU_DIS_SUBC
93 #define GPU_DIS_SUBQ
94 #define GPU_DIS_SUBQT
95 #define GPU_DIS_XOR
96
97 bool doGPUDis = false;
98 //bool doGPUDis = true;
99 //*/
100 /*
101 GPU opcodes use (BIOS flying ATARI logo):
102 +                     add 357416
103 +                    addq 538030
104 +                   addqt 6999
105 +                     sub 116663
106 +                    subq 188059
107 +                   subqt 15086
108 +                     neg 36097
109 +                     and 233993
110 +                      or 109332
111 +                     xor 1384
112 +                    btst 111924
113 +                    bset 25029
114 +                    bclr 10551
115 +                    mult 28147
116 +                   imult 69148
117 +                     div 64102
118 +                     abs 159394
119 +                    shlq 194690
120 +                    shrq 292587
121 +                   sharq 192649
122 +                    rorq 58672
123 +                     cmp 244963
124 +                    cmpq 114834
125 +                    move 833472
126 +                   moveq 56427
127 +                  moveta 220814
128 +                  movefa 170678
129 +                   movei 152025
130 +                   loadw 108220
131 +                    load 430936
132 +                  storew 3036
133 +                   store 372490
134 +                 move_pc 2330
135 +                    jump 349134
136 +                      jr 529171
137                     mmult 64904
138 +                     nop 432179
139 */
140
141 // Various bits
142
143 #define CINT0FLAG                       0x0200
144 #define CINT1FLAG                       0x0400
145 #define CINT2FLAG                       0x0800
146 #define CINT3FLAG                       0x1000
147 #define CINT4FLAG                       0x2000
148 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
149
150 // GPU_FLAGS bits
151
152 #define ZERO_FLAG               0x0001
153 #define CARRY_FLAG              0x0002
154 #define NEGA_FLAG               0x0004
155 #define IMASK                   0x0008
156 #define INT_ENA0                0x0010
157 #define INT_ENA1                0x0020
158 #define INT_ENA2                0x0040
159 #define INT_ENA3                0x0080
160 #define INT_ENA4                0x0100
161 #define INT_CLR0                0x0200
162 #define INT_CLR1                0x0400
163 #define INT_CLR2                0x0800
164 #define INT_CLR3                0x1000
165 #define INT_CLR4                0x2000
166 #define REGPAGE                 0x4000
167 #define DMAEN                   0x8000
168
169 // External global variables
170
171 extern int start_logging;
172 extern int gpu_start_log;
173
174 // Private function prototypes
175
176 void GPUUpdateRegisterBanks(void);
177 void GPUDumpDisassembly(void);
178 void GPUDumpRegisters(void);
179 void GPUDumpMemory(void);
180
181 static void gpu_opcode_add(void);
182 static void gpu_opcode_addc(void);
183 static void gpu_opcode_addq(void);
184 static void gpu_opcode_addqt(void);
185 static void gpu_opcode_sub(void);
186 static void gpu_opcode_subc(void);
187 static void gpu_opcode_subq(void);
188 static void gpu_opcode_subqt(void);
189 static void gpu_opcode_neg(void);
190 static void gpu_opcode_and(void);
191 static void gpu_opcode_or(void);
192 static void gpu_opcode_xor(void);
193 static void gpu_opcode_not(void);
194 static void gpu_opcode_btst(void);
195 static void gpu_opcode_bset(void);
196 static void gpu_opcode_bclr(void);
197 static void gpu_opcode_mult(void);
198 static void gpu_opcode_imult(void);
199 static void gpu_opcode_imultn(void);
200 static void gpu_opcode_resmac(void);
201 static void gpu_opcode_imacn(void);
202 static void gpu_opcode_div(void);
203 static void gpu_opcode_abs(void);
204 static void gpu_opcode_sh(void);
205 static void gpu_opcode_shlq(void);
206 static void gpu_opcode_shrq(void);
207 static void gpu_opcode_sha(void);
208 static void gpu_opcode_sharq(void);
209 static void gpu_opcode_ror(void);
210 static void gpu_opcode_rorq(void);
211 static void gpu_opcode_cmp(void);
212 static void gpu_opcode_cmpq(void);
213 static void gpu_opcode_sat8(void);
214 static void gpu_opcode_sat16(void);
215 static void gpu_opcode_move(void);
216 static void gpu_opcode_moveq(void);
217 static void gpu_opcode_moveta(void);
218 static void gpu_opcode_movefa(void);
219 static void gpu_opcode_movei(void);
220 static void gpu_opcode_loadb(void);
221 static void gpu_opcode_loadw(void);
222 static void gpu_opcode_load(void);
223 static void gpu_opcode_loadp(void);
224 static void gpu_opcode_load_r14_indexed(void);
225 static void gpu_opcode_load_r15_indexed(void);
226 static void gpu_opcode_storeb(void);
227 static void gpu_opcode_storew(void);
228 static void gpu_opcode_store(void);
229 static void gpu_opcode_storep(void);
230 static void gpu_opcode_store_r14_indexed(void);
231 static void gpu_opcode_store_r15_indexed(void);
232 static void gpu_opcode_move_pc(void);
233 static void gpu_opcode_jump(void);
234 static void gpu_opcode_jr(void);
235 static void gpu_opcode_mmult(void);
236 static void gpu_opcode_mtoi(void);
237 static void gpu_opcode_normi(void);
238 static void gpu_opcode_nop(void);
239 static void gpu_opcode_load_r14_ri(void);
240 static void gpu_opcode_load_r15_ri(void);
241 static void gpu_opcode_store_r14_ri(void);
242 static void gpu_opcode_store_r15_ri(void);
243 static void gpu_opcode_sat24(void);
244 static void gpu_opcode_pack(void);
245
246 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
247 /*uint8 gpu_opcode_cycles[64] =
248 {
249         3,  3,  3,  3,  3,  3,  3,  3,
250         3,  3,  3,  3,  3,  3,  3,  3,
251         3,  3,  1,  3,  1, 18,  3,  3,
252         3,  3,  3,  3,  3,  3,  3,  3,
253         3,  3,  2,  2,  2,  2,  3,  4,
254         5,  4,  5,  6,  6,  1,  1,  1,
255         1,  2,  2,  2,  1,  1,  9,  3,
256         3,  1,  6,  6,  2,  2,  3,  3
257 };//*/
258 //Here's a QnD kludge...
259 //This is wrong, wrong, WRONG, but it seems to work for the time being...
260 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
261 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
262 /*uint8 gpu_opcode_cycles[64] =
263 {
264         1,  1,  1,  1,  1,  1,  1,  1,
265         1,  1,  1,  1,  1,  1,  1,  1,
266         1,  1,  1,  1,  1,  9,  1,  1,
267         1,  1,  1,  1,  1,  1,  1,  1,
268         1,  1,  1,  1,  1,  1,  1,  2,
269         2,  2,  2,  3,  3,  1,  1,  1,
270         1,  1,  1,  1,  1,  1,  4,  1,
271         1,  1,  3,  3,  1,  1,  1,  1
272 };//*/
273 uint8 gpu_opcode_cycles[64] =
274 {
275         1,  1,  1,  1,  1,  1,  1,  1,
276         1,  1,  1,  1,  1,  1,  1,  1,
277         1,  1,  1,  1,  1,  1,  1,  1,
278         1,  1,  1,  1,  1,  1,  1,  1,
279         1,  1,  1,  1,  1,  1,  1,  1,
280         1,  1,  1,  1,  1,  1,  1,  1,
281         1,  1,  1,  1,  1,  1,  1,  1,
282         1,  1,  1,  1,  1,  1,  1,  1
283 };//*/
284
285 void (*gpu_opcode[64])()=
286 {
287         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
288         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
289         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
290         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
291         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
292         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
293         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
294         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
295         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
296         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
297         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
298         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
299         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
300         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
301         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
302         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
303 };
304
305 static uint8 gpu_ram_8[0x1000];
306 uint32 gpu_pc;
307 static uint32 gpu_acc;
308 static uint32 gpu_remain;
309 static uint32 gpu_hidata;
310 static uint32 gpu_flags;
311 static uint32 gpu_matrix_control;
312 static uint32 gpu_pointer_to_matrix;
313 static uint32 gpu_data_organization;
314 static uint32 gpu_control;
315 static uint32 gpu_div_control;
316 // There is a distinct advantage to having these separated out--there's no need to clear
317 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
318 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
319 static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c;
320 static uint32 gpu_reg_bank_0[32];
321 static uint32 gpu_reg_bank_1[32];
322 static uint32 * gpu_reg;
323 static uint32 * gpu_alternate_reg;
324
325 static uint32 gpu_instruction;
326 static uint32 gpu_opcode_first_parameter;
327 static uint32 gpu_opcode_second_parameter;
328
329 #define GPU_RUNNING             (gpu_control & 0x01)
330
331 #define RM                              gpu_reg[gpu_opcode_first_parameter]
332 #define RN                              gpu_reg[gpu_opcode_second_parameter]
333 #define ALTERNATE_RM    gpu_alternate_reg[gpu_opcode_first_parameter]
334 #define ALTERNATE_RN    gpu_alternate_reg[gpu_opcode_second_parameter]
335 #define IMM_1                   gpu_opcode_first_parameter
336 #define IMM_2                   gpu_opcode_second_parameter
337
338 #define SET_FLAG_Z(r)   (gpu_flag_z = ((r) == 0));
339 #define SET_FLAG_N(r)   (gpu_flag_n = (((uint32)(r) >> 31) & 0x01));
340
341 #define RESET_FLAG_Z()  gpu_flag_z = 0;
342 #define RESET_FLAG_N()  gpu_flag_n = 0;
343 #define RESET_FLAG_C()  gpu_flag_c = 0;
344
345 #define CLR_Z                           (gpu_flag_z = 0)
346 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
347 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
348 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
349 #define SET_N(r)                        (gpu_flag_n = (((uint32)(r) >> 31) & 0x01))
350 #define SET_C_ADD(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(~(a))))
351 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
352 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
353 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
354 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
355
356 uint32 gpu_convert_zero[32] =
357         { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
358
359 uint8 * branch_condition_table = 0;
360 #define BRANCH_CONDITION(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
361
362 uint32 gpu_opcode_use[64];
363
364 const char * gpu_opcode_str[64]=
365 {
366         "add",                          "addc",                         "addq",                         "addqt",
367         "sub",                          "subc",                         "subq",                         "subqt",
368         "neg",                          "and",                          "or",                           "xor",
369         "not",                          "btst",                         "bset",                         "bclr",
370         "mult",                         "imult",                        "imultn",                       "resmac",
371         "imacn",                        "div",                          "abs",                          "sh",
372         "shlq",                         "shrq",                         "sha",                          "sharq",
373         "ror",                          "rorq",                         "cmp",                          "cmpq",
374         "sat8",                         "sat16",                        "move",                         "moveq",
375         "moveta",                       "movefa",                       "movei",                        "loadb",
376         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
377         "load_r15_indexed",     "storeb",                       "storew",                       "store",
378         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
379         "jump",                         "jr",                           "mmult",                        "mtoi",
380         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
381         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
382 };
383
384 static uint32 gpu_in_exec = 0;
385 static uint32 gpu_releaseTimeSlice_flag = 0;
386
387 void GPUReleaseTimeslice(void)
388 {
389         gpu_releaseTimeSlice_flag = 1;
390 }
391
392 uint32 GPUGetPC(void)
393 {
394         return gpu_pc;
395 }
396
397 void build_branch_condition_table(void)
398 {
399         if (!branch_condition_table)
400         {
401                 branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
402
403                 if (branch_condition_table)
404                 {
405                         for(int i=0; i<8; i++)
406                         {
407                                 for(int j=0; j<32; j++)
408                                 {
409                                         int result = 1;
410                                         if (j & 1)
411                                                 if (i & ZERO_FLAG)
412                                                         result = 0;
413                                         if (j & 2)
414                                                 if (!(i & ZERO_FLAG))
415                                                         result = 0;
416                                         if (j & 4)
417                                                 if (i & (CARRY_FLAG << (j >> 4)))
418                                                         result = 0;
419                                         if (j & 8)
420                                                 if (!(i & (CARRY_FLAG << (j >> 4))))
421                                                         result = 0;
422                                         branch_condition_table[i * 32 + j] = result;
423                                 }
424                         }
425                 }
426         }
427 }
428
429 //
430 // GPU byte access (read)
431 //
432 uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
433 {
434         if (offset >= 0xF02000 && offset <= 0xF020FF)
435                 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
436
437         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
438                 return gpu_ram_8[offset & 0xFFF];
439         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
440         {
441                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
442
443                 if ((offset & 0x03) == 0)
444                         return data >> 24;
445                 else if ((offset & 0x03) == 1)
446                         return (data >> 16) & 0xFF;
447                 else if ((offset & 0x03) == 2)
448                         return (data >> 8) & 0xFF;
449                 else if ((offset & 0x03) == 3)
450                         return data & 0xFF;
451         }
452
453         return JaguarReadByte(offset, who);
454 }
455
456 //
457 // GPU word access (read)
458 //
459 uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
460 {
461         if (offset >= 0xF02000 && offset <= 0xF020FF)
462                 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
463
464         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
465         {
466                 offset &= 0xFFF;
467                 uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
468                 return data;
469         }
470         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
471         {
472 // This looks and smells wrong...
473 // But it *might* be OK...
474                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
475                         return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
476
477                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
478
479                 if (offset & 0x02)                      // Cases 0 & 2...
480                         return data & 0xFFFF;
481                 else
482                         return data >> 16;
483         }
484
485 //TEMP--Mirror of F03000? No. Writes only...
486 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
487 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
488
489         return JaguarReadWord(offset, who);
490 }
491
492 //
493 // GPU dword access (read)
494 //
495 uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
496 {
497         if (offset >= 0xF02000 && offset <= 0xF020FF)
498                 WriteLog("GPU: ReadLong--Attempt to read from GPU register file by %s!\n", whoName[who]);
499
500 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
501         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
502         {
503                 offset &= 0xFFF;
504                 return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
505                         | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/
506 //              return GET32(gpu_ram_8, offset);
507         }
508 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
509         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
510         {
511                 offset &= 0x1F;
512                 switch (offset)
513                 {
514                 case 0x00:
515                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
516                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
517                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
518
519                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
520
521                         return gpu_flags & 0xFFFFC1FF;
522                 case 0x04:
523                         return gpu_matrix_control;
524                 case 0x08:
525                         return gpu_pointer_to_matrix;
526                 case 0x0C:
527                         return gpu_data_organization;
528                 case 0x10:
529                         return gpu_pc;
530                 case 0x14:
531                         return gpu_control;
532                 case 0x18:
533                         return gpu_hidata;
534                 case 0x1C:
535                         return gpu_remain;
536                 default:                                                                // unaligned long read
537 #ifdef GPU_DEBUG
538                         WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
539 #endif  // GPU_DEBUG
540                         return 0;
541                 }
542         }
543 //TEMP--Mirror of F03000? No. Writes only...
544 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
545 //      WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
546 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
547         WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
548
549         return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
550 }
551
552 //
553 // GPU byte access (write)
554 //
555 void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
556 {
557         if (offset >= 0xF02000 && offset <= 0xF020FF)
558                 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
559
560         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
561         {
562                 gpu_ram_8[offset & 0xFFF] = data;
563
564 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
565 /*              if (!gpu_in_exec)
566                 {
567                         m68k_end_timeslice();
568                         dsp_releaseTimeslice();
569                 }*/
570                 return;
571         }
572         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
573         {
574                 uint32 reg = offset & 0x1C;
575                 int bytenum = offset & 0x03;
576
577 //This is definitely wrong!
578                 if ((reg >= 0x1C) && (reg <= 0x1F))
579                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
580                 else
581                 {
582                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
583                         bytenum = 3 - bytenum; // convention motorola !!!
584                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
585                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
586                 }
587                 return;
588         }
589 //      WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
590         JaguarWriteByte(offset, data, who);
591 }
592
593 //
594 // GPU word access (write)
595 //
596 void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
597 {
598         if (offset >= 0xF02000 && offset <= 0xF020FF)
599                 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
600
601         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
602         {
603                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
604                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
605 /*              offset &= 0xFFF;
606                 SET16(gpu_ram_8, offset, data);//*/
607
608 /*if (offset >= 0xF03214 && offset < 0xF0321F)
609         WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
610
611
612 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
613 /*              if (!gpu_in_exec)
614                 {
615                         m68k_end_timeslice();
616                         dsp_releaseTimeslice();
617                 }*/
618                 return;
619         }
620         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
621         {
622                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
623                 {
624 #ifdef GPU_DEBUG
625                         WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
626                         GPUDumpRegisters();
627 #endif  // GPU_DEBUG
628                         return;
629                 }
630 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
631 //This just literally sucks.
632                 if ((offset & 0x1C) == 0x1C)
633                 {
634 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
635                         if (offset & 0x02)
636                                 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
637                         else
638                                 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
639                 }
640                 else
641                 {
642 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
643                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
644                         if (offset & 0x02)
645                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
646                         else
647                                 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
648                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
649                 }
650                 return;
651         }
652         else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
653         {
654 #ifdef GPU_DEBUG
655                         WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
656                         GPUDumpRegisters();
657 #endif  // GPU_DEBUG
658                 return;
659         }
660
661         // Have to be careful here--this can cause an infinite loop!
662         JaguarWriteWord(offset, data, who);
663 }
664
665 //
666 // GPU dword access (write)
667 //
668 void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
669 {
670         if (offset >= 0xF02000 && offset <= 0xF020FF)
671                 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
672
673 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
674         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
675         {
676 #ifdef GPU_DEBUG
677                 if (offset & 0x03)
678                 {
679                         WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
680                         GPUDumpRegisters();
681                 }
682 #endif  // GPU_DEBUG
683
684                 offset &= 0xFFF;
685                 SET32(gpu_ram_8, offset, data);
686                 return;
687         }
688 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
689         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
690         {
691                 offset &= 0x1F;
692                 switch (offset)
693                 {
694                 case 0x00:
695                 {
696                         bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
697                         gpu_flags = data;
698                         gpu_flag_z = gpu_flags & ZERO_FLAG;
699                         gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
700                         gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
701                         GPUUpdateRegisterBanks();
702                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);       // Interrupt latch clear bits
703 //Writing here is only an interrupt enable--this approach is just plain wrong!
704 //                      GPUHandleIRQs();
705 //This, however, is A-OK! ;-)
706                         if (IMASKCleared)                                               // If IMASK was cleared,
707                                 GPUHandleIRQs();                                        // see if any other interrupts need servicing!
708 #ifdef GPU_DEBUG
709                         if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
710                                 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
711                         WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
712 #endif  // GPU_DEBUG
713                         break;
714                 }
715                 case 0x04:
716                         gpu_matrix_control = data;
717                         break;
718                 case 0x08:
719                         // This can only point to long aligned addresses
720                         gpu_pointer_to_matrix = data & 0xFFFFFFFC;
721                         break;
722                 case 0x0C:
723                         gpu_data_organization = data;
724                         break;
725                 case 0x10:
726                         gpu_pc = data;
727 #ifdef GPU_DEBUG
728 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
729 #endif  // GPU_DEBUG
730                         break;
731                 case 0x14:
732                 {
733 //                      uint32 gpu_was_running = GPU_RUNNING;
734                         data &= ~0xF7C0;                // Disable writes to INT_LAT0-4 & TOM version number
735
736                         // check for GPU -> CPU interrupt
737                         if (data & 0x02)
738                         {
739 //WriteLog("GPU->CPU interrupt\n");
740                                 if (TOMIRQEnabled(IRQ_GPU))
741                                 {
742 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
743 //                                      if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
744                                         {
745                                                 TOMSetPendingGPUInt();
746                                                 m68k_set_irq(2);                        // Set 68000 IPL 2
747                                                 GPUReleaseTimeslice();
748                                         }
749                                 }
750                                 data &= ~0x02;
751                         }
752
753                         // check for CPU -> GPU interrupt #0
754                         if (data & 0x04)
755                         {
756 //WriteLog("CPU->GPU interrupt\n");
757                                 GPUSetIRQLine(0, ASSERT_LINE);
758                                 m68k_end_timeslice();
759                                 DSPReleaseTimeslice();
760                                 data &= ~0x04;
761                         }
762
763                         // single stepping
764                         if (data & 0x10)
765                         {
766                                 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
767                         }
768                         gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
769
770                         // if gpu wasn't running but is now running, execute a few cycles
771 #ifndef GPU_SINGLE_STEPPING
772 /*                      if (!gpu_was_running && GPU_RUNNING)
773 #ifdef GPU_DEBUG
774                         {
775                                 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
776 #endif  // GPU_DEBUG
777                                 GPUExec(200);
778 #ifdef GPU_DEBUG
779                         }
780 #endif  // GPU_DEBUG//*/
781 #else
782                         if (gpu_control & 0x18)
783                                 GPUExec(1);
784 #endif  // #ifndef GPU_SINGLE_STEPPING
785 #ifdef GPU_DEBUG
786 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
787 if (GPU_RUNNING)
788         WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
789 else
790         WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
791 WriteLog("\n");
792 #endif  // GPU_DEBUG
793 //if (GPU_RUNNING)
794 //      GPUDumpDisassembly();
795 /*if (GPU_RUNNING)
796 {
797         if (gpu_pc == 0xF035D8)
798         {
799 //              GPUDumpDisassembly();
800 //              log_done();
801 //              exit(1);
802                 gpu_control &= 0xFFFFFFFE;      // Don't run it and let's see what happens!
803 //Hmm. Seems to lock up when going into the demo...
804 //Try to disable the collision altogether!
805         }
806 }//*/
807 extern int effect_start5;
808 static bool finished = false;
809 //if (GPU_RUNNING && effect_start5 && !finished)
810 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
811 {
812         // Let's do a dump of $6528!
813 /*      uint32 numItems = JaguarReadWord(0x6BD6);
814         WriteLog("\nDump of $6528: %u items.\n\n", numItems);
815         for(int i=0; i<numItems*3*4; i+=3*4)
816         {
817                 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
818                         JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
819                 uint16 link = JaguarReadWord(0x6528+i+8+2);
820                 for(int j=0; j<40; j+=4)
821                         WriteLog("%08X ", JaguarReadLong(link + j));
822                 WriteLog("\n");
823         }
824         WriteLog("\n");//*/
825         // Let's try a manual blit here...
826 //This isn't working the way it should! !!! FIX !!!
827 //Err, actually, it is.
828 // NOW, it works right! Problem solved!!! It's a blitter bug!
829 /*      uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4;
830         for(int y=0; y<127; y++)
831         {
832                 for(int x=0; x<2; x++)
833                 {
834                         JaguarWriteLong(dst, JaguarReadLong(src));
835
836                         src += 4;
837                         dst += 4;
838                 }
839                 src += width - (2 * 4);
840         }//*/
841 /*      finished = true;
842         doGPUDis = true;
843         WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
844
845 /*      WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
846         int count = 0;
847         for(int i=0x004D54; i<0x004D54+2048; i++)
848         {
849                 WriteLog("%02X ", JaguarReadByte(i));
850                 count++;
851                 if (count == 32)
852                 {
853                         count = 0;
854                         WriteLog("\n");
855                 }
856         }
857         WriteLog("\n\nData @ F03000:\n\n");
858         count = 0;
859         for(int i=0xF03000; i<0xF03200; i++)
860         {
861                 WriteLog("%02X ", JaguarReadByte(i));
862                 count++;
863                 if (count == 32)
864                 {
865                         count = 0;
866                         WriteLog("\n");
867                 }
868         }
869         WriteLog("\n\n");
870         log_done();
871         exit(0);//*/
872 }
873 //if (!GPU_RUNNING)
874 //      doGPUDis = false;
875 /*if (!GPU_RUNNING && finished)
876 {
877         WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
878         GPUDumpRegisters();
879         log_done();
880         exit(0);
881 }//*/
882                         // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
883                         // allow the GPU a chance to run...
884                         // Yes! This partially fixed Trevor McFur...
885                         if (GPU_RUNNING)
886                                 m68k_end_timeslice();
887                         break;
888                 }
889                 case 0x18:
890                         gpu_hidata = data;
891                         break;
892                 case 0x1C:
893                         gpu_div_control = data;
894                         break;
895 //              default:   // unaligned long write
896                         //exit(0);
897                         //__asm int 3
898                 }
899                 return;
900         }
901
902 //      JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
903 //      JaguarWriteWord(offset+2, data & 0xFFFF, who);
904 // We're a 32-bit processor, we can do a long write...!
905         JaguarWriteLong(offset, data, who);
906 }
907
908 //
909 // Change register banks if necessary
910 //
911 void GPUUpdateRegisterBanks(void)
912 {
913         int bank = (gpu_flags & REGPAGE);               // REGPAGE bit
914
915         if (gpu_flags & IMASK)                                  // IMASK bit
916                 bank = 0;                                                       // IMASK forces main bank to be bank 0
917
918         if (bank)
919                 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
920         else
921                 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
922 }
923
924 void GPUHandleIRQs(void)
925 {
926         // Bail out if we're already in an interrupt!
927         if (gpu_flags & IMASK)
928                 return;
929
930         // Get the interrupt latch & enable bits
931         uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
932
933         // Bail out if latched interrupts aren't enabled
934         bits &= mask;
935         if (!bits)
936                 return;
937
938         // Determine which interrupt to service
939         uint32 which = 0; //Isn't there a #pragma to disable this warning???
940         if (bits & 0x01)
941                 which = 0;
942         if (bits & 0x02)
943                 which = 1;
944         if (bits & 0x04)
945                 which = 2;
946         if (bits & 0x08)
947                 which = 3;
948         if (bits & 0x10)
949                 which = 4;
950
951         if (start_logging)
952                 WriteLog("GPU: Generating IRQ #%i\n", which);
953
954         // set the interrupt flag
955         gpu_flags |= IMASK;
956         GPUUpdateRegisterBanks();
957
958         // subqt  #4,r31                ; pre-decrement stack pointer
959         // move  pc,r30                 ; address of interrupted code
960         // store  r30,(r31)     ; store return address
961         gpu_reg[31] -= 4;
962         GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
963
964         // movei  #service_address,r30  ; pointer to ISR entry
965         // jump  (r30)                                  ; jump to ISR
966         // nop
967         gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
968 }
969
970 void GPUSetIRQLine(int irqline, int state)
971 {
972         if (start_logging)
973                 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
974
975         uint32 mask = 0x0040 << irqline;
976         gpu_control &= ~mask;                           // Clear the interrupt latch
977
978         if (state)
979         {
980                 gpu_control |= mask;                    // Assert the interrupt latch
981                 GPUHandleIRQs();                                // And handle the interrupt...
982         }
983 }
984
985 //TEMPORARY: Testing only!
986 //#include "gpu2.h"
987 //#include "gpu3.h"
988
989 void GPUInit(void)
990 {
991 //      memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
992 //      memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs");
993 //      memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs");
994
995         build_branch_condition_table();
996
997         GPUReset();
998
999 //TEMPORARY: Testing only!
1000 //      gpu2_init();
1001 //      gpu3_init();
1002 }
1003
1004 void GPUReset(void)
1005 {
1006         // GPU registers (directly visible)
1007         gpu_flags                         = 0x00000000;
1008         gpu_matrix_control    = 0x00000000;
1009         gpu_pointer_to_matrix = 0x00000000;
1010         gpu_data_organization = 0xFFFFFFFF;
1011         gpu_pc                            = 0x00F03000;
1012         gpu_control                       = 0x00002800;                 // Correctly sets this as TOM Rev. 2
1013         gpu_hidata                        = 0x00000000;
1014         gpu_remain                        = 0x00000000;                 // These two registers are RO/WO
1015         gpu_div_control           = 0x00000000;
1016
1017         // GPU internal register
1018         gpu_acc                           = 0x00000000;
1019
1020         gpu_reg = gpu_reg_bank_0;
1021         gpu_alternate_reg = gpu_reg_bank_1;
1022
1023         for(int i=0; i<32; i++)
1024                 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1025
1026         CLR_ZNC;
1027         memset(gpu_ram_8, 0xFF, 0x1000);
1028         gpu_in_exec = 0;
1029 //not needed    GPUInterruptPending = false;
1030         GPUResetStats();
1031 }
1032
1033 uint32 GPUReadPC(void)
1034 {
1035         return gpu_pc;
1036 }
1037
1038 void GPUResetStats(void)
1039 {
1040         for(uint32 i=0; i<64; i++)
1041                 gpu_opcode_use[i] = 0;
1042         WriteLog("--> GPU stats were reset!\n");
1043 }
1044
1045 void GPUDumpDisassembly(void)
1046 {
1047         char buffer[512];
1048
1049         WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1050         uint32 j = 0xF03000;
1051         while (j <= 0xF03FFF)
1052         {
1053                 uint32 oldj = j;
1054                 j += dasmjag(JAGUAR_GPU, buffer, j);
1055                 WriteLog("\t%08X: %s\n", oldj, buffer);
1056         }
1057 }
1058
1059 void GPUDumpRegisters(void)
1060 {
1061         WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1062         WriteLog("\nRegisters bank 0\n");
1063         for(int j=0; j<8; j++)
1064         {
1065                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1066                                                   (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1067                                                   (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1068                                                   (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1069                                                   (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1070         }
1071         WriteLog("Registers bank 1\n");
1072         for(int j=0; j<8; j++)
1073         {
1074                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1075                                                   (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1076                                                   (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1077                                                   (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1078                                                   (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1079         }
1080 }
1081
1082 void GPUDumpMemory(void)
1083 {
1084         WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1085         for(int i=0; i<0xFFF; i+=4)
1086                 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1087                         gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1088 }
1089
1090 void GPUDone(void)
1091 {
1092         WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1093
1094         // Get the interrupt latch & enable bits
1095         uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1096         WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1097
1098         GPUDumpRegisters();
1099         GPUDumpDisassembly();
1100
1101         WriteLog("\nGPU opcodes use:\n");
1102         for(int i=0; i<64; i++)
1103         {
1104                 if (gpu_opcode_use[i])
1105                         WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1106         }
1107         WriteLog("\n");
1108
1109 //      memory_free(gpu_ram_8);
1110 //      memory_free(gpu_reg_bank_0);
1111 //      memory_free(gpu_reg_bank_1);
1112 }
1113
1114 //
1115 // Main GPU execution core
1116 //
1117 static int testCount = 1;
1118 static int len = 0;
1119 static bool tripwire = false;
1120 void GPUExec(int32 cycles)
1121 {
1122         if (!GPU_RUNNING)
1123                 return;
1124
1125 #ifdef GPU_SINGLE_STEPPING
1126         if (gpu_control & 0x18)
1127         {
1128                 cycles = 1;
1129                 gpu_control &= ~0x10;
1130         }
1131 #endif
1132         GPUHandleIRQs();
1133         gpu_releaseTimeSlice_flag = 0;
1134         gpu_in_exec++;
1135
1136         while (cycles > 0 && GPU_RUNNING)
1137         {
1138 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1139         && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1140 {
1141         if (gpu_pc == 0xF03000)
1142         {
1143                 extern uint32 starCount;
1144                 starCount = 0;
1145 /*              WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1146                 uint32 base = gpu_reg_bank_0[3];
1147                 for(uint32 i=0; i<0x100; i+=16)
1148                 {
1149                         WriteLog("%02X: ", i);
1150                         for(uint32 j=0; j<16; j++)
1151                         {
1152                                 WriteLog("%02X ", JaguarReadByte(base + i + j));
1153                         }
1154                         WriteLog("\n");
1155                 }*/
1156         }
1157 //      if (gpu_pc == 0xF03)
1158         {
1159         }
1160 }//*/
1161 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1162 {
1163         GPUDumpRegisters();
1164         WriteLog("GPU: Starting disassembly log...\n");
1165         doGPUDis = true;
1166 }//*/
1167 /*if (gpu_pc == 0xF0359A)
1168 {
1169         doGPUDis = true;
1170         GPUDumpRegisters();
1171 }*/
1172 /*              gpu_flag_c = (gpu_flag_c ? 1 : 0);
1173                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1174                 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1175
1176                 uint16 opcode = GPUReadWord(gpu_pc, GPU);
1177                 uint32 index = opcode >> 10;
1178                 gpu_instruction = opcode;                               // Added for GPU #3...
1179                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1180                 gpu_opcode_second_parameter = opcode & 0x1F;
1181 /*if (gpu_pc == 0xF03BE8)
1182 WriteLog("Start of OP frame write...\n");
1183 if (gpu_pc == 0xF03EEE)
1184 WriteLog("--> Writing BRANCH object ---\n");
1185 if (gpu_pc == 0xF03F62)
1186 WriteLog("--> Writing BITMAP object ***\n");//*/
1187 /*if (gpu_pc == 0xF03546)
1188 {
1189         WriteLog("\n--> GPU PC: F03546\n");
1190         GPUDumpRegisters();
1191         GPUDumpDisassembly();
1192 }//*/
1193 /*if (gpu_pc == 0xF033F6)
1194 {
1195         WriteLog("\n--> GPU PC: F033F6\n");
1196         GPUDumpRegisters();
1197         GPUDumpDisassembly();
1198 }//*/
1199 /*if (gpu_pc == 0xF033CC)
1200 {
1201         WriteLog("\n--> GPU PC: F033CC\n");
1202         GPUDumpRegisters();
1203         GPUDumpDisassembly();
1204 }//*/
1205 /*if (gpu_pc == 0xF033D6)
1206 {
1207         WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1208         GPUDumpRegisters();
1209         GPUDumpMemory();
1210 }//*/
1211 /*if (gpu_pc == 0xF033D8)
1212 {
1213         WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1214         GPUDumpRegisters();
1215         GPUDumpMemory();
1216 }//*/
1217 /*if (gpu_pc == 0xF0358E)
1218 {
1219         WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1220         GPUDumpRegisters();
1221         GPUDumpMemory();
1222 }//*/
1223 /*if (gpu_pc == 0xF034CA)
1224 {
1225         WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1226         GPUDumpRegisters();
1227 }//*/
1228 /*if (gpu_pc == 0xF034CA)
1229 {
1230         len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1231         WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n   ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1232         for(int i=0; i<len; i+=4)
1233                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1234         WriteLog("\n   ");
1235         for(int i=0; i<len; i+=4)
1236                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1237         WriteLog("\n\n");
1238 }
1239 if (gpu_pc == 0xF034DE)
1240 {
1241         WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n   ", gpu_reg[14], gpu_reg[15]);
1242         for(int i=0; i<len; i+=4)
1243                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1244         WriteLog("\n   ");
1245         for(int i=0; i<len; i+=4)
1246                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1247         WriteLog("\n   ");
1248         for(int i=0; i<len; i+=4)
1249                 WriteLog(" --------");
1250         WriteLog("\n   ");
1251         for(int i=0; i<len; i+=4)
1252                 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1253         WriteLog("\n\n");
1254 }//*/
1255 /*if (gpu_pc == 0xF035C8)
1256 {
1257         WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1258         GPUDumpRegisters();
1259         GPUDumpDisassembly();
1260 }//*/
1261
1262 if (gpu_start_log)
1263 {
1264 //      gpu_reset_stats();
1265 static char buffer[512];
1266 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1267 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1268 }//*/
1269 //$E400 -> 1110 01 -> $39 -> 57
1270 //GPU #1
1271                 gpu_pc += 2;
1272                 gpu_opcode[index]();
1273 //GPU #2
1274 //              gpu2_opcode[index]();
1275 //              gpu_pc += 2;
1276 //GPU #3                                (Doesn't show ATARI logo! #1 & #2 do...)
1277 //              gpu_pc += 2;
1278 //              gpu3_opcode[index]();
1279
1280 // BIOS hacking
1281 //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken.
1282 /*static bool firstTime = true;
1283 if (gpu_pc == 0xF03548 && firstTime)
1284 {
1285         gpu_flag_z = 1;
1286 //      firstTime = false;
1287
1288 //static char buffer[512];
1289 //int k=0xF03548;
1290 //while (k<0xF0356C)
1291 //{
1292 //int oldk = k;
1293 //k += dasmjag(JAGUAR_GPU, buffer, k);
1294 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1295 //}
1296 //      gpu_start_log = 1;
1297 }//*/
1298 //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1299 /*if (gpu_pc == 0xF0354C)
1300         gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1301
1302                 cycles -= gpu_opcode_cycles[index];
1303                 gpu_opcode_use[index]++;
1304 if (gpu_start_log)
1305         WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1306 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1307 {
1308         WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1309         tripwire = true;
1310 }
1311         }
1312
1313         gpu_in_exec--;
1314 }
1315
1316 //
1317 // GPU opcodes
1318 //
1319
1320 /*
1321 GPU opcodes use (offset punch--vertically below bad guy):
1322                       add 18686
1323                      addq 32621
1324                       sub 7483
1325                      subq 10252
1326                       and 21229
1327                        or 15003
1328                      btst 1822
1329                      bset 2072
1330                      mult 141
1331                       div 2392
1332                      shlq 13449
1333                      shrq 10297
1334                     sharq 11104
1335                       cmp 6775
1336                      cmpq 5944
1337                      move 31259
1338                     moveq 4473
1339                     movei 23277
1340                     loadb 46
1341                     loadw 4201
1342                      load 28580
1343          load_r14_indexed 1183
1344          load_r15_indexed 1125
1345                    storew 178
1346                     store 10144
1347         store_r14_indexed 320
1348         store_r15_indexed 1
1349                   move_pc 1742
1350                      jump 24467
1351                        jr 18090
1352                       nop 41362
1353 */
1354
1355 static void gpu_opcode_jump(void)
1356 {
1357 #ifdef GPU_DIS_JUMP
1358 const char * condition[32] =
1359 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1360         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1361         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1362         "???", "???", "???", "F" };
1363         if (doGPUDis)
1364                 WriteLog("%06X: JUMP   %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1365 #endif
1366         // normalize flags
1367 /*      gpu_flag_c = (gpu_flag_c ? 1 : 0);
1368         gpu_flag_z = (gpu_flag_z ? 1 : 0);
1369         gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1370         // KLUDGE: Used by BRANCH_CONDITION
1371         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1372
1373         if (BRANCH_CONDITION(IMM_2))
1374         {
1375 #ifdef GPU_DIS_JUMP
1376         if (doGPUDis)
1377                 WriteLog("Branched!\n");
1378 #endif
1379 if (gpu_start_log)
1380         WriteLog("    --> JUMP: Branch taken.\n");
1381                 uint32 delayed_pc = RM;
1382                 GPUExec(1);
1383                 gpu_pc = delayed_pc;
1384 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1385                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1386                 gpu_opcode_second_parameter = opcode & 0x1F;
1387
1388                 gpu_pc = delayed_pc;
1389                 gpu_opcode[opcode>>10]();//*/
1390         }
1391 #ifdef GPU_DIS_JUMP
1392         else
1393                 if (doGPUDis)
1394                         WriteLog("Branch NOT taken.\n");
1395 #endif
1396 }
1397
1398 static void gpu_opcode_jr(void)
1399 {
1400 #ifdef GPU_DIS_JR
1401 const char * condition[32] =
1402 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1403         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1404         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1405         "???", "???", "???", "F" };
1406         if (doGPUDis)
1407                 WriteLog("%06X: JR     %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1408 #endif
1409 /*      if (CONDITION(jaguar.op & 31))
1410         {
1411                 int32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1412                 uint32 newpc = jaguar.PC + r1;
1413                 CALL_MAME_DEBUG;
1414                 jaguar.op = ROPCODE(jaguar.PC);
1415                 jaguar.PC = newpc;
1416                 (*jaguar.table[jaguar.op >> 10])();
1417
1418                 jaguar_icount -= 3;     // 3 wait states guaranteed
1419         }*/
1420         // normalize flags
1421 /*      gpu_flag_n = (gpu_flag_n ? 1 : 0);
1422         gpu_flag_c = (gpu_flag_c ? 1 : 0);
1423         gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1424         // KLUDGE: Used by BRANCH_CONDITION
1425         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1426
1427         if (BRANCH_CONDITION(IMM_2))
1428         {
1429 #ifdef GPU_DIS_JR
1430         if (doGPUDis)
1431                 WriteLog("Branched!\n");
1432 #endif
1433 if (gpu_start_log)
1434         WriteLog("    --> JR: Branch taken.\n");
1435                 int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1);             // Sign extend IMM_1
1436                 int32 delayed_pc = gpu_pc + (offset * 2);
1437                 GPUExec(1);
1438                 gpu_pc = delayed_pc;
1439 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1440                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1441                 gpu_opcode_second_parameter = opcode & 0x1F;
1442
1443                 gpu_pc = delayed_pc;
1444                 gpu_opcode[opcode>>10]();//*/
1445         }
1446 #ifdef GPU_DIS_JR
1447         else
1448                 if (doGPUDis)
1449                         WriteLog("Branch NOT taken.\n");
1450 #endif
1451 }
1452
1453 static void gpu_opcode_add(void)
1454 {
1455 #ifdef GPU_DIS_ADD
1456         if (doGPUDis)
1457                 WriteLog("%06X: ADD    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1458 #endif
1459         uint32 res = RN + RM;
1460         CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1461         RN = res;
1462 #ifdef GPU_DIS_ADD
1463         if (doGPUDis)
1464                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1465 #endif
1466 }
1467
1468 static void gpu_opcode_addc(void)
1469 {
1470 #ifdef GPU_DIS_ADDC
1471         if (doGPUDis)
1472                 WriteLog("%06X: ADDC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1473 #endif
1474 /*      int dreg = jaguar.op & 31;
1475         uint32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1476         uint32 r2 = jaguar.r[dreg];
1477         uint32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1478         jaguar.r[dreg] = res;
1479         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1480
1481         uint32 res = RN + RM + gpu_flag_c;
1482         uint32 carry = gpu_flag_c;
1483 //      SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1484         SET_ZNC_ADD(RN + carry, RM, res);
1485 //      SET_ZNC_ADD(RN, RM + carry, res);
1486         RN = res;
1487 #ifdef GPU_DIS_ADDC
1488         if (doGPUDis)
1489                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1490 #endif
1491 }
1492
1493 static void gpu_opcode_addq(void)
1494 {
1495 #ifdef GPU_DIS_ADDQ
1496         if (doGPUDis)
1497                 WriteLog("%06X: ADDQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1498 #endif
1499         uint32 r1 = gpu_convert_zero[IMM_1];
1500         uint32 res = RN + r1;
1501         CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1502         RN = res;
1503 #ifdef GPU_DIS_ADDQ
1504         if (doGPUDis)
1505                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1506 #endif
1507 }
1508
1509 static void gpu_opcode_addqt(void)
1510 {
1511 #ifdef GPU_DIS_ADDQT
1512         if (doGPUDis)
1513                 WriteLog("%06X: ADDQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1514 #endif
1515         RN += gpu_convert_zero[IMM_1];
1516 #ifdef GPU_DIS_ADDQT
1517         if (doGPUDis)
1518                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1519 #endif
1520 }
1521
1522 static void gpu_opcode_sub(void)
1523 {
1524 #ifdef GPU_DIS_SUB
1525         if (doGPUDis)
1526                 WriteLog("%06X: SUB    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1527 #endif
1528         uint32 res = RN - RM;
1529         SET_ZNC_SUB(RN, RM, res);
1530         RN = res;
1531 #ifdef GPU_DIS_SUB
1532         if (doGPUDis)
1533                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1534 #endif
1535 }
1536
1537 static void gpu_opcode_subc(void)
1538 {
1539 #ifdef GPU_DIS_SUBC
1540         if (doGPUDis)
1541                 WriteLog("%06X: SUBC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1542 #endif
1543         uint32 res = RN - RM - gpu_flag_c;
1544         uint32 borrow = gpu_flag_c;
1545 //      SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1546 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1547 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1548 //      SET_ZNC_SUB(RN - borrow, RM, res);
1549         SET_ZNC_SUB(RN, RM + borrow, res);
1550         RN = res;
1551 #ifdef GPU_DIS_SUBC
1552         if (doGPUDis)
1553                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1554 #endif
1555 }
1556 /*
1557 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1558 N = 0, M = 1, 0 - 1 = -1, C = 0!
1559
1560 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32)(b) > (uint32)(a)))
1561 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
1562 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1563 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1564 */
1565 static void gpu_opcode_subq(void)
1566 {
1567 #ifdef GPU_DIS_SUBQ
1568         if (doGPUDis)
1569                 WriteLog("%06X: SUBQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1570 #endif
1571         uint32 r1 = gpu_convert_zero[IMM_1];
1572         uint32 res = RN - r1;
1573         SET_ZNC_SUB(RN, r1, res);
1574         RN = res;
1575 #ifdef GPU_DIS_SUBQ
1576         if (doGPUDis)
1577                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1578 #endif
1579 }
1580
1581 static void gpu_opcode_subqt(void)
1582 {
1583 #ifdef GPU_DIS_SUBQT
1584         if (doGPUDis)
1585                 WriteLog("%06X: SUBQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1586 #endif
1587         RN -= gpu_convert_zero[IMM_1];
1588 #ifdef GPU_DIS_SUBQT
1589         if (doGPUDis)
1590                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1591 #endif
1592 }
1593
1594 static void gpu_opcode_cmp(void)
1595 {
1596 #ifdef GPU_DIS_CMP
1597         if (doGPUDis)
1598                 WriteLog("%06X: CMP    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1599 #endif
1600         uint32 res = RN - RM;
1601         SET_ZNC_SUB(RN, RM, res);
1602 #ifdef GPU_DIS_CMP
1603         if (doGPUDis)
1604                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1605 #endif
1606 }
1607
1608 static void gpu_opcode_cmpq(void)
1609 {
1610         static int32 sqtable[32] =
1611                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1612 #ifdef GPU_DIS_CMPQ
1613         if (doGPUDis)
1614                 WriteLog("%06X: CMPQ   #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1615 #endif
1616         uint32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1617         uint32 res = RN - r1;
1618         SET_ZNC_SUB(RN, r1, res);
1619 #ifdef GPU_DIS_CMPQ
1620         if (doGPUDis)
1621                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1622 #endif
1623 }
1624
1625 static void gpu_opcode_and(void)
1626 {
1627 #ifdef GPU_DIS_AND
1628         if (doGPUDis)
1629                 WriteLog("%06X: AND    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1630 #endif
1631         RN = RN & RM;
1632         SET_ZN(RN);
1633 #ifdef GPU_DIS_AND
1634         if (doGPUDis)
1635                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1636 #endif
1637 }
1638
1639 static void gpu_opcode_or(void)
1640 {
1641 #ifdef GPU_DIS_OR
1642         if (doGPUDis)
1643                 WriteLog("%06X: OR     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1644 #endif
1645         RN = RN | RM;
1646         SET_ZN(RN);
1647 #ifdef GPU_DIS_OR
1648         if (doGPUDis)
1649                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1650 #endif
1651 }
1652
1653 static void gpu_opcode_xor(void)
1654 {
1655 #ifdef GPU_DIS_XOR
1656         if (doGPUDis)
1657                 WriteLog("%06X: XOR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1658 #endif
1659         RN = RN ^ RM;
1660         SET_ZN(RN);
1661 #ifdef GPU_DIS_XOR
1662         if (doGPUDis)
1663                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1664 #endif
1665 }
1666
1667 static void gpu_opcode_not(void)
1668 {
1669 #ifdef GPU_DIS_NOT
1670         if (doGPUDis)
1671                 WriteLog("%06X: NOT    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1672 #endif
1673         RN = ~RN;
1674         SET_ZN(RN);
1675 #ifdef GPU_DIS_NOT
1676         if (doGPUDis)
1677                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1678 #endif
1679 }
1680
1681 static void gpu_opcode_move_pc(void)
1682 {
1683 #ifdef GPU_DIS_MOVEPC
1684         if (doGPUDis)
1685                 WriteLog("%06X: MOVE   PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1686 #endif
1687         // Should be previous PC--this might not always be previous instruction!
1688         // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1689         RN = gpu_pc - 2;
1690 #ifdef GPU_DIS_MOVEPC
1691         if (doGPUDis)
1692                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1693 #endif
1694 }
1695
1696 static void gpu_opcode_sat8(void)
1697 {
1698 #ifdef GPU_DIS_SAT8
1699         if (doGPUDis)
1700                 WriteLog("%06X: SAT8   R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1701 #endif
1702         RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1703         SET_ZN(RN);
1704 #ifdef GPU_DIS_SAT8
1705         if (doGPUDis)
1706                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1707 #endif
1708 }
1709
1710 static void gpu_opcode_sat16(void)
1711 {
1712         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1713         SET_ZN(RN);
1714 }
1715
1716 static void gpu_opcode_sat24(void)
1717 {
1718         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1719         SET_ZN(RN);
1720 }
1721
1722 static void gpu_opcode_store_r14_indexed(void)
1723 {
1724 #ifdef GPU_DIS_STORE14I
1725         if (doGPUDis)
1726                 WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1727 #endif
1728         GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1729 }
1730
1731 static void gpu_opcode_store_r15_indexed(void)
1732 {
1733 #ifdef GPU_DIS_STORE15I
1734         if (doGPUDis)
1735                 WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1736 #endif
1737         GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1738 }
1739
1740 static void gpu_opcode_load_r14_ri(void)
1741 {
1742 #ifdef GPU_DIS_LOAD14R
1743         if (doGPUDis)
1744                 WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1745 #endif
1746         RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1747 #ifdef GPU_DIS_LOAD14R
1748         if (doGPUDis)
1749                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1750 #endif
1751 }
1752
1753 static void gpu_opcode_load_r15_ri(void)
1754 {
1755 #ifdef GPU_DIS_LOAD15R
1756         if (doGPUDis)
1757                 WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1758 #endif
1759         RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1760 #ifdef GPU_DIS_LOAD15R
1761         if (doGPUDis)
1762                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1763 #endif
1764 }
1765
1766 static void gpu_opcode_store_r14_ri(void)
1767 {
1768 #ifdef GPU_DIS_STORE14R
1769         if (doGPUDis)
1770                 WriteLog("%06X: STORE  R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1771 #endif
1772         GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1773 }
1774
1775 static void gpu_opcode_store_r15_ri(void)
1776 {
1777 #ifdef GPU_DIS_STORE15R
1778         if (doGPUDis)
1779                 WriteLog("%06X: STORE  R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1780 #endif
1781         GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1782 }
1783
1784 static void gpu_opcode_nop(void)
1785 {
1786 #ifdef GPU_DIS_NOP
1787         if (doGPUDis)
1788                 WriteLog("%06X: NOP    [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1789 #endif
1790 }
1791
1792 static void gpu_opcode_pack(void)
1793 {
1794 #ifdef GPU_DIS_PACK
1795         if (doGPUDis)
1796                 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK  " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1797 #endif
1798         uint32 val = RN;
1799
1800 //BUG!  if (RM == 0)                            // Pack
1801         if (IMM_1 == 0)                         // Pack
1802                 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1803         else                                            // Unpack
1804                 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1805 #ifdef GPU_DIS_PACK
1806         if (doGPUDis)
1807                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1808 #endif
1809 }
1810
1811 static void gpu_opcode_storeb(void)
1812 {
1813 #ifdef GPU_DIS_STOREB
1814         if (doGPUDis)
1815                 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1816 #endif
1817 //Is this right???
1818 // Would appear to be so...!
1819         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1820                 GPUWriteLong(RM, RN & 0xFF, GPU);
1821         else
1822                 JaguarWriteByte(RM, RN, GPU);
1823 }
1824
1825 static void gpu_opcode_storew(void)
1826 {
1827 #ifdef GPU_DIS_STOREW
1828         if (doGPUDis)
1829                 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1830 #endif
1831         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1832                 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1833         else
1834                 JaguarWriteWord(RM, RN, GPU);
1835 }
1836
1837 static void gpu_opcode_store(void)
1838 {
1839 #ifdef GPU_DIS_STORE
1840         if (doGPUDis)
1841                 WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1842 #endif
1843         GPUWriteLong(RM, RN, GPU);
1844 }
1845
1846 static void gpu_opcode_storep(void)
1847 {
1848         GPUWriteLong(RM + 0, gpu_hidata, GPU);
1849         GPUWriteLong(RM + 4, RN, GPU);
1850 }
1851
1852 static void gpu_opcode_loadb(void)
1853 {
1854 #ifdef GPU_DIS_LOADB
1855         if (doGPUDis)
1856                 WriteLog("%06X: LOADB  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1857 #endif
1858         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1859                 RN = GPUReadLong(RM, GPU) & 0xFF;
1860         else
1861                 RN = JaguarReadByte(RM, GPU);
1862 #ifdef GPU_DIS_LOADB
1863         if (doGPUDis)
1864                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1865 #endif
1866 }
1867
1868 static void gpu_opcode_loadw(void)
1869 {
1870 #ifdef GPU_DIS_LOADW
1871         if (doGPUDis)
1872                 WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1873 #endif
1874         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1875                 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1876         else
1877                 RN = JaguarReadWord(RM, GPU);
1878 #ifdef GPU_DIS_LOADW
1879         if (doGPUDis)
1880                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1881 #endif
1882 }
1883
1884 static void gpu_opcode_load(void)
1885 {
1886 #ifdef GPU_DIS_LOAD
1887         if (doGPUDis)
1888                 WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1889 #endif
1890         RN = GPUReadLong(RM, GPU);
1891 #ifdef GPU_DIS_LOAD
1892         if (doGPUDis)
1893                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1894 #endif
1895 }
1896
1897 static void gpu_opcode_loadp(void)
1898 {
1899         gpu_hidata = GPUReadLong(RM + 0, GPU);
1900         RN                 = GPUReadLong(RM + 4, GPU);
1901 }
1902
1903 static void gpu_opcode_load_r14_indexed(void)
1904 {
1905 #ifdef GPU_DIS_LOAD14I
1906         if (doGPUDis)
1907                 WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
1908 #endif
1909         RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
1910 #ifdef GPU_DIS_LOAD14I
1911         if (doGPUDis)
1912                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1913 #endif
1914 }
1915
1916 static void gpu_opcode_load_r15_indexed(void)
1917 {
1918 #ifdef GPU_DIS_LOAD15I
1919         if (doGPUDis)
1920                 WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
1921 #endif
1922         RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
1923 #ifdef GPU_DIS_LOAD15I
1924         if (doGPUDis)
1925                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1926 #endif
1927 }
1928
1929 static void gpu_opcode_movei(void)
1930 {
1931 #ifdef GPU_DIS_MOVEI
1932         if (doGPUDis)
1933                 WriteLog("%06X: MOVEI  #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1934 #endif
1935         // This instruction is followed by 32-bit value in LSW / MSW format...
1936         RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16);
1937         gpu_pc += 4;
1938 #ifdef GPU_DIS_MOVEI
1939         if (doGPUDis)
1940                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1941 #endif
1942 }
1943
1944 static void gpu_opcode_moveta(void)
1945 {
1946 #ifdef GPU_DIS_MOVETA
1947         if (doGPUDis)
1948                 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
1949 #endif
1950         ALTERNATE_RN = RM;
1951 #ifdef GPU_DIS_MOVETA
1952         if (doGPUDis)
1953                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
1954 #endif
1955 }
1956
1957 static void gpu_opcode_movefa(void)
1958 {
1959 #ifdef GPU_DIS_MOVEFA
1960         if (doGPUDis)
1961                 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
1962 #endif
1963         RN = ALTERNATE_RM;
1964 #ifdef GPU_DIS_MOVEFA
1965         if (doGPUDis)
1966                 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
1967 #endif
1968 }
1969
1970 static void gpu_opcode_move(void)
1971 {
1972 #ifdef GPU_DIS_MOVE
1973         if (doGPUDis)
1974                 WriteLog("%06X: MOVE   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1975 #endif
1976         RN = RM;
1977 #ifdef GPU_DIS_MOVE
1978         if (doGPUDis)
1979                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1980 #endif
1981 }
1982
1983 static void gpu_opcode_moveq(void)
1984 {
1985 #ifdef GPU_DIS_MOVEQ
1986         if (doGPUDis)
1987                 WriteLog("%06X: MOVEQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1988 #endif
1989         RN = IMM_1;
1990 #ifdef GPU_DIS_MOVEQ
1991         if (doGPUDis)
1992                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1993 #endif
1994 }
1995
1996 static void gpu_opcode_resmac(void)
1997 {
1998         RN = gpu_acc;
1999 }
2000
2001 static void gpu_opcode_imult(void)
2002 {
2003 #ifdef GPU_DIS_IMULT
2004         if (doGPUDis)
2005                 WriteLog("%06X: IMULT  R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2006 #endif
2007         RN = (int16)RN * (int16)RM;
2008         SET_ZN(RN);
2009 #ifdef GPU_DIS_IMULT
2010         if (doGPUDis)
2011                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2012 #endif
2013 }
2014
2015 static void gpu_opcode_mult(void)
2016 {
2017 #ifdef GPU_DIS_MULT
2018         if (doGPUDis)
2019                 WriteLog("%06X: MULT   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2020 #endif
2021         RN = (uint16)RM * (uint16)RN;
2022         SET_ZN(RN);
2023 #ifdef GPU_DIS_MULT
2024         if (doGPUDis)
2025                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2026 #endif
2027 }
2028
2029 static void gpu_opcode_bclr(void)
2030 {
2031 #ifdef GPU_DIS_BCLR
2032         if (doGPUDis)
2033                 WriteLog("%06X: BCLR   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2034 #endif
2035         uint32 res = RN & ~(1 << IMM_1);
2036         RN = res;
2037         SET_ZN(res);
2038 #ifdef GPU_DIS_BCLR
2039         if (doGPUDis)
2040                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2041 #endif
2042 }
2043
2044 static void gpu_opcode_btst(void)
2045 {
2046 #ifdef GPU_DIS_BTST
2047         if (doGPUDis)
2048                 WriteLog("%06X: BTST   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2049 #endif
2050         gpu_flag_z = (~RN >> IMM_1) & 1;
2051 #ifdef GPU_DIS_BTST
2052         if (doGPUDis)
2053                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2054 #endif
2055 }
2056
2057 static void gpu_opcode_bset(void)
2058 {
2059 #ifdef GPU_DIS_BSET
2060         if (doGPUDis)
2061                 WriteLog("%06X: BSET   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2062 #endif
2063         uint32 res = RN | (1 << IMM_1);
2064         RN = res;
2065         SET_ZN(res);
2066 #ifdef GPU_DIS_BSET
2067         if (doGPUDis)
2068                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2069 #endif
2070 }
2071
2072 static void gpu_opcode_imacn(void)
2073 {
2074         uint32 res = (int16)RM * (int16)(RN);
2075         gpu_acc += res;
2076 }
2077
2078 static void gpu_opcode_mtoi(void)
2079 {
2080         uint32 _RM = RM;
2081         uint32 res = RN = (((int32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2082         SET_ZN(res);
2083 }
2084
2085 static void gpu_opcode_normi(void)
2086 {
2087         uint32 _RM = RM;
2088         uint32 res = 0;
2089
2090         if (_RM)
2091         {
2092                 while ((_RM & 0xFFC00000) == 0)
2093                 {
2094                         _RM <<= 1;
2095                         res--;
2096                 }
2097                 while ((_RM & 0xFF800000) != 0)
2098                 {
2099                         _RM >>= 1;
2100                         res++;
2101                 }
2102         }
2103         RN = res;
2104         SET_ZN(res);
2105 }
2106
2107 static void gpu_opcode_mmult(void)
2108 {
2109         int count       = gpu_matrix_control & 0x0F;    // Matrix width
2110         uint32 addr = gpu_pointer_to_matrix;            // In the GPU's RAM
2111         int64 accum = 0;
2112         uint32 res;
2113
2114         if (gpu_matrix_control & 0x10)                          // Column stepping
2115         {
2116                 for(int i=0; i<count; i++)
2117                 {
2118                         int16 a;
2119                         if (i & 0x01)
2120                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2121                         else
2122                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2123
2124                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2125                         accum += a * b;
2126                         addr += 4 * count;
2127                 }
2128         }
2129         else                                                                            // Row stepping
2130         {
2131                 for(int i=0; i<count; i++)
2132                 {
2133                         int16 a;
2134                         if (i & 0x01)
2135                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2136                         else
2137                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2138
2139                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2140                         accum += a * b;
2141                         addr += 4;
2142                 }
2143         }
2144         RN = res = (int32)accum;
2145         // carry flag to do (out of the last add)
2146         SET_ZN(res);
2147 }
2148
2149 static void gpu_opcode_abs(void)
2150 {
2151 #ifdef GPU_DIS_ABS
2152         if (doGPUDis)
2153                 WriteLog("%06X: ABS    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2154 #endif
2155         gpu_flag_c = RN >> 31;
2156         if (RN == 0x80000000)
2157         //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2158                 gpu_flag_n = 1, gpu_flag_z = 0;
2159         else
2160         {
2161                 if (gpu_flag_c)
2162                         RN = -RN;
2163                 gpu_flag_n = 0; SET_FLAG_Z(RN);
2164         }
2165 #ifdef GPU_DIS_ABS
2166         if (doGPUDis)
2167                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2168 #endif
2169 }
2170
2171 static void gpu_opcode_div(void)        // RN / RM
2172 {
2173 #ifdef GPU_DIS_DIV
2174         if (doGPUDis)
2175                 WriteLog("%06X: DIV    R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2176 #endif
2177 // NOTE: remainder is NOT calculated correctly here!
2178 //       The original tried to get it right by checking to see if the
2179 //       remainder was negative, but that's too late...
2180 // The code there should do it now, but I'm not 100% sure...
2181
2182         if (RM)
2183         {
2184                 if (gpu_div_control & 0x01)             // 16.16 division
2185                 {
2186                         RN = ((uint64)RN << 16) / RM;
2187                         gpu_remain = ((uint64)RN << 16) % RM;
2188                 }
2189                 else
2190                 {
2191                         RN = RN / RM;
2192                         gpu_remain = RN % RM;
2193                 }
2194
2195                 if ((gpu_remain - RM) & 0x80000000)     // If the result would have been negative...
2196                         gpu_remain -= RM;                       // Then make it negative!
2197         }
2198         else
2199                 RN = 0xFFFFFFFF;
2200
2201 /*      uint32 _RM=RM;
2202         uint32 _RN=RN;
2203
2204         if (_RM)
2205         {
2206                 if (gpu_div_control & 1)
2207                 {
2208                         gpu_remain = (((uint64)_RN) << 16) % _RM;
2209                         if (gpu_remain&0x80000000)
2210                                 gpu_remain-=_RM;
2211                         RN = (((uint64)_RN) << 16) / _RM;
2212                 }
2213                 else
2214                 {
2215                         gpu_remain = _RN % _RM;
2216                         if (gpu_remain&0x80000000)
2217                                 gpu_remain-=_RM;
2218                         RN/=_RM;
2219                 }
2220         }
2221         else
2222                 RN=0xffffffff;*/
2223 #ifdef GPU_DIS_DIV
2224         if (doGPUDis)
2225                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2226 #endif
2227 }
2228
2229 static void gpu_opcode_imultn(void)
2230 {
2231         uint32 res = (int32)((int16)RN * (int16)RM);
2232         gpu_acc = (int32)res;
2233         SET_FLAG_Z(res);
2234         SET_FLAG_N(res);
2235 }
2236
2237 static void gpu_opcode_neg(void)
2238 {
2239 #ifdef GPU_DIS_NEG
2240         if (doGPUDis)
2241                 WriteLog("%06X: NEG    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2242 #endif
2243         uint32 res = -RN;
2244         SET_ZNC_SUB(0, RN, res);
2245         RN = res;
2246 #ifdef GPU_DIS_NEG
2247         if (doGPUDis)
2248                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2249 #endif
2250 }
2251
2252 static void gpu_opcode_shlq(void)
2253 {
2254 #ifdef GPU_DIS_SHLQ
2255         if (doGPUDis)
2256                 WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2257 #endif
2258 // Was a bug here...
2259 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2260         int32 r1 = 32 - IMM_1;
2261         uint32 res = RN << r1;
2262         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2263         RN = res;
2264 #ifdef GPU_DIS_SHLQ
2265         if (doGPUDis)
2266                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2267 #endif
2268 }
2269
2270 static void gpu_opcode_shrq(void)
2271 {
2272 #ifdef GPU_DIS_SHRQ
2273         if (doGPUDis)
2274                 WriteLog("%06X: SHRQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2275 #endif
2276         int32 r1 = gpu_convert_zero[IMM_1];
2277         uint32 res = RN >> r1;
2278         SET_ZN(res); gpu_flag_c = RN & 1;
2279         RN = res;
2280 #ifdef GPU_DIS_SHRQ
2281         if (doGPUDis)
2282                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2283 #endif
2284 }
2285
2286 static void gpu_opcode_ror(void)
2287 {
2288 #ifdef GPU_DIS_ROR
2289         if (doGPUDis)
2290                 WriteLog("%06X: ROR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2291 #endif
2292         uint32 r1 = RM & 0x1F;
2293         uint32 res = (RN >> r1) | (RN << (32 - r1));
2294         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2295         RN = res;
2296 #ifdef GPU_DIS_ROR
2297         if (doGPUDis)
2298                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2299 #endif
2300 }
2301
2302 static void gpu_opcode_rorq(void)
2303 {
2304 #ifdef GPU_DIS_RORQ
2305         if (doGPUDis)
2306                 WriteLog("%06X: RORQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2307 #endif
2308         uint32 r1 = gpu_convert_zero[IMM_1 & 0x1F];
2309         uint32 r2 = RN;
2310         uint32 res = (r2 >> r1) | (r2 << (32 - r1));
2311         RN = res;
2312         SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2313 #ifdef GPU_DIS_RORQ
2314         if (doGPUDis)
2315                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2316 #endif
2317 }
2318
2319 static void gpu_opcode_sha(void)
2320 {
2321 /*      int dreg = jaguar.op & 31;
2322         int32 r1 = (int32)jaguar.r[(jaguar.op >> 5) & 31];
2323         uint32 r2 = jaguar.r[dreg];
2324         uint32 res;
2325
2326         CLR_ZNC;
2327         if (r1 < 0)
2328         {
2329                 res = (r1 <= -32) ? 0 : (r2 << -r1);
2330                 jaguar.FLAGS |= (r2 >> 30) & 2;
2331         }
2332         else
2333         {
2334                 res = (r1 >= 32) ? ((int32)r2 >> 31) : ((int32)r2 >> r1);
2335                 jaguar.FLAGS |= (r2 << 1) & 2;
2336         }
2337         jaguar.r[dreg] = res;
2338         SET_ZN(res);*/
2339
2340 #ifdef GPU_DIS_SHA
2341         if (doGPUDis)
2342                 WriteLog("%06X: SHA    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2343 #endif
2344         uint32 res;
2345
2346         if ((int32)RM < 0)
2347         {
2348                 res = ((int32)RM <= -32) ? 0 : (RN << -(int32)RM);
2349                 gpu_flag_c = RN >> 31;
2350         }
2351         else
2352         {
2353                 res = ((int32)RM >= 32) ? ((int32)RN >> 31) : ((int32)RN >> (int32)RM);
2354                 gpu_flag_c = RN & 0x01;
2355         }
2356         RN = res;
2357         SET_ZN(res);
2358 #ifdef GPU_DIS_SHA
2359         if (doGPUDis)
2360                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2361 #endif
2362
2363 /*      int32 sRM=(int32)RM;
2364         uint32 _RN=RN;
2365
2366         if (sRM<0)
2367         {
2368                 uint32 shift=-sRM;
2369                 if (shift>=32) shift=32;
2370                 gpu_flag_c=(_RN&0x80000000)>>31;
2371                 while (shift)
2372                 {
2373                         _RN<<=1;
2374                         shift--;
2375                 }
2376         }
2377         else
2378         {
2379                 uint32 shift=sRM;
2380                 if (shift>=32) shift=32;
2381                 gpu_flag_c=_RN&0x1;
2382                 while (shift)
2383                 {
2384                         _RN=((int32)_RN)>>1;
2385                         shift--;
2386                 }
2387         }
2388         RN=_RN;
2389         SET_FLAG_Z(_RN);
2390         SET_FLAG_N(_RN);*/
2391 }
2392
2393 static void gpu_opcode_sharq(void)
2394 {
2395 #ifdef GPU_DIS_SHARQ
2396         if (doGPUDis)
2397                 WriteLog("%06X: SHARQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2398 #endif
2399         uint32 res = (int32)RN >> gpu_convert_zero[IMM_1];
2400         SET_ZN(res); gpu_flag_c = RN & 0x01;
2401         RN = res;
2402 #ifdef GPU_DIS_SHARQ
2403         if (doGPUDis)
2404                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2405 #endif
2406 }
2407
2408 static void gpu_opcode_sh(void)
2409 {
2410 #ifdef GPU_DIS_SH
2411         if (doGPUDis)
2412                 WriteLog("%06X: SH     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2413 #endif
2414         if (RM & 0x80000000)            // Shift left
2415         {
2416                 gpu_flag_c = RN >> 31;
2417                 RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM);
2418         }
2419         else                                            // Shift right
2420         {
2421                 gpu_flag_c = RN & 0x01;
2422                 RN = (RM >= 32 ? 0 : RN >> RM);
2423         }
2424         SET_ZN(RN);
2425 #ifdef GPU_DIS_SH
2426         if (doGPUDis)
2427                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2428 #endif
2429 }
2430
2431 //Temporary: Testing only!
2432 //#include "gpu2.cpp"
2433 //#include "gpu3.cpp"
2434
2435 #else
2436
2437 // New thread-safe GPU core
2438
2439 int GPUCore(void * data)
2440 {
2441 }
2442
2443 #endif