]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
Fixed memory leak/timing tweaking
[virtualjaguar] / src / gpu.cpp
1 //
2 // GPU Core
3 //
4 // Originally by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Cleanups, endian wrongness, and bad ASM amelioration by James L. Hammons
7 // Note: Endian wrongness probably stems from the MAME origins of this emu and
8 //       the braindead way in which MAME handles memory. :-)
9 //
10 // Problem with not booting the BIOS was the incorrect way that the
11 // SUBC instruction set the carry when the carry was set going in...
12 // Same problem with ADDC...
13 //
14
15 #include "gpu.h"
16
17 //#define GPU_DEBUG
18
19 // For GPU dissasembly...
20
21 #define GPU_DIS_ABS
22 #define GPU_DIS_ADD
23 #define GPU_DIS_ADDC
24 #define GPU_DIS_ADDQ
25 #define GPU_DIS_ADDQT
26 #define GPU_DIS_AND
27 #define GPU_DIS_BCLR
28 #define GPU_DIS_BSET
29 #define GPU_DIS_BTST
30 #define GPU_DIS_CMP
31 #define GPU_DIS_CMPQ
32 #define GPU_DIS_DIV
33 #define GPU_DIS_IMULT
34 #define GPU_DIS_JUMP
35 #define GPU_DIS_JR
36 #define GPU_DIS_LOAD
37 #define GPU_DIS_LOADB
38 #define GPU_DIS_LOADW
39 #define GPU_DIS_LOAD14I
40 #define GPU_DIS_LOAD14R
41 #define GPU_DIS_LOAD15I
42 #define GPU_DIS_LOAD15R
43 #define GPU_DIS_MOVE
44 #define GPU_DIS_MOVEFA
45 #define GPU_DIS_MOVEI
46 #define GPU_DIS_MOVEPC
47 #define GPU_DIS_MOVETA
48 #define GPU_DIS_MOVEQ
49 #define GPU_DIS_MULT
50 #define GPU_DIS_NEG
51 #define GPU_DIS_NOP
52 #define GPU_DIS_NOT
53 #define GPU_DIS_OR
54 #define GPU_DIS_PACK
55 #define GPU_DIS_ROR
56 #define GPU_DIS_RORQ
57 #define GPU_DIS_SAT8
58 #define GPU_DIS_SH
59 #define GPU_DIS_SHA
60 #define GPU_DIS_SHARQ
61 #define GPU_DIS_SHLQ
62 #define GPU_DIS_SHRQ
63 #define GPU_DIS_STORE
64 #define GPU_DIS_STOREB
65 #define GPU_DIS_STOREW
66 #define GPU_DIS_STORE14I
67 #define GPU_DIS_STORE14R
68 #define GPU_DIS_STORE15I
69 #define GPU_DIS_STORE15R
70 #define GPU_DIS_SUB
71 #define GPU_DIS_SUBC
72 #define GPU_DIS_SUBQ
73 #define GPU_DIS_SUBQT
74 #define GPU_DIS_XOR
75
76 bool doGPUDis = false;
77 //bool doGPUDis = true;
78 //*/
79 /*
80 GPU opcodes use (BIOS flying ATARI logo):
81 +                     add 357416
82 +                    addq 538030
83 +                   addqt 6999
84 +                     sub 116663
85 +                    subq 188059
86 +                   subqt 15086
87 +                     neg 36097
88 +                     and 233993
89 +                      or 109332
90 +                     xor 1384
91 +                    btst 111924
92 +                    bset 25029
93 +                    bclr 10551
94 +                    mult 28147
95 +                   imult 69148
96 +                     div 64102
97 +                     abs 159394
98 +                    shlq 194690
99 +                    shrq 292587
100 +                   sharq 192649
101 +                    rorq 58672
102 +                     cmp 244963
103 +                    cmpq 114834
104 +                    move 833472
105 +                   moveq 56427
106 +                  moveta 220814
107 +                  movefa 170678
108 +                   movei 152025
109 +                   loadw 108220
110 +                    load 430936
111 +                  storew 3036
112 +                   store 372490
113 +                 move_pc 2330
114 +                    jump 349134
115 +                      jr 529171
116                     mmult 64904
117 +                     nop 432179
118 */
119
120 // Various bits
121
122 #define CINT0FLAG                       0x0200
123 #define CINT1FLAG                       0x0400
124 #define CINT2FLAG                       0x0800
125 #define CINT3FLAG                       0x1000
126 #define CINT4FLAG                       0x2000
127 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
128
129 // GPU_FLAGS bits
130
131 #define ZERO_FLAG               0x0001
132 #define CARRY_FLAG              0x0002
133 #define NEGA_FLAG               0x0004
134 #define IMASK                   0x0008
135 #define INT_ENA0                0x0010
136 #define INT_ENA1                0x0020
137 #define INT_ENA2                0x0040
138 #define INT_ENA3                0x0080
139 #define INT_ENA4                0x0100
140 #define INT_CLR0                0x0200
141 #define INT_CLR1                0x0400
142 #define INT_CLR2                0x0800
143 #define INT_CLR3                0x1000
144 #define INT_CLR4                0x2000
145 #define REGPAGE                 0x4000
146 #define DMAEN                   0x8000
147
148 // External global variables
149
150 extern int start_logging;
151 extern int gpu_start_log;
152
153 // Private function prototypes
154
155 void GPUUpdateRegisterBanks(void);
156
157 void GPUDumpDisassembly(void);
158 void GPUDumpRegisters(void);
159 void GPUDumpMemory(void);
160
161 static void gpu_opcode_add(void);
162 static void gpu_opcode_addc(void);
163 static void gpu_opcode_addq(void);
164 static void gpu_opcode_addqt(void);
165 static void gpu_opcode_sub(void);
166 static void gpu_opcode_subc(void);
167 static void gpu_opcode_subq(void);
168 static void gpu_opcode_subqt(void);
169 static void gpu_opcode_neg(void);
170 static void gpu_opcode_and(void);
171 static void gpu_opcode_or(void);
172 static void gpu_opcode_xor(void);
173 static void gpu_opcode_not(void);
174 static void gpu_opcode_btst(void);
175 static void gpu_opcode_bset(void);
176 static void gpu_opcode_bclr(void);
177 static void gpu_opcode_mult(void);
178 static void gpu_opcode_imult(void);
179 static void gpu_opcode_imultn(void);
180 static void gpu_opcode_resmac(void);
181 static void gpu_opcode_imacn(void);
182 static void gpu_opcode_div(void);
183 static void gpu_opcode_abs(void);
184 static void gpu_opcode_sh(void);
185 static void gpu_opcode_shlq(void);
186 static void gpu_opcode_shrq(void);
187 static void gpu_opcode_sha(void);
188 static void gpu_opcode_sharq(void);
189 static void gpu_opcode_ror(void);
190 static void gpu_opcode_rorq(void);
191 static void gpu_opcode_cmp(void);
192 static void gpu_opcode_cmpq(void);
193 static void gpu_opcode_sat8(void);
194 static void gpu_opcode_sat16(void);
195 static void gpu_opcode_move(void);
196 static void gpu_opcode_moveq(void);
197 static void gpu_opcode_moveta(void);
198 static void gpu_opcode_movefa(void);
199 static void gpu_opcode_movei(void);
200 static void gpu_opcode_loadb(void);
201 static void gpu_opcode_loadw(void);
202 static void gpu_opcode_load(void);
203 static void gpu_opcode_loadp(void);
204 static void gpu_opcode_load_r14_indexed(void);
205 static void gpu_opcode_load_r15_indexed(void);
206 static void gpu_opcode_storeb(void);
207 static void gpu_opcode_storew(void);
208 static void gpu_opcode_store(void);
209 static void gpu_opcode_storep(void);
210 static void gpu_opcode_store_r14_indexed(void);
211 static void gpu_opcode_store_r15_indexed(void);
212 static void gpu_opcode_move_pc(void);
213 static void gpu_opcode_jump(void);
214 static void gpu_opcode_jr(void);
215 static void gpu_opcode_mmult(void);
216 static void gpu_opcode_mtoi(void);
217 static void gpu_opcode_normi(void);
218 static void gpu_opcode_nop(void);
219 static void gpu_opcode_load_r14_ri(void);
220 static void gpu_opcode_load_r15_ri(void);
221 static void gpu_opcode_store_r14_ri(void);
222 static void gpu_opcode_store_r15_ri(void);
223 static void gpu_opcode_sat24(void);
224 static void gpu_opcode_pack(void);
225
226 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
227 /*uint8 gpu_opcode_cycles[64] = 
228 {
229         3,  3,  3,  3,  3,  3,  3,  3,
230         3,  3,  3,  3,  3,  3,  3,  3,
231         3,  3,  1,  3,  1, 18,  3,  3,
232         3,  3,  3,  3,  3,  3,  3,  3,
233         3,  3,  2,  2,  2,  2,  3,  4,
234         5,  4,  5,  6,  6,  1,  1,  1,
235         1,  2,  2,  2,  1,  1,  9,  3,
236         3,  1,  6,  6,  2,  2,  3,  3
237 };//*/
238 //Here's a QnD kludge...
239 //This is wrong, wrong, WRONG, but it seems to work for the time being...
240 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
241 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
242 uint8 gpu_opcode_cycles[64] = 
243 {
244         1,  1,  1,  1,  1,  1,  1,  1,
245         1,  1,  1,  1,  1,  1,  1,  1,
246         1,  1,  1,  1,  1,  9,  1,  1,
247         1,  1,  1,  1,  1,  1,  1,  1,
248         1,  1,  1,  1,  1,  1,  1,  2,
249         2,  2,  2,  3,  3,  1,  1,  1,
250         1,  1,  1,  1,  1,  1,  4,  1,
251         1,  1,  3,  3,  1,  1,  1,  1
252 };
253
254 void (*gpu_opcode[64])()= 
255 {       
256         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
257         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
258         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
259         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
260         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
261         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
262         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
263         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
264         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
265         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
266         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
267         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
268         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
269         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
270         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
271         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
272 };
273
274 static uint8 * gpu_ram_8;
275 uint32 gpu_pc;
276 static uint32 gpu_acc;
277 static uint32 gpu_remain;
278 static uint32 gpu_hidata;
279 static uint32 gpu_flags;
280 static uint32 gpu_matrix_control;
281 static uint32 gpu_pointer_to_matrix;
282 static uint32 gpu_data_organization;
283 static uint32 gpu_control;
284 static uint32 gpu_div_control;
285 // There is a distinct advantage to having these separated out--there's no need to clear
286 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
287 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
288 static uint8 gpu_flag_z, gpu_flag_n, gpu_flag_c;
289 static uint32 * gpu_reg_bank_0;
290 static uint32 * gpu_reg_bank_1;
291 static uint32 * gpu_reg;
292 static uint32 * gpu_alternate_reg;
293
294 static uint32 gpu_instruction;
295 static uint32 gpu_opcode_first_parameter;
296 static uint32 gpu_opcode_second_parameter;
297
298 #define GPU_RUNNING             (gpu_control & 0x01)
299
300 #define RM                              gpu_reg[gpu_opcode_first_parameter]
301 #define RN                              gpu_reg[gpu_opcode_second_parameter]
302 #define ALTERNATE_RM    gpu_alternate_reg[gpu_opcode_first_parameter]
303 #define ALTERNATE_RN    gpu_alternate_reg[gpu_opcode_second_parameter]
304 #define IMM_1                   gpu_opcode_first_parameter
305 #define IMM_2                   gpu_opcode_second_parameter
306
307 #define SET_FLAG_Z(r)   (gpu_flag_z = ((r) == 0));
308 #define SET_FLAG_N(r)   (gpu_flag_n = (((UINT32)(r) >> 31) & 0x01));
309
310 #define RESET_FLAG_Z()  gpu_flag_z = 0;
311 #define RESET_FLAG_N()  gpu_flag_n = 0;
312 #define RESET_FLAG_C()  gpu_flag_c = 0;    
313
314 #define CLR_Z                           (gpu_flag_z = 0)
315 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
316 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
317 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
318 #define SET_N(r)                        (gpu_flag_n = (((UINT32)(r) >> 31) & 0x01))
319 #define SET_C_ADD(a,b)          (gpu_flag_c = ((UINT32)(b) > (UINT32)(~(a))))
320 #define SET_C_SUB(a,b)          (gpu_flag_c = ((UINT32)(b) > (UINT32)(a)))
321 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
322 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
323 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
324
325 uint32 gpu_convert_zero[32] =
326         { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
327
328 uint8 * branch_condition_table = 0;
329 #define BRANCH_CONDITION(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
330
331 uint32 gpu_opcode_use[64];
332
333 char * gpu_opcode_str[64]= 
334 {       
335         "add",                          "addc",                         "addq",                         "addqt",
336         "sub",                          "subc",                         "subq",                         "subqt",
337         "neg",                          "and",                          "or",                           "xor",
338         "not",                          "btst",                         "bset",                         "bclr",
339         "mult",                         "imult",                        "imultn",                       "resmac",
340         "imacn",                        "div",                          "abs",                          "sh",
341         "shlq",                         "shrq",                         "sha",                          "sharq",
342         "ror",                          "rorq",                         "cmp",                          "cmpq",
343         "sat8",                         "sat16",                        "move",                         "moveq",
344         "moveta",                       "movefa",                       "movei",                        "loadb",
345         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
346         "load_r15_indexed",     "storeb",                       "storew",                       "store",
347         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
348         "jump",                         "jr",                           "mmult",                        "mtoi",
349         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
350         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
351 };
352
353 static uint32 gpu_in_exec = 0;
354 static uint32 gpu_releaseTimeSlice_flag = 0;
355
356 void gpu_releaseTimeslice(void)
357 {
358         gpu_releaseTimeSlice_flag = 1;
359 }
360
361 uint32 gpu_get_pc(void)
362 {
363         return gpu_pc;
364 }
365
366 void build_branch_condition_table(void)
367 {
368         if (!branch_condition_table)
369         {
370                 branch_condition_table = (uint8 *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
371
372                 if (branch_condition_table)
373                 {
374                         for(int i=0; i<8; i++)
375                         {
376                                 for(int j=0; j<32; j++)
377                                 {
378                                         int result = 1;
379                                         if (j & 1)
380                                                 if (i & ZERO_FLAG)
381                                                         result = 0;
382                                         if (j & 2)
383                                                 if (!(i & ZERO_FLAG))
384                                                         result = 0;
385                                         if (j & 4)
386                                                 if (i & (CARRY_FLAG << (j >> 4)))
387                                                         result = 0;
388                                         if (j & 8)
389                                                 if (!(i & (CARRY_FLAG << (j >> 4))))
390                                                         result = 0;
391                                         branch_condition_table[i * 32 + j] = result;
392                                 }
393                         }
394                 }
395         }
396 }
397
398 //
399 // GPU byte access (read)
400 //
401 uint8 GPUReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
402 {
403         if (offset >= 0xF02000 && offset <= 0xF020FF)
404                 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
405
406         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
407                 return gpu_ram_8[offset & 0xFFF];
408         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
409         {
410                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
411
412                 if ((offset & 0x03) == 0)
413                         return data >> 24;
414                 else if ((offset & 0x03) == 1)
415                         return (data >> 16) & 0xFF;
416                 else if ((offset & 0x03) == 2)
417                         return (data >> 8) & 0xFF;
418                 else if ((offset & 0x03) == 3)
419                         return data & 0xFF;
420         }
421
422         return JaguarReadByte(offset, who);
423 }
424
425 //
426 // GPU word access (read)
427 //
428 uint16 GPUReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
429 {
430         if (offset >= 0xF02000 && offset <= 0xF020FF)
431                 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
432
433         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
434         {
435                 offset &= 0xFFF;
436                 uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
437                 return data;
438         }
439         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
440         {
441 // This looks and smells wrong...
442 // But it *might* be OK...
443                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
444                         return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
445
446                 uint32 data = GPUReadLong(offset & 0xFFFFFFFC, who);
447
448                 if (offset & 0x02)                      // Cases 0 & 2...
449                         return data & 0xFFFF;
450                 else
451                         return data >> 16;
452         }
453
454 //TEMP--Mirror of F03000? No. Writes only...
455 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
456 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
457
458         return JaguarReadWord(offset, who);
459 }
460
461 //
462 // GPU dword access (read)
463 //
464 uint32 GPUReadLong(uint32 offset, uint32 who/*=UNKNOWN*/)
465 {
466         if (offset >= 0xF02000 && offset <= 0xF020FF)
467                 WriteLog("GPU: ReadLong--Attempt to read from GPU register file by %s!\n", whoName[who]);
468
469 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
470         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
471         {
472                 offset &= 0xFFF;
473                 return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
474                         | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];//*/
475 //              return GET32(gpu_ram_8, offset);
476         }
477 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
478         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
479         {
480                 offset &= 0x1F;
481                 switch (offset)
482                 {
483                 case 0x00:
484                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
485                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
486                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
487
488                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
489                                         
490                         return gpu_flags & 0xFFFFC1FF;
491                 case 0x04:
492                         return gpu_matrix_control;
493                 case 0x08:
494                         return gpu_pointer_to_matrix;
495                 case 0x0C:
496                         return gpu_data_organization;
497                 case 0x10:
498                         return gpu_pc;
499                 case 0x14:
500                         return gpu_control;
501                 case 0x18:
502                         return gpu_hidata;
503                 case 0x1C:
504                         return gpu_remain;
505                 default:                                                                // unaligned long read
506 #ifdef GPU_DEBUG
507                         WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
508 #endif  // GPU_DEBUG
509                         return 0;
510                 }
511         }
512 //TEMP--Mirror of F03000? No. Writes only...
513 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
514 //      WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
515 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
516         WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
517
518         return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
519 }
520
521 //
522 // GPU byte access (write)
523 //
524 void GPUWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
525 {
526         if (offset >= 0xF02000 && offset <= 0xF020FF)
527                 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
528
529         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
530         {
531                 gpu_ram_8[offset & 0xFFF] = data;
532
533 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
534 /*              if (!gpu_in_exec)
535                 {
536                         m68k_end_timeslice();
537                         dsp_releaseTimeslice();
538                 }*/
539                 return;
540         }
541         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
542         {
543                 uint32 reg = offset & 0x1C;
544                 int bytenum = offset & 0x03;
545
546 //This is definitely wrong!
547                 if ((reg >= 0x1C) && (reg <= 0x1F))
548                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
549                 else
550                 {
551                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
552                         bytenum = 3 - bytenum; // convention motorola !!!
553                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
554                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
555                 }
556                 return;
557         }
558 //      WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
559         JaguarWriteByte(offset, data, who);
560 }
561
562 //
563 // GPU word access (write)
564 //
565 void GPUWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
566 {
567         if (offset >= 0xF02000 && offset <= 0xF020FF)
568                 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
569
570         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
571         {
572                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
573                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
574 /*              offset &= 0xFFF;
575                 SET16(gpu_ram_8, offset, data);//*/
576
577 /*if (offset >= 0xF03214 && offset < 0xF0321F)
578         WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
579
580
581 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
582 /*              if (!gpu_in_exec)
583                 {
584                         m68k_end_timeslice();
585                         dsp_releaseTimeslice();
586                 }*/
587                 return;
588         }
589         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
590         {
591                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
592                 {
593 #ifdef GPU_DEBUG
594                         WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
595                         GPUDumpRegisters();
596 #endif  // GPU_DEBUG
597                         return;
598                 }
599 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
600 //This just literally sucks.
601                 if ((offset & 0x1C) == 0x1C)
602                 {
603 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
604                         if (offset & 0x02)
605                                 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
606                         else
607                                 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
608                 }
609                 else 
610                 {
611 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
612                         uint32 old_data = GPUReadLong(offset & 0xFFFFFFC, who);
613                         if (offset & 0x02)
614                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
615                         else
616                                 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
617                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
618                 }
619                 return;
620         }
621         else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
622         {
623 #ifdef GPU_DEBUG
624                         WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
625                         GPUDumpRegisters();
626 #endif  // GPU_DEBUG
627                 return;
628         }
629
630         // Have to be careful here--this can cause an infinite loop!
631         JaguarWriteWord(offset, data, who);
632 }
633
634 //
635 // GPU dword access (write)
636 //
637 void GPUWriteLong(uint32 offset, uint32 data, uint32 who/*=UNKNOWN*/)
638 {
639         if (offset >= 0xF02000 && offset <= 0xF020FF)
640                 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
641
642 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
643         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
644         {
645 #ifdef GPU_DEBUG
646                 if (offset & 0x03)
647                 {
648                         WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
649                         GPUDumpRegisters();
650                 }
651 #endif  // GPU_DEBUG
652
653                 offset &= 0xFFF;
654                 SET32(gpu_ram_8, offset, data);
655                 return;
656         }
657 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
658         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
659         {
660                 offset &= 0x1F;
661                 switch (offset)
662                 {
663                 case 0x00:
664                 {
665                         bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
666                         gpu_flags = data;
667                         gpu_flag_z = gpu_flags & ZERO_FLAG;
668                         gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
669                         gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
670                         GPUUpdateRegisterBanks();
671                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);       // Interrupt latch clear bits
672 //Writing here is only an interrupt enable--this approach is just plain wrong!
673 //                      GPUHandleIRQs();
674 //This, however, is A-OK! ;-)
675                         if (IMASKCleared)                                               // If IMASK was cleared,
676                                 GPUHandleIRQs();                                        // see if any other interrupts need servicing!
677 #ifdef GPU_DEBUG
678                         if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
679                                 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
680                         WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
681 #endif  // GPU_DEBUG
682                         break;
683                 }
684                 case 0x04:
685                         gpu_matrix_control = data;
686                         break;
687                 case 0x08:
688                         // This can only point to long aligned addresses
689                         gpu_pointer_to_matrix = data & 0xFFFFFFFC;
690                         break;
691                 case 0x0C:
692                         gpu_data_organization = data;
693                         break;
694                 case 0x10:
695                         gpu_pc = data;
696 #ifdef GPU_DEBUG
697 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
698 #endif  // GPU_DEBUG
699                         break;
700                 case 0x14:
701                 {       
702 //                      uint32 gpu_was_running = GPU_RUNNING;
703                         data &= ~0xF7C0;                // Disable writes to INT_LAT0-4 & TOM version number
704
705                         // check for GPU -> CPU interrupt
706                         if (data & 0x02)
707                         {
708 //WriteLog("GPU->CPU interrupt\n");
709                                 if (tom_irq_enabled(IRQ_GPU))
710                                 {
711                                         if ((tom_irq_enabled(IRQ_GPU)) && (jaguar_interrupt_handler_is_valid(64)))
712                                         {
713                                                 tom_set_pending_gpu_int();
714                                                 m68k_set_irq(7);                        // Set 68000 NMI
715                                                 gpu_releaseTimeslice();
716                                         }
717                                 }
718                                 data &= ~0x02;
719                         }
720
721                         // check for CPU -> GPU interrupt #0
722                         if (data & 0x04)
723                         {
724 //WriteLog("CPU->GPU interrupt\n");
725                                 GPUSetIRQLine(0, ASSERT_LINE);
726                                 m68k_end_timeslice();
727                                 dsp_releaseTimeslice();
728                                 data &= ~0x04;
729                         }
730
731                         // single stepping
732                         if (data & 0x10)
733                         {
734                                 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
735                         }
736                         gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
737
738                         // if gpu wasn't running but is now running, execute a few cycles
739 #ifndef GPU_SINGLE_STEPPING
740 /*                      if (!gpu_was_running && GPU_RUNNING)
741 #ifdef GPU_DEBUG
742                         {
743                                 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
744 #endif  // GPU_DEBUG
745                                 gpu_exec(200);
746 #ifdef GPU_DEBUG
747                         }
748 #endif  // GPU_DEBUG//*/
749 #else
750                         if (gpu_control & 0x18)
751                                 gpu_exec(1);
752 #endif  // #ifndef GPU_SINGLE_STEPPING
753 #ifdef GPU_DEBUG
754 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
755 if (GPU_RUNNING)
756         WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
757 else
758         WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
759 WriteLog("\n");
760 #endif  // GPU_DEBUG
761 //if (GPU_RUNNING)
762 //      GPUDumpDisassembly();
763 /*if (GPU_RUNNING)
764 {
765         if (gpu_pc == 0xF035D8)
766         {
767 //              GPUDumpDisassembly();
768 //              log_done();
769 //              exit(1);
770                 gpu_control &= 0xFFFFFFFE;      // Don't run it and let's see what happens!
771 //Hmm. Seems to lock up when going into the demo...
772 //Try to disable the collision altogether!
773         }
774 }//*/
775 extern int effect_start5;
776 static bool finished = false;
777 //if (GPU_RUNNING && effect_start5 && !finished)
778 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
779 {
780         // Let's do a dump of $6528!
781 /*      uint32 numItems = JaguarReadWord(0x6BD6);
782         WriteLog("\nDump of $6528: %u items.\n\n", numItems);
783         for(int i=0; i<numItems*3*4; i+=3*4)
784         {
785                 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
786                         JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
787                 uint16 link = JaguarReadWord(0x6528+i+8+2);
788                 for(int j=0; j<40; j+=4)
789                         WriteLog("%08X ", JaguarReadLong(link + j));
790                 WriteLog("\n");
791         }
792         WriteLog("\n");//*/
793         // Let's try a manual blit here...
794 //This isn't working the way it should! !!! FIX !!!
795 //Err, actually, it is.
796 // NOW, it works right! Problem solved!!! It's a blitter bug!
797 /*      uint32 src = 0x4D54, dst = 0xF03000, width = 10 * 4;
798         for(int y=0; y<127; y++)
799         {
800                 for(int x=0; x<2; x++)
801                 {
802                         JaguarWriteLong(dst, JaguarReadLong(src));
803                         
804                         src += 4;
805                         dst += 4;
806                 }
807                 src += width - (2 * 4);
808         }//*/
809 /*      finished = true;
810         doGPUDis = true;
811         WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
812
813 /*      WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
814         int count = 0;
815         for(int i=0x004D54; i<0x004D54+2048; i++)
816         {
817                 WriteLog("%02X ", JaguarReadByte(i));
818                 count++;
819                 if (count == 32)
820                 {
821                         count = 0;
822                         WriteLog("\n");
823                 }
824         }
825         WriteLog("\n\nData @ F03000:\n\n");
826         count = 0;
827         for(int i=0xF03000; i<0xF03200; i++)
828         {
829                 WriteLog("%02X ", JaguarReadByte(i));
830                 count++;
831                 if (count == 32)
832                 {
833                         count = 0;
834                         WriteLog("\n");
835                 }
836         }
837         WriteLog("\n\n");
838         log_done();
839         exit(0);//*/
840 }
841 //if (!GPU_RUNNING)
842 //      doGPUDis = false;
843 /*if (!GPU_RUNNING && finished)
844 {
845         WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
846         GPUDumpRegisters();
847         log_done();
848         exit(0);
849 }//*/
850                         // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
851                         // allow the GPU a chance to run...
852                         // Yes! This partially fixed Trevor McFur...
853                         if (GPU_RUNNING)
854                                 m68k_end_timeslice();
855                         break;
856                 }
857                 case 0x18:
858                         gpu_hidata = data;
859                         break;
860                 case 0x1C:
861                         gpu_div_control = data;
862                         break;
863 //              default:   // unaligned long write
864                         //exit(0);
865                         //__asm int 3
866                 }
867                 return;
868         }
869
870 //      JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
871 //      JaguarWriteWord(offset+2, data & 0xFFFF, who);
872 // We're a 32-bit processor, we can do a long write...!
873         JaguarWriteLong(offset, data, who);
874 }
875
876 //
877 // Change register banks if necessary
878 //
879 void GPUUpdateRegisterBanks(void)
880 {
881         int bank = (gpu_flags & REGPAGE);               // REGPAGE bit
882
883         if (gpu_flags & IMASK)                                  // IMASK bit
884                 bank = 0;                                                       // IMASK forces main bank to be bank 0
885
886         if (bank)
887                 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
888         else
889                 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
890 }
891
892 void GPUHandleIRQs(void)
893 {
894         // Bail out if we're already in an interrupt!
895         if (gpu_flags & IMASK)
896                 return;
897
898         // Get the interrupt latch & enable bits
899         uint32 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
900         
901         // Bail out if latched interrupts aren't enabled
902         bits &= mask;
903         if (!bits)
904                 return;
905         
906         // Determine which interrupt to service
907         uint32 which = 0; //Isn't there a #pragma to disable this warning???
908         if (bits & 0x01)
909                 which = 0;
910         if (bits & 0x02)
911                 which = 1;
912         if (bits & 0x04)
913                 which = 2;
914         if (bits & 0x08)
915                 which = 3;
916         if (bits & 0x10)
917                 which = 4;
918
919         if (start_logging)
920                 WriteLog("GPU: Generating IRQ #%i\n", which);
921
922         // set the interrupt flag 
923         gpu_flags |= IMASK;
924         GPUUpdateRegisterBanks();
925
926         // subqt  #4,r31                ; pre-decrement stack pointer 
927         // move  pc,r30                 ; address of interrupted code 
928         // store  r30,(r31)     ; store return address
929         gpu_reg[31] -= 4;
930         GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
931         
932         // movei  #service_address,r30  ; pointer to ISR entry 
933         // jump  (r30)                                  ; jump to ISR 
934         // nop
935         gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
936 }
937
938 void GPUSetIRQLine(int irqline, int state)
939 {
940         if (start_logging)
941                 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
942
943         uint32 mask = 0x0040 << irqline;
944         gpu_control &= ~mask;                           // Clear the interrupt latch
945
946         if (state)
947         {
948                 gpu_control |= mask;                    // Assert the interrupt latch
949                 GPUHandleIRQs();                                // And handle the interrupt...
950         }
951 }
952
953 //TEMPORARY: Testing only!
954 //#include "gpu2.h"
955 //#include "gpu3.h"
956
957 void gpu_init(void)
958 {
959         memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
960         memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32), "GPU bank 0 regs");
961         memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32), "GPU bank 1 regs");
962
963         build_branch_condition_table();
964
965         gpu_reset();
966
967 //TEMPORARY: Testing only!
968 //      gpu2_init();
969 //      gpu3_init();
970 }
971
972 void gpu_reset(void)
973 {
974         // GPU registers (directly visible)
975         gpu_flags                         = 0x00000000;
976         gpu_matrix_control    = 0x00000000;
977         gpu_pointer_to_matrix = 0x00000000;
978         gpu_data_organization = 0xFFFFFFFF;
979         gpu_pc                            = 0x00F03000;
980         gpu_control                       = 0x00002800;                 // Correctly sets this as TOM Rev. 2
981         gpu_hidata                        = 0x00000000;
982         gpu_remain                        = 0x00000000;                 // These two registers are RO/WO
983         gpu_div_control           = 0x00000000;
984
985         // GPU internal register
986         gpu_acc                           = 0x00000000;
987
988         gpu_reg = gpu_reg_bank_0;
989         gpu_alternate_reg = gpu_reg_bank_1;
990
991         for(int i=0; i<32; i++)
992                 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
993
994         CLR_ZNC;
995         memset(gpu_ram_8, 0xFF, 0x1000);
996         gpu_in_exec = 0;
997 //not needed    GPUInterruptPending = false;
998         gpu_reset_stats();
999 }
1000
1001 uint32 gpu_read_pc(void)
1002 {
1003         return gpu_pc;
1004 }
1005
1006 void gpu_reset_stats(void)
1007 {
1008         for(uint32 i=0; i<64; i++)
1009                 gpu_opcode_use[i] = 0;
1010         WriteLog("--> GPU stats were reset!\n");
1011 }
1012
1013 void GPUDumpDisassembly(void)
1014 {
1015         char buffer[512];
1016
1017         WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1018         uint32 j = 0xF03000;
1019         while (j <= 0xF03FFF)
1020         {
1021                 uint32 oldj = j;
1022                 j += dasmjag(JAGUAR_GPU, buffer, j);
1023                 WriteLog("\t%08X: %s\n", oldj, buffer);
1024         }
1025 }
1026
1027 void GPUDumpRegisters(void)
1028 {
1029         WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1030         WriteLog("\nRegisters bank 0\n");
1031         for(int j=0; j<8; j++)
1032         {
1033                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1034                                                   (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1035                                                   (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1036                                                   (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1037                                                   (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1038         }
1039         WriteLog("Registers bank 1\n");
1040         for(int j=0; j<8; j++)
1041         {
1042                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1043                                                   (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1044                                                   (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1045                                                   (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1046                                                   (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1047         }
1048 }
1049
1050 void GPUDumpMemory(void)
1051 {
1052         WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1053         for(int i=0; i<0xFFF; i+=4)
1054                 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1055                         gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1056 }
1057
1058 void gpu_done(void)
1059
1060         WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1061
1062         // Get the interrupt latch & enable bits 
1063         uint8 bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1064         WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1065
1066         GPUDumpRegisters();
1067         GPUDumpDisassembly();
1068
1069         WriteLog("\nGPU opcodes use:\n");
1070         for(int i=0; i<64; i++)
1071         {
1072                 if (gpu_opcode_use[i])
1073                         WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1074         }
1075         WriteLog("\n");
1076
1077         memory_free(gpu_ram_8);
1078         memory_free(gpu_reg_bank_0);
1079         memory_free(gpu_reg_bank_1);
1080 }
1081
1082 //
1083 // Main GPU execution core
1084 //
1085 static int testCount = 1;
1086 static int len = 0;
1087 static bool tripwire = false;
1088 void gpu_exec(int32 cycles)
1089 {
1090         if (!GPU_RUNNING)
1091                 return;
1092
1093 #ifdef GPU_SINGLE_STEPPING
1094         if (gpu_control & 0x18)
1095         {
1096                 cycles = 1;
1097                 gpu_control &= ~0x10;
1098         }
1099 #endif
1100         GPUHandleIRQs();
1101         gpu_releaseTimeSlice_flag = 0;
1102         gpu_in_exec++;
1103
1104         while (cycles > 0 && GPU_RUNNING)
1105         {
1106 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1107         && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1108 {
1109         if (gpu_pc == 0xF03000)
1110         {
1111                 extern uint32 starCount;
1112                 starCount = 0;
1113 /*              WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1114                 uint32 base = gpu_reg_bank_0[3];
1115                 for(uint32 i=0; i<0x100; i+=16)
1116                 {
1117                         WriteLog("%02X: ", i);
1118                         for(uint32 j=0; j<16; j++)
1119                         {
1120                                 WriteLog("%02X ", JaguarReadByte(base + i + j));
1121                         }
1122                         WriteLog("\n");
1123                 }*/
1124         }
1125 //      if (gpu_pc == 0xF03)
1126         {
1127         }
1128 }//*/
1129 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1130 {
1131         GPUDumpRegisters();
1132         WriteLog("GPU: Starting disassembly log...\n");
1133         doGPUDis = true;
1134 }//*/
1135 /*if (gpu_pc == 0xF0359A)
1136 {
1137         doGPUDis = true;
1138         GPUDumpRegisters();
1139 }*/
1140 /*              gpu_flag_c = (gpu_flag_c ? 1 : 0);
1141                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1142                 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1143         
1144                 uint16 opcode = GPUReadWord(gpu_pc, GPU);
1145                 uint32 index = opcode >> 10;
1146                 gpu_instruction = opcode;                               // Added for GPU #3...
1147                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1148                 gpu_opcode_second_parameter = opcode & 0x1F;
1149 /*if (gpu_pc == 0xF03BE8)
1150 WriteLog("Start of OP frame write...\n");
1151 if (gpu_pc == 0xF03EEE)
1152 WriteLog("--> Writing BRANCH object ---\n");
1153 if (gpu_pc == 0xF03F62)
1154 WriteLog("--> Writing BITMAP object ***\n");//*/
1155 /*if (gpu_pc == 0xF03546)
1156 {
1157         WriteLog("\n--> GPU PC: F03546\n");
1158         GPUDumpRegisters();
1159         GPUDumpDisassembly();
1160 }//*/
1161 /*if (gpu_pc == 0xF033F6)
1162 {
1163         WriteLog("\n--> GPU PC: F033F6\n");
1164         GPUDumpRegisters();
1165         GPUDumpDisassembly();
1166 }//*/
1167 /*if (gpu_pc == 0xF033CC)
1168 {
1169         WriteLog("\n--> GPU PC: F033CC\n");
1170         GPUDumpRegisters();
1171         GPUDumpDisassembly();
1172 }//*/
1173 /*if (gpu_pc == 0xF033D6)
1174 {
1175         WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1176         GPUDumpRegisters();
1177         GPUDumpMemory();
1178 }//*/
1179 /*if (gpu_pc == 0xF033D8)
1180 {
1181         WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1182         GPUDumpRegisters();
1183         GPUDumpMemory();
1184 }//*/
1185 /*if (gpu_pc == 0xF0358E)
1186 {
1187         WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1188         GPUDumpRegisters();
1189         GPUDumpMemory();
1190 }//*/
1191 /*if (gpu_pc == 0xF034CA)
1192 {
1193         WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1194         GPUDumpRegisters();
1195 }//*/
1196 /*if (gpu_pc == 0xF034CA)
1197 {
1198         len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1199         WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n   ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1200         for(int i=0; i<len; i+=4)
1201                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1202         WriteLog("\n   ");
1203         for(int i=0; i<len; i+=4)
1204                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1205         WriteLog("\n\n");
1206 }
1207 if (gpu_pc == 0xF034DE)
1208 {
1209         WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n   ", gpu_reg[14], gpu_reg[15]);
1210         for(int i=0; i<len; i+=4)
1211                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1212         WriteLog("\n   ");
1213         for(int i=0; i<len; i+=4)
1214                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1215         WriteLog("\n   ");
1216         for(int i=0; i<len; i+=4)
1217                 WriteLog(" --------");
1218         WriteLog("\n   ");
1219         for(int i=0; i<len; i+=4)
1220                 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1221         WriteLog("\n\n");
1222 }//*/
1223 /*if (gpu_pc == 0xF035C8)
1224 {
1225         WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1226         GPUDumpRegisters();
1227         GPUDumpDisassembly();
1228 }//*/
1229
1230 if (gpu_start_log)
1231 {
1232 //      gpu_reset_stats();
1233 static char buffer[512];
1234 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1235 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1236 }//*/
1237 //$E400 -> 1110 01 -> $39 -> 57
1238 //GPU #1
1239                 gpu_pc += 2;
1240                 gpu_opcode[index]();
1241 //GPU #2
1242 //              gpu2_opcode[index]();
1243 //              gpu_pc += 2;
1244 //GPU #3                                (Doesn't show ATARI logo! #1 & #2 do...)
1245 //              gpu_pc += 2;
1246 //              gpu3_opcode[index]();
1247
1248 // BIOS hacking
1249 //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken. 
1250 /*static bool firstTime = true;
1251 if (gpu_pc == 0xF03548 && firstTime)
1252 {
1253         gpu_flag_z = 1;
1254 //      firstTime = false;
1255
1256 //static char buffer[512];
1257 //int k=0xF03548;
1258 //while (k<0xF0356C)
1259 //{
1260 //int oldk = k;
1261 //k += dasmjag(JAGUAR_GPU, buffer, k);
1262 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1263 //}
1264 //      gpu_start_log = 1;
1265 }//*/
1266 //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1267 /*if (gpu_pc == 0xF0354C)
1268         gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1269
1270                 cycles -= gpu_opcode_cycles[index];
1271                 gpu_opcode_use[index]++;
1272 if (gpu_start_log)
1273         WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1274 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1275 {
1276         WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1277         tripwire = true;
1278 }
1279         }
1280
1281         gpu_in_exec--;
1282 }
1283
1284 //
1285 // GPU opcodes
1286 //
1287
1288 /*
1289 GPU opcodes use (offset punch--vertically below bad guy):
1290                       add 18686
1291                      addq 32621
1292                       sub 7483
1293                      subq 10252
1294                       and 21229
1295                        or 15003
1296                      btst 1822
1297                      bset 2072
1298                      mult 141
1299                       div 2392
1300                      shlq 13449
1301                      shrq 10297
1302                     sharq 11104
1303                       cmp 6775
1304                      cmpq 5944
1305                      move 31259
1306                     moveq 4473
1307                     movei 23277
1308                     loadb 46
1309                     loadw 4201
1310                      load 28580
1311          load_r14_indexed 1183
1312          load_r15_indexed 1125
1313                    storew 178
1314                     store 10144
1315         store_r14_indexed 320
1316         store_r15_indexed 1
1317                   move_pc 1742
1318                      jump 24467
1319                        jr 18090
1320                       nop 41362
1321 */
1322
1323 static void gpu_opcode_jump(void)
1324 {
1325 #ifdef GPU_DIS_JUMP
1326 char * condition[32] =
1327 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1328         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1329         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1330         "???", "???", "???", "F" };
1331         if (doGPUDis)
1332                 WriteLog("%06X: JUMP   %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1333 #endif
1334         // normalize flags
1335 /*      gpu_flag_c = (gpu_flag_c ? 1 : 0);
1336         gpu_flag_z = (gpu_flag_z ? 1 : 0);
1337         gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1338         // KLUDGE: Used by BRANCH_CONDITION
1339         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1340
1341         if (BRANCH_CONDITION(IMM_2))
1342         {
1343 #ifdef GPU_DIS_JUMP
1344         if (doGPUDis)
1345                 WriteLog("Branched!\n");
1346 #endif
1347 if (gpu_start_log)
1348         WriteLog("    --> JUMP: Branch taken.\n");
1349                 uint32 delayed_pc = RM;
1350                 gpu_exec(1);
1351                 gpu_pc = delayed_pc;
1352 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1353                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1354                 gpu_opcode_second_parameter = opcode & 0x1F;
1355
1356                 gpu_pc = delayed_pc;
1357                 gpu_opcode[opcode>>10]();//*/
1358         }
1359 #ifdef GPU_DIS_JUMP
1360         else
1361                 if (doGPUDis)
1362                         WriteLog("Branch NOT taken.\n");
1363 #endif
1364 }
1365
1366 static void gpu_opcode_jr(void)
1367 {
1368 #ifdef GPU_DIS_JR
1369 char * condition[32] =
1370 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1371         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1372         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1373         "???", "???", "???", "F" };
1374         if (doGPUDis)
1375                 WriteLog("%06X: JR     %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1376 #endif
1377 /*      if (CONDITION(jaguar.op & 31))
1378         {
1379                 INT32 r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1380                 UINT32 newpc = jaguar.PC + r1;
1381                 CALL_MAME_DEBUG;
1382                 jaguar.op = ROPCODE(jaguar.PC);
1383                 jaguar.PC = newpc;
1384                 (*jaguar.table[jaguar.op >> 10])();
1385
1386                 jaguar_icount -= 3;     // 3 wait states guaranteed
1387         }*/
1388         // normalize flags
1389 /*      gpu_flag_n = (gpu_flag_n ? 1 : 0);
1390         gpu_flag_c = (gpu_flag_c ? 1 : 0);
1391         gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1392         // KLUDGE: Used by BRANCH_CONDITION
1393         uint32 jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1394
1395         if (BRANCH_CONDITION(IMM_2))
1396         {
1397 #ifdef GPU_DIS_JR
1398         if (doGPUDis)
1399                 WriteLog("Branched!\n");
1400 #endif
1401 if (gpu_start_log)
1402         WriteLog("    --> JR: Branch taken.\n");
1403                 int32 offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1);             // Sign extend IMM_1
1404                 int32 delayed_pc = gpu_pc + (offset * 2);
1405                 gpu_exec(1);
1406                 gpu_pc = delayed_pc;
1407 /*              uint16 opcode = GPUReadWord(gpu_pc, GPU);
1408                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1409                 gpu_opcode_second_parameter = opcode & 0x1F;
1410
1411                 gpu_pc = delayed_pc;
1412                 gpu_opcode[opcode>>10]();//*/
1413         }
1414 #ifdef GPU_DIS_JR
1415         else
1416                 if (doGPUDis)
1417                         WriteLog("Branch NOT taken.\n");
1418 #endif
1419 }
1420
1421 static void gpu_opcode_add(void)
1422 {
1423 #ifdef GPU_DIS_ADD
1424         if (doGPUDis)
1425                 WriteLog("%06X: ADD    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1426 #endif
1427         UINT32 res = RN + RM;
1428         CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1429         RN = res;
1430 #ifdef GPU_DIS_ADD
1431         if (doGPUDis)
1432                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1433 #endif
1434 }
1435
1436 static void gpu_opcode_addc(void)
1437 {
1438 #ifdef GPU_DIS_ADDC
1439         if (doGPUDis)
1440                 WriteLog("%06X: ADDC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1441 #endif
1442 /*      int dreg = jaguar.op & 31;
1443         UINT32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1444         UINT32 r2 = jaguar.r[dreg];
1445         UINT32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1446         jaguar.r[dreg] = res;
1447         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1448
1449         UINT32 res = RN + RM + gpu_flag_c;
1450         UINT32 carry = gpu_flag_c;
1451 //      SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1452         SET_ZNC_ADD(RN + carry, RM, res);
1453 //      SET_ZNC_ADD(RN, RM + carry, res);
1454         RN = res;
1455 #ifdef GPU_DIS_ADDC
1456         if (doGPUDis)
1457                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1458 #endif
1459 }
1460
1461 static void gpu_opcode_addq(void)
1462 {
1463 #ifdef GPU_DIS_ADDQ
1464         if (doGPUDis)
1465                 WriteLog("%06X: ADDQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1466 #endif
1467         UINT32 r1 = gpu_convert_zero[IMM_1];
1468         UINT32 res = RN + r1;
1469         CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1470         RN = res;
1471 #ifdef GPU_DIS_ADDQ
1472         if (doGPUDis)
1473                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1474 #endif
1475 }
1476
1477 static void gpu_opcode_addqt(void)
1478 {
1479 #ifdef GPU_DIS_ADDQT
1480         if (doGPUDis)
1481                 WriteLog("%06X: ADDQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1482 #endif
1483         RN += gpu_convert_zero[IMM_1];
1484 #ifdef GPU_DIS_ADDQT
1485         if (doGPUDis)
1486                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1487 #endif
1488 }
1489
1490 static void gpu_opcode_sub(void)
1491 {
1492 #ifdef GPU_DIS_SUB
1493         if (doGPUDis)
1494                 WriteLog("%06X: SUB    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1495 #endif
1496         UINT32 res = RN - RM;
1497         SET_ZNC_SUB(RN, RM, res);
1498         RN = res;
1499 #ifdef GPU_DIS_SUB
1500         if (doGPUDis)
1501                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1502 #endif
1503 }
1504
1505 static void gpu_opcode_subc(void)
1506 {
1507 #ifdef GPU_DIS_SUBC
1508         if (doGPUDis)
1509                 WriteLog("%06X: SUBC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1510 #endif
1511         UINT32 res = RN - RM - gpu_flag_c;
1512         UINT32 borrow = gpu_flag_c;
1513 //      SET_ZNC_SUB(RN, RM, res); //???BUG??? YES!!!
1514 //No matter how you do it, there is a problem. With below, it's 0-0 with carry,
1515 //and the one below it it's FFFFFFFF - FFFFFFFF with carry... !!! FIX !!!
1516 //      SET_ZNC_SUB(RN - borrow, RM, res);
1517         SET_ZNC_SUB(RN, RM + borrow, res);
1518         RN = res;
1519 #ifdef GPU_DIS_SUBC
1520         if (doGPUDis)
1521                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1522 #endif
1523 }
1524 /*
1525 N = 5, M = 3, 3 - 5 = -2, C = 1... Or, in our case:
1526 N = 0, M = 1, 0 - 1 = -1, C = 0!
1527
1528 #define SET_C_SUB(a,b)          (gpu_flag_c = ((UINT32)(b) > (UINT32)(a)))
1529 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
1530 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
1531 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
1532 */
1533 static void gpu_opcode_subq(void)
1534 {
1535 #ifdef GPU_DIS_SUBQ
1536         if (doGPUDis)
1537                 WriteLog("%06X: SUBQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1538 #endif
1539         UINT32 r1 = gpu_convert_zero[IMM_1];
1540         UINT32 res = RN - r1;
1541         SET_ZNC_SUB(RN, r1, res);
1542         RN = res;
1543 #ifdef GPU_DIS_SUBQ
1544         if (doGPUDis)
1545                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1546 #endif
1547 }
1548
1549 static void gpu_opcode_subqt(void)
1550 {
1551 #ifdef GPU_DIS_SUBQT
1552         if (doGPUDis)
1553                 WriteLog("%06X: SUBQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1554 #endif
1555         RN -= gpu_convert_zero[IMM_1];
1556 #ifdef GPU_DIS_SUBQT
1557         if (doGPUDis)
1558                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1559 #endif
1560 }
1561
1562 static void gpu_opcode_cmp(void)
1563 {
1564 #ifdef GPU_DIS_CMP
1565         if (doGPUDis)
1566                 WriteLog("%06X: CMP    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1567 #endif
1568         UINT32 res = RN - RM;
1569         SET_ZNC_SUB(RN, RM, res);
1570 #ifdef GPU_DIS_CMP
1571         if (doGPUDis)
1572                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1573 #endif
1574 }
1575
1576 static void gpu_opcode_cmpq(void)
1577 {
1578         static int32 sqtable[32] =
1579                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1580 #ifdef GPU_DIS_CMPQ
1581         if (doGPUDis)
1582                 WriteLog("%06X: CMPQ   #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1583 #endif
1584         UINT32 r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1585         UINT32 res = RN - r1;
1586         SET_ZNC_SUB(RN, r1, res);
1587 #ifdef GPU_DIS_CMPQ
1588         if (doGPUDis)
1589                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1590 #endif
1591 }
1592
1593 static void gpu_opcode_and(void)
1594 {
1595 #ifdef GPU_DIS_AND
1596         if (doGPUDis)
1597                 WriteLog("%06X: AND    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1598 #endif
1599         RN = RN & RM;
1600         SET_ZN(RN);
1601 #ifdef GPU_DIS_AND
1602         if (doGPUDis)
1603                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1604 #endif
1605 }
1606
1607 static void gpu_opcode_or(void)
1608 {
1609 #ifdef GPU_DIS_OR
1610         if (doGPUDis)
1611                 WriteLog("%06X: OR     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1612 #endif
1613         RN = RN | RM;
1614         SET_ZN(RN);
1615 #ifdef GPU_DIS_OR
1616         if (doGPUDis)
1617                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1618 #endif
1619 }
1620
1621 static void gpu_opcode_xor(void)
1622 {
1623 #ifdef GPU_DIS_XOR
1624         if (doGPUDis)
1625                 WriteLog("%06X: XOR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1626 #endif
1627         RN = RN ^ RM;
1628         SET_ZN(RN);
1629 #ifdef GPU_DIS_XOR
1630         if (doGPUDis)
1631                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1632 #endif
1633 }
1634
1635 static void gpu_opcode_not(void)
1636 {
1637 #ifdef GPU_DIS_NOT
1638         if (doGPUDis)
1639                 WriteLog("%06X: NOT    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1640 #endif
1641         RN = ~RN;
1642         SET_ZN(RN);
1643 #ifdef GPU_DIS_NOT
1644         if (doGPUDis)
1645                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1646 #endif
1647 }
1648
1649 static void gpu_opcode_move_pc(void)
1650 {
1651 #ifdef GPU_DIS_MOVEPC
1652         if (doGPUDis)
1653                 WriteLog("%06X: MOVE   PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1654 #endif
1655         // Should be previous PC--this might not always be previous instruction!
1656         // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1657         RN = gpu_pc - 2;
1658 #ifdef GPU_DIS_MOVEPC
1659         if (doGPUDis)
1660                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1661 #endif
1662 }
1663
1664 static void gpu_opcode_sat8(void)
1665 {
1666 #ifdef GPU_DIS_SAT8
1667         if (doGPUDis)
1668                 WriteLog("%06X: SAT8   R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1669 #endif
1670         RN = ((int32)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1671         SET_ZN(RN);
1672 #ifdef GPU_DIS_SAT8
1673         if (doGPUDis)
1674                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1675 #endif
1676 }
1677
1678 static void gpu_opcode_sat16(void)
1679 {
1680         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1681         SET_ZN(RN);
1682 }
1683
1684 static void gpu_opcode_sat24(void)
1685 {
1686         RN = ((int32)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1687         SET_ZN(RN);
1688 }
1689
1690 static void gpu_opcode_store_r14_indexed(void)
1691 {
1692 #ifdef GPU_DIS_STORE14I
1693         if (doGPUDis)
1694                 WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1695 #endif
1696         GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1697 }
1698
1699 static void gpu_opcode_store_r15_indexed(void)
1700 {
1701 #ifdef GPU_DIS_STORE15I
1702         if (doGPUDis)
1703                 WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1704 #endif
1705         GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1706 }
1707
1708 static void gpu_opcode_load_r14_ri(void)
1709 {
1710 #ifdef GPU_DIS_LOAD14R
1711         if (doGPUDis)
1712                 WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1713 #endif
1714         RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1715 #ifdef GPU_DIS_LOAD14R
1716         if (doGPUDis)
1717                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1718 #endif
1719 }
1720
1721 static void gpu_opcode_load_r15_ri(void)
1722 {
1723 #ifdef GPU_DIS_LOAD15R
1724         if (doGPUDis)
1725                 WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1726 #endif
1727         RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1728 #ifdef GPU_DIS_LOAD15R
1729         if (doGPUDis)
1730                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1731 #endif
1732 }
1733
1734 static void gpu_opcode_store_r14_ri(void)
1735 {
1736 #ifdef GPU_DIS_STORE14R
1737         if (doGPUDis)
1738                 WriteLog("%06X: STORE  R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1739 #endif
1740         GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1741 }
1742
1743 static void gpu_opcode_store_r15_ri(void)
1744 {
1745 #ifdef GPU_DIS_STORE15R
1746         if (doGPUDis)
1747                 WriteLog("%06X: STORE  R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1748 #endif
1749         GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1750 }
1751
1752 static void gpu_opcode_nop(void)
1753 {
1754 #ifdef GPU_DIS_NOP
1755         if (doGPUDis)
1756                 WriteLog("%06X: NOP    [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1757 #endif
1758 }
1759
1760 static void gpu_opcode_pack(void)
1761 {
1762 #ifdef GPU_DIS_PACK
1763         if (doGPUDis)
1764                 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK  " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1765 #endif
1766         uint32 val = RN;
1767
1768 //BUG!  if (RM == 0)                            // Pack
1769         if (IMM_1 == 0)                         // Pack
1770                 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1771         else                                            // Unpack
1772                 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1773 #ifdef GPU_DIS_PACK
1774         if (doGPUDis)
1775                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1776 #endif
1777 }
1778
1779 static void gpu_opcode_storeb(void)
1780 {
1781 #ifdef GPU_DIS_STOREB
1782         if (doGPUDis)
1783                 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1784 #endif
1785 //Is this right???
1786 // Would appear to be so...!
1787         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1788                 GPUWriteLong(RM, RN & 0xFF, GPU);
1789         else
1790                 JaguarWriteByte(RM, RN, GPU);
1791 }
1792
1793 static void gpu_opcode_storew(void)
1794 {
1795 #ifdef GPU_DIS_STOREW
1796         if (doGPUDis)
1797                 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1798 #endif
1799         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1800                 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1801         else
1802                 JaguarWriteWord(RM, RN, GPU);
1803 }
1804
1805 static void gpu_opcode_store(void)
1806 {
1807 #ifdef GPU_DIS_STORE
1808         if (doGPUDis)
1809                 WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1810 #endif
1811         GPUWriteLong(RM, RN, GPU);
1812 }
1813
1814 static void gpu_opcode_storep(void)
1815 {
1816         GPUWriteLong(RM + 0, gpu_hidata, GPU);
1817         GPUWriteLong(RM + 4, RN, GPU);
1818 }
1819
1820 static void gpu_opcode_loadb(void)
1821 {
1822 #ifdef GPU_DIS_LOADB
1823         if (doGPUDis)
1824                 WriteLog("%06X: LOADB  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1825 #endif
1826         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1827                 RN = GPUReadLong(RM, GPU) & 0xFF;
1828         else
1829                 RN = JaguarReadByte(RM, GPU);
1830 #ifdef GPU_DIS_LOADB
1831         if (doGPUDis)
1832                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1833 #endif
1834 }
1835
1836 static void gpu_opcode_loadw(void)
1837 {
1838 #ifdef GPU_DIS_LOADW
1839         if (doGPUDis)
1840                 WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1841 #endif
1842         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1843                 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1844         else
1845                 RN = JaguarReadWord(RM, GPU);
1846 #ifdef GPU_DIS_LOADW
1847         if (doGPUDis)
1848                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1849 #endif
1850 }
1851
1852 static void gpu_opcode_load(void)
1853 {
1854 #ifdef GPU_DIS_LOAD
1855         if (doGPUDis)
1856                 WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1857 #endif
1858         RN = GPUReadLong(RM, GPU);
1859 #ifdef GPU_DIS_LOAD
1860         if (doGPUDis)
1861                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1862 #endif
1863 }
1864
1865 static void gpu_opcode_loadp(void)
1866 {
1867         gpu_hidata = GPUReadLong(RM + 0, GPU);
1868         RN                 = GPUReadLong(RM + 4, GPU);
1869 }
1870
1871 static void gpu_opcode_load_r14_indexed(void)
1872 {
1873 #ifdef GPU_DIS_LOAD14I
1874         if (doGPUDis)
1875                 WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
1876 #endif
1877         RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
1878 #ifdef GPU_DIS_LOAD14I
1879         if (doGPUDis)
1880                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1881 #endif
1882 }
1883
1884 static void gpu_opcode_load_r15_indexed(void)
1885 {
1886 #ifdef GPU_DIS_LOAD15I
1887         if (doGPUDis)
1888                 WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
1889 #endif
1890         RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
1891 #ifdef GPU_DIS_LOAD15I
1892         if (doGPUDis)
1893                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1894 #endif
1895 }
1896
1897 static void gpu_opcode_movei(void)
1898 {
1899 #ifdef GPU_DIS_MOVEI
1900         if (doGPUDis)
1901                 WriteLog("%06X: MOVEI  #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32)GPUReadWord(gpu_pc) | ((uint32)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1902 #endif
1903         // This instruction is followed by 32-bit value in LSW / MSW format...
1904         RN = (uint32)GPUReadWord(gpu_pc, GPU) | ((uint32)GPUReadWord(gpu_pc + 2, GPU) << 16);
1905         gpu_pc += 4;
1906 #ifdef GPU_DIS_MOVEI
1907         if (doGPUDis)
1908                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1909 #endif
1910 }
1911
1912 static void gpu_opcode_moveta(void)
1913 {
1914 #ifdef GPU_DIS_MOVETA
1915         if (doGPUDis)
1916                 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
1917 #endif
1918         ALTERNATE_RN = RM;
1919 #ifdef GPU_DIS_MOVETA
1920         if (doGPUDis)
1921                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
1922 #endif
1923 }
1924
1925 static void gpu_opcode_movefa(void)
1926 {
1927 #ifdef GPU_DIS_MOVEFA
1928         if (doGPUDis)
1929                 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
1930 #endif
1931         RN = ALTERNATE_RM;
1932 #ifdef GPU_DIS_MOVEFA
1933         if (doGPUDis)
1934                 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
1935 #endif
1936 }
1937
1938 static void gpu_opcode_move(void)
1939 {
1940 #ifdef GPU_DIS_MOVE
1941         if (doGPUDis)
1942                 WriteLog("%06X: MOVE   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1943 #endif
1944         RN = RM;
1945 #ifdef GPU_DIS_MOVE
1946         if (doGPUDis)
1947                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1948 #endif
1949 }
1950
1951 static void gpu_opcode_moveq(void)
1952 {
1953 #ifdef GPU_DIS_MOVEQ
1954         if (doGPUDis)
1955                 WriteLog("%06X: MOVEQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1956 #endif
1957         RN = IMM_1;
1958 #ifdef GPU_DIS_MOVEQ
1959         if (doGPUDis)
1960                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1961 #endif
1962 }
1963
1964 static void gpu_opcode_resmac(void)
1965 {
1966         RN = gpu_acc;
1967 }
1968
1969 static void gpu_opcode_imult(void)
1970 {
1971 #ifdef GPU_DIS_IMULT
1972         if (doGPUDis)
1973                 WriteLog("%06X: IMULT  R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1974 #endif
1975         RN = (int16)RN * (int16)RM;
1976         SET_ZN(RN);
1977 #ifdef GPU_DIS_IMULT
1978         if (doGPUDis)
1979                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1980 #endif
1981 }
1982
1983 static void gpu_opcode_mult(void)
1984 {
1985 #ifdef GPU_DIS_MULT
1986         if (doGPUDis)
1987                 WriteLog("%06X: MULT   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1988 #endif
1989         RN = (uint16)RM * (uint16)RN;
1990         SET_ZN(RN);
1991 #ifdef GPU_DIS_MULT
1992         if (doGPUDis)
1993                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1994 #endif
1995 }
1996
1997 static void gpu_opcode_bclr(void)
1998 {
1999 #ifdef GPU_DIS_BCLR
2000         if (doGPUDis)
2001                 WriteLog("%06X: BCLR   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2002 #endif
2003         UINT32 res = RN & ~(1 << IMM_1);
2004         RN = res;
2005         SET_ZN(res);
2006 #ifdef GPU_DIS_BCLR
2007         if (doGPUDis)
2008                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2009 #endif
2010 }
2011
2012 static void gpu_opcode_btst(void)
2013 {
2014 #ifdef GPU_DIS_BTST
2015         if (doGPUDis)
2016                 WriteLog("%06X: BTST   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2017 #endif
2018         gpu_flag_z = (~RN >> IMM_1) & 1;
2019 #ifdef GPU_DIS_BTST
2020         if (doGPUDis)
2021                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2022 #endif
2023 }
2024
2025 static void gpu_opcode_bset(void)
2026 {
2027 #ifdef GPU_DIS_BSET
2028         if (doGPUDis)
2029                 WriteLog("%06X: BSET   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2030 #endif
2031         UINT32 res = RN | (1 << IMM_1);
2032         RN = res;
2033         SET_ZN(res);
2034 #ifdef GPU_DIS_BSET
2035         if (doGPUDis)
2036                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2037 #endif
2038 }
2039
2040 static void gpu_opcode_imacn(void)
2041 {
2042         uint32 res = (int16)RM * (int16)(RN);
2043         gpu_acc += res;
2044 }
2045
2046 static void gpu_opcode_mtoi(void)
2047 {
2048         uint32 _RM = RM;
2049         uint32 res = RN = (((INT32)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2050         SET_ZN(res);
2051 }
2052
2053 static void gpu_opcode_normi(void)
2054 {
2055         uint32 _RM = RM;
2056         uint32 res = 0;
2057
2058         if (_RM)
2059         {
2060                 while ((_RM & 0xFFC00000) == 0)
2061                 {
2062                         _RM <<= 1;
2063                         res--;
2064                 }
2065                 while ((_RM & 0xFF800000) != 0)
2066                 {
2067                         _RM >>= 1;
2068                         res++;
2069                 }
2070         }
2071         RN = res;
2072         SET_ZN(res);
2073 }
2074
2075 static void gpu_opcode_mmult(void)
2076 {
2077         int count       = gpu_matrix_control & 0x0F;    // Matrix width
2078         uint32 addr = gpu_pointer_to_matrix;            // In the GPU's RAM
2079         int64 accum = 0;
2080         uint32 res;
2081
2082         if (gpu_matrix_control & 0x10)                          // Column stepping
2083         {
2084                 for(int i=0; i<count; i++)
2085                 { 
2086                         int16 a;
2087                         if (i & 0x01)
2088                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2089                         else
2090                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2091
2092                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2093                         accum += a * b;
2094                         addr += 4 * count;
2095                 }
2096         }
2097         else                                                                            // Row stepping
2098         {
2099                 for(int i=0; i<count; i++)
2100                 {
2101                         int16 a;
2102                         if (i & 0x01)
2103                                 a = (int16)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2104                         else
2105                                 a = (int16)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2106
2107                         int16 b = ((int16)GPUReadWord(addr + 2, GPU));
2108                         accum += a * b;
2109                         addr += 4;
2110                 }
2111         }
2112         RN = res = (int32)accum;
2113         // carry flag to do (out of the last add)
2114         SET_ZN(res);
2115 }
2116
2117 static void gpu_opcode_abs(void)
2118 {
2119 #ifdef GPU_DIS_ABS
2120         if (doGPUDis)
2121                 WriteLog("%06X: ABS    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2122 #endif
2123         gpu_flag_c = RN >> 31;
2124         if (RN == 0x80000000)
2125         //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2126                 gpu_flag_n = 1, gpu_flag_z = 0;
2127         else
2128         {
2129                 if (gpu_flag_c)
2130                         RN = -RN;
2131                 gpu_flag_n = 0; SET_FLAG_Z(RN);
2132         }
2133 #ifdef GPU_DIS_ABS
2134         if (doGPUDis)
2135                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2136 #endif
2137 }
2138
2139 static void gpu_opcode_div(void)        // RN / RM
2140 {
2141 #ifdef GPU_DIS_DIV
2142         if (doGPUDis)
2143                 WriteLog("%06X: DIV    R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2144 #endif
2145 // NOTE: remainder is NOT calculated correctly here!
2146 //       The original tried to get it right by checking to see if the
2147 //       remainder was negative, but that's too late...
2148 // The code there should do it now, but I'm not 100% sure...
2149
2150         if (RM)
2151         {
2152                 if (gpu_div_control & 0x01)             // 16.16 division
2153                 {
2154                         RN = ((UINT64)RN << 16) / RM;
2155                         gpu_remain = ((UINT64)RN << 16) % RM;
2156                 }
2157                 else
2158                 {
2159                         RN = RN / RM;
2160                         gpu_remain = RN % RM;
2161                 }
2162
2163                 if ((gpu_remain - RM) & 0x80000000)     // If the result would have been negative...
2164                         gpu_remain -= RM;                       // Then make it negative!
2165         }
2166         else
2167                 RN = 0xFFFFFFFF;
2168
2169 /*      uint32 _RM=RM;
2170         uint32 _RN=RN;
2171
2172         if (_RM)
2173         {
2174                 if (gpu_div_control & 1)
2175                 {
2176                         gpu_remain = (((uint64)_RN) << 16) % _RM;
2177                         if (gpu_remain&0x80000000)
2178                                 gpu_remain-=_RM;
2179                         RN = (((uint64)_RN) << 16) / _RM;
2180                 }
2181                 else
2182                 {
2183                         gpu_remain = _RN % _RM;
2184                         if (gpu_remain&0x80000000)
2185                                 gpu_remain-=_RM;
2186                         RN/=_RM;
2187                 }
2188         }
2189         else
2190                 RN=0xffffffff;*/
2191 #ifdef GPU_DIS_DIV
2192         if (doGPUDis)
2193                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2194 #endif
2195 }
2196
2197 static void gpu_opcode_imultn(void)
2198 {
2199         uint32 res = (int32)((int16)RN * (int16)RM);
2200         gpu_acc = (int32)res;
2201         SET_FLAG_Z(res);
2202         SET_FLAG_N(res);
2203 }
2204
2205 static void gpu_opcode_neg(void)
2206 {
2207 #ifdef GPU_DIS_NEG
2208         if (doGPUDis)
2209                 WriteLog("%06X: NEG    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2210 #endif
2211         UINT32 res = -RN;
2212         SET_ZNC_SUB(0, RN, res);
2213         RN = res;
2214 #ifdef GPU_DIS_NEG
2215         if (doGPUDis)
2216                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2217 #endif
2218 }
2219
2220 static void gpu_opcode_shlq(void)
2221 {
2222 #ifdef GPU_DIS_SHLQ
2223         if (doGPUDis)
2224                 WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2225 #endif
2226 // Was a bug here...
2227 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2228         INT32 r1 = 32 - IMM_1;
2229         UINT32 res = RN << r1;
2230         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2231         RN = res;
2232 #ifdef GPU_DIS_SHLQ
2233         if (doGPUDis)
2234                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2235 #endif
2236 }
2237
2238 static void gpu_opcode_shrq(void)
2239 {
2240 #ifdef GPU_DIS_SHRQ
2241         if (doGPUDis)
2242                 WriteLog("%06X: SHRQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2243 #endif
2244         INT32 r1 = gpu_convert_zero[IMM_1];
2245         UINT32 res = RN >> r1;
2246         SET_ZN(res); gpu_flag_c = RN & 1;
2247         RN = res;
2248 #ifdef GPU_DIS_SHRQ
2249         if (doGPUDis)
2250                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2251 #endif
2252 }
2253
2254 static void gpu_opcode_ror(void)
2255 {
2256 #ifdef GPU_DIS_ROR
2257         if (doGPUDis)
2258                 WriteLog("%06X: ROR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2259 #endif
2260         UINT32 r1 = RM & 0x1F;
2261         UINT32 res = (RN >> r1) | (RN << (32 - r1));
2262         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2263         RN = res;
2264 #ifdef GPU_DIS_ROR
2265         if (doGPUDis)
2266                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2267 #endif
2268 }
2269
2270 static void gpu_opcode_rorq(void)
2271 {
2272 #ifdef GPU_DIS_RORQ
2273         if (doGPUDis)
2274                 WriteLog("%06X: RORQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2275 #endif
2276         UINT32 r1 = gpu_convert_zero[IMM_1 & 0x1F];
2277         UINT32 r2 = RN;
2278         UINT32 res = (r2 >> r1) | (r2 << (32 - r1));
2279         RN = res;
2280         SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2281 #ifdef GPU_DIS_RORQ
2282         if (doGPUDis)
2283                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2284 #endif
2285 }
2286
2287 static void gpu_opcode_sha(void)
2288 {
2289 /*      int dreg = jaguar.op & 31;
2290         INT32 r1 = (INT32)jaguar.r[(jaguar.op >> 5) & 31];
2291         UINT32 r2 = jaguar.r[dreg];
2292         UINT32 res;
2293
2294         CLR_ZNC;
2295         if (r1 < 0)
2296         {
2297                 res = (r1 <= -32) ? 0 : (r2 << -r1);
2298                 jaguar.FLAGS |= (r2 >> 30) & 2;
2299         }
2300         else
2301         {
2302                 res = (r1 >= 32) ? ((INT32)r2 >> 31) : ((INT32)r2 >> r1);
2303                 jaguar.FLAGS |= (r2 << 1) & 2;
2304         }
2305         jaguar.r[dreg] = res;
2306         SET_ZN(res);*/
2307
2308 #ifdef GPU_DIS_SHA
2309         if (doGPUDis)
2310                 WriteLog("%06X: SHA    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2311 #endif
2312         UINT32 res;
2313
2314         if ((INT32)RM < 0)
2315         {
2316                 res = ((INT32)RM <= -32) ? 0 : (RN << -(INT32)RM);
2317                 gpu_flag_c = RN >> 31;
2318         }
2319         else
2320         {
2321                 res = ((INT32)RM >= 32) ? ((INT32)RN >> 31) : ((INT32)RN >> (INT32)RM);
2322                 gpu_flag_c = RN & 0x01;
2323         }
2324         RN = res;
2325         SET_ZN(res);
2326 #ifdef GPU_DIS_SHA
2327         if (doGPUDis)
2328                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2329 #endif
2330
2331 /*      int32 sRM=(int32)RM;
2332         uint32 _RN=RN;
2333
2334         if (sRM<0)
2335         {
2336                 uint32 shift=-sRM;
2337                 if (shift>=32) shift=32;
2338                 gpu_flag_c=(_RN&0x80000000)>>31;
2339                 while (shift)
2340                 {
2341                         _RN<<=1;
2342                         shift--;
2343                 }
2344         }
2345         else
2346         {
2347                 uint32 shift=sRM;
2348                 if (shift>=32) shift=32;
2349                 gpu_flag_c=_RN&0x1;
2350                 while (shift)
2351                 {
2352                         _RN=((int32)_RN)>>1;
2353                         shift--;
2354                 }
2355         }
2356         RN=_RN;
2357         SET_FLAG_Z(_RN);
2358         SET_FLAG_N(_RN);*/
2359 }
2360
2361 static void gpu_opcode_sharq(void)
2362 {
2363 #ifdef GPU_DIS_SHARQ
2364         if (doGPUDis)
2365                 WriteLog("%06X: SHARQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2366 #endif
2367         UINT32 res = (INT32)RN >> gpu_convert_zero[IMM_1];
2368         SET_ZN(res); gpu_flag_c = RN & 0x01;
2369         RN = res;
2370 #ifdef GPU_DIS_SHARQ
2371         if (doGPUDis)
2372                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2373 #endif
2374 }
2375
2376 static void gpu_opcode_sh(void)
2377 {
2378 #ifdef GPU_DIS_SH
2379         if (doGPUDis)
2380                 WriteLog("%06X: SH     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2381 #endif
2382         if (RM & 0x80000000)            // Shift left
2383         {
2384                 gpu_flag_c = RN >> 31;
2385                 RN = ((int32)RM <= -32 ? 0 : RN << -(int32)RM);
2386         }
2387         else                                            // Shift right
2388         {
2389                 gpu_flag_c = RN & 0x01;
2390                 RN = (RM >= 32 ? 0 : RN >> RM);
2391         }
2392         SET_ZN(RN);
2393 #ifdef GPU_DIS_SH
2394         if (doGPUDis)
2395                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2396 #endif
2397 }
2398
2399 //Temporary: Testing only!
2400 //#include "gpu2.cpp"
2401 //#include "gpu3.cpp"