]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
Added logging of GPU/DSP states on exit.
[virtualjaguar] / src / gpu.cpp
1 #if 1
2
3 //
4 // GPU Core
5 //
6 // Originally by David Raingeard (Cal2)
7 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
8 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
9 // (C) 2010 Underground Software
10 //
11 // JLH = James Hammons <jlhamm@acm.org>
12 //
13 // Who  When        What
14 // ---  ----------  -------------------------------------------------------------
15 // JLH  01/16/2010  Created this log ;-)
16 // JLH  11/26/2011  Added fixes for LOAD/STORE alignment issues
17
18 //
19 // Note: Endian wrongness probably stems from the MAME origins of this emu and
20 //       the braindead way in which MAME handles memory. :-)
21 //
22 // Problem with not booting the BIOS was the incorrect way that the
23 // SUBC instruction set the carry when the carry was set going in...
24 // Same problem with ADDC...
25 //
26
27 #include "gpu.h"
28
29 #include <stdlib.h>
30 #include <string.h>                                                             // For memset
31 #include "dsp.h"
32 #include "jagdasm.h"
33 #include "jaguar.h"
34 #include "log.h"
35 #include "m68000/m68kinterface.h"
36 //#include "memory.h"
37 #include "tom.h"
38
39
40 // Seems alignment in loads & stores was off...
41 #define GPU_CORRECT_ALIGNMENT
42 //#define GPU_DEBUG
43
44 // For GPU dissasembly...
45
46 #if 0
47 #define GPU_DIS_ABS
48 #define GPU_DIS_ADD
49 #define GPU_DIS_ADDC
50 #define GPU_DIS_ADDQ
51 #define GPU_DIS_ADDQT
52 #define GPU_DIS_AND
53 #define GPU_DIS_BCLR
54 #define GPU_DIS_BSET
55 #define GPU_DIS_BTST
56 #define GPU_DIS_CMP
57 #define GPU_DIS_CMPQ
58 #define GPU_DIS_DIV
59 #define GPU_DIS_IMULT
60 #define GPU_DIS_JUMP
61 #define GPU_DIS_JR
62 #define GPU_DIS_LOAD
63 #define GPU_DIS_LOADB
64 #define GPU_DIS_LOADW
65 #define GPU_DIS_LOAD14I
66 #define GPU_DIS_LOAD14R
67 #define GPU_DIS_LOAD15I
68 #define GPU_DIS_LOAD15R
69 #define GPU_DIS_MOVE
70 #define GPU_DIS_MOVEFA
71 #define GPU_DIS_MOVEI
72 #define GPU_DIS_MOVEPC
73 #define GPU_DIS_MOVETA
74 #define GPU_DIS_MOVEQ
75 #define GPU_DIS_MULT
76 #define GPU_DIS_NEG
77 #define GPU_DIS_NOP
78 #define GPU_DIS_NOT
79 #define GPU_DIS_OR
80 #define GPU_DIS_PACK
81 #define GPU_DIS_ROR
82 #define GPU_DIS_RORQ
83 #define GPU_DIS_SAT8
84 #define GPU_DIS_SH
85 #define GPU_DIS_SHA
86 #define GPU_DIS_SHARQ
87 #define GPU_DIS_SHLQ
88 #define GPU_DIS_SHRQ
89 #define GPU_DIS_STORE
90 #define GPU_DIS_STOREB
91 #define GPU_DIS_STOREW
92 #define GPU_DIS_STORE14I
93 #define GPU_DIS_STORE14R
94 #define GPU_DIS_STORE15I
95 #define GPU_DIS_STORE15R
96 #define GPU_DIS_SUB
97 #define GPU_DIS_SUBC
98 #define GPU_DIS_SUBQ
99 #define GPU_DIS_SUBQT
100 #define GPU_DIS_XOR
101
102 bool doGPUDis = false;
103 //bool doGPUDis = true;
104 #endif
105
106 /*
107 GPU opcodes use (BIOS flying ATARI logo):
108 +                     add 357416
109 +                    addq 538030
110 +                   addqt 6999
111 +                     sub 116663
112 +                    subq 188059
113 +                   subqt 15086
114 +                     neg 36097
115 +                     and 233993
116 +                      or 109332
117 +                     xor 1384
118 +                    btst 111924
119 +                    bset 25029
120 +                    bclr 10551
121 +                    mult 28147
122 +                   imult 69148
123 +                     div 64102
124 +                     abs 159394
125 +                    shlq 194690
126 +                    shrq 292587
127 +                   sharq 192649
128 +                    rorq 58672
129 +                     cmp 244963
130 +                    cmpq 114834
131 +                    move 833472
132 +                   moveq 56427
133 +                  moveta 220814
134 +                  movefa 170678
135 +                   movei 152025
136 +                   loadw 108220
137 +                    load 430936
138 +                  storew 3036
139 +                   store 372490
140 +                 move_pc 2330
141 +                    jump 349134
142 +                      jr 529171
143                     mmult 64904
144 +                     nop 432179
145 */
146
147 // Various bits
148
149 #define CINT0FLAG                       0x0200
150 #define CINT1FLAG                       0x0400
151 #define CINT2FLAG                       0x0800
152 #define CINT3FLAG                       0x1000
153 #define CINT4FLAG                       0x2000
154 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
155
156 // GPU_FLAGS bits
157
158 #define ZERO_FLAG               0x0001
159 #define CARRY_FLAG              0x0002
160 #define NEGA_FLAG               0x0004
161 #define IMASK                   0x0008
162 #define INT_ENA0                0x0010
163 #define INT_ENA1                0x0020
164 #define INT_ENA2                0x0040
165 #define INT_ENA3                0x0080
166 #define INT_ENA4                0x0100
167 #define INT_CLR0                0x0200
168 #define INT_CLR1                0x0400
169 #define INT_CLR2                0x0800
170 #define INT_CLR3                0x1000
171 #define INT_CLR4                0x2000
172 #define REGPAGE                 0x4000
173 #define DMAEN                   0x8000
174
175 // External global variables
176
177 extern int start_logging;
178 extern int gpu_start_log;
179
180 // Private function prototypes
181
182 void GPUUpdateRegisterBanks(void);
183 void GPUDumpDisassembly(void);
184 void GPUDumpRegisters(void);
185 void GPUDumpMemory(void);
186
187 static void gpu_opcode_add(void);
188 static void gpu_opcode_addc(void);
189 static void gpu_opcode_addq(void);
190 static void gpu_opcode_addqt(void);
191 static void gpu_opcode_sub(void);
192 static void gpu_opcode_subc(void);
193 static void gpu_opcode_subq(void);
194 static void gpu_opcode_subqt(void);
195 static void gpu_opcode_neg(void);
196 static void gpu_opcode_and(void);
197 static void gpu_opcode_or(void);
198 static void gpu_opcode_xor(void);
199 static void gpu_opcode_not(void);
200 static void gpu_opcode_btst(void);
201 static void gpu_opcode_bset(void);
202 static void gpu_opcode_bclr(void);
203 static void gpu_opcode_mult(void);
204 static void gpu_opcode_imult(void);
205 static void gpu_opcode_imultn(void);
206 static void gpu_opcode_resmac(void);
207 static void gpu_opcode_imacn(void);
208 static void gpu_opcode_div(void);
209 static void gpu_opcode_abs(void);
210 static void gpu_opcode_sh(void);
211 static void gpu_opcode_shlq(void);
212 static void gpu_opcode_shrq(void);
213 static void gpu_opcode_sha(void);
214 static void gpu_opcode_sharq(void);
215 static void gpu_opcode_ror(void);
216 static void gpu_opcode_rorq(void);
217 static void gpu_opcode_cmp(void);
218 static void gpu_opcode_cmpq(void);
219 static void gpu_opcode_sat8(void);
220 static void gpu_opcode_sat16(void);
221 static void gpu_opcode_move(void);
222 static void gpu_opcode_moveq(void);
223 static void gpu_opcode_moveta(void);
224 static void gpu_opcode_movefa(void);
225 static void gpu_opcode_movei(void);
226 static void gpu_opcode_loadb(void);
227 static void gpu_opcode_loadw(void);
228 static void gpu_opcode_load(void);
229 static void gpu_opcode_loadp(void);
230 static void gpu_opcode_load_r14_indexed(void);
231 static void gpu_opcode_load_r15_indexed(void);
232 static void gpu_opcode_storeb(void);
233 static void gpu_opcode_storew(void);
234 static void gpu_opcode_store(void);
235 static void gpu_opcode_storep(void);
236 static void gpu_opcode_store_r14_indexed(void);
237 static void gpu_opcode_store_r15_indexed(void);
238 static void gpu_opcode_move_pc(void);
239 static void gpu_opcode_jump(void);
240 static void gpu_opcode_jr(void);
241 static void gpu_opcode_mmult(void);
242 static void gpu_opcode_mtoi(void);
243 static void gpu_opcode_normi(void);
244 static void gpu_opcode_nop(void);
245 static void gpu_opcode_load_r14_ri(void);
246 static void gpu_opcode_load_r15_ri(void);
247 static void gpu_opcode_store_r14_ri(void);
248 static void gpu_opcode_store_r15_ri(void);
249 static void gpu_opcode_sat24(void);
250 static void gpu_opcode_pack(void);
251
252 // This is wrong, since it doesn't take pipeline effects into account. !!! FIX !!!
253 /*uint8_t gpu_opcode_cycles[64] =
254 {
255         3,  3,  3,  3,  3,  3,  3,  3,
256         3,  3,  3,  3,  3,  3,  3,  3,
257         3,  3,  1,  3,  1, 18,  3,  3,
258         3,  3,  3,  3,  3,  3,  3,  3,
259         3,  3,  2,  2,  2,  2,  3,  4,
260         5,  4,  5,  6,  6,  1,  1,  1,
261         1,  2,  2,  2,  1,  1,  9,  3,
262         3,  1,  6,  6,  2,  2,  3,  3
263 };//*/
264 //Here's a QnD kludge...
265 //This is wrong, wrong, WRONG, but it seems to work for the time being...
266 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
267 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
268 /*uint8_t gpu_opcode_cycles[64] =
269 {
270         1,  1,  1,  1,  1,  1,  1,  1,
271         1,  1,  1,  1,  1,  1,  1,  1,
272         1,  1,  1,  1,  1,  9,  1,  1,
273         1,  1,  1,  1,  1,  1,  1,  1,
274         1,  1,  1,  1,  1,  1,  1,  2,
275         2,  2,  2,  3,  3,  1,  1,  1,
276         1,  1,  1,  1,  1,  1,  4,  1,
277         1,  1,  3,  3,  1,  1,  1,  1
278 };//*/
279 uint8_t gpu_opcode_cycles[64] =
280 {
281         1,  1,  1,  1,  1,  1,  1,  1,
282         1,  1,  1,  1,  1,  1,  1,  1,
283         1,  1,  1,  1,  1,  1,  1,  1,
284         1,  1,  1,  1,  1,  1,  1,  1,
285         1,  1,  1,  1,  1,  1,  1,  1,
286         1,  1,  1,  1,  1,  1,  1,  1,
287         1,  1,  1,  1,  1,  1,  1,  1,
288         1,  1,  1,  1,  1,  1,  1,  1
289 };//*/
290
291 void (*gpu_opcode[64])()=
292 {
293         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
294         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
295         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
296         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
297         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
298         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
299         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
300         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
301         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
302         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
303         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
304         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
305         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
306         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
307         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
308         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
309 };
310
311 static uint8_t gpu_ram_8[0x1000];
312 uint32_t gpu_pc;
313 static uint32_t gpu_acc;
314 static uint32_t gpu_remain;
315 static uint32_t gpu_hidata;
316 static uint32_t gpu_flags;
317 static uint32_t gpu_matrix_control;
318 static uint32_t gpu_pointer_to_matrix;
319 static uint32_t gpu_data_organization;
320 static uint32_t gpu_control;
321 static uint32_t gpu_div_control;
322 // There is a distinct advantage to having these separated out--there's no need
323 // to clear a bit before writing a result. I.e., if the result of an operation
324 // leaves a zero in the carry flag, you don't have to zero gpu_flag_c before
325 // you can write that zero!
326 static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c;
327 uint32_t gpu_reg_bank_0[32];
328 uint32_t gpu_reg_bank_1[32];
329 static uint32_t * gpu_reg;
330 static uint32_t * gpu_alternate_reg;
331
332 static uint32_t gpu_instruction;
333 static uint32_t gpu_opcode_first_parameter;
334 static uint32_t gpu_opcode_second_parameter;
335
336 #define GPU_RUNNING             (gpu_control & 0x01)
337
338 #define RM                              gpu_reg[gpu_opcode_first_parameter]
339 #define RN                              gpu_reg[gpu_opcode_second_parameter]
340 #define ALTERNATE_RM    gpu_alternate_reg[gpu_opcode_first_parameter]
341 #define ALTERNATE_RN    gpu_alternate_reg[gpu_opcode_second_parameter]
342 #define IMM_1                   gpu_opcode_first_parameter
343 #define IMM_2                   gpu_opcode_second_parameter
344
345 #define SET_FLAG_Z(r)   (gpu_flag_z = ((r) == 0));
346 #define SET_FLAG_N(r)   (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01));
347
348 #define RESET_FLAG_Z()  gpu_flag_z = 0;
349 #define RESET_FLAG_N()  gpu_flag_n = 0;
350 #define RESET_FLAG_C()  gpu_flag_c = 0;
351
352 #define CLR_Z                           (gpu_flag_z = 0)
353 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
354 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
355 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
356 #define SET_N(r)                        (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01))
357 #define SET_C_ADD(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
358 #define SET_C_SUB(a,b)          (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
359 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
360 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
361 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
362
363 uint32_t gpu_convert_zero[32] =
364         { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
365
366 uint8_t * branch_condition_table = 0;
367 #define BRANCH_CONDITION(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
368
369 uint32_t gpu_opcode_use[64];
370
371 const char * gpu_opcode_str[64]=
372 {
373         "add",                          "addc",                         "addq",                         "addqt",
374         "sub",                          "subc",                         "subq",                         "subqt",
375         "neg",                          "and",                          "or",                           "xor",
376         "not",                          "btst",                         "bset",                         "bclr",
377         "mult",                         "imult",                        "imultn",                       "resmac",
378         "imacn",                        "div",                          "abs",                          "sh",
379         "shlq",                         "shrq",                         "sha",                          "sharq",
380         "ror",                          "rorq",                         "cmp",                          "cmpq",
381         "sat8",                         "sat16",                        "move",                         "moveq",
382         "moveta",                       "movefa",                       "movei",                        "loadb",
383         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
384         "load_r15_indexed",     "storeb",                       "storew",                       "store",
385         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
386         "jump",                         "jr",                           "mmult",                        "mtoi",
387         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
388         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
389 };
390
391 static uint32_t gpu_in_exec = 0;
392 static uint32_t gpu_releaseTimeSlice_flag = 0;
393
394 void GPUReleaseTimeslice(void)
395 {
396         gpu_releaseTimeSlice_flag = 1;
397 }
398
399 uint32_t GPUGetPC(void)
400 {
401         return gpu_pc;
402 }
403
404 void build_branch_condition_table(void)
405 {
406         if (!branch_condition_table)
407         {
408                 branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
409
410                 if (branch_condition_table)
411                 {
412                         for(int i=0; i<8; i++)
413                         {
414                                 for(int j=0; j<32; j++)
415                                 {
416                                         int result = 1;
417                                         if (j & 1)
418                                                 if (i & ZERO_FLAG)
419                                                         result = 0;
420                                         if (j & 2)
421                                                 if (!(i & ZERO_FLAG))
422                                                         result = 0;
423                                         if (j & 4)
424                                                 if (i & (CARRY_FLAG << (j >> 4)))
425                                                         result = 0;
426                                         if (j & 8)
427                                                 if (!(i & (CARRY_FLAG << (j >> 4))))
428                                                         result = 0;
429                                         branch_condition_table[i * 32 + j] = result;
430                                 }
431                         }
432                 }
433         }
434 }
435
436 //
437 // GPU byte access (read)
438 //
439 uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
440 {
441         if (offset >= 0xF02000 && offset <= 0xF020FF)
442                 WriteLog("GPU: ReadByte--Attempt to read from GPU register file by %s!\n", whoName[who]);
443
444         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
445                 return gpu_ram_8[offset & 0xFFF];
446         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
447         {
448                 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
449
450                 if ((offset & 0x03) == 0)
451                         return data >> 24;
452                 else if ((offset & 0x03) == 1)
453                         return (data >> 16) & 0xFF;
454                 else if ((offset & 0x03) == 2)
455                         return (data >> 8) & 0xFF;
456                 else if ((offset & 0x03) == 3)
457                         return data & 0xFF;
458         }
459
460         return JaguarReadByte(offset, who);
461 }
462
463 //
464 // GPU word access (read)
465 //
466 uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
467 {
468         if (offset >= 0xF02000 && offset <= 0xF020FF)
469                 WriteLog("GPU: ReadWord--Attempt to read from GPU register file by %s!\n", whoName[who]);
470
471         if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
472         {
473                 offset &= 0xFFF;
474                 uint16_t data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1];
475                 return data;
476         }
477         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
478         {
479 // This looks and smells wrong...
480 // But it *might* be OK...
481                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
482                         return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
483
484                 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
485
486                 if (offset & 0x02)                      // Cases 0 & 2...
487                         return data & 0xFFFF;
488                 else
489                         return data >> 16;
490         }
491
492 //TEMP--Mirror of F03000? No. Writes only...
493 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
494 //WriteLog("[GPUR16] --> Possible GPU RAM mirror access by %s!", whoName[who]);
495
496         return JaguarReadWord(offset, who);
497 }
498
499 //
500 // GPU dword access (read)
501 //
502 uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
503 {
504         if (offset >= 0xF02000 && offset <= 0xF020FF)
505         {
506                 WriteLog("GPU: ReadLong--Attempt to read from GPU register file (%X) by %s!\n", offset, whoName[who]);
507                 uint32_t reg = (offset & 0xFC) >> 2;
508                 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]); 
509         }
510
511 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
512         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
513         {
514                 offset &= 0xFFF;
515                 return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16)
516                         | ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/
517 //              return GET32(gpu_ram_8, offset);
518         }
519 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
520         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
521         {
522                 offset &= 0x1F;
523                 switch (offset)
524                 {
525                 case 0x00:
526                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
527                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
528                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
529
530                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
531
532                         return gpu_flags & 0xFFFFC1FF;
533                 case 0x04:
534                         return gpu_matrix_control;
535                 case 0x08:
536                         return gpu_pointer_to_matrix;
537                 case 0x0C:
538                         return gpu_data_organization;
539                 case 0x10:
540                         return gpu_pc;
541                 case 0x14:
542                         return gpu_control;
543                 case 0x18:
544                         return gpu_hidata;
545                 case 0x1C:
546                         return gpu_remain;
547                 default:                                                                // unaligned long read
548 #ifdef GPU_DEBUG
549                         WriteLog("GPU: Read32--unaligned 32 bit read at %08X by %s.\n", GPU_CONTROL_RAM_BASE + offset, whoName[who]);
550 #endif  // GPU_DEBUG
551                         return 0;
552                 }
553         }
554 //TEMP--Mirror of F03000? No. Writes only...
555 //if (offset >= 0xF0B000 && offset <= 0xF0BFFF)
556 //      WriteLog("[GPUR32] --> Possible GPU RAM mirror access by %s!\n", whoName[who]);
557 /*if (offset >= 0xF1D000 && offset <= 0xF1DFFF)
558         WriteLog("[GPUR32] --> Reading from Wavetable ROM!\n");//*/
559
560         return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
561 }
562
563 //
564 // GPU byte access (write)
565 //
566 void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
567 {
568         if (offset >= 0xF02000 && offset <= 0xF020FF)
569                 WriteLog("GPU: WriteByte--Attempt to write to GPU register file by %s!\n", whoName[who]);
570
571         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
572         {
573                 gpu_ram_8[offset & 0xFFF] = data;
574
575 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
576 /*              if (!gpu_in_exec)
577                 {
578                         m68k_end_timeslice();
579                         dsp_releaseTimeslice();
580                 }*/
581                 return;
582         }
583         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
584         {
585                 uint32_t reg = offset & 0x1C;
586                 int bytenum = offset & 0x03;
587
588 //This is definitely wrong!
589                 if ((reg >= 0x1C) && (reg <= 0x1F))
590                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
591                 else
592                 {
593                         uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
594                         bytenum = 3 - bytenum; // convention motorola !!!
595                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
596                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
597                 }
598                 return;
599         }
600 //      WriteLog("gpu: writing %.2x at 0x%.8x\n",data,offset);
601         JaguarWriteByte(offset, data, who);
602 }
603
604 //
605 // GPU word access (write)
606 //
607 void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
608 {
609         if (offset >= 0xF02000 && offset <= 0xF020FF)
610                 WriteLog("GPU: WriteWord--Attempt to write to GPU register file by %s!\n", whoName[who]);
611
612         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
613         {
614                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
615                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
616 /*              offset &= 0xFFF;
617                 SET16(gpu_ram_8, offset, data);//*/
618
619 /*if (offset >= 0xF03214 && offset < 0xF0321F)
620         WriteLog("GPU: Writing WORD (%04X) to GPU RAM (%08X)...\n", data, offset);//*/
621
622
623 //This is the same stupid worthless code that was in the DSP!!! AARRRGGGGHHHHH!!!!!!
624 /*              if (!gpu_in_exec)
625                 {
626                         m68k_end_timeslice();
627                         dsp_releaseTimeslice();
628                 }*/
629                 return;
630         }
631         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
632         {
633                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
634                 {
635 #ifdef GPU_DEBUG
636                         WriteLog("GPU: Write16--unaligned write @ %08X [%04X]\n", offset, data);
637                         GPUDumpRegisters();
638 #endif  // GPU_DEBUG
639                         return;
640                 }
641 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
642 //This just literally sucks.
643                 if ((offset & 0x1C) == 0x1C)
644                 {
645 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
646                         if (offset & 0x02)
647                                 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
648                         else
649                                 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
650                 }
651                 else
652                 {
653 //WriteLog("[GPU W16:%08X,%04X]", offset, data);
654                         uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
655
656                         if (offset & 0x02)
657                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
658                         else
659                                 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
660
661                         GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
662                 }
663
664                 return;
665         }
666         else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
667         {
668 #ifdef GPU_DEBUG
669                         WriteLog("GPU: Write16--unaligned write @ %08X by %s [%04X]!\n", offset, whoName[who], data);
670                         GPUDumpRegisters();
671 #endif  // GPU_DEBUG
672                 return;
673         }
674
675         // Have to be careful here--this can cause an infinite loop!
676         JaguarWriteWord(offset, data, who);
677 }
678
679 //
680 // GPU dword access (write)
681 //
682 void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
683 {
684         if (offset >= 0xF02000 && offset <= 0xF020FF)
685                 WriteLog("GPU: WriteLong--Attempt to write to GPU register file by %s!\n", whoName[who]);
686
687 //      if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE + 0x1000))
688         if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
689         {
690 #ifdef GPU_DEBUG
691                 if (offset & 0x03)
692                 {
693                         WriteLog("GPU: Write32--unaligned write @ %08X [%08X] by %s\n", offset, data, whoName[who]);
694                         GPUDumpRegisters();
695                 }
696 #endif  // GPU_DEBUG
697
698                 offset &= 0xFFF;
699                 SET32(gpu_ram_8, offset, data);
700                 return;
701         }
702 //      else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
703         else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
704         {
705                 offset &= 0x1F;
706                 switch (offset)
707                 {
708                 case 0x00:
709                 {
710                         bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
711                         // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
712                         //       IRQ logic can set it. So we mask it out here to prevent problems...
713                         gpu_flags = data & (~IMASK);
714                         gpu_flag_z = gpu_flags & ZERO_FLAG;
715                         gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
716                         gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
717                         GPUUpdateRegisterBanks();
718                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);       // Interrupt latch clear bits
719 //Writing here is only an interrupt enable--this approach is just plain wrong!
720 //                      GPUHandleIRQs();
721 //This, however, is A-OK! ;-)
722                         if (IMASKCleared)                                               // If IMASK was cleared,
723                                 GPUHandleIRQs();                                        // see if any other interrupts need servicing!
724 #ifdef GPU_DEBUG
725                         if (gpu_flags & (INT_ENA0 | INT_ENA1 | INT_ENA2 | INT_ENA3 | INT_ENA4))
726                                 WriteLog("GPU: Interrupt enable set by %s! Bits: %02X\n", whoName[who], (gpu_flags >> 4) & 0x1F);
727                         WriteLog("GPU: REGPAGE %s...\n", (gpu_flags & REGPAGE ? "set" : "cleared"));
728 #endif  // GPU_DEBUG
729                         break;
730                 }
731                 case 0x04:
732                         gpu_matrix_control = data;
733                         break;
734                 case 0x08:
735                         // This can only point to long aligned addresses
736                         gpu_pointer_to_matrix = data & 0xFFFFFFFC;
737                         break;
738                 case 0x0C:
739                         gpu_data_organization = data;
740                         break;
741                 case 0x10:
742                         gpu_pc = data;
743 #ifdef GPU_DEBUG
744 WriteLog("GPU: %s setting GPU PC to %08X %s\n", whoName[who], gpu_pc, (GPU_RUNNING ? "(GPU is RUNNING!)" : ""));//*/
745 #endif  // GPU_DEBUG
746                         break;
747                 case 0x14:
748                 {
749 //                      uint32_t gpu_was_running = GPU_RUNNING;
750                         data &= ~0xF7C0;                // Disable writes to INT_LAT0-4 & TOM version number
751
752                         // check for GPU -> CPU interrupt
753                         if (data & 0x02)
754                         {
755 //WriteLog("GPU->CPU interrupt\n");
756                                 if (TOMIRQEnabled(IRQ_GPU))
757                                 {
758 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
759 //                                      if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
760                                         {
761                                                 TOMSetPendingGPUInt();
762                                                 m68k_set_irq(2);                        // Set 68000 IPL 2
763                                                 GPUReleaseTimeslice();
764                                         }
765                                 }
766                                 data &= ~0x02;
767                         }
768
769                         // check for CPU -> GPU interrupt #0
770                         if (data & 0x04)
771                         {
772 //WriteLog("CPU->GPU interrupt\n");
773                                 GPUSetIRQLine(0, ASSERT_LINE);
774                                 m68k_end_timeslice();
775                                 DSPReleaseTimeslice();
776                                 data &= ~0x04;
777                         }
778
779                         // single stepping
780                         if (data & 0x10)
781                         {
782                                 //WriteLog("asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
783                         }
784
785                         gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
786
787                         // if gpu wasn't running but is now running, execute a few cycles
788 #ifndef GPU_SINGLE_STEPPING
789 /*                      if (!gpu_was_running && GPU_RUNNING)
790 #ifdef GPU_DEBUG
791                         {
792                                 WriteLog("GPU: Write32--About to do stupid braindead GPU execution for 200 cycles.\n");
793 #endif  // GPU_DEBUG
794                                 GPUExec(200);
795 #ifdef GPU_DEBUG
796                         }
797 #endif  // GPU_DEBUG//*/
798 #else
799                         if (gpu_control & 0x18)
800                                 GPUExec(1);
801 #endif  // #ifndef GPU_SINGLE_STEPPING
802 #ifdef GPU_DEBUG
803 WriteLog("Write to GPU CTRL by %s: %08X ", whoName[who], data);
804 if (GPU_RUNNING)
805         WriteLog(" --> Starting to run at %08X by %s...", gpu_pc, whoName[who]);
806 else
807         WriteLog(" --> Stopped by %s! (GPU_PC: %08X)", whoName[who], gpu_pc);
808 WriteLog("\n");
809 #endif  // GPU_DEBUG
810 //if (GPU_RUNNING)
811 //      GPUDumpDisassembly();
812 /*if (GPU_RUNNING)
813 {
814         if (gpu_pc == 0xF035D8)
815         {
816 //              GPUDumpDisassembly();
817 //              log_done();
818 //              exit(1);
819                 gpu_control &= 0xFFFFFFFE;      // Don't run it and let's see what happens!
820 //Hmm. Seems to lock up when going into the demo...
821 //Try to disable the collision altogether!
822         }
823 }//*/
824 extern int effect_start5;
825 static bool finished = false;
826 //if (GPU_RUNNING && effect_start5 && !finished)
827 if (GPU_RUNNING && effect_start5 && gpu_pc == 0xF035D8)
828 {
829         // Let's do a dump of $6528!
830 /*      uint32_t numItems = JaguarReadWord(0x6BD6);
831         WriteLog("\nDump of $6528: %u items.\n\n", numItems);
832         for(int i=0; i<numItems*3*4; i+=3*4)
833         {
834                 WriteLog("\t%04X: %08X %08X %08X -> ", 0x6528+i, JaguarReadLong(0x6528+i),
835                         JaguarReadLong(0x6528+i+4), JaguarReadLong(0x6528+i+8));
836                 uint16_t link = JaguarReadWord(0x6528+i+8+2);
837                 for(int j=0; j<40; j+=4)
838                         WriteLog("%08X ", JaguarReadLong(link + j));
839                 WriteLog("\n");
840         }
841         WriteLog("\n");//*/
842         // Let's try a manual blit here...
843 //This isn't working the way it should! !!! FIX !!!
844 //Err, actually, it is.
845 // NOW, it works right! Problem solved!!! It's a blitter bug!
846 /*      uint32_t src = 0x4D54, dst = 0xF03000, width = 10 * 4;
847         for(int y=0; y<127; y++)
848         {
849                 for(int x=0; x<2; x++)
850                 {
851                         JaguarWriteLong(dst, JaguarReadLong(src));
852
853                         src += 4;
854                         dst += 4;
855                 }
856                 src += width - (2 * 4);
857         }//*/
858 /*      finished = true;
859         doGPUDis = true;
860         WriteLog("\nGPU: About to execute collision detection code.\n\n");//*/
861
862 /*      WriteLog("\nGPU: About to execute collision detection code. Data @ 4D54:\n\n");
863         int count = 0;
864         for(int i=0x004D54; i<0x004D54+2048; i++)
865         {
866                 WriteLog("%02X ", JaguarReadByte(i));
867                 count++;
868                 if (count == 32)
869                 {
870                         count = 0;
871                         WriteLog("\n");
872                 }
873         }
874         WriteLog("\n\nData @ F03000:\n\n");
875         count = 0;
876         for(int i=0xF03000; i<0xF03200; i++)
877         {
878                 WriteLog("%02X ", JaguarReadByte(i));
879                 count++;
880                 if (count == 32)
881                 {
882                         count = 0;
883                         WriteLog("\n");
884                 }
885         }
886         WriteLog("\n\n");
887         log_done();
888         exit(0);//*/
889 }
890 //if (!GPU_RUNNING)
891 //      doGPUDis = false;
892 /*if (!GPU_RUNNING && finished)
893 {
894         WriteLog("\nGPU: Finished collision detection code. Exiting!\n\n");
895         GPUDumpRegisters();
896         log_done();
897         exit(0);
898 }//*/
899                         // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
900                         // allow the GPU a chance to run...
901                         // Yes! This partially fixed Trevor McFur...
902                         if (GPU_RUNNING)
903                                 m68k_end_timeslice();
904                         break;
905                 }
906                 case 0x18:
907                         gpu_hidata = data;
908                         break;
909                 case 0x1C:
910                         gpu_div_control = data;
911                         break;
912 //              default:   // unaligned long write
913                         //exit(0);
914                         //__asm int 3
915                 }
916                 return;
917         }
918
919 //      JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
920 //      JaguarWriteWord(offset+2, data & 0xFFFF, who);
921 // We're a 32-bit processor, we can do a long write...!
922         JaguarWriteLong(offset, data, who);
923 }
924
925 //
926 // Change register banks if necessary
927 //
928 void GPUUpdateRegisterBanks(void)
929 {
930         int bank = (gpu_flags & REGPAGE);               // REGPAGE bit
931
932         if (gpu_flags & IMASK)                                  // IMASK bit
933                 bank = 0;                                                       // IMASK forces main bank to be bank 0
934
935         if (bank)
936                 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
937         else
938                 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
939 }
940
941 void GPUHandleIRQs(void)
942 {
943         // Bail out if we're already in an interrupt!
944         if (gpu_flags & IMASK)
945                 return;
946
947         // Get the interrupt latch & enable bits
948         uint32_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
949
950         // Bail out if latched interrupts aren't enabled
951         bits &= mask;
952         if (!bits)
953                 return;
954
955         // Determine which interrupt to service
956         uint32_t which = 0; //Isn't there a #pragma to disable this warning???
957         if (bits & 0x01)
958                 which = 0;
959         if (bits & 0x02)
960                 which = 1;
961         if (bits & 0x04)
962                 which = 2;
963         if (bits & 0x08)
964                 which = 3;
965         if (bits & 0x10)
966                 which = 4;
967
968         if (start_logging)
969                 WriteLog("GPU: Generating IRQ #%i\n", which);
970
971         // set the interrupt flag
972         gpu_flags |= IMASK;
973         GPUUpdateRegisterBanks();
974
975         // subqt  #4,r31                ; pre-decrement stack pointer
976         // move  pc,r30                 ; address of interrupted code
977         // store  r30,(r31)     ; store return address
978         gpu_reg[31] -= 4;
979         GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
980
981         // movei  #service_address,r30  ; pointer to ISR entry
982         // jump  (r30)                                  ; jump to ISR
983         // nop
984         gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
985 }
986
987 void GPUSetIRQLine(int irqline, int state)
988 {
989         if (start_logging)
990                 WriteLog("GPU: Setting GPU IRQ line #%i\n", irqline);
991
992         uint32_t mask = 0x0040 << irqline;
993         gpu_control &= ~mask;                           // Clear the interrupt latch
994
995         if (state)
996         {
997                 gpu_control |= mask;                    // Assert the interrupt latch
998                 GPUHandleIRQs();                                // And handle the interrupt...
999         }
1000 }
1001
1002 //TEMPORARY: Testing only!
1003 //#include "gpu2.h"
1004 //#include "gpu3.h"
1005
1006 void GPUInit(void)
1007 {
1008 //      memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work RAM");
1009 //      memory_malloc_secure((void **)&gpu_reg_bank_0, 32 * sizeof(int32_t), "GPU bank 0 regs");
1010 //      memory_malloc_secure((void **)&gpu_reg_bank_1, 32 * sizeof(int32_t), "GPU bank 1 regs");
1011
1012         build_branch_condition_table();
1013
1014         GPUReset();
1015
1016 //TEMPORARY: Testing only!
1017 //      gpu2_init();
1018 //      gpu3_init();
1019 }
1020
1021 void GPUReset(void)
1022 {
1023         // GPU registers (directly visible)
1024         gpu_flags                         = 0x00000000;
1025         gpu_matrix_control    = 0x00000000;
1026         gpu_pointer_to_matrix = 0x00000000;
1027         gpu_data_organization = 0xFFFFFFFF;
1028         gpu_pc                            = 0x00F03000;
1029         gpu_control                       = 0x00002800;                 // Correctly sets this as TOM Rev. 2
1030         gpu_hidata                        = 0x00000000;
1031         gpu_remain                        = 0x00000000;                 // These two registers are RO/WO
1032         gpu_div_control           = 0x00000000;
1033
1034         // GPU internal register
1035         gpu_acc                           = 0x00000000;
1036
1037         gpu_reg = gpu_reg_bank_0;
1038         gpu_alternate_reg = gpu_reg_bank_1;
1039
1040         for(int i=0; i<32; i++)
1041                 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
1042
1043         CLR_ZNC;
1044         memset(gpu_ram_8, 0xFF, 0x1000);
1045         gpu_in_exec = 0;
1046 //not needed    GPUInterruptPending = false;
1047         GPUResetStats();
1048
1049         // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
1050         for(uint32_t i=0; i<4096; i+=4)
1051                 *((uint32_t *)(&gpu_ram_8[i])) = rand();
1052 }
1053
1054
1055 uint32_t GPUReadPC(void)
1056 {
1057         return gpu_pc;
1058 }
1059
1060
1061 void GPUResetStats(void)
1062 {
1063         for(uint32_t i=0; i<64; i++)
1064                 gpu_opcode_use[i] = 0;
1065         WriteLog("--> GPU stats were reset!\n");
1066 }
1067
1068
1069 void GPUDumpDisassembly(void)
1070 {
1071         char buffer[512];
1072
1073         WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
1074         uint32_t j = 0xF03000;
1075         while (j <= 0xF03FFF)
1076         {
1077                 uint32_t oldj = j;
1078                 j += dasmjag(JAGUAR_GPU, buffer, j);
1079                 WriteLog("\t%08X: %s\n", oldj, buffer);
1080         }
1081 }
1082
1083
1084 void GPUDumpRegisters(void)
1085 {
1086         WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1087         WriteLog("\nRegisters bank 0\n");
1088         for(int j=0; j<8; j++)
1089         {
1090                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1091                                                   (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
1092                                                   (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
1093                                                   (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
1094                                                   (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
1095         }
1096         WriteLog("Registers bank 1\n");
1097         for(int j=0; j<8; j++)
1098         {
1099                 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
1100                                                   (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
1101                                                   (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
1102                                                   (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
1103                                                   (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
1104         }
1105 }
1106
1107
1108 void GPUDumpMemory(void)
1109 {
1110         WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
1111         for(int i=0; i<0xFFF; i+=4)
1112                 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
1113                         gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
1114 }
1115
1116
1117 void GPUDone(void)
1118 {
1119         WriteLog("\n\n---------------------------------------------------------------------\n");
1120         WriteLog("GPU I/O Registers\n");
1121         WriteLog("---------------------------------------------------------------------\n");
1122         WriteLog("F0%04X   (G_FLAGS): $%06X\n", 0x2100, (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z);
1123         WriteLog("F0%04X    (G_MTXC): $%04X\n", 0x2104, gpu_matrix_control);
1124         WriteLog("F0%04X    (G_MTXA): $%04X\n", 0x2108, gpu_pointer_to_matrix);
1125         WriteLog("F0%04X     (G_END): $%02X\n", 0x210C, gpu_data_organization);
1126         WriteLog("F0%04X      (G_PC): $%06X\n", 0x2110, gpu_pc);
1127         WriteLog("F0%04X    (G_CTRL): $%06X\n", 0x2114, gpu_control);
1128         WriteLog("F0%04X  (G_HIDATA): $%08X\n", 0x2118, gpu_hidata);
1129         WriteLog("F0%04X  (G_REMAIN): $%08X\n", 0x211C, gpu_remain);
1130         WriteLog("F0%04X (G_DIVCTRL): $%02X\n", 0x211C, gpu_div_control);
1131         WriteLog("---------------------------------------------------------------------\n\n\n");
1132
1133         WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
1134
1135         // Get the interrupt latch & enable bits
1136         uint8_t bits = (gpu_control >> 6) & 0x1F, mask = (gpu_flags >> 4) & 0x1F;
1137         WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
1138
1139         GPUDumpRegisters();
1140         GPUDumpDisassembly();
1141
1142         WriteLog("\nGPU opcodes use:\n");
1143         for(int i=0; i<64; i++)
1144         {
1145                 if (gpu_opcode_use[i])
1146                         WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
1147         }
1148         WriteLog("\n");
1149 }
1150
1151
1152 //
1153 // Main GPU execution core
1154 //
1155 static int testCount = 1;
1156 static int len = 0;
1157 static bool tripwire = false;
1158 void GPUExec(int32_t cycles)
1159 {
1160         if (!GPU_RUNNING)
1161                 return;
1162
1163 #ifdef GPU_SINGLE_STEPPING
1164         if (gpu_control & 0x18)
1165         {
1166                 cycles = 1;
1167                 gpu_control &= ~0x10;
1168         }
1169 #endif
1170         GPUHandleIRQs();
1171         gpu_releaseTimeSlice_flag = 0;
1172         gpu_in_exec++;
1173
1174         while (cycles > 0 && GPU_RUNNING)
1175         {
1176 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
1177         && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
1178 {
1179         if (gpu_pc == 0xF03000)
1180         {
1181                 extern uint32_t starCount;
1182                 starCount = 0;
1183 /*              WriteLog("GPU: Starting starfield generator... Dump of [R03=%08X]:\n", gpu_reg_bank_0[03]);
1184                 uint32_t base = gpu_reg_bank_0[3];
1185                 for(uint32_t i=0; i<0x100; i+=16)
1186                 {
1187                         WriteLog("%02X: ", i);
1188                         for(uint32_t j=0; j<16; j++)
1189                         {
1190                                 WriteLog("%02X ", JaguarReadByte(base + i + j));
1191                         }
1192                         WriteLog("\n");
1193                 }*/
1194         }
1195 //      if (gpu_pc == 0xF03)
1196         {
1197         }
1198 }//*/
1199 /*if (gpu_pc == 0xF03B9E && gpu_reg_bank_0[01] == 0)
1200 {
1201         GPUDumpRegisters();
1202         WriteLog("GPU: Starting disassembly log...\n");
1203         doGPUDis = true;
1204 }//*/
1205 /*if (gpu_pc == 0xF0359A)
1206 {
1207         doGPUDis = true;
1208         GPUDumpRegisters();
1209 }*/
1210 /*              gpu_flag_c = (gpu_flag_c ? 1 : 0);
1211                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
1212                 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1213 #if 0
1214 if (gpu_pc == 0xF03200)
1215         doGPUDis = true;
1216 #endif
1217
1218                 uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1219                 uint32_t index = opcode >> 10;
1220                 gpu_instruction = opcode;                               // Added for GPU #3...
1221                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1222                 gpu_opcode_second_parameter = opcode & 0x1F;
1223 /*if (gpu_pc == 0xF03BE8)
1224 WriteLog("Start of OP frame write...\n");
1225 if (gpu_pc == 0xF03EEE)
1226 WriteLog("--> Writing BRANCH object ---\n");
1227 if (gpu_pc == 0xF03F62)
1228 WriteLog("--> Writing BITMAP object ***\n");//*/
1229 /*if (gpu_pc == 0xF03546)
1230 {
1231         WriteLog("\n--> GPU PC: F03546\n");
1232         GPUDumpRegisters();
1233         GPUDumpDisassembly();
1234 }//*/
1235 /*if (gpu_pc == 0xF033F6)
1236 {
1237         WriteLog("\n--> GPU PC: F033F6\n");
1238         GPUDumpRegisters();
1239         GPUDumpDisassembly();
1240 }//*/
1241 /*if (gpu_pc == 0xF033CC)
1242 {
1243         WriteLog("\n--> GPU PC: F033CC\n");
1244         GPUDumpRegisters();
1245         GPUDumpDisassembly();
1246 }//*/
1247 /*if (gpu_pc == 0xF033D6)
1248 {
1249         WriteLog("\n--> GPU PC: F033D6 (#%d)\n", testCount++);
1250         GPUDumpRegisters();
1251         GPUDumpMemory();
1252 }//*/
1253 /*if (gpu_pc == 0xF033D8)
1254 {
1255         WriteLog("\n--> GPU PC: F033D8 (#%d)\n", testCount++);
1256         GPUDumpRegisters();
1257         GPUDumpMemory();
1258 }//*/
1259 /*if (gpu_pc == 0xF0358E)
1260 {
1261         WriteLog("\n--> GPU PC: F0358E (#%d)\n", testCount++);
1262         GPUDumpRegisters();
1263         GPUDumpMemory();
1264 }//*/
1265 /*if (gpu_pc == 0xF034CA)
1266 {
1267         WriteLog("\n--> GPU PC: F034CA (#%d)\n", testCount++);
1268         GPUDumpRegisters();
1269 }//*/
1270 /*if (gpu_pc == 0xF034CA)
1271 {
1272         len = gpu_reg[1] + 4;//, r9save = gpu_reg[9];
1273         WriteLog("\nAbout to subtract [#%d] (R14=%08X, R15=%08X, R9=%08X):\n   ", testCount++, gpu_reg[14], gpu_reg[15], gpu_reg[9]);
1274         for(int i=0; i<len; i+=4)
1275                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1276         WriteLog("\n   ");
1277         for(int i=0; i<len; i+=4)
1278                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1279         WriteLog("\n\n");
1280 }
1281 if (gpu_pc == 0xF034DE)
1282 {
1283         WriteLog("\nSubtracted! (R14=%08X, R15=%08X):\n   ", gpu_reg[14], gpu_reg[15]);
1284         for(int i=0; i<len; i+=4)
1285                 WriteLog(" %08X", GPUReadLong(gpu_reg[15]+i));
1286         WriteLog("\n   ");
1287         for(int i=0; i<len; i+=4)
1288                 WriteLog(" %08X", GPUReadLong(gpu_reg[14]+i));
1289         WriteLog("\n   ");
1290         for(int i=0; i<len; i+=4)
1291                 WriteLog(" --------");
1292         WriteLog("\n   ");
1293         for(int i=0; i<len; i+=4)
1294                 WriteLog(" %08X", GPUReadLong(gpu_reg[9]+4+i));
1295         WriteLog("\n\n");
1296 }//*/
1297 /*if (gpu_pc == 0xF035C8)
1298 {
1299         WriteLog("\n--> GPU PC: F035C8 (#%d)\n", testCount++);
1300         GPUDumpRegisters();
1301         GPUDumpDisassembly();
1302 }//*/
1303
1304 if (gpu_start_log)
1305 {
1306 //      gpu_reset_stats();
1307 static char buffer[512];
1308 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
1309 WriteLog("GPU: [%08X] %s (RM=%08X, RN=%08X) -> ", gpu_pc, buffer, RM, RN);
1310 }//*/
1311 //$E400 -> 1110 01 -> $39 -> 57
1312 //GPU #1
1313                 gpu_pc += 2;
1314                 gpu_opcode[index]();
1315 //GPU #2
1316 //              gpu2_opcode[index]();
1317 //              gpu_pc += 2;
1318 //GPU #3                                (Doesn't show ATARI logo! #1 & #2 do...)
1319 //              gpu_pc += 2;
1320 //              gpu3_opcode[index]();
1321
1322 // BIOS hacking
1323 //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken.
1324 /*static bool firstTime = true;
1325 if (gpu_pc == 0xF03548 && firstTime)
1326 {
1327         gpu_flag_z = 1;
1328 //      firstTime = false;
1329
1330 //static char buffer[512];
1331 //int k=0xF03548;
1332 //while (k<0xF0356C)
1333 //{
1334 //int oldk = k;
1335 //k += dasmjag(JAGUAR_GPU, buffer, k);
1336 //WriteLog("GPU: [%08X] %s\n", oldk, buffer);
1337 //}
1338 //      gpu_start_log = 1;
1339 }//*/
1340 //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
1341 /*if (gpu_pc == 0xF0354C)
1342         gpu_flag_z = 0;//, gpu_start_log = 1;//*/
1343
1344                 cycles -= gpu_opcode_cycles[index];
1345                 gpu_opcode_use[index]++;
1346 if (gpu_start_log)
1347         WriteLog("(RM=%08X, RN=%08X)\n", RM, RN);//*/
1348 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
1349 {
1350         WriteLog("GPU: Executing outside local RAM! GPU_PC: %08X\n", gpu_pc);
1351         tripwire = true;
1352 }
1353         }
1354
1355         gpu_in_exec--;
1356 }
1357
1358 //
1359 // GPU opcodes
1360 //
1361
1362 /*
1363 GPU opcodes use (offset punch--vertically below bad guy):
1364                       add 18686
1365                      addq 32621
1366                       sub 7483
1367                      subq 10252
1368                       and 21229
1369                        or 15003
1370                      btst 1822
1371                      bset 2072
1372                      mult 141
1373                       div 2392
1374                      shlq 13449
1375                      shrq 10297
1376                     sharq 11104
1377                       cmp 6775
1378                      cmpq 5944
1379                      move 31259
1380                     moveq 4473
1381                     movei 23277
1382                     loadb 46
1383                     loadw 4201
1384                      load 28580
1385          load_r14_indexed 1183
1386          load_r15_indexed 1125
1387                    storew 178
1388                     store 10144
1389         store_r14_indexed 320
1390         store_r15_indexed 1
1391                   move_pc 1742
1392                      jump 24467
1393                        jr 18090
1394                       nop 41362
1395 */
1396
1397
1398 static void gpu_opcode_jump(void)
1399 {
1400 #ifdef GPU_DIS_JUMP
1401 const char * condition[32] =
1402 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1403         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1404         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1405         "???", "???", "???", "F" };
1406         if (doGPUDis)
1407                 WriteLog("%06X: JUMP   %s, (R%02u) [NCZ:%u%u%u, R%02u=%08X] ", gpu_pc-2, condition[IMM_2], IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM);
1408 #endif
1409         // normalize flags
1410 /*      gpu_flag_c = (gpu_flag_c ? 1 : 0);
1411         gpu_flag_z = (gpu_flag_z ? 1 : 0);
1412         gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
1413         // KLUDGE: Used by BRANCH_CONDITION
1414         uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1415
1416         if (BRANCH_CONDITION(IMM_2))
1417         {
1418 #ifdef GPU_DIS_JUMP
1419         if (doGPUDis)
1420                 WriteLog("Branched!\n");
1421 #endif
1422 if (gpu_start_log)
1423         WriteLog("    --> JUMP: Branch taken.\n");
1424                 uint32_t delayed_pc = RM;
1425                 GPUExec(1);
1426                 gpu_pc = delayed_pc;
1427 /*              uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1428                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1429                 gpu_opcode_second_parameter = opcode & 0x1F;
1430
1431                 gpu_pc = delayed_pc;
1432                 gpu_opcode[opcode>>10]();//*/
1433         }
1434 #ifdef GPU_DIS_JUMP
1435         else
1436                 if (doGPUDis)
1437                         WriteLog("Branch NOT taken.\n");
1438 #endif
1439 }
1440
1441
1442 static void gpu_opcode_jr(void)
1443 {
1444 #ifdef GPU_DIS_JR
1445 const char * condition[32] =
1446 {       "T", "nz", "z", "???", "nc", "nc nz", "nc z", "???", "c", "c nz",
1447         "c z", "???", "???", "???", "???", "???", "???", "???", "???",
1448         "???", "nn", "nn nz", "nn z", "???", "n", "n nz", "n z", "???",
1449         "???", "???", "???", "F" };
1450         if (doGPUDis)
1451                 WriteLog("%06X: JR     %s, %06X [NCZ:%u%u%u] ", gpu_pc-2, condition[IMM_2], gpu_pc+((IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1) * 2), gpu_flag_n, gpu_flag_c, gpu_flag_z);
1452 #endif
1453 /*      if (CONDITION(jaguar.op & 31))
1454         {
1455                 int32_t r1 = (INT8)((jaguar.op >> 2) & 0xF8) >> 2;
1456                 uint32_t newpc = jaguar.PC + r1;
1457                 CALL_MAME_DEBUG;
1458                 jaguar.op = ROPCODE(jaguar.PC);
1459                 jaguar.PC = newpc;
1460                 (*jaguar.table[jaguar.op >> 10])();
1461
1462                 jaguar_icount -= 3;     // 3 wait states guaranteed
1463         }*/
1464         // normalize flags
1465 /*      gpu_flag_n = (gpu_flag_n ? 1 : 0);
1466         gpu_flag_c = (gpu_flag_c ? 1 : 0);
1467         gpu_flag_z = (gpu_flag_z ? 1 : 0);*/
1468         // KLUDGE: Used by BRANCH_CONDITION
1469         uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
1470
1471         if (BRANCH_CONDITION(IMM_2))
1472         {
1473 #ifdef GPU_DIS_JR
1474         if (doGPUDis)
1475                 WriteLog("Branched!\n");
1476 #endif
1477 if (gpu_start_log)
1478         WriteLog("    --> JR: Branch taken.\n");
1479                 int32_t offset = (IMM_1 & 0x10 ? 0xFFFFFFF0 | IMM_1 : IMM_1);           // Sign extend IMM_1
1480                 int32_t delayed_pc = gpu_pc + (offset * 2);
1481                 GPUExec(1);
1482                 gpu_pc = delayed_pc;
1483 /*              uint16_t opcode = GPUReadWord(gpu_pc, GPU);
1484                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
1485                 gpu_opcode_second_parameter = opcode & 0x1F;
1486
1487                 gpu_pc = delayed_pc;
1488                 gpu_opcode[opcode>>10]();//*/
1489         }
1490 #ifdef GPU_DIS_JR
1491         else
1492                 if (doGPUDis)
1493                         WriteLog("Branch NOT taken.\n");
1494 #endif
1495 }
1496
1497
1498 static void gpu_opcode_add(void)
1499 {
1500 #ifdef GPU_DIS_ADD
1501         if (doGPUDis)
1502                 WriteLog("%06X: ADD    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1503 #endif
1504         uint32_t res = RN + RM;
1505         CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
1506         RN = res;
1507 #ifdef GPU_DIS_ADD
1508         if (doGPUDis)
1509                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1510 #endif
1511 }
1512
1513
1514 static void gpu_opcode_addc(void)
1515 {
1516 #ifdef GPU_DIS_ADDC
1517         if (doGPUDis)
1518                 WriteLog("%06X: ADDC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1519 #endif
1520 /*      int dreg = jaguar.op & 31;
1521         uint32_t r1 = jaguar.r[(jaguar.op >> 5) & 31];
1522         uint32_t r2 = jaguar.r[dreg];
1523         uint32_t res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1524         jaguar.r[dreg] = res;
1525         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1526
1527         uint32_t res = RN + RM + gpu_flag_c;
1528         uint32_t carry = gpu_flag_c;
1529 //      SET_ZNC_ADD(RN, RM, res); //???BUG??? Yes!
1530         SET_ZNC_ADD(RN + carry, RM, res);
1531 //      SET_ZNC_ADD(RN, RM + carry, res);
1532         RN = res;
1533 #ifdef GPU_DIS_ADDC
1534         if (doGPUDis)
1535                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1536 #endif
1537 }
1538
1539
1540 static void gpu_opcode_addq(void)
1541 {
1542 #ifdef GPU_DIS_ADDQ
1543         if (doGPUDis)
1544                 WriteLog("%06X: ADDQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1545 #endif
1546         uint32_t r1 = gpu_convert_zero[IMM_1];
1547         uint32_t res = RN + r1;
1548         CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1549         RN = res;
1550 #ifdef GPU_DIS_ADDQ
1551         if (doGPUDis)
1552                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1553 #endif
1554 }
1555
1556
1557 static void gpu_opcode_addqt(void)
1558 {
1559 #ifdef GPU_DIS_ADDQT
1560         if (doGPUDis)
1561                 WriteLog("%06X: ADDQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1562 #endif
1563         RN += gpu_convert_zero[IMM_1];
1564 #ifdef GPU_DIS_ADDQT
1565         if (doGPUDis)
1566                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1567 #endif
1568 }
1569
1570
1571 static void gpu_opcode_sub(void)
1572 {
1573 #ifdef GPU_DIS_SUB
1574         if (doGPUDis)
1575                 WriteLog("%06X: SUB    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1576 #endif
1577         uint32_t res = RN - RM;
1578         SET_ZNC_SUB(RN, RM, res);
1579         RN = res;
1580 #ifdef GPU_DIS_SUB
1581         if (doGPUDis)
1582                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1583 #endif
1584 }
1585
1586
1587 static void gpu_opcode_subc(void)
1588 {
1589 #ifdef GPU_DIS_SUBC
1590         if (doGPUDis)
1591                 WriteLog("%06X: SUBC   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1592 #endif
1593         // This is how the GPU ALU does it--Two's complement with inverted carry
1594         uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (gpu_flag_c ^ 1);
1595         // Carry out of the result is inverted too
1596         gpu_flag_c = ((res >> 32) & 0x01) ^ 1;
1597         RN = (res & 0xFFFFFFFF);
1598         SET_ZN(RN);
1599 #ifdef GPU_DIS_SUBC
1600         if (doGPUDis)
1601                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1602 #endif
1603 }
1604
1605
1606 static void gpu_opcode_subq(void)
1607 {
1608 #ifdef GPU_DIS_SUBQ
1609         if (doGPUDis)
1610                 WriteLog("%06X: SUBQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1611 #endif
1612         uint32_t r1 = gpu_convert_zero[IMM_1];
1613         uint32_t res = RN - r1;
1614         SET_ZNC_SUB(RN, r1, res);
1615         RN = res;
1616 #ifdef GPU_DIS_SUBQ
1617         if (doGPUDis)
1618                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1619 #endif
1620 }
1621
1622
1623 static void gpu_opcode_subqt(void)
1624 {
1625 #ifdef GPU_DIS_SUBQT
1626         if (doGPUDis)
1627                 WriteLog("%06X: SUBQT  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1628 #endif
1629         RN -= gpu_convert_zero[IMM_1];
1630 #ifdef GPU_DIS_SUBQT
1631         if (doGPUDis)
1632                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1633 #endif
1634 }
1635
1636
1637 static void gpu_opcode_cmp(void)
1638 {
1639 #ifdef GPU_DIS_CMP
1640         if (doGPUDis)
1641                 WriteLog("%06X: CMP    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1642 #endif
1643         uint32_t res = RN - RM;
1644         SET_ZNC_SUB(RN, RM, res);
1645 #ifdef GPU_DIS_CMP
1646         if (doGPUDis)
1647                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1648 #endif
1649 }
1650
1651
1652 static void gpu_opcode_cmpq(void)
1653 {
1654         static int32_t sqtable[32] =
1655                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1656 #ifdef GPU_DIS_CMPQ
1657         if (doGPUDis)
1658                 WriteLog("%06X: CMPQ   #%d, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, sqtable[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1659 #endif
1660         uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1661         uint32_t res = RN - r1;
1662         SET_ZNC_SUB(RN, r1, res);
1663 #ifdef GPU_DIS_CMPQ
1664         if (doGPUDis)
1665                 WriteLog("[NCZ:%u%u%u]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
1666 #endif
1667 }
1668
1669
1670 static void gpu_opcode_and(void)
1671 {
1672 #ifdef GPU_DIS_AND
1673         if (doGPUDis)
1674                 WriteLog("%06X: AND    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1675 #endif
1676         RN = RN & RM;
1677         SET_ZN(RN);
1678 #ifdef GPU_DIS_AND
1679         if (doGPUDis)
1680                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1681 #endif
1682 }
1683
1684
1685 static void gpu_opcode_or(void)
1686 {
1687 #ifdef GPU_DIS_OR
1688         if (doGPUDis)
1689                 WriteLog("%06X: OR     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1690 #endif
1691         RN = RN | RM;
1692         SET_ZN(RN);
1693 #ifdef GPU_DIS_OR
1694         if (doGPUDis)
1695                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1696 #endif
1697 }
1698
1699
1700 static void gpu_opcode_xor(void)
1701 {
1702 #ifdef GPU_DIS_XOR
1703         if (doGPUDis)
1704                 WriteLog("%06X: XOR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1705 #endif
1706         RN = RN ^ RM;
1707         SET_ZN(RN);
1708 #ifdef GPU_DIS_XOR
1709         if (doGPUDis)
1710                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1711 #endif
1712 }
1713
1714
1715 static void gpu_opcode_not(void)
1716 {
1717 #ifdef GPU_DIS_NOT
1718         if (doGPUDis)
1719                 WriteLog("%06X: NOT    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1720 #endif
1721         RN = ~RN;
1722         SET_ZN(RN);
1723 #ifdef GPU_DIS_NOT
1724         if (doGPUDis)
1725                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
1726 #endif
1727 }
1728
1729
1730 static void gpu_opcode_move_pc(void)
1731 {
1732 #ifdef GPU_DIS_MOVEPC
1733         if (doGPUDis)
1734                 WriteLog("%06X: MOVE   PC, R%02u [NCZ:%u%u%u, PC=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_pc-2, IMM_2, RN);
1735 #endif
1736         // Should be previous PC--this might not always be previous instruction!
1737         // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1738         RN = gpu_pc - 2;
1739 #ifdef GPU_DIS_MOVEPC
1740         if (doGPUDis)
1741                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1742 #endif
1743 }
1744
1745
1746 static void gpu_opcode_sat8(void)
1747 {
1748 #ifdef GPU_DIS_SAT8
1749         if (doGPUDis)
1750                 WriteLog("%06X: SAT8   R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1751 #endif
1752         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1753         SET_ZN(RN);
1754 #ifdef GPU_DIS_SAT8
1755         if (doGPUDis)
1756                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1757 #endif
1758 }
1759
1760
1761 static void gpu_opcode_sat16(void)
1762 {
1763         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1764         SET_ZN(RN);
1765 }
1766
1767 static void gpu_opcode_sat24(void)
1768 {
1769         RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1770         SET_ZN(RN);
1771 }
1772
1773
1774 static void gpu_opcode_store_r14_indexed(void)
1775 {
1776 #ifdef GPU_DIS_STORE14I
1777         if (doGPUDis)
1778                 WriteLog("%06X: STORE  R%02u, (R14+$%02X) [NCZ:%u%u%u, R%02u=%08X, R14+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2));
1779 #endif
1780 #ifdef GPU_CORRECT_ALIGNMENT
1781         uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1782         
1783         if (address >= 0xF03000 && address <= 0xF03FFF)
1784                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1785         else
1786                 GPUWriteLong(address, RN, GPU);
1787 #else
1788         GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1789 #endif
1790 }
1791
1792
1793 static void gpu_opcode_store_r15_indexed(void)
1794 {
1795 #ifdef GPU_DIS_STORE15I
1796         if (doGPUDis)
1797                 WriteLog("%06X: STORE  R%02u, (R15+$%02X) [NCZ:%u%u%u, R%02u=%08X, R15+$%02X=%08X]\n", gpu_pc-2, IMM_2, gpu_convert_zero[IMM_1] << 2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2));
1798 #endif
1799 #ifdef GPU_CORRECT_ALIGNMENT
1800         uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1801
1802         if (address >= 0xF03000 && address <= 0xF03FFF)
1803                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1804         else
1805                 GPUWriteLong(address, RN, GPU);
1806 #else
1807         GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1808 #endif
1809 }
1810
1811
1812 static void gpu_opcode_load_r14_ri(void)
1813 {
1814 #ifdef GPU_DIS_LOAD14R
1815         if (doGPUDis)
1816                 WriteLog("%06X: LOAD   (R14+R%02u), R%02u [NCZ:%u%u%u, R14+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[14], IMM_2, RN);
1817 #endif
1818 #ifdef GPU_CORRECT_ALIGNMENT
1819         uint32_t address = gpu_reg[14] + RM;
1820
1821         if (address >= 0xF03000 && address <= 0xF03FFF)
1822                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1823         else
1824                 RN = GPUReadLong(address, GPU);
1825 #else
1826         RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1827 #endif
1828 #ifdef GPU_DIS_LOAD14R
1829         if (doGPUDis)
1830                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1831 #endif
1832 }
1833
1834
1835 static void gpu_opcode_load_r15_ri(void)
1836 {
1837 #ifdef GPU_DIS_LOAD15R
1838         if (doGPUDis)
1839                 WriteLog("%06X: LOAD   (R15+R%02u), R%02u [NCZ:%u%u%u, R15+R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM+gpu_reg[15], IMM_2, RN);
1840 #endif
1841 #ifdef GPU_CORRECT_ALIGNMENT
1842         uint32_t address = gpu_reg[15] + RM;
1843
1844         if (address >= 0xF03000 && address <= 0xF03FFF)
1845                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1846         else
1847                 RN = GPUReadLong(address, GPU);
1848 #else
1849         RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1850 #endif
1851 #ifdef GPU_DIS_LOAD15R
1852         if (doGPUDis)
1853                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1854 #endif
1855 }
1856
1857
1858 static void gpu_opcode_store_r14_ri(void)
1859 {
1860 #ifdef GPU_DIS_STORE14R
1861         if (doGPUDis)
1862                 WriteLog("%06X: STORE  R%02u, (R14+R%02u) [NCZ:%u%u%u, R%02u=%08X, R14+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[14]);
1863 #endif
1864 #ifdef GPU_CORRECT_ALIGNMENT
1865         uint32_t address = gpu_reg[14] + RM;
1866
1867         if (address >= 0xF03000 && address <= 0xF03FFF)
1868                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1869         else
1870                 GPUWriteLong(address, RN, GPU);
1871 #else
1872         GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1873 #endif
1874 }
1875
1876
1877 static void gpu_opcode_store_r15_ri(void)
1878 {
1879 #ifdef GPU_DIS_STORE15R
1880         if (doGPUDis)
1881                 WriteLog("%06X: STORE  R%02u, (R15+R%02u) [NCZ:%u%u%u, R%02u=%08X, R15+R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM+gpu_reg[15]);
1882 #endif
1883 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1884         uint32_t address = gpu_reg[15] + RM;
1885
1886         if (address >= 0xF03000 && address <= 0xF03FFF)
1887                 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1888         else
1889                 GPUWriteLong(address, RN, GPU);
1890 #else
1891         GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1892 #endif
1893 }
1894
1895
1896 static void gpu_opcode_nop(void)
1897 {
1898 #ifdef GPU_DIS_NOP
1899         if (doGPUDis)
1900                 WriteLog("%06X: NOP    [NCZ:%u%u%u]\n", gpu_pc-2, gpu_flag_n, gpu_flag_c, gpu_flag_z);
1901 #endif
1902 }
1903
1904
1905 static void gpu_opcode_pack(void)
1906 {
1907 #ifdef GPU_DIS_PACK
1908         if (doGPUDis)
1909                 WriteLog("%06X: %s R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, (!IMM_1 ? "PACK  " : "UNPACK"), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1910 #endif
1911         uint32_t val = RN;
1912
1913 //BUG!  if (RM == 0)                            // Pack
1914         if (IMM_1 == 0)                         // Pack
1915                 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1916         else                                            // Unpack
1917                 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1918 #ifdef GPU_DIS_PACK
1919         if (doGPUDis)
1920                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
1921 #endif
1922 }
1923
1924
1925 static void gpu_opcode_storeb(void)
1926 {
1927 #ifdef GPU_DIS_STOREB
1928         if (doGPUDis)
1929                 WriteLog("%06X: STOREB R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1930 #endif
1931 //Is this right???
1932 // Would appear to be so...!
1933         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1934                 GPUWriteLong(RM, RN & 0xFF, GPU);
1935         else
1936                 JaguarWriteByte(RM, RN, GPU);
1937 }
1938
1939
1940 static void gpu_opcode_storew(void)
1941 {
1942 #ifdef GPU_DIS_STOREW
1943         if (doGPUDis)
1944                 WriteLog("%06X: STOREW R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1945 #endif
1946 #ifdef GPU_CORRECT_ALIGNMENT
1947         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1948                 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1949         else
1950                 JaguarWriteWord(RM, RN, GPU);
1951 #else
1952         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1953                 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1954         else
1955                 JaguarWriteWord(RM, RN, GPU);
1956 #endif
1957 }
1958
1959
1960 static void gpu_opcode_store(void)
1961 {
1962 #ifdef GPU_DIS_STORE
1963         if (doGPUDis)
1964                 WriteLog("%06X: STORE  R%02u, (R%02u) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_pc-2, IMM_2, IMM_1, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN, IMM_1, RM);
1965 #endif
1966 #ifdef GPU_CORRECT_ALIGNMENT
1967         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1968                 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1969         else
1970                 GPUWriteLong(RM, RN, GPU);
1971 #else
1972         GPUWriteLong(RM, RN, GPU);
1973 #endif
1974 }
1975
1976
1977 static void gpu_opcode_storep(void)
1978 {
1979 #ifdef GPU_CORRECT_ALIGNMENT
1980         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1981         {
1982                 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1983                 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1984         }
1985         else
1986         {
1987                 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1988                 GPUWriteLong(RM + 4, RN, GPU);
1989         }
1990 #else
1991         GPUWriteLong(RM + 0, gpu_hidata, GPU);
1992         GPUWriteLong(RM + 4, RN, GPU);
1993 #endif
1994 }
1995
1996 static void gpu_opcode_loadb(void)
1997 {
1998 #ifdef GPU_DIS_LOADB
1999         if (doGPUDis)
2000                 WriteLog("%06X: LOADB  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2001 #endif
2002         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2003                 RN = GPUReadLong(RM, GPU) & 0xFF;
2004         else
2005                 RN = JaguarReadByte(RM, GPU);
2006 #ifdef GPU_DIS_LOADB
2007         if (doGPUDis)
2008                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2009 #endif
2010 }
2011
2012
2013 static void gpu_opcode_loadw(void)
2014 {
2015 #ifdef GPU_DIS_LOADW
2016         if (doGPUDis)
2017                 WriteLog("%06X: LOADW  (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2018 #endif
2019 #ifdef GPU_CORRECT_ALIGNMENT
2020         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2021                 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
2022         else
2023                 RN = JaguarReadWord(RM, GPU);
2024 #else
2025         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2026                 RN = GPUReadLong(RM, GPU) & 0xFFFF;
2027         else
2028                 RN = JaguarReadWord(RM, GPU);
2029 #endif
2030 #ifdef GPU_DIS_LOADW
2031         if (doGPUDis)
2032                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2033 #endif
2034 }
2035
2036
2037 // According to the docs, & "Do The Same", this address is long aligned...
2038 // So let's try it:
2039 // And it works!!! Need to fix all instances...
2040 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
2041 // the $F03000-$F03FFF range are aligned...
2042 #warning "!!! Alignment issues, need to find definitive final word on this !!!"
2043 /*
2044 Preliminary testing on real hardware seems to confirm that something strange goes on
2045 with unaligned reads in main memory. When the address is off by 1, the result is the
2046 same as the long address with the top byte replaced by something. So if the read is
2047 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
2048 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
2049 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
2050 It may be that the "unknown" values come from the prefetch queue, but not sure how
2051 to test that. They seem to be stable, though, which would indicate such a mechanism.
2052 Sometimes, however, the off by 2 case returns $12345678!
2053 */
2054 static void gpu_opcode_load(void)
2055 {
2056 #ifdef GPU_DIS_LOAD
2057         if (doGPUDis)
2058                 WriteLog("%06X: LOAD   (R%02u), R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2059 #endif
2060 #ifdef GPU_CORRECT_ALIGNMENT
2061         uint32_t mask[4] = { 0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00 };
2062 //      if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2063                 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
2064 //              RN = GPUReadLong(RM & 0x00FFFFFC, GPU);
2065 //      else
2066 //              RN = GPUReadLong(RM, GPU);
2067         // Simulate garbage in unaligned reads...
2068 //seems that this behavior is different in GPU mem vs. main mem...
2069 //      if ((RM < 0xF03000) || (RM > 0xF0BFFF))
2070 //              RN |= mask[RM & 0x03];
2071 #else
2072         RN = GPUReadLong(RM, GPU);
2073 #endif
2074 #ifdef GPU_DIS_LOAD
2075         if (doGPUDis)
2076                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2077 #endif
2078 }
2079
2080
2081 static void gpu_opcode_loadp(void)
2082 {
2083 #ifdef GPU_CORRECT_ALIGNMENT
2084         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2085         {
2086                 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
2087                 RN                 = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
2088         }
2089         else
2090         {
2091                 gpu_hidata = GPUReadLong(RM + 0, GPU);
2092                 RN                 = GPUReadLong(RM + 4, GPU);
2093         }
2094 #else
2095         gpu_hidata = GPUReadLong(RM + 0, GPU);
2096         RN                 = GPUReadLong(RM + 4, GPU);
2097 #endif
2098 }
2099
2100
2101 static void gpu_opcode_load_r14_indexed(void)
2102 {
2103 #ifdef GPU_DIS_LOAD14I
2104         if (doGPUDis)
2105                 WriteLog("%06X: LOAD   (R14+$%02X), R%02u [NCZ:%u%u%u, R14+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[14]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2106 #endif
2107 #ifdef GPU_CORRECT_ALIGNMENT
2108         uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
2109
2110         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2111                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2112         else
2113                 RN = GPUReadLong(address, GPU);
2114 #else
2115         RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
2116 #endif
2117 #ifdef GPU_DIS_LOAD14I
2118         if (doGPUDis)
2119                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2120 #endif
2121 }
2122
2123
2124 static void gpu_opcode_load_r15_indexed(void)
2125 {
2126 #ifdef GPU_DIS_LOAD15I
2127         if (doGPUDis)
2128                 WriteLog("%06X: LOAD   (R15+$%02X), R%02u [NCZ:%u%u%u, R15+$%02X=%08X, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1] << 2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, gpu_convert_zero[IMM_1] << 2, gpu_reg[15]+(gpu_convert_zero[IMM_1] << 2), IMM_2, RN);
2129 #endif
2130 #ifdef GPU_CORRECT_ALIGNMENT
2131         uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
2132
2133         if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
2134                 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
2135         else
2136                 RN = GPUReadLong(address, GPU);
2137 #else
2138         RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
2139 #endif
2140 #ifdef GPU_DIS_LOAD15I
2141         if (doGPUDis)
2142                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2143 #endif
2144 }
2145
2146
2147 static void gpu_opcode_movei(void)
2148 {
2149 #ifdef GPU_DIS_MOVEI
2150         if (doGPUDis)
2151                 WriteLog("%06X: MOVEI  #$%08X, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, (uint32_t)GPUReadWord(gpu_pc) | ((uint32_t)GPUReadWord(gpu_pc + 2) << 16), IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2152 #endif
2153         // This instruction is followed by 32-bit value in LSW / MSW format...
2154         RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16);
2155         gpu_pc += 4;
2156 #ifdef GPU_DIS_MOVEI
2157         if (doGPUDis)
2158                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2159 #endif
2160 }
2161
2162
2163 static void gpu_opcode_moveta(void)
2164 {
2165 #ifdef GPU_DIS_MOVETA
2166         if (doGPUDis)
2167                 WriteLog("%06X: MOVETA R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2168 #endif
2169         ALTERNATE_RN = RM;
2170 #ifdef GPU_DIS_MOVETA
2171         if (doGPUDis)
2172                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u(alt)=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, ALTERNATE_RN);
2173 #endif
2174 }
2175
2176
2177 static void gpu_opcode_movefa(void)
2178 {
2179 #ifdef GPU_DIS_MOVEFA
2180         if (doGPUDis)
2181                 WriteLog("%06X: MOVEFA R%02u, R%02u [NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2182 #endif
2183         RN = ALTERNATE_RM;
2184 #ifdef GPU_DIS_MOVEFA
2185         if (doGPUDis)
2186                 WriteLog("[NCZ:%u%u%u, R%02u(alt)=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, ALTERNATE_RM, IMM_2, RN);
2187 #endif
2188 }
2189
2190
2191 static void gpu_opcode_move(void)
2192 {
2193 #ifdef GPU_DIS_MOVE
2194         if (doGPUDis)
2195                 WriteLog("%06X: MOVE   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2196 #endif
2197         RN = RM;
2198 #ifdef GPU_DIS_MOVE
2199         if (doGPUDis)
2200                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2201 #endif
2202 }
2203
2204
2205 static void gpu_opcode_moveq(void)
2206 {
2207 #ifdef GPU_DIS_MOVEQ
2208         if (doGPUDis)
2209                 WriteLog("%06X: MOVEQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2210 #endif
2211         RN = IMM_1;
2212 #ifdef GPU_DIS_MOVEQ
2213         if (doGPUDis)
2214                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2215 #endif
2216 }
2217
2218
2219 static void gpu_opcode_resmac(void)
2220 {
2221         RN = gpu_acc;
2222 }
2223
2224
2225 static void gpu_opcode_imult(void)
2226 {
2227 #ifdef GPU_DIS_IMULT
2228         if (doGPUDis)
2229                 WriteLog("%06X: IMULT  R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2230 #endif
2231         RN = (int16_t)RN * (int16_t)RM;
2232         SET_ZN(RN);
2233 #ifdef GPU_DIS_IMULT
2234         if (doGPUDis)
2235                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2236 #endif
2237 }
2238
2239
2240 static void gpu_opcode_mult(void)
2241 {
2242 #ifdef GPU_DIS_MULT
2243         if (doGPUDis)
2244                 WriteLog("%06X: MULT   R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2245 #endif
2246         RN = (uint16_t)RM * (uint16_t)RN;
2247 //      RN = (RM & 0xFFFF) * (RN & 0xFFFF);
2248         SET_ZN(RN);
2249 #ifdef GPU_DIS_MULT
2250         if (doGPUDis)
2251                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2252 #endif
2253 }
2254
2255
2256 static void gpu_opcode_bclr(void)
2257 {
2258 #ifdef GPU_DIS_BCLR
2259         if (doGPUDis)
2260                 WriteLog("%06X: BCLR   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2261 #endif
2262         uint32_t res = RN & ~(1 << IMM_1);
2263         RN = res;
2264         SET_ZN(res);
2265 #ifdef GPU_DIS_BCLR
2266         if (doGPUDis)
2267                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2268 #endif
2269 }
2270
2271
2272 static void gpu_opcode_btst(void)
2273 {
2274 #ifdef GPU_DIS_BTST
2275         if (doGPUDis)
2276                 WriteLog("%06X: BTST   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2277 #endif
2278         gpu_flag_z = (~RN >> IMM_1) & 1;
2279 #ifdef GPU_DIS_BTST
2280         if (doGPUDis)
2281                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2282 #endif
2283 }
2284
2285
2286 static void gpu_opcode_bset(void)
2287 {
2288 #ifdef GPU_DIS_BSET
2289         if (doGPUDis)
2290                 WriteLog("%06X: BSET   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2291 #endif
2292         uint32_t res = RN | (1 << IMM_1);
2293         RN = res;
2294         SET_ZN(res);
2295 #ifdef GPU_DIS_BSET
2296         if (doGPUDis)
2297                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2298 #endif
2299 }
2300
2301
2302 static void gpu_opcode_imacn(void)
2303 {
2304         uint32_t res = (int16_t)RM * (int16_t)(RN);
2305         gpu_acc += res;
2306 }
2307
2308
2309 static void gpu_opcode_mtoi(void)
2310 {
2311         uint32_t _RM = RM;
2312         uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
2313         SET_ZN(res);
2314 }
2315
2316
2317 static void gpu_opcode_normi(void)
2318 {
2319         uint32_t _RM = RM;
2320         uint32_t res = 0;
2321
2322         if (_RM)
2323         {
2324                 while ((_RM & 0xFFC00000) == 0)
2325                 {
2326                         _RM <<= 1;
2327                         res--;
2328                 }
2329                 while ((_RM & 0xFF800000) != 0)
2330                 {
2331                         _RM >>= 1;
2332                         res++;
2333                 }
2334         }
2335         RN = res;
2336         SET_ZN(res);
2337 }
2338
2339 static void gpu_opcode_mmult(void)
2340 {
2341         int count       = gpu_matrix_control & 0x0F;    // Matrix width
2342         uint32_t addr = gpu_pointer_to_matrix;          // In the GPU's RAM
2343         int64_t accum = 0;
2344         uint32_t res;
2345
2346         if (gpu_matrix_control & 0x10)                          // Column stepping
2347         {
2348                 for(int i=0; i<count; i++)
2349                 {
2350                         int16_t a;
2351                         if (i & 0x01)
2352                                 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2353                         else
2354                                 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2355
2356                         int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2357                         accum += a * b;
2358                         addr += 4 * count;
2359                 }
2360         }
2361         else                                                                            // Row stepping
2362         {
2363                 for(int i=0; i<count; i++)
2364                 {
2365                         int16_t a;
2366                         if (i & 0x01)
2367                                 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
2368                         else
2369                                 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
2370
2371                         int16_t b = ((int16_t)GPUReadWord(addr + 2, GPU));
2372                         accum += a * b;
2373                         addr += 4;
2374                 }
2375         }
2376         RN = res = (int32_t)accum;
2377         // carry flag to do (out of the last add)
2378         SET_ZN(res);
2379 }
2380
2381
2382 static void gpu_opcode_abs(void)
2383 {
2384 #ifdef GPU_DIS_ABS
2385         if (doGPUDis)
2386                 WriteLog("%06X: ABS    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2387 #endif
2388         gpu_flag_c = RN >> 31;
2389         if (RN == 0x80000000)
2390         //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
2391                 gpu_flag_n = 1, gpu_flag_z = 0;
2392         else
2393         {
2394                 if (gpu_flag_c)
2395                         RN = -RN;
2396                 gpu_flag_n = 0; SET_FLAG_Z(RN);
2397         }
2398 #ifdef GPU_DIS_ABS
2399         if (doGPUDis)
2400                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2401 #endif
2402 }
2403
2404
2405 static void gpu_opcode_div(void)        // RN / RM
2406 {
2407 #ifdef GPU_DIS_DIV
2408         if (doGPUDis)
2409                 WriteLog("%06X: DIV    R%02u, R%02u (%s) [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, (gpu_div_control & 0x01 ? "16.16" : "32"), gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2410 #endif
2411 #if 0
2412         if (RM)
2413         {
2414                 if (gpu_div_control & 0x01)             // 16.16 division
2415                 {
2416                         gpu_remain = ((uint64_t)RN << 16) % RM;
2417                         RN = ((uint64_t)RN << 16) / RM;
2418                 }
2419                 else
2420                 {
2421                         // We calculate the remainder first because we destroy RN after
2422                         // this by assigning it to itself.
2423                         gpu_remain = RN % RM;
2424                         RN = RN / RM;
2425                 }
2426         }
2427         else
2428         {
2429                 // This is what happens according to SCPCD. NYAN!
2430                 RN = 0xFFFFFFFF;
2431                 gpu_remain = 0;
2432         }
2433 #else
2434         // Real algorithm, courtesy of SCPCD: NYAN!
2435         uint32_t q = RN;
2436         uint32_t r = 0;
2437
2438         // If 16.16 division, stuff top 16 bits of RN into remainder and put the
2439         // bottom 16 of RN in top 16 of quotient
2440         if (gpu_div_control & 0x01)
2441                 q <<= 16, r = RN >> 16;
2442
2443         for(int i=0; i<32; i++)
2444         {
2445 //              uint32_t sign = (r >> 31) & 0x01;
2446                 uint32_t sign = r & 0x80000000;
2447                 r = (r << 1) | ((q >> 31) & 0x01);
2448                 r += (sign ? RM : -RM);
2449                 q = (q << 1) | (((~r) >> 31) & 0x01);
2450         }
2451
2452         RN = q;
2453         gpu_remain = r;
2454 #endif
2455
2456 #ifdef GPU_DIS_DIV
2457         if (doGPUDis)
2458                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] Remainder: %08X\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN, gpu_remain);
2459 #endif
2460 }
2461
2462
2463 static void gpu_opcode_imultn(void)
2464 {
2465         uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
2466         gpu_acc = (int32_t)res;
2467         SET_FLAG_Z(res);
2468         SET_FLAG_N(res);
2469 }
2470
2471
2472 static void gpu_opcode_neg(void)
2473 {
2474 #ifdef GPU_DIS_NEG
2475         if (doGPUDis)
2476                 WriteLog("%06X: NEG    R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2477 #endif
2478         uint32_t res = -RN;
2479         SET_ZNC_SUB(0, RN, res);
2480         RN = res;
2481 #ifdef GPU_DIS_NEG
2482         if (doGPUDis)
2483                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2484 #endif
2485 }
2486
2487
2488 static void gpu_opcode_shlq(void)
2489 {
2490 #ifdef GPU_DIS_SHLQ
2491         if (doGPUDis)
2492                 WriteLog("%06X: SHLQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, 32 - IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2493 #endif
2494 // Was a bug here...
2495 // (Look at Aaron's code: If r1 = 32, then 32 - 32 = 0 which is wrong!)
2496         int32_t r1 = 32 - IMM_1;
2497         uint32_t res = RN << r1;
2498         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2499         RN = res;
2500 #ifdef GPU_DIS_SHLQ
2501         if (doGPUDis)
2502                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2503 #endif
2504 }
2505
2506
2507 static void gpu_opcode_shrq(void)
2508 {
2509 #ifdef GPU_DIS_SHRQ
2510         if (doGPUDis)
2511                 WriteLog("%06X: SHRQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2512 #endif
2513         int32_t r1 = gpu_convert_zero[IMM_1];
2514         uint32_t res = RN >> r1;
2515         SET_ZN(res); gpu_flag_c = RN & 1;
2516         RN = res;
2517 #ifdef GPU_DIS_SHRQ
2518         if (doGPUDis)
2519                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2520 #endif
2521 }
2522
2523
2524 static void gpu_opcode_ror(void)
2525 {
2526 #ifdef GPU_DIS_ROR
2527         if (doGPUDis)
2528                 WriteLog("%06X: ROR    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2529 #endif
2530         uint32_t r1 = RM & 0x1F;
2531         uint32_t res = (RN >> r1) | (RN << (32 - r1));
2532         SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
2533         RN = res;
2534 #ifdef GPU_DIS_ROR
2535         if (doGPUDis)
2536                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2537 #endif
2538 }
2539
2540
2541 static void gpu_opcode_rorq(void)
2542 {
2543 #ifdef GPU_DIS_RORQ
2544         if (doGPUDis)
2545                 WriteLog("%06X: RORQ   #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2546 #endif
2547         uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F];
2548         uint32_t r2 = RN;
2549         uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
2550         RN = res;
2551         SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2552 #ifdef GPU_DIS_RORQ
2553         if (doGPUDis)
2554                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2555 #endif
2556 }
2557
2558
2559 static void gpu_opcode_sha(void)
2560 {
2561 /*      int dreg = jaguar.op & 31;
2562         int32_t r1 = (int32_t)jaguar.r[(jaguar.op >> 5) & 31];
2563         uint32_t r2 = jaguar.r[dreg];
2564         uint32_t res;
2565
2566         CLR_ZNC;
2567         if (r1 < 0)
2568         {
2569                 res = (r1 <= -32) ? 0 : (r2 << -r1);
2570                 jaguar.FLAGS |= (r2 >> 30) & 2;
2571         }
2572         else
2573         {
2574                 res = (r1 >= 32) ? ((int32_t)r2 >> 31) : ((int32_t)r2 >> r1);
2575                 jaguar.FLAGS |= (r2 << 1) & 2;
2576         }
2577         jaguar.r[dreg] = res;
2578         SET_ZN(res);*/
2579
2580 #ifdef GPU_DIS_SHA
2581         if (doGPUDis)
2582                 WriteLog("%06X: SHA    R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2583 #endif
2584         uint32_t res;
2585
2586         if ((int32_t)RM < 0)
2587         {
2588                 res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM);
2589                 gpu_flag_c = RN >> 31;
2590         }
2591         else
2592         {
2593                 res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM);
2594                 gpu_flag_c = RN & 0x01;
2595         }
2596         RN = res;
2597         SET_ZN(res);
2598 #ifdef GPU_DIS_SHA
2599         if (doGPUDis)
2600                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2601 #endif
2602
2603 /*      int32_t sRM=(int32_t)RM;
2604         uint32_t _RN=RN;
2605
2606         if (sRM<0)
2607         {
2608                 uint32_t shift=-sRM;
2609                 if (shift>=32) shift=32;
2610                 gpu_flag_c=(_RN&0x80000000)>>31;
2611                 while (shift)
2612                 {
2613                         _RN<<=1;
2614                         shift--;
2615                 }
2616         }
2617         else
2618         {
2619                 uint32_t shift=sRM;
2620                 if (shift>=32) shift=32;
2621                 gpu_flag_c=_RN&0x1;
2622                 while (shift)
2623                 {
2624                         _RN=((int32_t)_RN)>>1;
2625                         shift--;
2626                 }
2627         }
2628         RN=_RN;
2629         SET_FLAG_Z(_RN);
2630         SET_FLAG_N(_RN);*/
2631 }
2632
2633
2634 static void gpu_opcode_sharq(void)
2635 {
2636 #ifdef GPU_DIS_SHARQ
2637         if (doGPUDis)
2638                 WriteLog("%06X: SHARQ  #%u, R%02u [NCZ:%u%u%u, R%02u=%08X] -> ", gpu_pc-2, gpu_convert_zero[IMM_1], IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2639 #endif
2640         uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1];
2641         SET_ZN(res); gpu_flag_c = RN & 0x01;
2642         RN = res;
2643 #ifdef GPU_DIS_SHARQ
2644         if (doGPUDis)
2645                 WriteLog("[NCZ:%u%u%u, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_2, RN);
2646 #endif
2647 }
2648
2649
2650 static void gpu_opcode_sh(void)
2651 {
2652 #ifdef GPU_DIS_SH
2653         if (doGPUDis)
2654                 WriteLog("%06X: SH     R%02u, R%02u [NCZ:%u%u%u, R%02u=%08X, R%02u=%08X] -> ", gpu_pc-2, IMM_1, IMM_2, gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2655 #endif
2656         if (RM & 0x80000000)            // Shift left
2657         {
2658                 gpu_flag_c = RN >> 31;
2659                 RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM);
2660         }
2661         else                                            // Shift right
2662         {
2663                 gpu_flag_c = RN & 0x01;
2664                 RN = (RM >= 32 ? 0 : RN >> RM);
2665         }
2666         SET_ZN(RN);
2667 #ifdef GPU_DIS_SH
2668         if (doGPUDis)
2669                 WriteLog("[NCZ:%u%u%u, R%02u=%08X, R%02u=%08X]\n", gpu_flag_n, gpu_flag_c, gpu_flag_z, IMM_1, RM, IMM_2, RN);
2670 #endif
2671 }
2672
2673
2674 //Temporary: Testing only!
2675 //#include "gpu2.cpp"
2676 //#include "gpu3.cpp"
2677
2678 #else
2679
2680
2681 // New thread-safe GPU core
2682
2683 int GPUCore(void * data)
2684 {
2685 }
2686
2687 #endif
2688