]> Shamusworld >> Repos - virtualjaguar/blob - src/gpu.cpp
This commit was generated by cvs2svn to compensate for changes in r8,
[virtualjaguar] / src / gpu.cpp
1 //
2 // GPU Core
3 //
4 // by cal2
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Cleanups, endian wrongness, and bad ASM amelioration by James L. Hammons
7 // Note: Endian wrongness probably stems from the MAME origins of this emu and
8 //       the braindead way in which MAME handles memory. :-)
9 //
10
11 #include "gpu.h"
12
13 #define CINT0FLAG                       0x00200
14 #define CINT1FLAG                       0x00400
15 #define CINT2FLAG                       0x00800
16 #define CINT3FLAG                       0x01000
17 #define CINT4FLAG                       0x02000
18 #define CINT04FLAGS                     (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
19
20 extern int start_logging;
21
22 static void gpu_opcode_add(void);
23 static void gpu_opcode_addc(void);
24 static void gpu_opcode_addq(void);
25 static void gpu_opcode_addqt(void);
26 static void gpu_opcode_sub(void);
27 static void gpu_opcode_subc(void);
28 static void gpu_opcode_subq(void);
29 static void gpu_opcode_subqt(void);
30 static void gpu_opcode_neg(void);
31 static void gpu_opcode_and(void);
32 static void gpu_opcode_or(void);
33 static void gpu_opcode_xor(void);
34 static void gpu_opcode_not(void);
35 static void gpu_opcode_btst(void);
36 static void gpu_opcode_bset(void);
37 static void gpu_opcode_bclr(void);
38 static void gpu_opcode_mult(void);
39 static void gpu_opcode_imult(void);
40 static void gpu_opcode_imultn(void);
41 static void gpu_opcode_resmac(void);
42 static void gpu_opcode_imacn(void);
43 static void gpu_opcode_div(void);
44 static void gpu_opcode_abs(void);
45 static void gpu_opcode_sh(void);
46 static void gpu_opcode_shlq(void);
47 static void gpu_opcode_shrq(void);
48 static void gpu_opcode_sha(void);
49 static void gpu_opcode_sharq(void);
50 static void gpu_opcode_ror(void);
51 static void gpu_opcode_rorq(void);
52 static void gpu_opcode_cmp(void);
53 static void gpu_opcode_cmpq(void);
54 static void gpu_opcode_sat8(void);
55 static void gpu_opcode_sat16(void);
56 static void gpu_opcode_move(void);
57 static void gpu_opcode_moveq(void);
58 static void gpu_opcode_moveta(void);
59 static void gpu_opcode_movefa(void);
60 static void gpu_opcode_movei(void);
61 static void gpu_opcode_loadb(void);
62 static void gpu_opcode_loadw(void);
63 static void gpu_opcode_load(void);
64 static void gpu_opcode_loadp(void);
65 static void gpu_opcode_load_r14_indexed(void);
66 static void gpu_opcode_load_r15_indexed(void);
67 static void gpu_opcode_storeb(void);
68 static void gpu_opcode_storew(void);
69 static void gpu_opcode_store(void);
70 static void gpu_opcode_storep(void);
71 static void gpu_opcode_store_r14_indexed(void);
72 static void gpu_opcode_store_r15_indexed(void);
73 static void gpu_opcode_move_pc(void);
74 static void gpu_opcode_jump(void);
75 static void gpu_opcode_jr(void);
76 static void gpu_opcode_mmult(void);
77 static void gpu_opcode_mtoi(void);
78 static void gpu_opcode_normi(void);
79 static void gpu_opcode_nop(void);
80 static void gpu_opcode_load_r14_ri(void);
81 static void gpu_opcode_load_r15_ri(void);
82 static void gpu_opcode_store_r14_ri(void);
83 static void gpu_opcode_store_r15_ri(void);
84 static void gpu_opcode_sat24(void);
85 static void gpu_opcode_pack(void);
86
87 uint8 gpu_opcode_cycles[64] = 
88 {
89         3,  3,  3,  3,  
90         3,  3,  3,  3,  
91         3,  3,  3,  3,  
92         3,  3,  3,  3,
93         3,  3,  1,  3,  
94         1, 18,  3,  3,  
95         3,  3,  3,  3,  
96         3,  3,  3,  3,
97         3,  3,  2,  2,  
98         2,  2,  3,  4,  
99         5,  4,  5,  6,  
100         6,  1,  1,  1,
101         1,  2,  2,  2,  
102         1,  1,  9,  3,  
103         3,  1,  6,  6,  
104         2,  2,  3,  3
105 };
106
107 void (*gpu_opcode[64])()= 
108 {       
109         gpu_opcode_add,                                 gpu_opcode_addc,                                gpu_opcode_addq,                                gpu_opcode_addqt,
110         gpu_opcode_sub,                                 gpu_opcode_subc,                                gpu_opcode_subq,                                gpu_opcode_subqt,
111         gpu_opcode_neg,                                 gpu_opcode_and,                                 gpu_opcode_or,                                  gpu_opcode_xor,
112         gpu_opcode_not,                                 gpu_opcode_btst,                                gpu_opcode_bset,                                gpu_opcode_bclr,
113         gpu_opcode_mult,                                gpu_opcode_imult,                               gpu_opcode_imultn,                              gpu_opcode_resmac,
114         gpu_opcode_imacn,                               gpu_opcode_div,                                 gpu_opcode_abs,                                 gpu_opcode_sh,
115         gpu_opcode_shlq,                                gpu_opcode_shrq,                                gpu_opcode_sha,                                 gpu_opcode_sharq,
116         gpu_opcode_ror,                                 gpu_opcode_rorq,                                gpu_opcode_cmp,                                 gpu_opcode_cmpq,
117         gpu_opcode_sat8,                                gpu_opcode_sat16,                               gpu_opcode_move,                                gpu_opcode_moveq,
118         gpu_opcode_moveta,                              gpu_opcode_movefa,                              gpu_opcode_movei,                               gpu_opcode_loadb,
119         gpu_opcode_loadw,                               gpu_opcode_load,                                gpu_opcode_loadp,                               gpu_opcode_load_r14_indexed,
120         gpu_opcode_load_r15_indexed,    gpu_opcode_storeb,                              gpu_opcode_storew,                              gpu_opcode_store,
121         gpu_opcode_storep,                              gpu_opcode_store_r14_indexed,   gpu_opcode_store_r15_indexed,   gpu_opcode_move_pc,
122         gpu_opcode_jump,                                gpu_opcode_jr,                                  gpu_opcode_mmult,                               gpu_opcode_mtoi,
123         gpu_opcode_normi,                               gpu_opcode_nop,                                 gpu_opcode_load_r14_ri,                 gpu_opcode_load_r15_ri,
124         gpu_opcode_store_r14_ri,                gpu_opcode_store_r15_ri,                gpu_opcode_sat24,                               gpu_opcode_pack,
125 };
126
127 static uint8 * gpu_ram_8;
128 //static uint16 *gpu_ram_16;
129 //static uint32 *gpu_ram_32;
130
131
132 static uint32 gpu_pc;
133 static uint32 gpu_acc;
134 static uint32 gpu_remain;
135 static uint32 gpu_hidata;
136 static uint32 gpu_flags;
137 static uint32 gpu_matrix_control;
138 static uint32 gpu_pointer_to_matrix;
139 static uint32 gpu_data_organization;
140 static uint32 gpu_control;
141 static uint32 gpu_div_control;
142 static uint8 gpu_flag_z;
143 static uint8 gpu_flag_n;
144 static uint8 gpu_flag_c;    
145 static uint8 gpu_alternate_flag_z;
146 static uint8 gpu_alternate_flag_n;
147 static uint8 gpu_alternate_flag_c;    
148 static uint32 * gpu_reg;
149 static uint32 * gpu_alternate_reg;
150 static uint32 * gpu_reg_bank_0;
151 static uint32 * gpu_reg_bank_1;
152
153 static uint32 gpu_opcode_first_parameter;
154 static uint32 gpu_opcode_second_parameter;
155
156 #define GPU_RUNNING             (gpu_control & 0x01)
157
158 #define Rm gpu_reg[gpu_opcode_first_parameter]
159 #define Rn gpu_reg[gpu_opcode_second_parameter]
160 #define alternate_Rm gpu_alternate_reg[gpu_opcode_first_parameter]
161 #define alternate_Rn gpu_alternate_reg[gpu_opcode_second_parameter]
162 #define imm_1 gpu_opcode_first_parameter
163 #define imm_2 gpu_opcode_second_parameter
164
165 #define set_flag_z(r) gpu_flag_z = (r==0); 
166 #define set_flag_n(r) gpu_flag_n = ((r&0x80000000)>>31);
167
168 #define reset_flag_z()  gpu_flag_z = 0;
169 #define reset_flag_n()  gpu_flag_n = 0;
170 #define reset_flag_c()  gpu_flag_c = 0;    
171
172 #define CLR_Z                           (gpu_flag_z = 0)
173 #define CLR_ZN                          (gpu_flag_z = gpu_flag_n = 0)
174 #define CLR_ZNC                         (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
175 #define SET_Z(r)                        (gpu_flag_z = ((r) == 0))
176 #define SET_N(r)                        (gpu_flag_n = (((UINT32)(r) >> 31) & 0x01))
177 #define SET_C_ADD(a,b)          (gpu_flag_c = ((UINT32)(b) > (UINT32)(~(a))))
178 #define SET_C_SUB(a,b)          (gpu_flag_c = ((UINT32)(b) > (UINT32)(a)))
179 #define SET_ZN(r)                       SET_N(r); SET_Z(r)
180 #define SET_ZNC_ADD(a,b,r)      SET_N(r); SET_Z(r); SET_C_ADD(a,b)
181 #define SET_ZNC_SUB(a,b,r)      SET_N(r); SET_Z(r); SET_C_SUB(a,b)
182
183 uint32 gpu_convert_zero[32] = { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
184
185 uint8 * branch_condition_table = 0;
186 #define branch_condition(x)     branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
187
188 uint32 gpu_opcode_use[64];
189
190 void gpu_update_register_banks(void);
191
192 char *gpu_opcode_str[64]= 
193 {       
194         "add",                          "addc",                         "addq",                         "addqt",
195         "sub",                          "subc",                         "subq",                         "subqt",
196         "neg",                          "and",                          "or",                           "xor",
197         "not",                          "btst",                         "bset",                         "bclr",
198         "mult",                         "imult",                        "imultn",                       "resmac",
199         "imacn",                        "div",                          "abs",                          "sh",
200         "shlq",                         "shrq",                         "sha",                          "sharq",
201         "ror",                          "rorq",                         "cmp",                          "cmpq",
202         "sat8",                         "sat16",                        "move",                         "moveq",
203         "moveta",                       "movefa",                       "movei",                        "loadb",
204         "loadw",                        "load",                         "loadp",                        "load_r14_indexed",
205         "load_r15_indexed",     "storeb",                       "storew",                       "store",
206         "storep",                       "store_r14_indexed","store_r15_indexed","move_pc",
207         "jump",                         "jr",                           "mmult",                        "mtoi",
208         "normi",                        "nop",                          "load_r14_ri",          "load_r15_ri",
209         "store_r14_ri",         "store_r15_ri",         "sat24",                        "pack",
210 };
211
212 static uint32 gpu_in_exec = 0;
213 static uint32 gpu_releaseTimeSlice_flag = 0;
214
215
216 void gpu_releaseTimeslice(void)
217 {
218         gpu_releaseTimeSlice_flag = 1;
219 }
220
221 uint32 gpu_get_pc(void)
222 {
223         return gpu_pc;
224 }
225
226 void build_branch_condition_table(void)
227 {
228 #define ZFLAG   0x00001
229 #define CFLAG   0x00002
230 #define NFLAG   0x00004
231         
232         if (!branch_condition_table)
233         {
234                 branch_condition_table = (uint8*)malloc(32 * 8 * sizeof(branch_condition_table[0]));
235
236                 if (branch_condition_table)
237                 {
238                         for(int i=0; i<8; i++)
239                         {
240                                 for(int j=0; j<32; j++)
241                                 {
242                                         int result = 1;
243                                         if (j & 1)
244                                                 if (i & ZFLAG)
245                                                         result = 0;
246                                         if (j & 2)
247                                                 if (!(i & ZFLAG))
248                                                         result = 0;
249                                         if (j & 4)
250                                                 if (i & (CFLAG << (j >> 4)))
251                                                         result = 0;
252                                         if (j & 8)
253                                                 if (!(i & (CFLAG << (j >> 4))))
254                                                         result = 0;
255                                         branch_condition_table[i * 32 + j] = result;
256                                 }
257                         }
258                 }
259         }
260 }
261
262 //
263 // GPU byte access (read)
264 //
265
266 unsigned gpu_byte_read(unsigned int offset)
267 {       
268         if ((offset >= gpu_work_ram_base) && (offset < gpu_work_ram_base+0x1000))
269                 return gpu_ram_8[offset & 0xFFF];
270         else if ((offset >= gpu_control_ram_base) && (offset < gpu_control_ram_base+0x20))
271         {
272                 uint32 data = gpu_long_read(offset & 0xFFFFFFFC);
273
274                 if ((offset & 0x03) == 0)
275                         return data >> 24;
276                 else if ((offset & 0x03) == 1)
277                         return (data >> 16) & 0xFF;
278                 else if ((offset & 0x03) == 2)
279                         return (data >> 8) & 0xFF;
280                 else if ((offset & 0x03) == 3)
281                         return data & 0xFF;
282         }
283
284         return jaguar_byte_read(offset);
285 }
286
287 //
288 // GPU word access (read)
289 //
290
291 unsigned gpu_word_read(unsigned int offset)
292 {
293         if ((offset >= gpu_work_ram_base) && (offset < gpu_work_ram_base+0x1000))
294         {
295                 offset &= 0xFFF;
296                 uint16 data = ((uint16)gpu_ram_8[offset] << 8) | (uint16)gpu_ram_8[offset+1];
297                 return data;
298         }
299         else if ((offset >= gpu_control_ram_base) && (offset < gpu_control_ram_base+0x20))
300         {
301 // This looks and smells wrong...
302 // But it *might* be OK...
303                 if (offset & 0x01)                      // Catch cases 1 & 3... (unaligned read)
304                         return (gpu_byte_read(offset) << 8) | gpu_byte_read(offset+1);
305
306                 uint32 data = gpu_long_read(offset & 0xFFFFFFFC);
307
308                 if (offset & 0x02)                      // Cases 0 & 2...
309                         return data & 0xFFFF;
310                 else
311                         return data >> 16;
312         }
313
314         return jaguar_word_read(offset);
315 }
316
317 //
318 // GPU dword access (read)
319 //
320
321 unsigned gpu_long_read(unsigned int offset)
322 {
323
324         if ((offset >= gpu_work_ram_base) && (offset < gpu_work_ram_base+0x1000))
325         {
326                 offset &= 0xFFF;
327                 return ((uint32)gpu_ram_8[offset] << 24) | ((uint32)gpu_ram_8[offset+1] << 16)
328                         | ((uint32)gpu_ram_8[offset+2] << 8) | (uint32)gpu_ram_8[offset+3];
329         }
330         else if ((offset >= gpu_control_ram_base) && (offset < gpu_control_ram_base+0x20))
331         {
332                 offset &= 0x1F;
333                 switch (offset)
334                 {
335                 case 0x00:
336                         gpu_flag_c = (gpu_flag_c ? 1 : 0);
337                         gpu_flag_z = (gpu_flag_z ? 1 : 0);
338                         gpu_flag_n = (gpu_flag_n ? 1 : 0);
339
340                         gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
341                                         
342                         return gpu_flags & 0xFFFFC1FF;
343                 case 0x04:
344                         return gpu_matrix_control;
345                 case 0x08:
346                         return gpu_pointer_to_matrix;
347                 case 0x0C:
348                         return gpu_data_organization;
349                 case 0x10:
350                         return gpu_pc;
351                 case 0x14:
352                         return gpu_control;
353                 case 0x18:
354                         return gpu_hidata;
355                 case 0x1C:
356                         return gpu_remain;
357                 default:                                                                // unaligned long read
358                         return 0;
359                         //exit(0);
360                         //         __asm int 3
361                         //                 }
362                 }
363                 // to prevent any lock-ups
364         }
365
366         return (jaguar_word_read(offset) << 16) | jaguar_word_read(offset+2);
367 }
368
369 //
370 // GPU byte access (write)
371 //
372
373 void gpu_byte_write(unsigned offset, unsigned data)
374 {
375         if ((offset >= gpu_work_ram_base) && (offset < gpu_work_ram_base+0x1000))
376         {
377                 gpu_ram_8[offset & 0xFFF] = data;
378                 if (gpu_in_exec == 0)
379                 {
380 //                      s68000releaseTimeslice();
381                         m68k_end_timeslice();
382                         dsp_releaseTimeslice();
383                 }
384                 return;
385         }
386         else if ((offset >= gpu_control_ram_base) && (offset < gpu_control_ram_base+0x20))
387         {
388                 uint32 reg = offset & 0x1C;
389                 int bytenum = offset & 0x03;
390
391 //This is definitely wrong!
392                 if ((reg >= 0x1C) && (reg <= 0x1F))
393                         gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));                           
394                 else
395                 {
396                         uint32 old_data = gpu_long_read(offset & 0xFFFFFFC);
397                         bytenum = 3 - bytenum; // convention motorola !!!
398                         old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3)); 
399                         gpu_long_write(offset & 0xFFFFFFC, old_data);
400                 }
401                 return;
402         }
403 //      fprintf(log_get(),"gpu: writing %.2x at 0x%.8x\n",data,offset);
404         jaguar_byte_write(offset, data);
405 }
406
407 //
408 // GPU word access (write)
409 //
410
411 void gpu_word_write(unsigned offset, unsigned data)
412 {
413
414         if ((offset >= gpu_work_ram_base) && (offset < gpu_work_ram_base+0x1000))
415         {
416
417                 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
418                 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;
419                 if (gpu_in_exec == 0)
420                 {
421 //                      s68000releaseTimeslice();
422                         m68k_end_timeslice();
423                         dsp_releaseTimeslice();
424                 }
425                 return;
426         }
427         if ((offset >= gpu_control_ram_base) && (offset < gpu_control_ram_base+0x20))
428         {
429                 if (offset & 0x01)              // This is supposed to weed out unaligned writes, but does nothing...
430                 {
431                         //exit(0);
432                         //__asm int 3
433                 }
434                 if ((offset & 0x1C) == 0x1C)
435                 {
436 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
437                         if (offset & 0x03)
438                                 gpu_div_control = (gpu_div_control&0xFFFF0000) | (data&0xFFFF);
439                         else
440                                 gpu_div_control = (gpu_div_control&0xFFFF) | ((data&0xFFFF)<<16);
441                 }
442                 else 
443                 {
444                         uint32 old_data = gpu_long_read(offset & 0xFFFFFFC);
445                         if (offset & 0x03)
446                                 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
447                         else
448                                 old_data = (old_data & 0xFFFF) | ((data & 0xFFFF) << 16);
449                         gpu_long_write(offset & 0xFFFFFFC, old_data);
450                 }
451                 return;
452         }
453 //      fprintf(log_get(),"gpu: writing %.4x at 0x%.8x\n",data,offset);
454         jaguar_word_write(offset, data);
455 }
456
457 //
458 // GPU dword access (write)
459 //
460
461 void gpu_long_write(unsigned offset, unsigned data)
462 {
463
464         if ((offset >= gpu_work_ram_base) && (offset < gpu_work_ram_base+0x1000))
465         {
466                 gpu_ram_8[offset & 0xFFF] = (data >> 24) & 0xFF;
467                 gpu_ram_8[(offset+1) & 0xFFF] = (data >> 16) & 0xFF;
468                 gpu_ram_8[(offset+2) & 0xFFF] = (data >> 8) & 0xFF;
469                 gpu_ram_8[(offset+3) & 0xFFF] = data & 0xFF;
470                 return;
471         }
472         else if ((offset >= gpu_control_ram_base) && (offset < gpu_control_ram_base+0x20))
473         {
474                 offset &= 0x1F;
475                 switch (offset)
476                 {
477                 case 0x00:
478                         /*if (data&0x8)
479                                 gpu_flags=(data&(~0x08))|(gpu_flags&0x08); // update dsp_flags, but keep imask unchanged
480                         else*/
481                         gpu_flags = data;
482                         gpu_flag_z = gpu_flags & 0x01;
483                         gpu_flag_c = (gpu_flags>>1) & 0x01;
484                         gpu_flag_n = (gpu_flags>>2) & 0x01;
485                         gpu_update_register_banks();
486                         gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);
487                         gpu_check_irqs();
488                         break;
489                 case 0x04:
490                         gpu_matrix_control = data;
491                         break;
492                 case 0x08:
493                         gpu_pointer_to_matrix=data;
494                         break;
495                 case 0x0C:
496                         gpu_data_organization=data;
497                         break;
498                 case 0x10:
499                         gpu_pc = data;  /*fprintf(log_get(),"setting gpu pc to 0x%.8x\n",gpu_pc);*/
500                         break;
501                 case 0x14:
502                 {       
503                         uint32 gpu_was_running = GPU_RUNNING;
504                                                 
505                         data &= (~0x7C0); // disable writes to irq pending
506                         /*if (GPU_RUNNING)
507                         {
508                                 fprintf(log_get(),"gpu pc is 0x%.8x\n",gpu_pc);
509                                 fclose(log_get());
510                                 exit(0);
511                         }*/
512                         // check for GPU->CPU interrupt
513                         if (data & 0x02)
514                         {
515 //                              fprintf(log_get(),"GPU->CPU interrupt\n");
516                                 if (tom_irq_enabled(IRQ_GPU))
517                                 {
518                                         if ((tom_irq_enabled(IRQ_GPU)) && (jaguar_interrupt_handler_is_valid(64)))
519                                         {
520                                                 tom_set_pending_gpu_int();
521 //                                              s68000interrupt(7,64);
522 //                                              s68000flushInterrupts();
523                                                 m68k_set_irq(7);                        // Set 68000 NMI
524                                                 gpu_releaseTimeslice();
525                                         }
526 /*
527                                         uint32 addr=jaguar_word_read(((IRQ_GPU+64)<<2)+0);
528                                         addr<<=16;
529                                         addr|=jaguar_word_read(((IRQ_GPU+64)<<2)+2);
530                                         if ((addr)&&(jaguar_interrupt_handler_is_valid(IRQ_GPU+64)))
531                                         {
532                                                 s68000interrupt(7,IRQ_GPU+64);
533                                                 s68000flushInterrupts();
534                                         }
535 */
536                                 }
537                                 data &= ~(0x02);
538                         }
539                         // check for CPU->GPU interrupt
540                         if (data & 0x04)
541                         {
542                                 //fprintf(log_get(),"CPU->GPU interrupt\n");
543                                 gpu_set_irq_line(0, 1);
544 //                              s68000releaseTimeslice();
545                                 m68k_end_timeslice();
546                                 dsp_releaseTimeslice();
547                                 data &= ~(0x04);
548                         }
549                         // single stepping
550                         if (data & 0x10)
551                         {
552                                 //fprintf(log_get(),"asked to perform a single step (single step is %senabled)\n",(data&0x8)?"":"not ");
553                         }
554                         gpu_control = (gpu_control & 0x107C0) | (data & (~0x107C0));
555
556                         // if gpu wasn't running but is now running, execute a few cycles
557 #ifndef GPU_SINGLE_STEPPING
558                         if ((!gpu_was_running) && (GPU_RUNNING))
559                                 gpu_exec(200);
560 #else
561                         if (gpu_control & 0x18)
562                                 gpu_exec(1);
563 #endif  // #ifndef GPU_SINGLE_STEPPING
564 #ifdef GPU_DEBUG
565 fprintf(log_get(), "Write to GPU CTRL: %08X ", data);
566 if (GPU_RUNNING)
567         fprintf(log_get(), "-- Starting to run at %08X...", gpu_pc);
568 fprintf(log_get(), "\n");
569 #endif  // #ifdef GPU_DEBUG
570                         break;
571                 }
572                 case 0x18:
573                         gpu_hidata = data;
574                         break;
575                 case 0x1C:
576                         gpu_div_control = data;
577                         break;
578 //              default:   // unaligned long write
579                         //exit(0);
580                         //__asm int 3
581                 }
582                 return;
583         }
584 //      fprintf(log_get(),"gpu: writing %.8x at 0x%.8x\n",data,offset);
585         jaguar_word_write(offset, (data >> 16) & 0xFFFF);
586         jaguar_word_write(offset+2, data & 0xFFFF);
587 }
588
589 void gpu_update_register_banks(void)
590 {
591         uint32 temp;
592         int bank = (gpu_flags & 0x4000);
593
594 //      fprintf(log_get(),"gpu_update_register_banks at gpu pc 0x%.8x bank=%i iflag=%i\n",gpu_pc,bank?1:0,(gpu_flags&0x8)?1:0);
595
596         if (gpu_flags & 0x8) 
597                 bank = 0;
598
599         if ((!bank && (gpu_reg_bank_0 != gpu_reg)) || (bank && (gpu_reg_bank_1 != gpu_reg)))
600         {
601 //              fprintf(log_get(),"\tswitching to bank %i\n",bank?1:0);
602                 for(int i=0; i<32; i++)
603                 {
604                         temp = gpu_reg[i];
605                         gpu_reg[i] = gpu_alternate_reg[i];
606                         gpu_alternate_reg[i] = temp;
607                 }
608
609                 // switch flags
610                 temp = gpu_flag_z;
611                 gpu_flag_z = gpu_alternate_flag_z;
612                 gpu_alternate_flag_z = temp;
613
614                 temp = gpu_flag_n;
615                 gpu_flag_n = gpu_alternate_flag_n;
616                 gpu_alternate_flag_n = temp;
617
618                 temp = gpu_flag_c;
619                 gpu_flag_c = gpu_alternate_flag_c;
620                 gpu_alternate_flag_c = temp;
621
622                 if (!bank)
623                 {
624                         gpu_reg_bank_0 = gpu_reg;
625                         gpu_reg_bank_1 = gpu_alternate_reg;
626                 }
627                 else
628                 {
629                         gpu_reg_bank_0 = gpu_alternate_reg;
630                         gpu_reg_bank_1 = gpu_reg;
631                 }
632         }
633 //      else
634 //      {
635 //              fprintf(log_get(),"\tnot switching banks\n");
636 //      }
637 }
638
639 void gpu_check_irqs(void)
640 {
641         int bits, mask, which = 0;
642
643         // get the active interrupt bits 
644         bits = (gpu_control >> 6) & 0x1F;
645         bits |= (gpu_control >> 10) & 0x20;
646
647         // get the interrupt mask 
648         mask = (gpu_flags >> 4) & 0x1F;
649         mask |= (gpu_flags >> 11) & 0x20;
650         
651         // bail if nothing is available
652         bits &= mask;
653         if (!bits)
654                 return;
655         
656         // determine which interrupt 
657         if (bits & 0x01) which = 0;
658         if (bits & 0x02) which = 1;
659         if (bits & 0x04) which = 2;
660         if (bits & 0x08) which = 3;
661         if (bits & 0x10) which = 4;
662         if (bits & 0x20) which = 5;
663
664         if (gpu_flags & 0x8) 
665                 return;
666
667         if (start_logging)
668                 fprintf(log_get(),"gpu: generating irg  %i\n",which);
669
670         // set the interrupt flag 
671         gpu_flags |= 0x08;
672         gpu_update_register_banks();
673
674         // subqt  #4,r31                ; pre-decrement stack pointer 
675         // move  pc,r30                 ; address of interrupted code 
676         // store  r30,(r31)     ; store return address
677         gpu_reg[31] -= 4;
678         gpu_reg[30] = gpu_pc - 2;
679         gpu_long_write(gpu_reg[31], gpu_pc - 2);
680         
681         // movei  #service_address,r30  ; pointer to ISR entry 
682         // jump  (r30)                                  ; jump to ISR 
683         // nop
684         gpu_pc = gpu_work_ram_base;
685         gpu_pc += which * 0x10;
686         gpu_reg[30] = gpu_pc;
687 }
688
689 void gpu_set_irq_line(int irqline, int state)
690 {
691         if (start_logging)
692                 fprintf(log_get(),"gpu: setting irg line %i\n",irqline);
693         int mask = 0x40 << irqline;
694         gpu_control &= ~mask;
695
696         if (state)
697         {
698                 gpu_control |= mask;
699                 gpu_check_irqs();
700         }
701 }
702
703 void gpu_init(void)
704 {
705         memory_malloc_secure((void **)&gpu_ram_8, 0x1000, "GPU work ram");
706 //      gpu_ram_16=(uint16*)gpu_ram_8;
707 //      gpu_ram_32=(uint32*)gpu_ram_8;
708
709         memory_malloc_secure((void **)&gpu_reg, 32*sizeof(int32), "GPU bank 0 regs");
710         memory_malloc_secure((void **)&gpu_alternate_reg, 32*sizeof(int32), "GPU bank 1 regs");
711         
712         build_branch_condition_table();
713
714         gpu_reset();
715 }
716
717 void gpu_reset(void)
718 {
719         gpu_pc                            = 0x00F03000;
720         gpu_acc                           = 0x00000000;
721         gpu_remain                        = 0x00000000;
722         gpu_hidata                        = 0x00000000;
723         gpu_flags                         = 0x00040000;
724         gpu_matrix_control    = 0x00000000;
725         gpu_pointer_to_matrix = 0x00000000;
726         gpu_data_organization = 0xFFFFFFFF;
727         gpu_control                       = 0x00012800;
728         gpu_div_control           = 0x00000000;
729         gpu_in_exec                       = 0;
730
731         for(int i=0; i<32; i++)
732         {
733                 gpu_reg[i]           = 0x00000000;
734                 gpu_alternate_reg[i] = 0x00000000;
735         }
736         
737         gpu_reg_bank_0 = gpu_reg;
738         gpu_reg_bank_1 = gpu_alternate_reg;
739 //      gpu_reg_bank_1 = gpu_reg;
740 //      gpu_reg_bank_0 = gpu_alternate_reg;
741
742         reset_flag_z();
743         reset_flag_n();
744         reset_flag_c();
745
746         gpu_alternate_flag_z = 0;
747         gpu_alternate_flag_n = 0;
748         gpu_alternate_flag_c = 0;
749
750         memset(gpu_ram_8, 0xFF, 0x1000);
751
752         gpu_reset_stats();
753 }
754
755 uint32 gpu_read_pc(void)
756 {
757         return gpu_pc;
758 }
759
760 void gpu_reset_stats(void)
761 {
762         for(uint32 i=0; i<64; i++)
763                 gpu_opcode_use[i] = 0;
764 }
765
766 void gpu_done(void)
767
768         fprintf(log_get(), "GPU: stopped at PC=%08X (GPU %s running)\n", gpu_pc, GPU_RUNNING ? "was" : "wasn't");
769
770         // get the active interrupt bits 
771         int bits = (gpu_control >> 6) & 0x1F;
772         bits |= (gpu_control >> 10) & 0x20;
773
774         // get the interrupt mask 
775         int mask = (gpu_flags >> 4) & 0x1F;
776         mask |= (gpu_flags >> 11) & 0x20;
777         
778
779         fprintf(log_get(), "GPU: ibits=0x%.8x imask=0x%.8x\n", bits, mask);
780 //      fprintf(log_get(),"\nregisters bank 0\n");
781 //      for (int j=0;j<8;j++)
782 //      {
783 //              fprintf(log_get(),"\tr%2i=0x%.8x r%2i=0x%.8x r%2i=0x%.8x r%2i=0x%.8x\n",
784 //                                                (j<<2)+0,gpu_reg[(j<<2)+0],
785 //                                                (j<<2)+1,gpu_reg[(j<<2)+1],
786 //                                                (j<<2)+2,gpu_reg[(j<<2)+2],
787 //                                                (j<<2)+3,gpu_reg[(j<<2)+3]);
788 //
789 //      }
790 //      fprintf(log_get(),"registers bank 1\n");
791 //      for (j=0;j<8;j++)
792 //      {
793 //              fprintf(log_get(),"\tr%2i=0x%.8x r%2i=0x%.8x r%2i=0x%.8x r%2i=0x%.8x\n",
794 //                                                (j<<2)+0,gpu_alternate_reg[(j<<2)+0],
795 //                                                (j<<2)+1,gpu_alternate_reg[(j<<2)+1],
796 //                                                (j<<2)+2,gpu_alternate_reg[(j<<2)+2],
797 //                                                (j<<2)+3,gpu_alternate_reg[(j<<2)+3]);
798 //
799 //      }
800         fprintf(log_get(),"---[GPU code at 00F03000]---------------------------\n");
801         static char buffer[512];
802         int j = 0xF03000;
803         for(int i=0; i<4096; i++)
804         {
805                 uint32 oldj = j;
806                 j += dasmjag(JAGUAR_GPU, buffer, j);
807                 fprintf(log_get(),"\t%08X: %s\n", oldj, buffer);
808         }
809
810         fprintf(log_get(), "---[GPU code at %08X]---------------------------\n", gpu_pc);
811         j = gpu_pc - 64;
812         for(int i=0; i<4096; i++)
813         {
814                 uint32 oldj = j;
815                 j += dasmjag(JAGUAR_GPU, buffer, j);
816                 fprintf(log_get(), "\t%08X: %s\n", oldj, buffer);
817         }
818
819         fprintf(log_get(), "gpu opcodes use:\n");
820         for(int i=0; i<64; i++)
821         {
822                 if (gpu_opcode_use[i])
823                         fprintf(log_get(), "\t%s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
824         }
825         memory_free(gpu_ram_8);
826 }
827
828 //
829 // Main GPU execution core
830 //
831
832 void gpu_exec(int32 cycles)
833 {
834         if (!GPU_RUNNING)
835                 return;
836
837 #ifdef GPU_SINGLE_STEPPING
838         if (gpu_control & 0x18)
839         {
840                 cycles = 1;
841                 gpu_control &= ~0x10;
842         }
843 #endif
844         gpu_check_irqs();
845         gpu_releaseTimeSlice_flag = 0;
846         gpu_in_exec++;
847
848         while ((cycles > 0) && GPU_RUNNING)
849         {
850                 gpu_flag_c = (gpu_flag_c ? 1 : 0);
851                 gpu_flag_z = (gpu_flag_z ? 1 : 0);
852                 gpu_flag_n = (gpu_flag_n ? 1 : 0);
853         
854                 uint16 opcode = gpu_word_read(gpu_pc);
855 /*static char buffer[512];
856 dasmjag(JAGUAR_GPU, buffer, gpu_pc);
857 fprintf(log_get(), "GPU: [%08X] %s\n", gpu_pc, buffer);*/
858
859                 uint32 index = opcode >> 10;            
860                 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
861                 gpu_opcode_second_parameter = opcode & 0x1F;
862                 gpu_pc += 2;
863                 gpu_opcode[index]();
864                 cycles -= gpu_opcode_cycles[index];
865                 gpu_opcode_use[index]++;
866         }
867
868         gpu_in_exec--;
869 }
870
871 //
872 // GPU opcodes
873 //
874
875 static void gpu_opcode_jump(void)
876 {
877         uint32 delayed_pc = Rm;
878         uint32 jaguar_flags;
879
880         // normalize flags
881         gpu_flag_c = (gpu_flag_c ? 1 : 0);
882         gpu_flag_z = (gpu_flag_z ? 1 : 0);
883         gpu_flag_n = (gpu_flag_n ? 1 : 0);
884
885         jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
886
887         if (branch_condition(imm_2))
888         {
889                 gpu_exec(1);
890                 gpu_pc = delayed_pc;
891         }
892 }
893
894 static void gpu_opcode_jr(void)
895 {
896         int32 offset=(imm_1&0x10) ? (0xFFFFFFF0|imm_1) : imm_1;
897
898         int32 delayed_pc = gpu_pc + (offset * 2);
899         uint32 jaguar_flags;
900
901         // normalize flags
902         gpu_flag_c=gpu_flag_c?1:0;
903         gpu_flag_z=gpu_flag_z?1:0;
904         gpu_flag_n=gpu_flag_n?1:0;
905         
906         jaguar_flags=(gpu_flag_n<<2)|(gpu_flag_c<<1)|gpu_flag_z;
907
908         if (branch_condition(imm_2))
909         {
910                 gpu_exec(1);
911                 gpu_pc=delayed_pc;
912         }
913 }
914
915 static void gpu_opcode_add(void)
916 {
917         uint32 _Rm=Rm;
918         uint32 _Rn=Rn;
919         uint32 res;
920 #ifdef __PORT__
921 #ifndef USE_ASSEMBLY
922 {
923 /*              uint32 index = opcode >> 10;            
924                 gpu_opcode_first_parameter = (opcode & 0x3E0) >> 5;
925                 gpu_opcode_second_parameter = (opcode & 0x1F);
926                 gpu_pc += 2;
927                 gpu_opcode[index]();
928                 cycles -= gpu_opcode_cycles[index];
929                 gpu_opcode_use[index]++;*/
930 /*      int dreg = jaguar.op & 31;
931         UINT32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
932         UINT32 r2 = jaguar.r[dreg];
933         UINT32 res = r2 + r1;
934         jaguar.r[dreg] = res;
935         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
936
937         UINT32 res = Rn + Rm;
938         CLR_ZNC; SET_ZNC_ADD(Rn, Rm, res);
939         Rn = res;
940         return;
941 }
942 #else
943     /*
944        GCC on WIN32 (more importantly mingw) doesn't know the declared
945        variables in asm until we put a _ before it.
946        
947        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
948     */
949  
950 #ifdef __GCCWIN32__
951
952         asm(
953         "addl %1, %2                                    \n\
954         setc  _gpu_flag_c                               \n\
955         setz  _gpu_flag_z                               \n\
956         sets  _gpu_flag_n                               \n\
957         movl %%eax, %0                                  \n\
958         "
959         : "=m"(res)
960         : "d"(_Rm), "a"(_Rn));
961         
962 #else
963
964         asm(
965         "addl %1, %2                                    \n\
966         setc  gpu_flag_c                                \n\
967         setz  gpu_flag_z                                \n\
968         sets  gpu_flag_n                                \n\
969         movl %%eax, %0                                  \n\
970         "
971         : "=m"(res)
972         : "d"(_Rm), "a"(_Rn));
973         
974 #endif  // #ifdef __GCCWIN32__
975 #endif  // #ifndef USE_ASSEMBLY
976         
977 #else
978         __asm 
979         {
980                 mov   edx,_Rm
981                 mov   eax,_Rn
982                 add   eax,edx
983                 setc  [gpu_flag_c]
984                 setz  [gpu_flag_z]
985                 sets  [gpu_flag_n]
986                 mov       res,eax
987         };
988 #endif  // #ifdef __PORT__
989         Rn=res;
990 }
991
992 static void gpu_opcode_addc(void)
993 {
994         uint32 _Rm=Rm;
995         uint32 _Rn=Rn;
996         uint32 res;
997 #ifdef __PORT__
998 #ifndef USE_ASSEMBLY
999 {
1000 /*      int dreg = jaguar.op & 31;
1001         UINT32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1002         UINT32 r2 = jaguar.r[dreg];
1003         UINT32 res = r2 + r1 + ((jaguar.FLAGS >> 1) & 1);
1004         jaguar.r[dreg] = res;
1005         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1006
1007         UINT32 res = Rn + Rm + gpu_flag_c;
1008         CLR_ZNC; SET_ZNC_ADD(Rn, Rm, res);
1009         Rn = res;
1010         return;
1011 }
1012 #else
1013     /*
1014        GCC on WIN32 (more importantly mingw) doesn't know the declared
1015        variables in asm until we put a _ before it.
1016        
1017        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
1018     */
1019
1020 #ifdef __GCCWIN32__
1021
1022         asm(
1023         "addl %1, %2                                    \n\
1024         cmp       $0, _gpu_flag_c                       \n\
1025         clc                                                             \n\
1026         jz 1f                                                   \n\
1027         stc                                                             \n\
1028         1:                                                              \n\
1029         adc %1, %2                                              \n\
1030         setc  _gpu_flag_c                               \n\
1031         setz  _gpu_flag_z                               \n\
1032         sets  _gpu_flag_n                               \n\
1033         movl %%eax, %0                                  \n\
1034         "
1035         : "=m"(res)
1036         : "d"(_Rm), "a"(_Rn));
1037
1038 #else
1039         
1040         asm(
1041         "addl %1, %2                                    \n\
1042         cmp       $0, gpu_flag_c                        \n\
1043         clc                                                             \n\
1044         jz 1f                                                   \n\
1045         stc                                                             \n\
1046         1:                                                              \n\
1047         adc %1, %2                                              \n\
1048         setc  gpu_flag_c                                \n\
1049         setz  gpu_flag_z                                \n\
1050         sets  gpu_flag_n                                \n\
1051         movl %%eax, %0                                  \n\
1052         "
1053         : "=m"(res)
1054         : "d"(_Rm), "a"(_Rn));
1055
1056 #endif  // #ifdef __GCCWIN32__
1057 #endif  // #ifndef USE_ASSEMBLY
1058         
1059 #else
1060         __asm 
1061         {
1062                 mov   edx,_Rm
1063                 mov   eax,_Rn
1064                 cmp       [gpu_flag_c],0
1065                 clc
1066                 jz        gpu_opcode_addc_no_carry
1067                 stc
1068 gpu_opcode_addc_no_carry:
1069                 adc   eax,edx
1070                 setc  [gpu_flag_c]
1071                 setz  [gpu_flag_z]
1072                 sets  [gpu_flag_n]
1073                 mov       res,eax
1074         };
1075 #endif
1076         Rn=res;
1077 }
1078
1079 static void gpu_opcode_addq(void)
1080 {
1081         uint32 _Rn=Rn;
1082         uint32 _Rm=gpu_convert_zero[imm_1];
1083         uint32 res;
1084 #ifdef __PORT__
1085 #ifndef USE_ASSEMBLY
1086 {
1087 /*      int dreg = jaguar.op & 31;
1088         UINT32 r1 = convert_zero[(jaguar.op >> 5) & 31];
1089         UINT32 r2 = jaguar.r[dreg];
1090         UINT32 res = r2 + r1;
1091         jaguar.r[dreg] = res;
1092         CLR_ZNC; SET_ZNC_ADD(r2,r1,res);*/
1093         UINT32 r1 = gpu_convert_zero[imm_1];
1094         UINT32 res = Rn + r1;
1095         CLR_ZNC; SET_ZNC_ADD(Rn, r1, res);
1096         Rn = res;
1097         return;
1098 }
1099 #else
1100     /*
1101        GCC on WIN32 (more importantly mingw) doesn't know the declared
1102        variables in asm until we put a _ before it.
1103        
1104        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
1105     */
1106
1107 #ifdef __GCCWIN32__
1108
1109         asm(
1110         "addl %1, %2                                    \n\
1111         setc  _gpu_flag_c                               \n\
1112         setz  _gpu_flag_z                               \n\
1113         sets  _gpu_flag_n                               \n\
1114         movl %%eax, %0                                  \n\
1115         "
1116         : "=m"(res)
1117         : "d"(_Rm), "a"(_Rn));
1118         
1119 #else
1120
1121         asm(
1122         "addl %1, %2                                    \n\
1123         setc  gpu_flag_c                                \n\
1124         setz  gpu_flag_z                                \n\
1125         sets  gpu_flag_n                                \n\
1126         movl %%eax, %0                                  \n\
1127         "
1128         : "=m"(res)
1129         : "d"(_Rm), "a"(_Rn));
1130
1131 #endif  // #ifdef __GCCWIN32__
1132 #endif  // #ifndef USE_ASSEMBLY
1133         
1134 #else
1135         __asm 
1136         {
1137                 mov   edx,_Rm
1138                 mov   eax,_Rn
1139                 add   eax,edx
1140                 setc  [gpu_flag_c]
1141                 setz  [gpu_flag_z]
1142                 sets  [gpu_flag_n]
1143                 mov       res,eax
1144         };
1145 #endif
1146         Rn=res;
1147 }
1148
1149 static void gpu_opcode_addqt(void)
1150 {
1151         Rn += gpu_convert_zero[imm_1];
1152 }
1153
1154 static void gpu_opcode_sub(void)
1155 {
1156         uint32 _Rm=Rm;
1157         uint32 _Rn=Rn;
1158         uint32 res;
1159 #ifdef __PORT__
1160 #ifndef USE_ASSEMBLY
1161 {
1162 /*      int dreg = jaguar.op & 31;
1163         UINT32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1164         UINT32 r2 = jaguar.r[dreg];
1165         UINT32 res = r2 - r1;
1166         jaguar.r[dreg] = res;
1167         CLR_ZNC; SET_ZNC_SUB(r2,r1,res);*/
1168         UINT32 res = Rn - Rm;
1169         CLR_ZNC; SET_ZNC_SUB(Rn, Rm, res);
1170         Rn = res;
1171         return;
1172 }
1173 #else
1174     /*
1175        GCC on WIN32 (more importantly mingw) doesn't know the declared
1176        variables in asm until we put a _ before it.
1177        
1178        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
1179     */
1180
1181 #ifdef __GCCWIN32__
1182
1183         asm(
1184         "subl %1, %2                                    \n\
1185         setc  _gpu_flag_c                               \n\
1186         setz  _gpu_flag_z                               \n\
1187         sets  _gpu_flag_n                               \n\
1188         movl %%eax, %0                                  \n\
1189         "
1190         : "=m"(res)
1191         : "d"(_Rm), "a"(_Rn));
1192         
1193 #else
1194         
1195         asm(
1196         "subl %1, %2                                    \n\
1197         setc  gpu_flag_c                                \n\
1198         setz  gpu_flag_z                                \n\
1199         sets  gpu_flag_n                                \n\
1200         movl %%eax, %0                                  \n\
1201         "
1202         : "=m"(res)
1203         : "d"(_Rm), "a"(_Rn));
1204
1205 #endif  // #ifdef __GCCWIN32__
1206 #endif  // #ifndef USE_ASSEMBLY
1207         
1208 #else
1209         __asm 
1210         {
1211                 mov   eax,_Rn
1212                 mov   edx,_Rm
1213                 sub   eax,edx
1214                 setc  [gpu_flag_c]
1215                 setz  [gpu_flag_z]
1216                 sets  [gpu_flag_n]
1217                 mov       res,eax
1218         };
1219 #endif
1220         Rn=res;
1221 }
1222
1223 static void gpu_opcode_subc(void)
1224 {
1225         uint32 _Rm=Rm;
1226         uint32 _Rn=Rn;
1227         uint32 res;
1228 #ifdef __PORT__
1229 #ifndef USE_ASSEMBLY
1230 {
1231 /*      int dreg = jaguar.op & 31;
1232         UINT32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1233         UINT32 r2 = jaguar.r[dreg];
1234         UINT32 res = r2 - r1 - ((jaguar.FLAGS >> 1) & 1);
1235         jaguar.r[dreg] = res;
1236         CLR_ZNC; SET_ZNC_SUB(r2,r1,res);*/
1237         UINT32 res = Rn - Rm - gpu_flag_c;
1238         CLR_ZNC; SET_ZNC_SUB(Rn, Rm, res);
1239         Rn = res;
1240         return;
1241 }
1242 #else
1243     /*
1244        GCC on WIN32 (more importantly mingw) doesn't know the declared
1245        variables in asm until we put a _ before it.
1246        
1247        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
1248     */
1249
1250 #ifdef __GCCWIN32__
1251
1252         asm(
1253         "addl %1, %2                                    \n\
1254         cmp       $0, _gpu_flag_c                       \n\
1255         clc                                                             \n\
1256         jz 1f                                                   \n\
1257         stc                                                             \n\
1258         1:                                                              \n\
1259         sbb %1, %2                                              \n\
1260         setc  _gpu_flag_c                               \n\
1261         setz  _gpu_flag_z                               \n\
1262         sets  _gpu_flag_n                               \n\
1263         movl %%eax, %0                                  \n\
1264         "
1265         : "=m"(res)
1266         : "d"(_Rm), "a"(_Rn));
1267         
1268 #else
1269         
1270         asm(
1271         "addl %1, %2                                    \n\
1272         cmp       $0, gpu_flag_c                        \n\
1273         clc                                                             \n\
1274         jz 1f                                                   \n\
1275         stc                                                             \n\
1276         1:                                                              \n\
1277         sbb %1, %2                                              \n\
1278         setc  gpu_flag_c                                \n\
1279         setz  gpu_flag_z                                \n\
1280         sets  gpu_flag_n                                \n\
1281         movl %%eax, %0                                  \n\
1282         "
1283         : "=m"(res)
1284         : "d"(_Rm), "a"(_Rn));
1285
1286 #endif  // #ifdef __GCCWIN32__
1287 #endif  // #ifndef USE_ASSEMBLY
1288         
1289 #else
1290         __asm 
1291         {
1292                 cmp       [gpu_flag_c],0
1293                 clc
1294                 jz        gpu_opcode_subc_no_carry
1295                 stc
1296 gpu_opcode_subc_no_carry:
1297                 mov   edx,_Rm
1298                 mov   eax,_Rn
1299                 sbb   eax,edx
1300                 setc  [gpu_flag_c]
1301                 setz  [gpu_flag_z]
1302                 sets  [gpu_flag_n]
1303                 mov       res,eax
1304         };
1305 #endif
1306         Rn=res;
1307 }
1308
1309 static void gpu_opcode_subq(void)
1310 {
1311         uint32 _Rm=gpu_convert_zero[imm_1];
1312         uint32 _Rn=Rn;
1313         uint32 res;
1314 #ifdef __PORT__
1315 #ifndef USE_ASSEMBLY
1316 {
1317 /*      int dreg = jaguar.op & 31;
1318         UINT32 r1 = convert_zero[(jaguar.op >> 5) & 31];
1319         UINT32 r2 = jaguar.r[dreg];
1320         UINT32 res = r2 - r1;
1321         jaguar.r[dreg] = res;
1322         CLR_ZNC; SET_ZNC_SUB(r2,r1,res);*/
1323         UINT32 r1 = gpu_convert_zero[imm_1];
1324         UINT32 res = Rn - r1;
1325         CLR_ZNC; SET_ZNC_SUB(Rn, r1, res);
1326         Rn = res;
1327         return;
1328 }
1329 #else
1330     /*
1331        GCC on WIN32 (more importantly mingw) doesn't know the declared
1332        variables in asm until we put a _ before it.
1333        
1334        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
1335     */
1336
1337 #ifdef __GCCWIN32__
1338
1339         asm(
1340         "subl %1, %2                                    \n\
1341         setc  _gpu_flag_c                               \n\
1342         setz  _gpu_flag_z                               \n\
1343         sets  _gpu_flag_n                               \n\
1344         movl %%eax, %0                                  \n\
1345         "
1346         : "=m"(res)
1347         : "d"(_Rm), "a"(_Rn));
1348         
1349 #else
1350         
1351         asm(
1352         "subl %1, %2                                    \n\
1353         setc  gpu_flag_c                                \n\
1354         setz  gpu_flag_z                                \n\
1355         sets  gpu_flag_n                                \n\
1356         movl %%eax, %0                                  \n\
1357         "
1358         : "=m"(res)
1359         : "d"(_Rm), "a"(_Rn));
1360
1361 #endif  // #ifdef __GCCWIN32__
1362 #endif  // #ifndef USE_ASSEMBLY
1363         
1364 #else
1365         __asm 
1366         {
1367                 mov   eax,_Rn
1368                 mov   edx,_Rm
1369                 sub   eax,edx
1370                 setc  [gpu_flag_c]
1371                 setz  [gpu_flag_z]
1372                 sets  [gpu_flag_n]
1373                 mov       res,eax
1374         };
1375 #endif
1376         Rn=res;
1377 }
1378
1379 static void gpu_opcode_subqt(void)
1380 {
1381         Rn -= gpu_convert_zero[imm_1];
1382 }
1383
1384 static void gpu_opcode_cmp(void)
1385 {
1386         uint32 _Rm=Rm;
1387         uint32 _Rn=Rn;
1388 #ifdef __PORT__
1389 #ifndef USE_ASSEMBLY
1390 {
1391 /*      UINT32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1392         UINT32 r2 = jaguar.r[jaguar.op & 31];
1393         UINT32 res = r2 - r1;
1394         CLR_ZNC; SET_ZNC_SUB(r2,r1,res);*/
1395         UINT32 res = Rn - Rm;
1396         CLR_ZNC; SET_ZNC_SUB(Rn, Rm, res);
1397         return;
1398 }
1399 #else
1400
1401     /*
1402        GCC on WIN32 (more importantly mingw) doesn't know the declared
1403        variables in asm until we put a _ before it.
1404        
1405        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
1406     */
1407
1408 #ifdef __GCCWIN32__
1409
1410         asm(
1411         "cmpl %0, %1                                    \n\
1412         setc  _gpu_flag_c                               \n\
1413         setz  _gpu_flag_z                               \n\
1414         sets  _gpu_flag_n                               \n\
1415         "
1416         :
1417         : "d"(_Rm), "a"(_Rn));
1418         
1419 #else
1420         
1421         asm(
1422         "cmpl %0, %1                                    \n\
1423         setc  gpu_flag_c                                \n\
1424         setz  gpu_flag_z                                \n\
1425         sets  gpu_flag_n                                \n\
1426         "
1427         :
1428         : "d"(_Rm), "a"(_Rn));
1429
1430 #endif  // #ifdef __GCCWIN32__
1431 #endif  // #ifndef USE_ASSEMBLY
1432         
1433 #else
1434         __asm 
1435         {
1436                 mov   eax,_Rn
1437                 mov   edx,_Rm
1438                 cmp   eax,edx
1439                 setc  [gpu_flag_c]
1440                 setz  [gpu_flag_z]
1441                 sets  [gpu_flag_n]
1442         };
1443 #endif
1444 }
1445
1446 static void gpu_opcode_cmpq(void)
1447 {
1448         static int32 sqtable[32] =
1449                 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1450         int32 _Rm=sqtable[imm_1&0x1f];
1451         uint32 _Rn=Rn;
1452 #ifdef __PORT__
1453 #ifndef USE_ASSEMBLY
1454 {
1455 /*      UINT32 r1 = (INT8)(jaguar.op >> 2) >> 3;
1456         UINT32 r2 = jaguar.r[jaguar.op & 31];
1457         UINT32 res = r2 - r1;
1458         CLR_ZNC; SET_ZNC_SUB(r2,r1,res);*/
1459         UINT32 r1 = sqtable[imm_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1460         UINT32 res = Rn - r1;
1461         CLR_ZNC; SET_ZNC_SUB(Rn, r1, res);
1462         return;
1463 }
1464 #else
1465     /*
1466        GCC on WIN32 (more importantly mingw) doesn't know the declared
1467        variables in asm until we put a _ before it.
1468        
1469        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
1470     */
1471
1472 #ifdef __GCCWIN32__
1473
1474         asm(
1475         "cmpl %0, %1                                    \n\
1476         setc  _gpu_flag_c                               \n\
1477         setz  _gpu_flag_z                               \n\
1478         sets  _gpu_flag_n                               \n\
1479         "
1480         :
1481         : "d"(_Rm), "a"(_Rn));
1482         
1483 #else
1484
1485         asm(
1486         "cmpl %0, %1                                    \n\
1487         setc  gpu_flag_c                                \n\
1488         setz  gpu_flag_z                                \n\
1489         sets  gpu_flag_n                                \n\
1490         "
1491         :
1492         : "d"(_Rm), "a"(_Rn));
1493         
1494 #endif  // #ifdef __GCCWIN32__
1495 #endif  // #ifndef USE_ASSEMBLY
1496
1497 #else
1498         __asm 
1499         {
1500                 mov   eax,_Rn
1501                 mov   edx,_Rm
1502                 cmp   eax,edx
1503                 setc  [gpu_flag_c]
1504                 setz  [gpu_flag_z]
1505                 sets  [gpu_flag_n]
1506         };
1507 #endif
1508 }
1509
1510 static void gpu_opcode_and(void)
1511 {
1512         uint32 _Rm=Rm;
1513         uint32 _Rn=Rn;
1514         uint32 res;
1515 #ifdef __PORT__
1516 #ifndef USE_ASSEMBLY
1517 {
1518 /*      int dreg = jaguar.op & 31;
1519         UINT32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1520         UINT32 r2 = jaguar.r[dreg];
1521         UINT32 res = r2 & r1;
1522         jaguar.r[dreg] = res;
1523         CLR_ZN; SET_ZN(res);*/
1524         UINT32 res = Rn & Rm;
1525         Rn = res;
1526         CLR_ZN; SET_ZN(res);
1527         return;
1528 }
1529 #else
1530
1531     /*
1532        GCC on WIN32 (more importantly mingw) doesn't know the declared
1533        variables in asm until we put a _ before it.
1534        
1535        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
1536     */
1537
1538 #ifdef __GCCWIN32__
1539
1540         asm(
1541         "andl %1, %2                                    \n\
1542         setz  _gpu_flag_z                               \n\
1543         sets  _gpu_flag_n                               \n\
1544         movl %%eax, %0                                  \n\
1545         "
1546         : "=m"(res)
1547         : "d"(_Rm), "a"(_Rn));
1548         
1549 #else
1550
1551         asm(
1552         "andl %1, %2                                    \n\
1553         setz  gpu_flag_z                                \n\
1554         sets  gpu_flag_n                                \n\
1555         movl %%eax, %0                                  \n\
1556         "
1557         : "=m"(res)
1558         : "d"(_Rm), "a"(_Rn));
1559         
1560 #endif  // #ifdef __GCCWIN32__
1561 #endif  // #ifndef USE_ASSEMBLY
1562
1563 #else
1564         __asm 
1565         {
1566                 mov   eax,_Rn
1567                 mov   edx,_Rm
1568                 and   eax,edx
1569                 setz  [gpu_flag_z]
1570                 sets  [gpu_flag_n]
1571                 mov   res,eax
1572         };
1573 #endif
1574         Rn=res;
1575 }
1576
1577 static void gpu_opcode_or(void)
1578 {
1579         uint32 _Rm=Rm;
1580         uint32 _Rn=Rn;
1581         uint32 res;
1582 #ifdef __PORT__
1583 #ifndef USE_ASSEMBLY
1584 {
1585 /*      int dreg = jaguar.op & 31;
1586         UINT32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1587         UINT32 r2 = jaguar.r[dreg];
1588         UINT32 res = r1 | r2;
1589         jaguar.r[dreg] = res;
1590         CLR_ZN; SET_ZN(res);*/
1591         UINT32 res = Rn | Rm;
1592         Rn = res;
1593         CLR_ZN; SET_ZN(res);
1594         return;
1595 }
1596 #else
1597     /*
1598        GCC on WIN32 (more importantly mingw) doesn't know the declared
1599        variables in asm until we put a _ before it.
1600        
1601        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
1602     */
1603
1604 #ifdef __GCCWIN32__
1605
1606         asm(
1607         "orl %1, %2                                             \n\
1608         setz  _gpu_flag_z                               \n\
1609         sets  _gpu_flag_n                               \n\
1610         movl %%eax, %0                                  \n\
1611         "
1612         : "=m"(res)
1613         : "d"(_Rm), "a"(_Rn));
1614         
1615 #else
1616
1617         asm(
1618         "orl %1, %2                                             \n\
1619         setz  gpu_flag_z                                \n\
1620         sets  gpu_flag_n                                \n\
1621         movl %%eax, %0                                  \n\
1622         "
1623         : "=m"(res)
1624         : "d"(_Rm), "a"(_Rn));
1625         
1626 #endif  // #ifdef __GCCWIN32__
1627 #endif  // #ifndef USE_ASSEMBLY
1628
1629 #else
1630         __asm 
1631         {
1632                 mov   eax,_Rn
1633                 mov   edx,_Rm
1634                 or    eax,edx
1635                 setz  [gpu_flag_z]
1636                 sets  [gpu_flag_n]
1637                 mov   res,eax
1638         };
1639 #endif
1640         Rn=res;
1641 }
1642
1643 static void gpu_opcode_xor(void)
1644 {
1645         uint32 _Rm=Rm;
1646         uint32 _Rn=Rn;
1647         uint32 res;
1648 #ifdef __PORT__
1649 #ifndef USE_ASSEMBLY
1650 {
1651 /*      int dreg = jaguar.op & 31;
1652         UINT32 r1 = jaguar.r[(jaguar.op >> 5) & 31];
1653         UINT32 r2 = jaguar.r[dreg];
1654         UINT32 res = r1 ^ r2;
1655         jaguar.r[dreg] = res;
1656         CLR_ZN; SET_ZN(res);*/
1657         UINT32 res = Rn ^ Rm;
1658         Rn = res;
1659         CLR_ZN; SET_ZN(res);
1660         return;
1661 }
1662 #else
1663     /*
1664        GCC on WIN32 (more importantly mingw) doesn't know the declared
1665        variables in asm until we put a _ before it.
1666        
1667        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
1668     */
1669
1670 #ifdef __GCCWIN32__
1671
1672         asm(
1673         "xorl %1, %2                                    \n\
1674         setz  _gpu_flag_z                               \n\
1675         sets  _gpu_flag_n                               \n\
1676         movl %%eax, %0                                  \n\
1677         "
1678         : "=m"(res)
1679         : "d"(_Rm), "a"(_Rn));
1680         
1681 #else
1682
1683         asm(
1684         "xorl %1, %2                                    \n\
1685         setz  gpu_flag_z                                \n\
1686         sets  gpu_flag_n                                \n\
1687         movl %%eax, %0                                  \n\
1688         "
1689         : "=m"(res)
1690         : "d"(_Rm), "a"(_Rn));
1691
1692 #endif  // #ifdef __GCCWIN32__
1693 #endif  // #ifndef USE_ASSEMBLY
1694
1695 #else
1696         __asm 
1697         {
1698                 mov   eax,_Rn
1699                 mov   edx,_Rm
1700                 xor   eax,edx
1701                 setz  [gpu_flag_z]
1702                 sets  [gpu_flag_n]
1703                 mov   res,eax
1704         };
1705 #endif
1706         Rn=res;
1707 }
1708
1709 static void gpu_opcode_not(void)
1710 {
1711         uint32 _Rn=Rn;
1712         uint32 res;
1713 #ifdef __PORT__
1714 #ifndef USE_ASSEMBLY
1715 {
1716 /*      int dreg = jaguar.op & 31;
1717         UINT32 res = ~jaguar.r[dreg];
1718         jaguar.r[dreg] = res;
1719         CLR_ZN; SET_ZN(res);*/
1720         UINT32 res = ~Rn;
1721         Rn = res;
1722         CLR_ZN; SET_ZN(res);
1723         return;
1724 }
1725 #else
1726     /*
1727        GCC on WIN32 (more importantly mingw) doesn't know the declared
1728        variables in asm until we put a _ before it.
1729        
1730        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
1731     */
1732
1733 #ifdef __GCCWIN32__
1734
1735         asm(
1736         "notl %1                                                \n\
1737         setz  _gpu_flag_z                               \n\
1738         sets  _gpu_flag_n                               \n\
1739         movl %%eax, %0                                  \n\
1740         "
1741         : "=m"(res)
1742         : "a"(_Rn));
1743         
1744 #else
1745
1746         asm(
1747         "notl %1                                                \n\
1748         setz  gpu_flag_z                                \n\
1749         sets  gpu_flag_n                                \n\
1750         movl %%eax, %0                                  \n\
1751         "
1752         : "=m"(res)
1753         : "a"(_Rn));
1754         
1755 #endif  // #ifdef __GCCWIN32__
1756 #endif  // #ifndef USE_ASSEMBLY
1757
1758 #else
1759         __asm 
1760         {
1761                 mov   eax,_Rn
1762                 not   eax
1763                 setz  [gpu_flag_z]
1764                 sets  [gpu_flag_n]
1765                 mov   res,eax
1766         };
1767 #endif
1768         Rn=res;
1769 }
1770
1771 static void gpu_opcode_move_pc(void)
1772 {
1773         Rn = gpu_pc-2; 
1774 }
1775
1776 static void gpu_opcode_sat8(void)
1777 {
1778         int32 _Rn=(int32)Rn;
1779
1780         uint32 res= Rn = (_Rn<0) ? 0 : (_Rn > 0xff ? 0xff : _Rn);
1781         set_flag_z(res);
1782         reset_flag_n();
1783 }
1784
1785 static void gpu_opcode_sat16(void)
1786 {
1787         int32 _Rn=(int32)Rn;
1788         uint32 res= Rn = (_Rn<0) ? 0 : (_Rn > 0xFFFF ? 0xFFFF : _Rn);
1789         set_flag_z(res);
1790         reset_flag_n();
1791 }
1792
1793 static void gpu_opcode_sat24(void)
1794 {
1795         int32 _Rn=(int32)Rn;
1796
1797         uint32 res= Rn = (_Rn<0) ? 0 : (_Rn > 0xFFFFFF ? 0xFFFFFF : _Rn);
1798         set_flag_z(res);
1799         reset_flag_n();
1800 }
1801
1802 static void gpu_opcode_store_r14_indexed(void)
1803 {
1804         gpu_long_write( gpu_reg[14] + (gpu_convert_zero[imm_1] << 2),Rn);
1805 }
1806
1807 static void gpu_opcode_store_r15_indexed(void)
1808 {
1809         gpu_long_write( gpu_reg[15] + (gpu_convert_zero[imm_1] << 2),Rn);
1810 }
1811
1812 static void gpu_opcode_load_r14_ri(void)
1813 {
1814         Rn=gpu_long_read(gpu_reg[14] + Rm);
1815 }
1816
1817 static void gpu_opcode_load_r15_ri(void)
1818 {
1819         Rn=gpu_long_read(gpu_reg[15] + Rm);
1820 }
1821
1822 static void gpu_opcode_store_r14_ri(void)
1823 {
1824         gpu_long_write(gpu_reg[14] + Rm,Rn);
1825 }
1826
1827 static void gpu_opcode_store_r15_ri(void)
1828 {
1829         gpu_long_write(gpu_reg[15] + Rm,Rn);
1830 }
1831
1832 static void gpu_opcode_nop(void)
1833 {
1834 }
1835
1836 static void gpu_opcode_pack(void)
1837 {
1838         uint32 _Rn=Rn;
1839
1840         if (Rm==0)
1841         {
1842                 Rn =((_Rn & 0x03C00000) >> 10) |
1843                         ((_Rn & 0x0001E000) >> 5)  |
1844                         ((_Rn & 0x000000FF));
1845         }
1846         else
1847         {
1848                 Rn =((_Rn & 0x0000F000) << 10) |
1849                         ((_Rn & 0x00000F00) << 5)  |
1850                         ((_Rn & 0x000000FF));
1851         }
1852         reset_flag_z();
1853         reset_flag_n();
1854         set_flag_z(Rn);
1855         set_flag_n(Rn);
1856 }
1857
1858 static void gpu_opcode_storeb(void)
1859 {
1860         if ((Rm >= 0xF03000) && (Rm < 0xF04000))
1861                 gpu_long_write(Rm,Rn&0xff);
1862         else
1863                 jaguar_byte_write(Rm,Rn);
1864 }
1865
1866 static void gpu_opcode_storew(void)
1867 {
1868         if ((Rm >= 0xF03000) && (Rm < 0xF04000))
1869                 gpu_long_write(Rm,Rn&0xffff);
1870         else
1871                 jaguar_word_write(Rm,Rn);
1872 }
1873
1874 static void gpu_opcode_store(void)
1875 {
1876         gpu_long_write(Rm,Rn);
1877 }
1878
1879 static void gpu_opcode_storep(void)
1880 {
1881         uint32 _Rm=Rm;
1882         gpu_long_write(_Rm,      gpu_hidata);
1883         gpu_long_write(_Rm+4, Rn);
1884 }
1885
1886 static void gpu_opcode_loadb(void)
1887 {
1888         if ((Rm >= 0xF03000) && (Rm < 0xF04000))
1889                 Rn=gpu_long_read(Rm)&0xff;
1890         else
1891                 Rn=jaguar_byte_read(Rm);
1892 }
1893
1894 static void gpu_opcode_loadw(void)
1895 {
1896         if ((Rm >= 0xF03000) && (Rm < 0xF04000))
1897                 Rn=gpu_long_read(Rm)&0xffff;
1898         else
1899                 Rn=jaguar_word_read(Rm);
1900 }
1901
1902 static void gpu_opcode_load(void)
1903 {
1904         Rn = gpu_long_read(Rm);
1905 }
1906
1907 static void gpu_opcode_loadp(void)
1908 {
1909         uint32 _Rm=Rm;
1910
1911         gpu_hidata = gpu_long_read(_Rm);
1912         Rn                 = gpu_long_read(_Rm+4);
1913 }
1914
1915 static void gpu_opcode_load_r14_indexed(void)
1916 {
1917         Rn = gpu_long_read( gpu_reg[14] + (gpu_convert_zero[imm_1] << 2));
1918 }
1919
1920 static void gpu_opcode_load_r15_indexed(void)
1921 {
1922         Rn = gpu_long_read( gpu_reg[15] + (gpu_convert_zero[imm_1] << 2));
1923 }
1924
1925 static void gpu_opcode_movei(void)
1926 {
1927         Rn = (uint32)gpu_word_read(gpu_pc) | ((uint32)gpu_word_read(gpu_pc + 2) << 16);
1928         gpu_pc += 4;
1929 }
1930
1931 static void gpu_opcode_moveta(void)
1932 {
1933         alternate_Rn = Rm;
1934 }
1935
1936 static void gpu_opcode_movefa(void)
1937 {
1938         Rn = alternate_Rm;
1939 }
1940
1941 static void gpu_opcode_move(void)
1942 {
1943         Rn = Rm;
1944 }
1945
1946 static void gpu_opcode_moveq(void)
1947 {
1948         Rn = imm_1;    
1949 }
1950
1951 static void gpu_opcode_resmac(void)
1952 {
1953         Rn = gpu_acc;
1954 }
1955
1956 static void gpu_opcode_imult(void)
1957 {
1958         uint32 res=Rn=((int16)Rn)*((int16)Rm);
1959         set_flag_z(res);
1960         set_flag_n(res);
1961 }
1962
1963 static void gpu_opcode_mult(void)
1964 {
1965         uint32 res=Rn =  ((uint16)Rm) * ((uint16)Rn);
1966         set_flag_z(res);
1967         set_flag_n(res);
1968 }
1969
1970 static void gpu_opcode_bclr(void)
1971 {
1972         uint32 _Rm=imm_1;
1973         uint32 _Rn=Rn;
1974         uint32 res;
1975 #ifdef __PORT__
1976 #ifndef USE_ASSEMBLY
1977 {
1978 /*      int dreg = jaguar.op & 31;
1979         UINT32 r1 = (jaguar.op >> 5) & 31;
1980         UINT32 r2 = jaguar.r[dreg];
1981         UINT32 res = r2 & ~(1 << r1);
1982         jaguar.r[dreg] = res;
1983         CLR_ZN; SET_ZN(res);*/
1984         UINT32 res = Rn & ~(1 << imm_1);
1985         Rn = res;
1986         CLR_ZN; SET_ZN(res);
1987         return;
1988 }
1989 #else
1990     /*
1991        GCC on WIN32 (more importantly mingw) doesn't know the declared
1992        variables in asm until we put a _ before it.
1993        
1994        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
1995     */
1996
1997 #ifdef __GCCWIN32__
1998
1999         asm(
2000         "btrl %1, %2                                    \n\
2001         cmpl $0, %2                                             \n\
2002         setz  _gpu_flag_z                               \n\
2003         sets  _gpu_flag_n                               \n\
2004         movl %%eax, %0                                  \n\
2005         "
2006         : "=m"(res)
2007         : "c"(_Rm), "a"(_Rn));
2008         
2009 #else
2010         
2011         asm(
2012         "btrl %1, %2                                    \n\
2013         cmpl $0, %2                                             \n\
2014         setz  gpu_flag_z                                \n\
2015         sets  gpu_flag_n                                \n\
2016         movl %%eax, %0                                  \n\
2017         "
2018         : "=m"(res)
2019         : "c"(_Rm), "a"(_Rn));
2020
2021 #endif  // #ifdef __GCCWIN32__
2022 #endif  // #ifndef USE_ASSEMBLY
2023         
2024 #else
2025         __asm 
2026         {
2027                 mov   eax,_Rn
2028                 mov   ecx,_Rm
2029                 btr       eax,ecx
2030                 cmp   eax,0
2031                 setz  [gpu_flag_z]
2032                 sets  [gpu_flag_n]
2033                 mov   res,eax
2034         };
2035 #endif
2036         Rn=res;
2037 }
2038
2039 static void gpu_opcode_btst(void)
2040 {
2041         uint32 _Rm=imm_1;
2042         uint32 _Rn=Rn;
2043 #ifdef __PORT__
2044 #ifndef USE_ASSEMBLY
2045 {
2046 /*      UINT32 r1 = (jaguar.op >> 5) & 31;
2047         UINT32 r2 = jaguar.r[jaguar.op & 31];
2048         CLR_Z; jaguar.FLAGS |= (~r2 >> r1) & 1;*/
2049         CLR_Z; gpu_flag_z = (~Rn >> imm_1) & 1;
2050         return;
2051 }
2052 #else
2053     /*
2054        GCC on WIN32 (more importantly mingw) doesn't know the declared
2055        variables in asm until we put a _ before it.
2056        
2057        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
2058     */
2059
2060 #ifdef __GCCWIN32__
2061
2062         asm(
2063         "bt %0, %1                                              \n\
2064         setnc _gpu_flag_z                               \n\
2065         "
2066         :
2067         : "c"(_Rm), "a"(_Rn));
2068         
2069 #else
2070
2071         asm(
2072         "bt %0, %1                                              \n\
2073         setnc gpu_flag_z                                \n\
2074         "
2075         :
2076         : "c"(_Rm), "a"(_Rn));
2077         
2078 #endif  // #ifdef __GCCWIN32__
2079 #endif  // #ifndef USE_ASSEMBLY
2080
2081 #else
2082         __asm 
2083         {
2084                 mov   eax,_Rn
2085                 mov   ecx,_Rm
2086                 bt        eax,ecx
2087                 setnc [gpu_flag_z]
2088         };
2089 #endif
2090 }
2091
2092 static void gpu_opcode_bset(void)
2093 {
2094         uint32 _Rm=imm_1;
2095         uint32 _Rn=Rn;
2096         uint32 res;
2097 #ifdef __PORT__
2098 #ifndef USE_ASSEMBLY
2099 {
2100 /*      int dreg = jaguar.op & 31;
2101         UINT32 r1 = (jaguar.op >> 5) & 31;
2102         UINT32 r2 = jaguar.r[dreg];
2103         UINT32 res = r2 | (1 << r1);
2104         jaguar.r[dreg] = res;
2105         CLR_ZN; SET_ZN(res);*/
2106         UINT32 res = Rn | (1 << imm_1);
2107         Rn = res;
2108         CLR_ZN; SET_ZN(res);
2109         return;
2110 }
2111 #else
2112     /*
2113        GCC on WIN32 (more importantly mingw) doesn't know the declared
2114        variables in asm until we put a _ before it.
2115        
2116        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
2117     */
2118
2119 #ifdef __GCCWIN32__
2120
2121         asm(
2122         "btsl %1, %2                                    \n\
2123         cmpl $0, %2                                             \n\
2124         setz  _gpu_flag_z                               \n\
2125         sets  _gpu_flag_n                               \n\
2126         movl %%eax, %0                                  \n\
2127         "
2128         : "=m"(res)
2129         : "c"(_Rm), "a"(_Rn));
2130         
2131 #else
2132         
2133         asm(
2134         "btsl %1, %2                                    \n\
2135         cmpl $0, %2                                             \n\
2136         setz  gpu_flag_z                                \n\
2137         sets  gpu_flag_n                                \n\
2138         movl %%eax, %0                                  \n\
2139         "
2140         : "=m"(res)
2141         : "c"(_Rm), "a"(_Rn));
2142
2143 #endif  // #ifdef __GCCWIN32__
2144 #endif  // #ifndef USE_ASSEMBLY
2145         
2146 #else
2147         __asm 
2148         {
2149                 mov   eax,_Rn
2150                 mov   ecx,_Rm
2151                 bts       eax,ecx
2152                 cmp   eax,0
2153                 setz  [gpu_flag_z]
2154                 sets  [gpu_flag_n]
2155                 mov   res,eax
2156         };
2157 #endif
2158         Rn=res;
2159 }
2160
2161 static void gpu_opcode_imacn(void)
2162 {
2163         uint32 res = ((int16)Rm) * ((int16)(Rn));
2164         gpu_acc += res;
2165 }
2166
2167 static void gpu_opcode_mtoi(void)
2168 {
2169         uint32 _Rm=Rm;
2170         uint32 res=Rn=(((INT32)_Rm >> 8) & 0xff800000) | (_Rm & 0x007fffff);
2171         set_flag_z(res);
2172         set_flag_n(res);
2173 }
2174
2175 static void gpu_opcode_normi(void)
2176 {
2177         uint32 _Rm = Rm;
2178         uint32 res = 0;
2179
2180         if (_Rm)
2181         {
2182                 while ((_Rm & 0xFFC00000) == 0)
2183                 {
2184                         _Rm <<= 1;
2185                         res--;
2186                 }
2187                 while ((_Rm & 0xFF800000) != 0)
2188                 {
2189                         _Rm >>= 1;
2190                         res++;
2191                 }
2192         }
2193         Rn = res;
2194         set_flag_z(res);
2195         set_flag_n(res);
2196 }
2197
2198 static void gpu_opcode_mmult(void)
2199 {
2200         int count       = gpu_matrix_control & 0x0F;
2201         uint32 addr = gpu_pointer_to_matrix; // in the gpu ram
2202         int64 accum = 0;
2203         uint32 res;
2204
2205         if (!(gpu_matrix_control & 0x10))
2206         {
2207                 for (int i = 0; i < count; i++)
2208                 { 
2209                         int16 a;
2210                         if (i&0x01)
2211                                 a=(int16)((gpu_alternate_reg[gpu_opcode_first_parameter + (i>>1)]>>16)&0xffff);
2212                         else
2213                                 a=(int16)(gpu_alternate_reg[gpu_opcode_first_parameter + (i>>1)]&0xffff);
2214
2215                         int16 b=((int16)gpu_word_read(addr+2));
2216                         accum += a*b;
2217                         addr += 4;
2218                 }
2219         }
2220         else
2221         {
2222                 for (int i = 0; i < count; i++)
2223                 {
2224                         int16 a;
2225                         if (i&0x01)
2226                                 a=(int16)((gpu_alternate_reg[gpu_opcode_first_parameter + (i>>1)]>>16)&0xffff);
2227                         else
2228                                 a=(int16)(gpu_alternate_reg[gpu_opcode_first_parameter + (i>>1)]&0xffff);
2229
2230                         int16 b=((int16)gpu_word_read(addr+2));
2231                         accum += a*b;
2232                         addr += 4 * count;
2233                 }
2234         }
2235         Rn = res = (int32)accum;
2236         // carry flag to do
2237         set_flag_z(res);
2238         set_flag_n(res);
2239 }
2240
2241 static void gpu_opcode_abs(void)
2242 {
2243         uint32 _Rn=Rn;
2244         uint32 res;
2245         
2246         if (_Rn==0x80000000)
2247         {
2248                 set_flag_n(1);
2249         }
2250         else
2251         {
2252                 gpu_flag_c  = ((_Rn&0x80000000)>>31);
2253                 res= Rn =  (((int32)_Rn)<0) ? -_Rn : _Rn;
2254                 reset_flag_n();
2255                 set_flag_z(res);
2256         }
2257 }
2258
2259 static void gpu_opcode_div(void)
2260 {
2261         uint32 _Rm=Rm;
2262         uint32 _Rn=Rn;
2263
2264         if (_Rm)
2265         {
2266                 if (gpu_div_control & 1)
2267                 {
2268                         gpu_remain = (((uint64)_Rn) << 16) % _Rm;
2269                         if (gpu_remain&0x80000000)
2270                                 gpu_remain-=_Rm;
2271                         Rn = (((uint64)_Rn) << 16) / _Rm;
2272                 }
2273                 else
2274                 {
2275                         gpu_remain = _Rn % _Rm;
2276                         if (gpu_remain&0x80000000)
2277                                 gpu_remain-=_Rm;
2278                         Rn/=_Rm;
2279                 }
2280         }
2281         else
2282                 Rn=0xffffffff;
2283 }
2284
2285 static void gpu_opcode_imultn(void)
2286 {
2287         uint32 res = (int32)((int16)Rn * (int16)Rm);
2288         gpu_acc = (int32)res;
2289         set_flag_z(res);
2290         set_flag_n(res);
2291 }
2292
2293 static void gpu_opcode_neg(void)
2294 {
2295         uint32 _Rn=Rn;
2296         uint32 res;
2297 #ifdef __PORT__
2298 #ifndef USE_ASSEMBLY
2299 {
2300 /*      int dreg = jaguar.op & 31;
2301         UINT32 r2 = jaguar.r[dreg];
2302         UINT32 res = -r2;
2303         jaguar.r[dreg] = res;
2304         CLR_ZNC; SET_ZNC_SUB(0,r2,res);*/
2305         UINT32 res = -Rn;
2306         CLR_ZNC; SET_ZNC_SUB(0, Rn, res);
2307         Rn = res;
2308         return;
2309 }
2310 #else
2311     /*
2312        GCC on WIN32 (more importantly mingw) doesn't know the declared
2313        variables in asm until we put a _ before it.
2314        
2315        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
2316     */
2317
2318 #ifdef __GCCWIN32__
2319
2320         asm(
2321         "subl %1, %2                                    \n\
2322         setc  _gpu_flag_c                               \n\
2323         setz  _gpu_flag_z                               \n\
2324         sets  _gpu_flag_n                               \n\
2325         movl %%eax, %0                                  \n\
2326         "
2327         : "=m"(res)
2328         : "d"(_Rn), "a"(0));
2329         
2330 #else
2331
2332         asm(
2333         "subl %1, %2                                    \n\
2334         setc  gpu_flag_c                                \n\
2335         setz  gpu_flag_z                                \n\
2336         sets  gpu_flag_n                                \n\
2337         movl %%eax, %0                                  \n\
2338         "
2339         : "=m"(res)
2340         : "d"(_Rn), "a"(0));
2341         
2342 #endif  // #ifdef __GCCWIN32__
2343 #endif  // #ifndef USE_ASSEMBLY
2344
2345 #else
2346         __asm 
2347         {
2348                 xor       eax,eax
2349                 mov   edx,_Rn
2350                 sub   eax,edx
2351                 setc  [gpu_flag_c]
2352                 setz  [gpu_flag_z]
2353                 sets  [gpu_flag_n]
2354                 mov       res,eax
2355         };
2356 #endif
2357         Rn=res;
2358 }
2359
2360 static void gpu_opcode_shlq(void)
2361 {
2362         uint32 shift=(32-gpu_convert_zero[imm_1]);
2363         uint32 _Rn=Rn;
2364         uint32 res;
2365 #ifdef __PORT__
2366 #ifndef USE_ASSEMBLY
2367 /*      int dreg = jaguar.op & 31;
2368         INT32 r1 = convert_zero[(jaguar.op >> 5) & 31];
2369         UINT32 r2 = jaguar.r[dreg];
2370         UINT32 res = r2 << (32 - r1);
2371         jaguar.r[dreg] = res;
2372         CLR_ZNC; SET_ZN(res); jaguar.FLAGS |= (r2 >> 30) & 2;*/
2373 {
2374         INT32 r1 = gpu_convert_zero[imm_1];
2375         UINT32 res = Rn << (32 - r1);
2376         CLR_ZNC; SET_ZN(res); gpu_flag_c = (Rn >> 31) & 1;
2377         Rn = res;
2378         return;
2379 }
2380 #else
2381     /*
2382        GCC on WIN32 (more importantly mingw) doesn't know the declared
2383        variables in asm until we put a _ before it.
2384        
2385        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
2386     */
2387
2388 #ifdef __GCCWIN32__
2389
2390         asm(
2391         "testl $0x80000000, %2                  \n\
2392         setnz _gpu_flag_c                               \n\
2393         shl %%cl, %2                                    \n\
2394         cmpl $0, %2                                             \n\
2395         setz  _gpu_flag_z                               \n\
2396         sets  _gpu_flag_n                               \n\
2397         movl %%eax, %0                                  \n\
2398         "
2399         : "=m"(res)
2400         : "c"(shift), "a"(_Rn));
2401         
2402 #else
2403         
2404         asm(
2405         "testl $0x80000000, %2                  \n\
2406         setnz gpu_flag_c                                \n\
2407         shl %%cl, %2                                    \n\
2408         cmpl $0, %2                                             \n\
2409         setz  gpu_flag_z                                \n\
2410         sets  gpu_flag_n                                \n\
2411         movl %%eax, %0                                  \n\
2412         "
2413         : "=m"(res)
2414         : "c"(shift), "a"(_Rn));
2415
2416 #endif  // #ifdef __GCCWIN32__
2417 #endif  // #ifndef USE_ASSEMBLY
2418         
2419 #else
2420         __asm 
2421         {
2422                 mov ecx,shift
2423                 mov eax,_Rn
2424                 test eax,0x80000000
2425                 setnz [gpu_flag_c]
2426                 shl eax,cl
2427                 cmp eax,0
2428                 setz [gpu_flag_z]
2429                 sets [gpu_flag_n]
2430                 mov res,eax
2431         }
2432 #endif
2433         Rn=res;
2434 }
2435
2436 static void gpu_opcode_shrq(void)
2437 {
2438         uint32 shift=gpu_convert_zero[imm_1];
2439         uint32 _Rn=Rn;
2440         
2441         uint32 res;
2442 #ifdef __PORT__
2443 #ifndef USE_ASSEMBLY
2444 /*      int dreg = jaguar.op & 31;
2445         INT32 r1 = convert_zero[(jaguar.op >> 5) & 31];
2446         UINT32 r2 = jaguar.r[dreg];
2447         UINT32 res = r2 >> r1;
2448         jaguar.r[dreg] = res;
2449         CLR_ZNC; SET_ZN(res); jaguar.FLAGS |= (r2 << 1) & 2;*/
2450 {
2451         INT32 r1 = gpu_convert_zero[imm_1];
2452         UINT32 res = Rn >> r1;
2453         CLR_ZNC; SET_ZN(res); gpu_flag_c = Rn & 1;
2454         Rn = res;
2455         return;
2456 }
2457 #else
2458     /*
2459        GCC on WIN32 (more importantly mingw) doesn't know the declared
2460        variables in asm until we put a _ before it.
2461        
2462        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
2463     */
2464
2465 #ifdef __GCCWIN32__
2466
2467         asm(
2468         "testl $0x00000001, %2                  \n\
2469         setnz _gpu_flag_c                               \n\
2470         shr %%cl, %2                                    \n\
2471         cmpl $0, %2                                             \n\
2472         setz  _gpu_flag_z                               \n\
2473         sets  _gpu_flag_n                               \n\
2474         movl %%eax, %0                                  \n\
2475         "
2476         : "=m"(res)
2477         : "c"(shift), "a"(_Rn));
2478         
2479 #else
2480
2481         asm(
2482         "testl $0x00000001, %2                  \n\
2483         setnz gpu_flag_c                                \n\
2484         shr %%cl, %2                                    \n\
2485         cmpl $0, %2                                             \n\
2486         setz  gpu_flag_z                                \n\
2487         sets  gpu_flag_n                                \n\
2488         movl %%eax, %0                                  \n\
2489         "
2490         : "=m"(res)
2491         : "c"(shift), "a"(_Rn));
2492         
2493 #endif  // #ifdef __GCCWIN32__
2494 #endif  // #ifndef USE_ASSEMBLY
2495
2496 #else
2497         __asm 
2498         {
2499                 mov ecx,shift
2500                 mov eax,_Rn
2501                 test eax,0x00000001
2502                 setnz [gpu_flag_c]
2503                 shr eax,cl
2504                 cmp eax,0
2505                 setz [gpu_flag_z]
2506                 sets [gpu_flag_n]
2507                 mov res,eax
2508         }
2509 #endif
2510         Rn=res;
2511 }
2512
2513 static void gpu_opcode_ror(void)
2514 {
2515         uint32 shift=Rm;
2516         uint32 _Rn=Rn;
2517         uint32 res;
2518 #ifdef __PORT__
2519 #ifndef USE_ASSEMBLY
2520 //#ifndef __PORT__      // For testing...
2521 /*      int dreg = jaguar.op & 31;
2522         UINT32 r1 = jaguar.r[(jaguar.op >> 5) & 31] & 31;
2523         UINT32 r2 = jaguar.r[dreg];
2524         UINT32 res = (r2 >> r1) | (r2 << (32 - r1));
2525         jaguar.r[dreg] = res;
2526         CLR_ZNC; SET_ZN(res); jaguar.FLAGS |= (r2 >> 30) & 2;*/
2527 {
2528         UINT32 r1 = Rm & 0x1F;
2529         UINT32 res = (Rn >> r1) | (Rn << (32 - r1));
2530         CLR_ZNC; SET_ZN(res); gpu_flag_c = (Rn >> 31) & 1;
2531         Rn = res;
2532         return;
2533 }
2534 #else
2535     /*
2536        GCC on WIN32 (more importantly mingw) doesn't know the declared
2537        variables in asm until we put a _ before it.
2538        
2539        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
2540     */
2541
2542 #ifdef __GCCWIN32__
2543
2544         asm(
2545         "testl $0x80000000, %2                  \n\
2546         setnz _gpu_flag_c                               \n\
2547         ror %%cl, %2                                    \n\
2548         cmpl $0, %2                                             \n\
2549         setz  _gpu_flag_z                               \n\
2550         sets  _gpu_flag_n                               \n\
2551         movl %%eax, %0                                  \n\
2552         "
2553         : "=m"(res)
2554         : "c"(shift), "a"(_Rn));
2555         
2556 #else
2557
2558         asm(
2559         "testl $0x80000000, %2                  \n\
2560         setnz gpu_flag_c                                \n\
2561         ror %%cl, %2                                    \n\
2562         cmpl $0, %2                                             \n\
2563         setz  gpu_flag_z                                \n\
2564         sets  gpu_flag_n                                \n\
2565         movl %%eax, %0                                  \n\
2566         "
2567         : "=m"(res)
2568         : "c"(shift), "a"(_Rn));
2569
2570 #endif  // #ifdef __GCCWIN32__
2571 #endif  // #ifndef USE_ASSEMBLY
2572         
2573 #else
2574         __asm 
2575         {
2576                 mov ecx,shift
2577                 mov eax,_Rn
2578                 test eax,0x80000000
2579                 setnz [gpu_flag_c]
2580                 ror eax,cl
2581                 cmp eax,0
2582                 setz [gpu_flag_z]
2583                 sets [gpu_flag_n]
2584                 mov res,eax
2585         }
2586 #endif
2587         Rn=res;
2588 }
2589
2590 static void gpu_opcode_rorq(void)
2591 {
2592         uint32 shift = gpu_convert_zero[imm_1 & 0x1F];
2593         uint32 _Rn = Rn;
2594         uint32 res;
2595 #ifdef __PORT__
2596 #ifndef USE_ASSEMBLY
2597 /*              uint32 index = opcode >> 10;            
2598                 gpu_opcode_first_parameter = (opcode & 0x3E0) >> 5;
2599                 gpu_opcode_second_parameter = (opcode & 0x1F);
2600                 gpu_pc += 2;
2601                 gpu_opcode[index]();
2602                 cycles -= gpu_opcode_cycles[index];
2603                 gpu_opcode_use[index]++;*/
2604
2605 /*      int dreg = jaguar.op & 31;
2606         UINT32 r1 = convert_zero[(jaguar.op >> 5) & 31];
2607         UINT32 r2 = jaguar.r[dreg];
2608         UINT32 res = (r2 >> r1) | (r2 << (32 - r1));
2609         jaguar.r[dreg] = res;
2610         CLR_ZNC; SET_ZN(res); jaguar.FLAGS |= (r2 >> 30) & 2;*/
2611 {
2612         UINT32 r1 = gpu_convert_zero[imm_1 & 0x1F];
2613         UINT32 r2 = Rn;
2614         UINT32 res = (r2 >> r1) | (r2 << (32 - r1));
2615         Rn = res;
2616         CLR_ZNC; SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
2617         return;
2618 }
2619 #else
2620
2621     /*
2622        GCC on WIN32 (more importantly mingw) doesn't know the declared
2623        variables in asm until we put a _ before it.
2624        
2625        So the declaration dsp_flag_c needs to be _dsp_flag_c on mingw.
2626     */
2627
2628 #ifdef __GCCWIN32__
2629
2630         asm(
2631         "testl $0x80000000, %2                  \n\
2632         setnz _gpu_flag_c                               \n\
2633         ror %%cl, %2                                    \n\
2634         cmpl $0, %2                                             \n\
2635         setz  _gpu_flag_z                               \n\
2636         sets  _gpu_flag_n                               \n\
2637         movl %%eax, %0                                  \n\
2638         "
2639         : "=m"(res)
2640         : "c"(shift), "a"(_Rn));
2641         
2642 #else
2643         
2644         asm(
2645         "testl $0x80000000, %2                  \n\
2646         setnz gpu_flag_c                                \n\
2647         ror %%cl, %2                                    \n\
2648         cmpl $0, %2                                             \n\
2649         setz  gpu_flag_z                                \n\
2650         sets  gpu_flag_n                                \n\
2651         movl %%eax, %0                                  \n\
2652         "
2653         : "=m"(res)
2654         : "c"(shift), "a"(_Rn));
2655
2656 #endif  // #ifdef __GCCWIN32__
2657 #endif  // #ifndef USE_ASSEMBLY
2658
2659 #else
2660         __asm 
2661         {
2662                 mov ecx,shift
2663                 mov eax,_Rn
2664                 test eax,0x80000000
2665                 setnz [gpu_flag_c]
2666                 ror eax,cl
2667                 cmp eax,0
2668                 setz [gpu_flag_z]
2669                 sets [gpu_flag_n]
2670                 mov res,eax
2671         }
2672 #endif  // #ifdef __PORT__
2673         Rn = res;
2674 }
2675
2676 static void gpu_opcode_sha(void)
2677 {
2678         int32 sRm=(int32)Rm;
2679         uint32 _Rn=Rn;
2680
2681         if (sRm<0)
2682         {
2683                 uint32 shift=-sRm;
2684                 if (shift>=32) shift=32;
2685                 gpu_flag_c=(_Rn&0x80000000)>>31;
2686                 while (shift)
2687                 {
2688                         _Rn<<=1;
2689                         shift--;
2690                 }
2691         }
2692         else
2693         {
2694                 uint32 shift=sRm;
2695                 if (shift>=32) shift=32;
2696                 gpu_flag_c=_Rn&0x1;
2697                 while (shift)
2698                 {
2699                         _Rn=((int32)_Rn)>>1;
2700                         shift--;
2701                 }
2702         }
2703         Rn=_Rn;
2704         set_flag_z(_Rn);
2705         set_flag_n(_Rn);
2706 }
2707
2708 static void gpu_opcode_sharq(void)
2709 {
2710         uint32 shift=gpu_convert_zero[imm_1];
2711         uint32 _Rn=Rn;
2712
2713         gpu_flag_c  = (_Rn & 0x1);
2714         while (shift)
2715         {
2716                 _Rn=((int32)_Rn)>>1;
2717                 shift--;
2718         }
2719         Rn=_Rn;
2720         set_flag_z(_Rn);
2721         set_flag_n(_Rn);
2722 }
2723
2724 static void gpu_opcode_sh(void)
2725 {
2726         int32 sRm=(int32)Rm;
2727         uint32 _Rn=Rn;  
2728
2729         if (sRm<0)
2730         {
2731                 uint32 shift=(-sRm);
2732                 if (shift>=32) shift=32;
2733                 gpu_flag_c=(_Rn&0x80000000)>>31;
2734                 while (shift)
2735                 {
2736                         _Rn<<=1;
2737                         shift--;
2738                 }
2739         }
2740         else
2741         {
2742                 uint32 shift=sRm;
2743                 if (shift>=32) shift=32;
2744                 gpu_flag_c=_Rn&0x1;
2745                 while (shift)
2746                 {
2747                         _Rn>>=1;
2748                         shift--;
2749                 }
2750         }
2751         Rn=_Rn;
2752         set_flag_z(_Rn);
2753         set_flag_n(_Rn);
2754 }