]> Shamusworld >> Repos - virtualjaguar/blob - src/objectp.cpp
Fixed off-by-one error in scaled bitmaps
[virtualjaguar] / src / objectp.cpp
1 //
2 // Object Processor
3 //
4 // Original source by Cal2
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 //
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include "jaguar.h"
13
14 //#define OP_DEBUG
15 //#define OP_DEBUG_BMP
16
17 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
18 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
19
20 #define OBJECT_TYPE_BITMAP      0                       // 000
21 #define OBJECT_TYPE_SCALE       1                       // 001
22 #define OBJECT_TYPE_GPU         2                       // 010
23 #define OBJECT_TYPE_BRANCH      3                       // 011
24 #define OBJECT_TYPE_STOP        4                       // 100
25
26 #define CONDITION_EQUAL                         0
27 #define CONDITION_LESS_THAN                     1
28 #define CONDITION_GREATER_THAN          2
29 #define CONDITION_OP_FLAG_SET           3
30 #define CONDITION_SECOND_HALF_LINE      4
31
32 #define OPFLAG_RELEASE          8                       // Bus release bit
33 #define OPFLAG_TRANS            4                       // Transparency bit
34 #define OPFLAG_RMW                      2                       // Read-Modify-Write bit
35 #define OPFLAG_REFLECT          1                       // Horizontal mirror bit
36
37 // Private function prototypes
38
39 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
40 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
41 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
42 void DumpFixedObject(uint64 p0, uint64 p1);
43 uint64 op_load_phrase(uint32 offset);
44
45 // Local global variables
46
47 static uint8 * op_blend_y;
48 static uint8 * op_blend_cr;
49 // There may be a problem with this "RAM" overlapping (and thus being independent of)
50 // some of the regular TOM RAM...
51 static uint8 objectp_ram[0x40];                 // This is based at $F00000
52 uint8 objectp_running;
53 //bool objectp_stop_reading_list;
54
55 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
56 //static uint32 op_bitmap_bit_size[8] =
57 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
58 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
59 static uint32 op_pointer;
60
61 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
62
63
64 //
65 // Object Processor initialization
66 //
67 void op_init(void)
68 {
69         // Blend tables (64K each)
70         memory_malloc_secure((void **)&op_blend_y, 0x10000, "Jaguar Object processor Y blend lookup table");
71         memory_malloc_secure((void **)&op_blend_cr, 0x10000, "Jaguar Object processor CR blend lookup table");
72
73         // Here we calculate the saturating blend of a signed 4-bit value and an
74         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
75         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
76         for(int i=0; i<256*256; i++)
77         {
78                 int y = (i >> 8) & 0xFF;
79                 int dy = (INT8)i;                                       // Sign extend the Y index
80                 int c1 = (i >> 8) & 0x0F;
81                 int dc1 = (INT8)(i << 4) >> 4;          // Sign extend the R index
82                 int c2 = (i >> 12) & 0x0F;
83                 int dc2 = (INT8)(i & 0xF0) >> 4;        // Sign extend the C index
84
85                 y += dy;
86                 if (y < 0)
87                         y = 0;
88                 else if (y > 0xFF)
89                         y = 0xFF;
90                 op_blend_y[i] = y;
91
92                 c1 += dc1;
93                 if (c1 < 0)
94                         c1 = 0;
95                 else if (c1 > 0x0F)
96                         c1 = 0x0F;
97                 c2 += dc2;
98
99                 if (c2 < 0)
100                         c2 = 0;
101                 else if (c2 > 0x0F)
102                         c2 = 0x0F;
103                 op_blend_cr[i] = (c2 << 4) | c1;
104         }
105
106         op_reset();
107 }
108
109 //
110 // Object Processor reset
111 //
112 void op_reset(void)
113 {
114         memset(objectp_ram, 0x00, 0x40);
115         objectp_running = 0;
116 }
117
118 void op_done(void)
119 {
120         char * opType[8] =
121         { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
122         char * ccType[8] =
123                 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
124
125         uint32 olp = op_get_list_pointer();
126         WriteLog("OP: OLP = %08X\n", olp);
127         WriteLog("OP: Phrase dump\n    ----------\n");
128         for(uint32 i=0; i<0x100; i+=8)
129         {
130                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
131                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
132                 if ((lo & 0x07) == 3)
133                 {
134                         uint16 ypos = (lo >> 3) & 0x7FF;
135                         uint8  cc   = (lo >> 14) & 0x03;
136                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
137                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
138                 }
139                 WriteLog("\n");
140                 if ((lo & 0x07) == 0)
141                         DumpFixedObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8));
142                 if ((lo & 0x07) == 1)
143                         DumpScaledObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8), op_load_phrase(olp+i+16));
144         }
145         WriteLog("\n");
146 }
147
148 //
149 // Object Processor memory access
150 // Memory range: F00010 - F00027
151 //
152 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
153 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
154 //      F00026            W   -------- -------x   OBF - object processor flag
155 //
156
157 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
158 {
159         offset &= 0x3F;
160         return objectp_ram[offset];
161 }
162
163 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
164 {
165         offset &= 0x3F;
166         return GET16(objectp_ram, offset);
167 }
168
169 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
170 {
171         offset &= 0x3F;
172         objectp_ram[offset] = data;
173 }
174
175 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
176 {
177         offset &= 0x3F;
178         SET16(objectp_ram, offset, data);
179
180 /*if (offset == 0x20)
181 WriteLog("OP: Setting lo list pointer: %04X\n", data);
182 if (offset == 0x22)
183 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
184 }
185
186 uint32 op_get_list_pointer(void)
187 {
188         // Note: This register is LO / HI WORD, hence the funky look of this...
189 //      return (objectp_ram[0x22] << 24) | (objectp_ram[0x23] << 16) | (objectp_ram[0x20] << 8) | objectp_ram[0x21];
190         return GET16(objectp_ram, 0x20) | (GET16(objectp_ram, 0x22) << 16);
191 }
192
193 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
194
195 uint32 op_get_status_register(void)
196 {
197 //      return (objectp_ram[0x26] << 24) | (objectp_ram[0x27] << 16) | (objectp_ram[0x28] << 8) | objectp_ram[0x29];
198 //      return GET32(objectp_ram, 0x26);
199         return GET16(objectp_ram, 0x26);
200 }
201
202 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
203
204 void op_set_status_register(uint32 data)
205 {
206 /*      objectp_ram[0x26] = (data & 0xFF000000) >> 24;
207         objectp_ram[0x27] = (data & 0x00FF0000) >> 16;
208         objectp_ram[0x28] = (data & 0x0000FF00) >> 8;
209         objectp_ram[0x29] |= (data & 0xFE);*/
210         objectp_ram[0x26] = (data & 0x0000FF00) >> 8;
211         objectp_ram[0x27] |= (data & 0xFE);
212 }
213
214 void op_set_current_object(uint64 object)
215 {
216 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
217         // Stored as least significant 32 bits first, ms32 last in big endian
218 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
219         objectp_ram[0x12] = object & 0xFF; object >>= 8;
220         objectp_ram[0x11] = object & 0xFF; object >>= 8;
221         objectp_ram[0x10] = object & 0xFF; object >>= 8;
222
223         objectp_ram[0x17] = object & 0xFF; object >>= 8;
224         objectp_ram[0x16] = object & 0xFF; object >>= 8;
225         objectp_ram[0x15] = object & 0xFF; object >>= 8;
226         objectp_ram[0x14] = object & 0xFF;*/
227 // Let's try regular good old big endian...
228         objectp_ram[0x17] = object & 0xFF; object >>= 8;
229         objectp_ram[0x16] = object & 0xFF; object >>= 8;
230         objectp_ram[0x15] = object & 0xFF; object >>= 8;
231         objectp_ram[0x14] = object & 0xFF; object >>= 8;
232
233         objectp_ram[0x13] = object & 0xFF; object >>= 8;
234         objectp_ram[0x12] = object & 0xFF; object >>= 8;
235         objectp_ram[0x11] = object & 0xFF; object >>= 8;
236         objectp_ram[0x10] = object & 0xFF;
237 }
238
239 uint64 op_load_phrase(uint32 offset)
240 {
241         offset &= ~0x07;                                                // 8 byte alignment
242         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
243 }
244
245 void OPStorePhrase(uint32 offset, uint64 p)
246 {
247         offset &= ~0x07;                                                // 8 byte alignment
248         JaguarWriteLong(offset, p >> 32, OP);
249         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
250 }
251
252 //
253 // Debugging routines
254 //
255 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
256 {
257         WriteLog(" (SCALED BITMAP)");
258         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
259         WriteLog("                 %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
260         uint8 bitdepth = (p1 >> 12) & 0x07;
261         int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
262         int32 xpos = p1 & 0xFFF;
263         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
264         uint32 iwidth = ((p1 >> 28) & 0x3FF);
265         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
266         uint16 height = ((p0 >> 14) & 0x3FF);
267         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
268         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
269         uint32 firstPix = (p1 >> 49) & 0x3F;
270         uint8 flags = (p1 >> 45) & 0x0F;
271         uint8 idx = (p1 >> 38) & 0x7F;
272         uint32 pitch = (p1 >> 15) & 0x07;
273         WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
274                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
275         uint32 hscale = p2 & 0xFF;
276         uint32 vscale = (p2 >> 8) & 0xFF;
277         uint32 remainder = (p2 >> 16) & 0xFF;
278         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
279 }
280
281 void DumpFixedObject(uint64 p0, uint64 p1)
282 {
283         WriteLog(" (BITMAP)");
284         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
285         uint8 bitdepth = (p1 >> 12) & 0x07;
286         int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
287         int32 xpos = p1 & 0xFFF;
288         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
289         uint32 iwidth = ((p1 >> 28) & 0x3FF);
290         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
291         uint16 height = ((p0 >> 14) & 0x3FF);
292         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
293         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
294         uint32 firstPix = (p1 >> 49) & 0x3F;
295         uint8 flags = (p1 >> 45) & 0x0F;
296         uint8 idx = (p1 >> 38) & 0x7F;
297         uint32 pitch = (p1 >> 15) & 0x07;
298         WriteLog("    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
299                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
300 }
301
302 //
303 // Object Processor main routine
304 //
305 //Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing
306 //where we left off. !!! FIX !!!
307 void OPProcessList(int scanline, bool render)
308 {
309 extern int op_start_log;
310 //      char * condition_to_str[8] =
311 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
312
313         op_pointer = op_get_list_pointer();
314
315 //      objectp_stop_reading_list = false;
316
317 // *** BEGIN OP PROCESSOR TESTING ONLY ***
318 extern bool interactiveMode;
319 extern bool iToggle;
320 extern int objectPtr;
321 bool inhibit;
322 int bitmapCounter = 0;
323 // *** END OP PROCESSOR TESTING ONLY ***
324
325 //      if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
326         while (op_pointer)
327         {
328 // *** BEGIN OP PROCESSOR TESTING ONLY ***
329 if (interactiveMode && bitmapCounter == objectPtr)
330         inhibit = iToggle;
331 else
332         inhibit = false;
333 // *** END OP PROCESSOR TESTING ONLY ***
334 //              if (objectp_stop_reading_list)
335 //                      return;
336                         
337                 uint64 p0 = op_load_phrase(op_pointer);
338                 op_pointer += 8;
339 if (scanline == tom_get_vdb() + 1 && op_start_log)
340 //if (scanline == 215 && op_start_log)
341 {
342 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
343 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
344 {
345 WriteLog(" (BITMAP) ");
346 uint64 p1 = op_load_phrase(op_pointer);
347 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
348         uint8 bitdepth = (p1 >> 12) & 0x07;
349         int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
350 int32 xpos = p1 & 0xFFF;
351 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
352         uint32 iwidth = ((p1 >> 28) & 0x3FF);
353         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
354         uint16 height = ((p0 >> 14) & 0x3FF);
355         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
356         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
357         uint32 firstPix = (p1 >> 49) & 0x3F;
358         uint8 flags = (p1 >> 45) & 0x0F;
359         uint8 idx = (p1 >> 38) & 0x7F;
360         uint32 pitch = (p1 >> 15) & 0x07;
361 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
362         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
363 }
364 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
365 {
366 WriteLog(" (SCALED BITMAP)");
367 uint64 p1 = op_load_phrase(op_pointer), p2 = op_load_phrase(op_pointer+8);
368 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
369 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
370         uint8 bitdepth = (p1 >> 12) & 0x07;
371         int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
372 int32 xpos = p1 & 0xFFF;
373 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
374         uint32 iwidth = ((p1 >> 28) & 0x3FF);
375         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
376         uint16 height = ((p0 >> 14) & 0x3FF);
377         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
378         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
379         uint32 firstPix = (p1 >> 49) & 0x3F;
380         uint8 flags = (p1 >> 45) & 0x0F;
381         uint8 idx = (p1 >> 38) & 0x7F;
382         uint32 pitch = (p1 >> 15) & 0x07;
383 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
384         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
385         uint32 hscale = p2 & 0xFF;
386         uint32 vscale = (p2 >> 8) & 0xFF;
387         uint32 remainder = (p2 >> 16) & 0xFF;
388 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
389 }
390 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
391 WriteLog(" (GPU)\n");
392 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
393 {
394 WriteLog(" (BRANCH)\n");
395 uint8 * jaguar_mainRam = GetRamPtr();
396 WriteLog("[RAM] --> ");
397 for(int k=0; k<8; k++)
398         WriteLog("%02X ", jaguar_mainRam[op_pointer-8 + k]);
399 WriteLog("\n");
400 }
401 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
402 WriteLog("    --> List end\n");
403 }//*/
404                 
405 //              WriteLog("%08X type %i\n", op_pointer, (uint8)p0 & 0x07);               
406                 switch ((uint8)p0 & 0x07)
407                 {
408                 case OBJECT_TYPE_BITMAP:
409                 {
410                         uint16 ypos = (p0 >> 3) & 0x3FF;
411 // This is only theory implied by Rayman...!
412 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
413 // the VDB value. With interlacing, this would be slightly more tricky.
414 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
415 // to affect any other game in a negative way (that I've seen).
416 // Either that, or it's an undocumented bug...
417
418 //No, the reason this was needed is that the OP code before was wrong. Any value
419 //less than VDB will get written to the top line of the display!
420 //                      if (ypos == 0)
421 //                              ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
422                         uint32 height = (p0 & 0xFFC000) >> 14;
423                         uint32 oldOPP = op_pointer - 8;
424 // *** BEGIN OP PROCESSOR TESTING ONLY ***
425 if (inhibit && op_start_log)
426         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
427 bitmapCounter++;
428 if (!inhibit)   // For OP testing only!
429 // *** END OP PROCESSOR TESTING ONLY ***
430                         if (scanline >= ypos && height > 0)
431                         {
432                                 uint64 p1 = op_load_phrase(op_pointer);
433                                 op_pointer += 8;
434 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
435 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
436 //                              OPProcessFixedBitmap(scanline, p0, p1, render);
437                                 OPProcessFixedBitmap(p0, p1, render);
438
439                                 // OP write-backs
440
441 //???Does this really happen??? Doesn't seem to work if you do this...!
442 //Probably not. Must be a bug in the documentation...!
443 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
444 //                              SET16(objectp_ram, 0x20, link & 0xFFFF);        // OLP
445 //                              SET16(objectp_ram, 0x22, link >> 16);
446 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
447                                 if (height - 1 > 0)
448                                         height--;*/
449                                 // NOTE: Would subtract 2 if in interlaced mode...!
450 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
451 //                              if (height)
452                                         height--;
453
454                                 uint64 data = (p0 & 0xFFFFF80000000000) >> 40;
455                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
456                                 data += dwidth;
457
458                                 p0 &= ~0xFFFFF80000FFC000;                      // Mask out old data...
459                                 p0 |= (uint64)height << 14;
460                                 p0 |= data << 40;
461                                 OPStorePhrase(oldOPP, p0);
462                         }
463                         op_pointer = (p0 & 0x000007FFFF000000) >> 21;
464                         break;
465                 }
466                 case OBJECT_TYPE_SCALE:
467                 {
468                         uint16 ypos = (p0 >> 3) & 0x3FF;
469                         uint32 height = (p0 & 0xFFC000) >> 14;
470                         uint32 oldOPP = op_pointer - 8;
471 // *** BEGIN OP PROCESSOR TESTING ONLY ***
472 if (inhibit && op_start_log)
473 {
474         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
475         DumpScaledObject(p0, op_load_phrase(op_pointer), op_load_phrase(op_pointer+8));
476 }
477 bitmapCounter++;
478 if (!inhibit)   // For OP testing only!
479 // *** END OP PROCESSOR TESTING ONLY ***
480                         if (scanline >= ypos && height > 0)
481                         {
482                                 uint64 p1 = op_load_phrase(op_pointer);
483                                 op_pointer += 8;
484                                 uint64 p2 = op_load_phrase(op_pointer);
485                                 op_pointer += 8;
486 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
487                                 OPProcessScaledBitmap(p0, p1, p2, render);
488
489                                 // OP write-backs
490
491                                 uint8 remainder = p2 >> 16, vscale = p2 >> 8;
492 //Actually, we should skip this object if it has a vscale of zero.
493 //Or do we? Not sure... Atari Karts has a few lines that look like:
494 // (SCALED BITMAP)
495 //000E8268 --> phrase 00010000 7000B00D 
496 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
497 //    [hsc: 9A, vsc: 00, rem: 00]
498 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
499
500                                 if (vscale == 0)
501                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
502
503                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
504 //                              if (remainder & 0x80)                           // I.e., it's negative
505                                 if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
506                                 {
507                                         uint64 data = (p0 & 0xFFFFF80000000000) >> 40;
508                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
509
510 //                                      while (remainder & 0x80)
511                                         while ((remainder & 0x80) || remainder == 0)
512                                         {
513                                                 remainder += vscale;
514                                                 if (height)
515                                                         height--;
516
517                                                 data += dwidth;
518                                         }
519                                         p0 &= ~0xFFFFF80000FFC000;              // Mask out old data...
520                                         p0 |= (uint64)height << 14;
521                                         p0 |= data << 40;
522                                         OPStorePhrase(oldOPP, p0);
523                                 }
524
525 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
526                                 p2 &= ~0x0000000000FF0000;
527                                 p2 |= (uint64)remainder << 16;
528 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
529                                 OPStorePhrase(oldOPP+16, p2);
530 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
531 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
532                         }
533                         op_pointer = (p0 & 0x000007FFFF000000) >> 21;
534                         break;
535                 }
536                 case OBJECT_TYPE_GPU:
537                 {
538 //WriteLog("OP: Asserting GPU IRQ #3...\n");
539                         op_set_current_object(p0);
540                         GPUSetIRQLine(3, ASSERT_LINE);
541 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
542 // !!! FIX !!!
543 //Do something like:
544 //OPSuspendedByGPU = true;
545 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
546 //on the next scanline...
547 // --> It continues from where it was interrupted! !!! FIX !!!
548                         break;
549                 }
550                 case OBJECT_TYPE_BRANCH:
551                 {
552                         uint16 ypos = (p0 >> 3) & 0x7FF;
553                         uint8  cc   = (p0 >> 14) & 0x03;
554                         uint32 link = (p0 >> 21) & 0x3FFFF8;
555                         
556 //                      if ((ypos!=507)&&(ypos!=25))
557 //                              WriteLog("\t%i%s%i link=0x%.8x\n",scanline,condition_to_str[cc],ypos>>1,link);
558                         switch (cc)
559                         {
560                         case CONDITION_EQUAL:
561                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
562                                         op_pointer = link;
563                                 break;
564                         case CONDITION_LESS_THAN:
565                                 if (TOMReadWord(0xF00006, OP) < ypos)
566                                         op_pointer = link;
567                                 break;
568                         case CONDITION_GREATER_THAN:
569                                 if (TOMReadWord(0xF00006, OP) > ypos)
570                                         op_pointer = link;
571                                 break;
572                         case CONDITION_OP_FLAG_SET:
573                                 if (op_get_status_register() & 0x01)
574                                         op_pointer = link;
575                                 break;
576                         case CONDITION_SECOND_HALF_LINE:
577                                 // This basically means branch if bit 10 of HC is set
578                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shuting down\n");
579                                 fclose(log_get());
580                                 exit(0);
581                                 break;
582                         default:
583                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
584                         }
585                         break;
586                 }
587                 case OBJECT_TYPE_STOP:
588                 {
589 //op_start_log = 0;
590                         // unsure
591 //WriteLog("OP: --> STOP\n");
592 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
593 //This seems more likely...
594                         op_set_current_object(p0);
595                         
596                         if (p0 & 0x08)
597                         {
598                                 tom_set_pending_object_int();
599                                 if (tom_irq_enabled(IRQ_OPFLAG))// && jaguar_interrupt_handler_is_valid(64))
600                                         m68k_set_irq(7);                                // Cause an NMI to occur...
601                         }
602
603                         return;
604 //                      break;
605                 }
606                 default:
607                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07)); 
608                         return;
609                 }
610         }
611 }
612
613 //
614 // Store fixed size bitmap in line buffer
615 //
616 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
617 {
618 // Need to make sure that when writing that it stays within the line buffer...
619 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
620         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
621         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
622         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
623         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
624 //#ifdef OP_DEBUG_BMP
625         uint32  firstPix = (p1 >> 49) & 0x3F;
626         // "The LSB is significant only for scaled objects..." -JTRM
627         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
628         firstPix &= 0x3E;
629 //#endif
630 // We can ignore the RELEASE (high order) bit for now--probably forever...!
631 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
632 //Optimize: break these out to their own BOOL values
633         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
634         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
635                 flagRMW = (flags & OPFLAG_RMW ? true : false),
636                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
637 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
638 //  provide the most significant bits of the palette address."
639         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
640         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
641         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
642
643 //      int16 scanlineWidth = tom_getVideoModeWidth();
644         uint8 * tom_ram_8 = tom_get_ram_pointer();
645         uint8 * paletteRAM = &tom_ram_8[0x400];
646         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
647         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
648         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
649
650 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
651 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
652
653 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
654 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
655 // Pitch == 0 is OK too...
656 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
657 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
658         if (!render || iwidth == 0)
659                 return;
660
661 //#define OP_DEBUG_BMP
662 //#ifdef OP_DEBUG_BMP
663 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
664 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
665 //#endif
666
667 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
668         int32 startPos = xpos, endPos = xpos +
669                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
670                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
671         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
672         bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false);      // VMODE
673         // Not sure if this is Jaguar Two only location or what...
674         // From the docs, it is... If we want to limit here we should think of something else.
675 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
676         int32 limit = 720;
677         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
678
679         // If the image is completely to the left or right of the line buffer, then bail.
680 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
681 //There are four possibilities:
682 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
683 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
684 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
685 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
686 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
687 // numbers 1 & 3 are of concern.
688 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
689 //      if (rightMargin < 0 || leftMargin > lbufWidth)
690
691 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
692 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
693 // Still have to be careful with the DATA and IWIDTH values though...
694
695 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
696 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
697 //              return;
698         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
699                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
700                 return;
701
702         // Otherwise, find the clip limits and clip the phrase as well...
703         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
704         //       line buffer, but it shouldn't matter since there are two unused line
705         //       buffers below and nothing above and I'll at most write 8 bytes outside
706         //       the line buffer... I could use a fractional clip begin/end value, but
707         //       this makes the blit a *lot* more hairy. I might fix this in the future
708         //       if it becomes necessary. (JLH)
709         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
710         //       which pixel in the phrase is being written, and quit when either end of phrases
711         //       is reached or line buffer extents are surpassed.
712
713 //This stuff is probably wrong as well... !!! FIX !!!
714 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
715 //Yup. Seems that JagMania doesn't work correctly with this...
716 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
717 //      if (!flagREFLECT)
718
719 /*
720         if (leftMargin < 0)
721                 clippedWidth = 0 - leftMargin,
722                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
723                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
724 //              leftMargin = 0;
725
726         if (rightMargin > lbufWidth)
727                 clippedWidth = rightMargin - lbufWidth,
728                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
729 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
730 //              rightMargin = lbufWidth;
731 */
732 if (depth > 5)
733         WriteLog("OP: We're about to encounter a divide by zero error!\n");
734         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
735         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
736         // !!! FIX !!!
737         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
738                 clippedWidth = 0 - startPos,
739                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
740                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
741
742         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
743                 clippedWidth = 0 - endPos,
744                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
745
746         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
747                 clippedWidth = endPos - lbufWidth,
748                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
749
750         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
751                 clippedWidth = startPos - lbufWidth,
752                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
753                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
754
755         // If the image is sitting on the line buffer left or right edge, we need to compensate
756         // by decreasing the image phrase width accordingly.
757         iwidth -= phraseClippedWidth;
758
759         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
760         // the pixel data.
761 //      data += phraseClippedWidth * (pitch << 3);
762         data += dataClippedWidth * pitch;
763
764         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
765         //       bitmap! This makes clipping & etc. MUCH, much easier...!
766 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
767 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
768 //Is this a bug in the OP?
769         uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
770         uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
771
772         // Render.
773
774 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
775 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
776 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
777 // anyway.
778 // This seems to be the case (at least according to the Midsummer docs)...!
779
780         if (depth == 0)                                                                 // 1 BPP
781         {
782                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
783                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
784
785                 // Fetch 1st phrase...
786                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
787 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
788 //i.e., we didn't clip on the margin... !!! FIX !!!
789                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
790                 int i = firstPix;                                                       // Start counter at right spot...
791
792                 while (iwidth--)
793                 {
794                         while (i++ < 64)
795                         {
796                                 uint8 bit = pixels >> 63;
797                                 if (flagTRANS && bit == 0)
798                                         ;       // Do nothing...
799                                 else
800                                 {
801                                         if (!flagRMW)
802 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
803 //Won't optimize RMW case though...
804                                                 // This is the *only* correct use of endian-dependent code
805                                                 // (i.e., mem-to-mem direct copying)!
806                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
807                                         else
808                                                 *currentLineBuffer = 
809                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
810                                                 *(currentLineBuffer + 1) = 
811                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
812                                 }
813
814                                 currentLineBuffer += lbufDelta;
815                                 pixels <<= 1;
816                         }
817                         i = 0;
818                         // Fetch next phrase...
819                         data += pitch;
820                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
821                 }
822         }
823         else if (depth == 1)                                                    // 2 BPP
824         {
825 if (firstPix)
826         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
827                 index &= 0xFC;                                                          // Top six bits form CLUT index
828                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
829                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
830
831                 while (iwidth--)
832                 {
833                         // Fetch phrase...
834                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
835                         data += pitch;
836
837                         for(int i=0; i<32; i++)
838                         {
839                                 uint8 bits = pixels >> 62;
840 // Seems to me that both of these are in the same endian, so we could cast it as
841 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
842 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
843 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
844 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
845                                 if (flagTRANS && bits == 0)
846                                         ;       // Do nothing...
847                                 else
848                                 {
849                                         if (!flagRMW)
850                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
851                                         else
852                                                 *currentLineBuffer = 
853                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
854                                                 *(currentLineBuffer + 1) = 
855                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
856                                 }
857
858                                 currentLineBuffer += lbufDelta;
859                                 pixels <<= 2;
860                         }
861                 }
862         }
863         else if (depth == 2)                                                    // 4 BPP
864         {
865 if (firstPix)
866         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
867                 index &= 0xF0;                                                          // Top four bits form CLUT index
868                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
869                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
870
871                 while (iwidth--)
872                 {
873                         // Fetch phrase...
874                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
875                         data += pitch;
876
877                         for(int i=0; i<16; i++)
878                         {
879                                 uint8 bits = pixels >> 60;
880 // Seems to me that both of these are in the same endian, so we could cast it as
881 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
882 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
883 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
884 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
885                                 if (flagTRANS && bits == 0)
886                                         ;       // Do nothing...
887                                 else
888                                 {
889                                         if (!flagRMW)
890                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
891                                         else
892                                                 *currentLineBuffer = 
893                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
894                                                 *(currentLineBuffer + 1) = 
895                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
896                                 }
897
898                                 currentLineBuffer += lbufDelta;
899                                 pixels <<= 4;
900                         }
901                 }
902         }
903         else if (depth == 3)                                                    // 8 BPP
904         {
905                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
906                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
907
908                 // Fetch 1st phrase...
909                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
910 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
911 //i.e., we didn't clip on the margin... !!! FIX !!!
912                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
913                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
914                 int i = firstPix >> 3;                                          // Start counter at right spot...
915
916                 while (iwidth--)
917                 {
918                         while (i++ < 8)
919                         {
920                                 uint8 bits = pixels >> 56;
921 // Seems to me that both of these are in the same endian, so we could cast it as
922 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
923 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
924 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
925 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
926                                 if (flagTRANS && bits == 0)
927                                         ;       // Do nothing...
928                                 else
929                                 {
930                                         if (!flagRMW)
931                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
932                                         else
933                                                 *currentLineBuffer = 
934                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
935                                                 *(currentLineBuffer + 1) = 
936                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
937                                 }
938
939                                 currentLineBuffer += lbufDelta;
940                                 pixels <<= 8;
941                         }
942                         i = 0;
943                         // Fetch next phrase...
944                         data += pitch;
945                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
946                 }
947         }
948         else if (depth == 4)                                                    // 16 BPP
949         {
950 if (firstPix)
951         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
952                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
953                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
954
955                 while (iwidth--)
956                 {
957                         // Fetch phrase...
958                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
959                         data += pitch;
960
961                         for(int i=0; i<4; i++)
962                         {
963                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
964 // Seems to me that both of these are in the same endian, so we could cast it as
965 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
966 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
967 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
968 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
969                                 if (flagTRANS && (bitsLo | bitsHi) == 0)
970                                         ;       // Do nothing...
971                                 else
972                                 {
973                                         if (!flagRMW)
974                                                 *currentLineBuffer = bitsHi,
975                                                 *(currentLineBuffer + 1) = bitsLo;
976                                         else
977                                                 *currentLineBuffer = 
978                                                         BLEND_CR(*currentLineBuffer, bitsHi),
979                                                 *(currentLineBuffer + 1) = 
980                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
981                                 }
982
983                                 currentLineBuffer += lbufDelta;
984                                 pixels <<= 16;
985                         }
986                 }
987         }
988         else if (depth == 5)                                                    // 24 BPP
989         {
990 //Looks like Iron Soldier is the only game that uses 24BPP mode...
991 //There *might* be others...
992 //WriteLog("OP: Writing 24 BPP bitmap!\n");
993 if (firstPix)
994         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
995                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
996                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
997                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
998
999                 while (iwidth--)
1000                 {
1001                         // Fetch phrase...
1002                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1003                         data += pitch;
1004
1005                         for(int i=0; i<2; i++)
1006                         {
1007                                 // We don't use a 32-bit var here because of endian issues...!
1008                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1009                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1010
1011                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1012                                         ;       // Do nothing...
1013                                 else
1014                                         *currentLineBuffer = bits3,
1015                                         *(currentLineBuffer + 1) = bits2,
1016                                         *(currentLineBuffer + 2) = bits1,
1017                                         *(currentLineBuffer + 3) = bits0;
1018
1019                                 currentLineBuffer += lbufDelta;
1020                                 pixels <<= 32;
1021                         }
1022                 }
1023         }
1024 }
1025
1026 //
1027 // Store scaled bitmap in line buffer
1028 //
1029 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1030 {
1031 // Need to make sure that when writing that it stays within the line buffer...
1032 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1033         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1034         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1035         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1036         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1037 //#ifdef OP_DEBUG_BMP
1038 // Prolly should use this... Though not sure exactly how.
1039 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1040         uint32 firstPix = (p1 >> 49) & 0x3F;
1041 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1042 if (firstPix)
1043         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1044 //#endif
1045 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1046 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1047 //Optimize: break these out to their own BOOL values
1048         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1049         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1050                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1051                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1052         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1053         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1054
1055         uint8 * tom_ram_8 = tom_get_ram_pointer();
1056         uint8 * paletteRAM = &tom_ram_8[0x400];
1057         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1058         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1059         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1060
1061         uint8 hscale = p2 & 0xFF;
1062 //      uint8 horizontalRemainder = hscale;                             // Not sure if it starts full, but seems reasonable [It's not!]
1063         uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay!
1064         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1065         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1066
1067 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1068 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1069
1070 // Looks like an hscale of zero means don't draw!
1071         if (!render || iwidth == 0 || hscale == 0)
1072                 return;
1073
1074 //#define OP_DEBUG_BMP
1075 //#ifdef OP_DEBUG_BMP
1076 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1077 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1078 //#endif
1079
1080         int32 startPos = xpos, endPos = xpos +
1081                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1082         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1083         bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false);      // VMODE
1084         // Not sure if this is Jaguar Two only location or what...
1085         // From the docs, it is... If we want to limit here we should think of something else.
1086 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1087         int32 limit = 720;
1088         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1089
1090         // If the image is completely to the left or right of the line buffer, then bail.
1091 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1092 //There are four possibilities:
1093 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1094 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1095 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1096 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1097 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1098 // numbers 1 & 3 are of concern.
1099 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
1100 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1101
1102 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1103 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1104 // Still have to be careful with the DATA and IWIDTH values though...
1105
1106         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1107                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1108                 return;
1109
1110         // Otherwise, find the clip limits and clip the phrase as well...
1111         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1112         //       line buffer, but it shouldn't matter since there are two unused line
1113         //       buffers below and nothing above and I'll at most write 40 bytes outside
1114         //       the line buffer... I could use a fractional clip begin/end value, but
1115         //       this makes the blit a *lot* more hairy. I might fix this in the future
1116         //       if it becomes necessary. (JLH)
1117         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1118         //       which pixel in the phrase is being written, and quit when either end of phrases
1119         //       is reached or line buffer extents are surpassed.
1120
1121 //This stuff is probably wrong as well... !!! FIX !!!
1122 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1123 //Yup. Seems that JagMania doesn't work correctly with this...
1124 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1125 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1126 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1127 // a bit more accurately... Strange!
1128 //It's probably a case of the REFLECT flag being set and the background being written
1129 //from the right side of the screen...
1130 //But no, it isn't... At least if the diagnostics are telling the truth!
1131
1132         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1133         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1134         // !!! FIX !!!
1135
1136 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1137 //the scaling factor is small. So fix it already! !!! FIX !!!
1138 /*if (scaledPhrasePixels == 0)
1139 {
1140         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1141         DumpScaledObject(p0, p1, p2);
1142 }//*/
1143 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1144         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1145 /*              clippedWidth = 0 - startPos,
1146                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1147                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);*/
1148                 clippedWidth = 0 - startPos,
1149                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1150                 startPos = 0 - (clippedWidth % scaledPhrasePixels);
1151
1152         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1153 /*              clippedWidth = 0 - endPos,
1154                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];*/
1155                 clippedWidth = 0 - endPos,
1156                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1157
1158         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1159 /*              clippedWidth = endPos - lbufWidth,
1160                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];*/
1161                 clippedWidth = endPos - lbufWidth,
1162                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1163
1164         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1165 /*              clippedWidth = startPos - lbufWidth,
1166                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1167                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);*/
1168                 clippedWidth = startPos - lbufWidth,
1169                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1170                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1171
1172 extern int op_start_log;
1173 if (op_start_log && clippedWidth != 0)
1174         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1175 if (op_start_log && startPos == 13)
1176 {
1177         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1178         DumpScaledObject(p0, p1, p2);
1179 }
1180         // If the image is sitting on the line buffer left or right edge, we need to compensate
1181         // by decreasing the image phrase width accordingly.
1182         iwidth -= phraseClippedWidth;
1183
1184         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1185         // the pixel data.
1186 //      data += phraseClippedWidth * (pitch << 3);
1187         data += dataClippedWidth * (pitch << 3);
1188
1189         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1190         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1191 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1192 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1193         uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1194         uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
1195
1196         // Render.
1197
1198 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1199 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1200 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1201 // anyway.
1202 // This seems to be the case (at least according to the Midsummer docs)...!
1203
1204         if (depth == 0)                                                                 // 1 BPP
1205         {
1206 if (firstPix != 0)
1207         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1208                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1209                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1210
1211                 int pixCount = 0;
1212                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1213
1214                 while ((int32)iwidth > 0)
1215                 {
1216                         uint8 bits = pixels >> 63;
1217
1218                         if (flagTRANS && bits == 0)
1219                                 ;       // Do nothing...
1220                         else
1221                         {
1222                                 if (!flagRMW)
1223                                         // This is the *only* correct use of endian-dependent code
1224                                         // (i.e., mem-to-mem direct copying)!
1225                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1226                                 else
1227                                         *currentLineBuffer = 
1228                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1229                                         *(currentLineBuffer + 1) = 
1230                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1231                         }
1232
1233                         currentLineBuffer += lbufDelta;
1234
1235                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1236                         while (horizontalRemainder & 0x80)
1237                         {
1238                                 horizontalRemainder += hscale;
1239                                 pixCount++;
1240                                 pixels <<= 1;
1241                         }
1242
1243                         if (pixCount > 63)
1244                         {
1245                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1246
1247                                 data += (pitch << 3) * phrasesToSkip;
1248                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1249                                 pixels <<= 1 * pixelShift;
1250                                 iwidth -= phrasesToSkip;
1251                                 pixCount = pixelShift;
1252                         }
1253                 }
1254         }
1255         else if (depth == 1)                                                    // 2 BPP
1256         {
1257 if (firstPix != 0)
1258         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1259                 index &= 0xFC;                                                          // Top six bits form CLUT index
1260                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1261                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1262
1263                 int pixCount = 0;
1264                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1265
1266                 while ((int32)iwidth > 0)
1267                 {
1268                         uint8 bits = pixels >> 62;
1269
1270                         if (flagTRANS && bits == 0)
1271                                 ;       // Do nothing...
1272                         else
1273                         {
1274                                 if (!flagRMW)
1275                                         // This is the *only* correct use of endian-dependent code
1276                                         // (i.e., mem-to-mem direct copying)!
1277                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1278                                 else
1279                                         *currentLineBuffer = 
1280                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1281                                         *(currentLineBuffer + 1) = 
1282                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1283                         }
1284
1285                         currentLineBuffer += lbufDelta;
1286
1287                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1288                         while (horizontalRemainder & 0x80)
1289                         {
1290                                 horizontalRemainder += hscale;
1291                                 pixCount++;
1292                                 pixels <<= 2;
1293                         }
1294
1295                         if (pixCount > 31)
1296                         {
1297                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1298
1299                                 data += (pitch << 3) * phrasesToSkip;
1300                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1301                                 pixels <<= 2 * pixelShift;
1302                                 iwidth -= phrasesToSkip;
1303                                 pixCount = pixelShift;
1304                         }
1305                 }
1306         }
1307         else if (depth == 2)                                                    // 4 BPP
1308         {
1309 if (firstPix != 0)
1310         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1311                 index &= 0xF0;                                                          // Top four bits form CLUT index
1312                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1313                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1314
1315                 int pixCount = 0;
1316                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1317
1318                 while ((int32)iwidth > 0)
1319                 {
1320                         uint8 bits = pixels >> 60;
1321
1322                         if (flagTRANS && bits == 0)
1323                                 ;       // Do nothing...
1324                         else
1325                         {
1326                                 if (!flagRMW)
1327                                         // This is the *only* correct use of endian-dependent code
1328                                         // (i.e., mem-to-mem direct copying)!
1329                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1330                                 else
1331                                         *currentLineBuffer = 
1332                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1333                                         *(currentLineBuffer + 1) = 
1334                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1335                         }
1336
1337                         currentLineBuffer += lbufDelta;
1338
1339                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1340                         while (horizontalRemainder & 0x80)
1341                         {
1342                                 horizontalRemainder += hscale;
1343                                 pixCount++;
1344                                 pixels <<= 4;
1345                         }
1346
1347                         if (pixCount > 15)
1348                         {
1349                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1350
1351                                 data += (pitch << 3) * phrasesToSkip;
1352                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1353                                 pixels <<= 4 * pixelShift;
1354                                 iwidth -= phrasesToSkip;
1355                                 pixCount = pixelShift;
1356                         }
1357                 }
1358         }
1359         else if (depth == 3)                                                    // 8 BPP
1360         {
1361 if (firstPix)
1362         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1363                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1364                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1365
1366                 int pixCount = 0;
1367                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1368
1369                 while ((int32)iwidth > 0)
1370                 {
1371                         uint8 bits = pixels >> 56;
1372
1373                         if (flagTRANS && bits == 0)
1374                                 ;       // Do nothing...
1375                         else
1376                         {
1377                                 if (!flagRMW)
1378                                         // This is the *only* correct use of endian-dependent code
1379                                         // (i.e., mem-to-mem direct copying)!
1380                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1381                                 else
1382                                         *currentLineBuffer = 
1383                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1384                                         *(currentLineBuffer + 1) = 
1385                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1386                         }
1387
1388                         currentLineBuffer += lbufDelta;
1389
1390                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1391                         while (horizontalRemainder & 0x80)
1392                         {
1393                                 horizontalRemainder += hscale;
1394                                 pixCount++;
1395                                 pixels <<= 8;
1396                         }
1397
1398                         if (pixCount > 7)
1399                         {
1400                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1401
1402                                 data += (pitch << 3) * phrasesToSkip;
1403                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1404                                 pixels <<= 8 * pixelShift;
1405                                 iwidth -= phrasesToSkip;
1406                                 pixCount = pixelShift;
1407                         }
1408                 }
1409         }
1410         else if (depth == 4)                                                    // 16 BPP
1411         {
1412 if (firstPix != 0)
1413         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1414                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1415                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1416
1417                 int pixCount = 0;
1418                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1419
1420                 while ((int32)iwidth > 0)
1421                 {
1422                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1423
1424                         if (flagTRANS && (bitsLo | bitsHi) == 0)
1425                                 ;       // Do nothing...
1426                         else
1427                         {
1428                                 if (!flagRMW)
1429                                         *currentLineBuffer = bitsHi,
1430                                         *(currentLineBuffer + 1) = bitsLo;
1431                                 else
1432                                         *currentLineBuffer = 
1433                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1434                                         *(currentLineBuffer + 1) = 
1435                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1436                         }
1437
1438                         currentLineBuffer += lbufDelta;
1439
1440                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1441                         while (horizontalRemainder & 0x80)
1442                         {
1443                                 horizontalRemainder += hscale;
1444                                 pixCount++;
1445                                 pixels <<= 16;
1446                         }
1447
1448                         if (pixCount > 3)
1449                         {
1450                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1451
1452                                 data += (pitch << 3) * phrasesToSkip;
1453                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1454                                 pixels <<= 16 * pixelShift;
1455
1456                                 iwidth -= phrasesToSkip;
1457
1458                                 pixCount = pixelShift;
1459                         }
1460                 }
1461         }
1462         else if (depth == 5)                                                    // 24 BPP
1463         {
1464 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1465 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1466 if (firstPix != 0)
1467         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1468                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1469                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1470                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1471
1472                 while (iwidth--)
1473                 {
1474                         // Fetch phrase...
1475                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1476                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1477
1478                         for(int i=0; i<2; i++)
1479                         {
1480                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1481                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1482
1483                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1484                                         ;       // Do nothing...
1485                                 else
1486                                         *currentLineBuffer = bits3,
1487                                         *(currentLineBuffer + 1) = bits2,
1488                                         *(currentLineBuffer + 2) = bits1,
1489                                         *(currentLineBuffer + 3) = bits0;
1490
1491                                 currentLineBuffer += lbufDelta;
1492                                 pixels <<= 32;
1493                         }
1494                 }
1495         }
1496 /*if (depth == 3 && startPos == 13)
1497 {
1498 if (op_start_log)
1499 WriteLog("OP: Writing in the margins...\n");
1500         for(int i=0; i<100*2; i+=2)
1501 //      for(int i=0; i<14*2; i+=2)
1502                 tom_ram_8[0x1800 + i] = 0xFF,
1503                 tom_ram_8[0x1800 + i + 1] = 0xFF;
1504 }*/
1505 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1506 //      uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
1507 }