]> Shamusworld >> Repos - virtualjaguar/blob - src/objectp.cpp
cee49356ccdd92889d84d60e7398e291343c8686
[virtualjaguar] / src / objectp.cpp
1 //
2 // Object Processor
3 //
4 // Original source by Cal2
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 //
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include "jaguar.h"
13
14 //#define OP_DEBUG
15 //#define OP_DEBUG_BMP
16
17 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
18 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
19
20 #define OBJECT_TYPE_BITMAP      0                       // 000
21 #define OBJECT_TYPE_SCALE       1                       // 001
22 #define OBJECT_TYPE_GPU         2                       // 010
23 #define OBJECT_TYPE_BRANCH      3                       // 011
24 #define OBJECT_TYPE_STOP        4                       // 100
25
26 #define CONDITION_EQUAL                         0
27 #define CONDITION_LESS_THAN                     1
28 #define CONDITION_GREATER_THAN          2
29 #define CONDITION_OP_FLAG_SET           3
30 #define CONDITION_SECOND_HALF_LINE      4
31
32 #define OPFLAG_RELEASE          8                       // Bus release bit
33 #define OPFLAG_TRANS            4                       // Transparency bit
34 #define OPFLAG_RMW                      2                       // Read-Modify-Write bit
35 #define OPFLAG_REFLECT          1                       // Horizontal mirror bit
36
37 // Private function prototypes
38
39 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
40 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
41 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
42 void DumpFixedObject(uint64 p0, uint64 p1);
43 uint64 op_load_phrase(uint32 offset);
44
45 // Local global variables
46
47 static uint8 * op_blend_y;
48 static uint8 * op_blend_cr;
49 // There may be a problem with this "RAM" overlapping (and thus being independent of)
50 // some of the regular TOM RAM...
51 static uint8 objectp_ram[0x40];                 // This is based at $F00000
52 uint8 objectp_running;
53 //bool objectp_stop_reading_list;
54
55 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
56 //static uint32 op_bitmap_bit_size[8] =
57 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
58 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
59 static uint32 op_pointer;
60
61 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
62
63
64 //
65 // Object Processor initialization
66 //
67 void op_init(void)
68 {
69         // Blend tables (64K each)
70         memory_malloc_secure((void **)&op_blend_y, 0x10000, "Jaguar Object processor Y blend lookup table");
71         memory_malloc_secure((void **)&op_blend_cr, 0x10000, "Jaguar Object processor CR blend lookup table");
72
73         // Here we calculate the saturating blend of a signed 4-bit value and an
74         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
75         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
76         for(int i=0; i<256*256; i++)
77         {
78                 int y = (i >> 8) & 0xFF;
79                 int dy = (INT8)i;                                       // Sign extend the Y index
80                 int c1 = (i >> 8) & 0x0F;
81                 int dc1 = (INT8)(i << 4) >> 4;          // Sign extend the R index
82                 int c2 = (i >> 12) & 0x0F;
83                 int dc2 = (INT8)(i & 0xF0) >> 4;        // Sign extend the C index
84
85                 y += dy;
86                 if (y < 0)
87                         y = 0;
88                 else if (y > 0xFF)
89                         y = 0xFF;
90                 op_blend_y[i] = y;
91
92                 c1 += dc1;
93                 if (c1 < 0)
94                         c1 = 0;
95                 else if (c1 > 0x0F)
96                         c1 = 0x0F;
97                 c2 += dc2;
98
99                 if (c2 < 0)
100                         c2 = 0;
101                 else if (c2 > 0x0F)
102                         c2 = 0x0F;
103                 op_blend_cr[i] = (c2 << 4) | c1;
104         }
105
106         op_reset();
107 }
108
109 //
110 // Object Processor reset
111 //
112 void op_reset(void)
113 {
114         memset(objectp_ram, 0x00, 0x40);
115         objectp_running = 0;
116 }
117
118 void op_done(void)
119 {
120         char * opType[8] =
121         { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
122         char * ccType[8] =
123                 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
124
125         uint32 olp = op_get_list_pointer();
126         WriteLog("OP: OLP = %08X\n", olp);
127         WriteLog("OP: Phrase dump\n    ----------\n");
128         for(uint32 i=0; i<0x100; i+=8)
129         {
130                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
131                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
132                 if ((lo & 0x07) == 3)
133                 {
134                         uint16 ypos = (lo >> 3) & 0x7FF;
135                         uint8  cc   = (lo >> 14) & 0x03;
136                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
137                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
138                 }
139                 WriteLog("\n");
140                 if ((lo & 0x07) == 0)
141                         DumpFixedObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8));
142                 if ((lo & 0x07) == 1)
143                         DumpScaledObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8), op_load_phrase(olp+i+16));
144         }
145         WriteLog("\n");
146 }
147
148 //
149 // Object Processor memory access
150 // Memory range: F00010 - F00027
151 //
152 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
153 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
154 //      F00026            W   -------- -------x   OBF - object processor flag
155 //
156
157 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
158 {
159         offset &= 0x3F;
160         return objectp_ram[offset];
161 }
162
163 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
164 {
165         offset &= 0x3F;
166         return GET16(objectp_ram, offset);
167 }
168
169 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
170 {
171         offset &= 0x3F;
172         objectp_ram[offset] = data;
173 }
174
175 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
176 {
177         offset &= 0x3F;
178         SET16(objectp_ram, offset, data);
179
180 /*if (offset == 0x20)
181 WriteLog("OP: Setting lo list pointer: %04X\n", data);
182 if (offset == 0x22)
183 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
184 }
185
186 uint32 op_get_list_pointer(void)
187 {
188         // Note: This register is LO / HI WORD, hence the funky look of this...
189 //      return (objectp_ram[0x22] << 24) | (objectp_ram[0x23] << 16) | (objectp_ram[0x20] << 8) | objectp_ram[0x21];
190         return GET16(objectp_ram, 0x20) | (GET16(objectp_ram, 0x22) << 16);
191 }
192
193 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
194
195 uint32 op_get_status_register(void)
196 {
197 //      return (objectp_ram[0x26] << 24) | (objectp_ram[0x27] << 16) | (objectp_ram[0x28] << 8) | objectp_ram[0x29];
198 //      return GET32(objectp_ram, 0x26);
199         return GET16(objectp_ram, 0x26);
200 }
201
202 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
203
204 void op_set_status_register(uint32 data)
205 {
206 /*      objectp_ram[0x26] = (data & 0xFF000000) >> 24;
207         objectp_ram[0x27] = (data & 0x00FF0000) >> 16;
208         objectp_ram[0x28] = (data & 0x0000FF00) >> 8;
209         objectp_ram[0x29] |= (data & 0xFE);*/
210         objectp_ram[0x26] = (data & 0x0000FF00) >> 8;
211         objectp_ram[0x27] |= (data & 0xFE);
212 }
213
214 void op_set_current_object(uint64 object)
215 {
216 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
217         // Stored as least significant 32 bits first, ms32 last in big endian
218 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
219         objectp_ram[0x12] = object & 0xFF; object >>= 8;
220         objectp_ram[0x11] = object & 0xFF; object >>= 8;
221         objectp_ram[0x10] = object & 0xFF; object >>= 8;
222
223         objectp_ram[0x17] = object & 0xFF; object >>= 8;
224         objectp_ram[0x16] = object & 0xFF; object >>= 8;
225         objectp_ram[0x15] = object & 0xFF; object >>= 8;
226         objectp_ram[0x14] = object & 0xFF;*/
227 // Let's try regular good old big endian...
228         objectp_ram[0x17] = object & 0xFF; object >>= 8;
229         objectp_ram[0x16] = object & 0xFF; object >>= 8;
230         objectp_ram[0x15] = object & 0xFF; object >>= 8;
231         objectp_ram[0x14] = object & 0xFF; object >>= 8;
232
233         objectp_ram[0x13] = object & 0xFF; object >>= 8;
234         objectp_ram[0x12] = object & 0xFF; object >>= 8;
235         objectp_ram[0x11] = object & 0xFF; object >>= 8;
236         objectp_ram[0x10] = object & 0xFF;
237 }
238
239 uint64 op_load_phrase(uint32 offset)
240 {
241         offset &= ~0x07;                                                // 8 byte alignment
242         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
243 }
244
245 void OPStorePhrase(uint32 offset, uint64 p)
246 {
247         offset &= ~0x07;                                                // 8 byte alignment
248         JaguarWriteLong(offset, p >> 32, OP);
249         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
250 }
251
252 //
253 // Debugging routines
254 //
255 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
256 {
257         WriteLog(" (SCALED BITMAP)");
258         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
259         WriteLog("                 %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
260         uint8 bitdepth = (p1 >> 12) & 0x07;
261 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
262         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
263         int32 xpos = p1 & 0xFFF;
264         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
265         uint32 iwidth = ((p1 >> 28) & 0x3FF);
266         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
267         uint16 height = ((p0 >> 14) & 0x3FF);
268         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
269         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
270         uint32 firstPix = (p1 >> 49) & 0x3F;
271         uint8 flags = (p1 >> 45) & 0x0F;
272         uint8 idx = (p1 >> 38) & 0x7F;
273         uint32 pitch = (p1 >> 15) & 0x07;
274         WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
275                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
276         uint32 hscale = p2 & 0xFF;
277         uint32 vscale = (p2 >> 8) & 0xFF;
278         uint32 remainder = (p2 >> 16) & 0xFF;
279         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
280 }
281
282 void DumpFixedObject(uint64 p0, uint64 p1)
283 {
284         WriteLog(" (BITMAP)");
285         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
286         uint8 bitdepth = (p1 >> 12) & 0x07;
287 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
288         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
289         int32 xpos = p1 & 0xFFF;
290         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
291         uint32 iwidth = ((p1 >> 28) & 0x3FF);
292         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
293         uint16 height = ((p0 >> 14) & 0x3FF);
294         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
295         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
296         uint32 firstPix = (p1 >> 49) & 0x3F;
297         uint8 flags = (p1 >> 45) & 0x0F;
298         uint8 idx = (p1 >> 38) & 0x7F;
299         uint32 pitch = (p1 >> 15) & 0x07;
300         WriteLog("    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
301                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
302 }
303
304 //
305 // Object Processor main routine
306 //
307 //Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing
308 //where we left off. !!! FIX !!!
309 void OPProcessList(int scanline, bool render)
310 {
311 extern int op_start_log;
312 //      char * condition_to_str[8] =
313 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
314
315         op_pointer = op_get_list_pointer();
316
317 //      objectp_stop_reading_list = false;
318
319 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer);
320 //op_done();
321
322 // *** BEGIN OP PROCESSOR TESTING ONLY ***
323 extern bool interactiveMode;
324 extern bool iToggle;
325 extern int objectPtr;
326 bool inhibit;
327 int bitmapCounter = 0;
328 // *** END OP PROCESSOR TESTING ONLY ***
329
330 //      if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
331         while (op_pointer)
332         {
333 // *** BEGIN OP PROCESSOR TESTING ONLY ***
334 if (interactiveMode && bitmapCounter == objectPtr)
335         inhibit = iToggle;
336 else
337         inhibit = false;
338 // *** END OP PROCESSOR TESTING ONLY ***
339 //              if (objectp_stop_reading_list)
340 //                      return;
341                         
342                 uint64 p0 = op_load_phrase(op_pointer);
343 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
344                 op_pointer += 8;
345 if (scanline == tom_get_vdb() && op_start_log)
346 //if (scanline == 215 && op_start_log)
347 {
348 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
349 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
350 {
351 WriteLog(" (BITMAP) ");
352 uint64 p1 = op_load_phrase(op_pointer);
353 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
354         uint8 bitdepth = (p1 >> 12) & 0x07;
355 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
356         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
357 int32 xpos = p1 & 0xFFF;
358 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
359         uint32 iwidth = ((p1 >> 28) & 0x3FF);
360         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
361         uint16 height = ((p0 >> 14) & 0x3FF);
362         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
363         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
364         uint32 firstPix = (p1 >> 49) & 0x3F;
365         uint8 flags = (p1 >> 45) & 0x0F;
366         uint8 idx = (p1 >> 38) & 0x7F;
367         uint32 pitch = (p1 >> 15) & 0x07;
368 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
369         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
370 }
371 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
372 {
373 WriteLog(" (SCALED BITMAP)");
374 uint64 p1 = op_load_phrase(op_pointer), p2 = op_load_phrase(op_pointer+8);
375 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
376 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
377         uint8 bitdepth = (p1 >> 12) & 0x07;
378 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
379         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
380 int32 xpos = p1 & 0xFFF;
381 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
382         uint32 iwidth = ((p1 >> 28) & 0x3FF);
383         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
384         uint16 height = ((p0 >> 14) & 0x3FF);
385         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
386         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
387         uint32 firstPix = (p1 >> 49) & 0x3F;
388         uint8 flags = (p1 >> 45) & 0x0F;
389         uint8 idx = (p1 >> 38) & 0x7F;
390         uint32 pitch = (p1 >> 15) & 0x07;
391 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
392         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
393         uint32 hscale = p2 & 0xFF;
394         uint32 vscale = (p2 >> 8) & 0xFF;
395         uint32 remainder = (p2 >> 16) & 0xFF;
396 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
397 }
398 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
399 WriteLog(" (GPU)\n");
400 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
401 {
402 WriteLog(" (BRANCH)\n");
403 uint8 * jaguar_mainRam = GetRamPtr();
404 WriteLog("[RAM] --> ");
405 for(int k=0; k<8; k++)
406         WriteLog("%02X ", jaguar_mainRam[op_pointer-8 + k]);
407 WriteLog("\n");
408 }
409 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
410 WriteLog("    --> List end\n");
411 }//*/
412                 
413                 switch ((uint8)p0 & 0x07)
414                 {
415                 case OBJECT_TYPE_BITMAP:
416                 {
417 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
418                         uint16 ypos = (p0 >> 3) & 0x7FF;
419 // This is only theory implied by Rayman...!
420 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
421 // the VDB value. With interlacing, this would be slightly more tricky.
422 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
423 // to affect any other game in a negative way (that I've seen).
424 // Either that, or it's an undocumented bug...
425
426 //No, the reason this was needed is that the OP code before was wrong. Any value
427 //less than VDB will get written to the top line of the display!
428 //                      if (ypos == 0)
429 //                              ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
430                         uint32 height = (p0 & 0xFFC000) >> 14;
431                         uint32 oldOPP = op_pointer - 8;
432 // *** BEGIN OP PROCESSOR TESTING ONLY ***
433 if (inhibit && op_start_log)
434         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
435 bitmapCounter++;
436 if (!inhibit)   // For OP testing only!
437 // *** END OP PROCESSOR TESTING ONLY ***
438                         if (scanline >= ypos && height > 0)
439                         {
440                                 uint64 p1 = op_load_phrase(op_pointer);
441                                 op_pointer += 8;
442 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
443 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
444 //                              OPProcessFixedBitmap(scanline, p0, p1, render);
445                                 OPProcessFixedBitmap(p0, p1, render);
446
447                                 // OP write-backs
448
449 //???Does this really happen??? Doesn't seem to work if you do this...!
450 //Probably not. Must be a bug in the documentation...!
451 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
452 //                              SET16(objectp_ram, 0x20, link & 0xFFFF);        // OLP
453 //                              SET16(objectp_ram, 0x22, link >> 16);
454 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
455                                 if (height - 1 > 0)
456                                         height--;*/
457                                 // NOTE: Would subtract 2 if in interlaced mode...!
458 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
459 //                              if (height)
460                                 height--;
461
462                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
463                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
464                                 data += dwidth;
465
466                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
467                                 p0 |= (uint64)height << 14;
468                                 p0 |= data << 40;
469                                 OPStorePhrase(oldOPP, p0);
470                         }
471 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
472 //Temp, for testing...
473 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
474 //And it does! !!! FIX !!!
475         if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
476                 return;
477
478                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
479 //WriteLog("New OP: %08X\n", op_pointer);
480                         break;
481                 }
482                 case OBJECT_TYPE_SCALE:
483                 {
484 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
485                         uint16 ypos = (p0 >> 3) & 0x7FF;
486                         uint32 height = (p0 & 0xFFC000) >> 14;
487                         uint32 oldOPP = op_pointer - 8;
488 // *** BEGIN OP PROCESSOR TESTING ONLY ***
489 if (inhibit && op_start_log)
490 {
491         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
492         DumpScaledObject(p0, op_load_phrase(op_pointer), op_load_phrase(op_pointer+8));
493 }
494 bitmapCounter++;
495 if (!inhibit)   // For OP testing only!
496 // *** END OP PROCESSOR TESTING ONLY ***
497                         if (scanline >= ypos && height > 0)
498                         {
499                                 uint64 p1 = op_load_phrase(op_pointer);
500                                 op_pointer += 8;
501                                 uint64 p2 = op_load_phrase(op_pointer);
502                                 op_pointer += 8;
503 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
504                                 OPProcessScaledBitmap(p0, p1, p2, render);
505
506                                 // OP write-backs
507
508                                 uint8 remainder = p2 >> 16, vscale = p2 >> 8;
509 //Actually, we should skip this object if it has a vscale of zero.
510 //Or do we? Not sure... Atari Karts has a few lines that look like:
511 // (SCALED BITMAP)
512 //000E8268 --> phrase 00010000 7000B00D 
513 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
514 //    [hsc: 9A, vsc: 00, rem: 00]
515 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
516
517                                 if (vscale == 0)
518                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
519
520 //extern int start_logging;
521 //if (start_logging)
522 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
523 //Locks up here:
524 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
525 //There are other problems here, it looks like...
526 //Another lock up:
527 //About to execute OP (508)...
528 /*
529 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
530 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
531 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
532 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
533 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
534 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
535 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
536 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
537 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
538 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
539 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
540 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
541 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
542 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
543 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
544 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
545 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
546 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
547 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
548 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
549 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
550 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
551 */
552 //Here's another problem:
553 //    [hsc: 20, vsc: 20, rem: 00]
554 // Since we're not checking for $E0 (but that's what we get from the above), we end
555 // up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite
556 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
557 //Also note: $E0 = 7.0 which IS a legal vscale value...
558
559 //                              if (remainder & 0x80)                           // I.e., it's negative
560 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
561 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
562 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
563 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
564                                 if (remainder <= 0x20)                          // I.e., it's <= 0
565                                 {
566                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
567                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
568
569 //                                      while (remainder & 0x80)
570 //                                      while ((remainder & 0x80) || remainder == 0)
571 //                                      while ((remainder - 1) >= 0xE0)
572 //                                      while ((remainder >= 0xE1) || remainder == 0)
573 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
574                                         while (remainder <= 0x20)
575                                         {
576                                                 remainder += vscale;
577
578                                                 if (height)
579                                                         height--;
580
581                                                 data += dwidth;
582                                         }
583
584                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
585                                         p0 |= (uint64)height << 14;
586                                         p0 |= data << 40;
587                                         OPStorePhrase(oldOPP, p0);
588                                 }
589
590                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
591
592 //if (start_logging)
593 //      WriteLog("--> Finished writebacks...\n");//*/
594
595 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
596                                 p2 &= ~0x0000000000FF0000LL;
597                                 p2 |= (uint64)remainder << 16;
598 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
599                                 OPStorePhrase(oldOPP+16, p2);
600 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
601 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
602                         }
603                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
604                         break;
605                 }
606                 case OBJECT_TYPE_GPU:
607                 {
608 //WriteLog("OP: Asserting GPU IRQ #3...\n");
609                         op_set_current_object(p0);
610                         GPUSetIRQLine(3, ASSERT_LINE);
611 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
612 // !!! FIX !!!
613 //Do something like:
614 //OPSuspendedByGPU = true;
615 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
616 //on the next scanline...
617 // --> It continues from where it was interrupted! !!! FIX !!!
618                         break;
619                 }
620                 case OBJECT_TYPE_BRANCH:
621                 {
622                         uint16 ypos = (p0 >> 3) & 0x7FF;
623                         uint8  cc   = (p0 >> 14) & 0x03;
624                         uint32 link = (p0 >> 21) & 0x3FFFF8;
625                         
626 //                      if ((ypos!=507)&&(ypos!=25))
627 //                              WriteLog("\t%i%s%i link=0x%.8x\n",scanline,condition_to_str[cc],ypos>>1,link);
628                         switch (cc)
629                         {
630                         case CONDITION_EQUAL:
631                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
632                                         op_pointer = link;
633                                 break;
634                         case CONDITION_LESS_THAN:
635                                 if (TOMReadWord(0xF00006, OP) < ypos)
636                                         op_pointer = link;
637                                 break;
638                         case CONDITION_GREATER_THAN:
639                                 if (TOMReadWord(0xF00006, OP) > ypos)
640                                         op_pointer = link;
641                                 break;
642                         case CONDITION_OP_FLAG_SET:
643                                 if (op_get_status_register() & 0x01)
644                                         op_pointer = link;
645                                 break;
646                         case CONDITION_SECOND_HALF_LINE:
647                                 // This basically means branch if bit 10 of HC is set
648                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shuting down\n");
649                                 fclose(log_get());
650                                 exit(0);
651                                 break;
652                         default:
653                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
654                         }
655                         break;
656                 }
657                 case OBJECT_TYPE_STOP:
658                 {
659 //op_start_log = 0;
660                         // unsure
661 //WriteLog("OP: --> STOP\n");
662 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
663 //This seems more likely...
664                         op_set_current_object(p0);
665                         
666                         if (p0 & 0x08)
667                         {
668                                 tom_set_pending_object_int();
669                                 if (tom_irq_enabled(IRQ_OPFLAG))// && jaguar_interrupt_handler_is_valid(64))
670                                         m68k_set_irq(7);                                // Cause an NMI to occur...
671                         }
672
673                         return;
674 //                      break;
675                 }
676                 default:
677                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07)); 
678                         return;
679                 }
680         }
681 }
682
683 //
684 // Store fixed size bitmap in line buffer
685 //
686 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
687 {
688 // Need to make sure that when writing that it stays within the line buffer...
689 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
690         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
691         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
692         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
693         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
694 //#ifdef OP_DEBUG_BMP
695         uint32  firstPix = (p1 >> 49) & 0x3F;
696         // "The LSB is significant only for scaled objects..." -JTRM
697         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
698         firstPix &= 0x3E;
699 //#endif
700 // We can ignore the RELEASE (high order) bit for now--probably forever...!
701 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
702 //Optimize: break these out to their own BOOL values
703         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
704         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
705                 flagRMW = (flags & OPFLAG_RMW ? true : false),
706                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
707 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
708 //  provide the most significant bits of the palette address."
709         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
710         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
711         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
712
713 //      int16 scanlineWidth = tom_getVideoModeWidth();
714         uint8 * tom_ram_8 = tom_get_ram_pointer();
715         uint8 * paletteRAM = &tom_ram_8[0x400];
716         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
717         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
718         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
719
720 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
721 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
722
723 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
724 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
725 // Pitch == 0 is OK too...
726 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
727 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
728         if (!render || iwidth == 0)
729                 return;
730
731 //#define OP_DEBUG_BMP
732 //#ifdef OP_DEBUG_BMP
733 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
734 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
735 //#endif
736
737 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
738         int32 startPos = xpos, endPos = xpos +
739                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
740                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
741         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
742         bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false);      // VMODE
743         // Not sure if this is Jaguar Two only location or what...
744         // From the docs, it is... If we want to limit here we should think of something else.
745 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
746         int32 limit = 720;
747         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
748
749         // If the image is completely to the left or right of the line buffer, then bail.
750 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
751 //There are four possibilities:
752 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
753 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
754 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
755 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
756 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
757 // numbers 1 & 3 are of concern.
758 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
759 //      if (rightMargin < 0 || leftMargin > lbufWidth)
760
761 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
762 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
763 // Still have to be careful with the DATA and IWIDTH values though...
764
765 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
766 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
767 //              return;
768         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
769                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
770                 return;
771
772         // Otherwise, find the clip limits and clip the phrase as well...
773         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
774         //       line buffer, but it shouldn't matter since there are two unused line
775         //       buffers below and nothing above and I'll at most write 8 bytes outside
776         //       the line buffer... I could use a fractional clip begin/end value, but
777         //       this makes the blit a *lot* more hairy. I might fix this in the future
778         //       if it becomes necessary. (JLH)
779         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
780         //       which pixel in the phrase is being written, and quit when either end of phrases
781         //       is reached or line buffer extents are surpassed.
782
783 //This stuff is probably wrong as well... !!! FIX !!!
784 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
785 //Yup. Seems that JagMania doesn't work correctly with this...
786 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
787 //      if (!flagREFLECT)
788
789 /*
790         if (leftMargin < 0)
791                 clippedWidth = 0 - leftMargin,
792                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
793                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
794 //              leftMargin = 0;
795
796         if (rightMargin > lbufWidth)
797                 clippedWidth = rightMargin - lbufWidth,
798                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
799 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
800 //              rightMargin = lbufWidth;
801 */
802 if (depth > 5)
803         WriteLog("OP: We're about to encounter a divide by zero error!\n");
804         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
805         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
806         // !!! FIX !!!
807         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
808                 clippedWidth = 0 - startPos,
809                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
810                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
811
812         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
813                 clippedWidth = 0 - endPos,
814                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
815
816         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
817                 clippedWidth = endPos - lbufWidth,
818                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
819
820         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
821                 clippedWidth = startPos - lbufWidth,
822                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
823                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
824
825         // If the image is sitting on the line buffer left or right edge, we need to compensate
826         // by decreasing the image phrase width accordingly.
827         iwidth -= phraseClippedWidth;
828
829         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
830         // the pixel data.
831 //      data += phraseClippedWidth * (pitch << 3);
832         data += dataClippedWidth * pitch;
833
834         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
835         //       bitmap! This makes clipping & etc. MUCH, much easier...!
836 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
837 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
838 //Is this a bug in the OP?
839         uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
840         uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
841
842         // Render.
843
844 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
845 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
846 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
847 // anyway.
848 // This seems to be the case (at least according to the Midsummer docs)...!
849
850         if (depth == 0)                                                                 // 1 BPP
851         {
852                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
853                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
854
855                 // Fetch 1st phrase...
856                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
857 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
858 //i.e., we didn't clip on the margin... !!! FIX !!!
859                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
860                 int i = firstPix;                                                       // Start counter at right spot...
861
862                 while (iwidth--)
863                 {
864                         while (i++ < 64)
865                         {
866                                 uint8 bit = pixels >> 63;
867                                 if (flagTRANS && bit == 0)
868                                         ;       // Do nothing...
869                                 else
870                                 {
871                                         if (!flagRMW)
872 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
873 //Won't optimize RMW case though...
874                                                 // This is the *only* correct use of endian-dependent code
875                                                 // (i.e., mem-to-mem direct copying)!
876                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
877                                         else
878                                                 *currentLineBuffer = 
879                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
880                                                 *(currentLineBuffer + 1) = 
881                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
882                                 }
883
884                                 currentLineBuffer += lbufDelta;
885                                 pixels <<= 1;
886                         }
887                         i = 0;
888                         // Fetch next phrase...
889                         data += pitch;
890                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
891                 }
892         }
893         else if (depth == 1)                                                    // 2 BPP
894         {
895 if (firstPix)
896         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
897                 index &= 0xFC;                                                          // Top six bits form CLUT index
898                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
899                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
900
901                 while (iwidth--)
902                 {
903                         // Fetch phrase...
904                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
905                         data += pitch;
906
907                         for(int i=0; i<32; i++)
908                         {
909                                 uint8 bits = pixels >> 62;
910 // Seems to me that both of these are in the same endian, so we could cast it as
911 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
912 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
913 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
914 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
915                                 if (flagTRANS && bits == 0)
916                                         ;       // Do nothing...
917                                 else
918                                 {
919                                         if (!flagRMW)
920                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
921                                         else
922                                                 *currentLineBuffer = 
923                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
924                                                 *(currentLineBuffer + 1) = 
925                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
926                                 }
927
928                                 currentLineBuffer += lbufDelta;
929                                 pixels <<= 2;
930                         }
931                 }
932         }
933         else if (depth == 2)                                                    // 4 BPP
934         {
935 if (firstPix)
936         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
937                 index &= 0xF0;                                                          // Top four bits form CLUT index
938                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
939                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
940
941                 while (iwidth--)
942                 {
943                         // Fetch phrase...
944                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
945                         data += pitch;
946
947                         for(int i=0; i<16; i++)
948                         {
949                                 uint8 bits = pixels >> 60;
950 // Seems to me that both of these are in the same endian, so we could cast it as
951 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
952 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
953 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
954 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
955                                 if (flagTRANS && bits == 0)
956                                         ;       // Do nothing...
957                                 else
958                                 {
959                                         if (!flagRMW)
960                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
961                                         else
962                                                 *currentLineBuffer = 
963                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
964                                                 *(currentLineBuffer + 1) = 
965                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
966                                 }
967
968                                 currentLineBuffer += lbufDelta;
969                                 pixels <<= 4;
970                         }
971                 }
972         }
973         else if (depth == 3)                                                    // 8 BPP
974         {
975                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
976                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
977
978                 // Fetch 1st phrase...
979                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
980 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
981 //i.e., we didn't clip on the margin... !!! FIX !!!
982                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
983                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
984                 int i = firstPix >> 3;                                          // Start counter at right spot...
985
986                 while (iwidth--)
987                 {
988                         while (i++ < 8)
989                         {
990                                 uint8 bits = pixels >> 56;
991 // Seems to me that both of these are in the same endian, so we could cast it as
992 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
993 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
994 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
995 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
996                                 if (flagTRANS && bits == 0)
997                                         ;       // Do nothing...
998                                 else
999                                 {
1000                                         if (!flagRMW)
1001                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1002                                         else
1003                                                 *currentLineBuffer = 
1004                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1005                                                 *(currentLineBuffer + 1) = 
1006                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1007                                 }
1008
1009                                 currentLineBuffer += lbufDelta;
1010                                 pixels <<= 8;
1011                         }
1012                         i = 0;
1013                         // Fetch next phrase...
1014                         data += pitch;
1015                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1016                 }
1017         }
1018         else if (depth == 4)                                                    // 16 BPP
1019         {
1020 if (firstPix)
1021         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1022                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1023                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1024
1025                 while (iwidth--)
1026                 {
1027                         // Fetch phrase...
1028                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1029                         data += pitch;
1030
1031                         for(int i=0; i<4; i++)
1032                         {
1033                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1034 // Seems to me that both of these are in the same endian, so we could cast it as
1035 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1036 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1037 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1038 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1039                                 if (flagTRANS && (bitsLo | bitsHi) == 0)
1040                                         ;       // Do nothing...
1041                                 else
1042                                 {
1043                                         if (!flagRMW)
1044                                                 *currentLineBuffer = bitsHi,
1045                                                 *(currentLineBuffer + 1) = bitsLo;
1046                                         else
1047                                                 *currentLineBuffer = 
1048                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1049                                                 *(currentLineBuffer + 1) = 
1050                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1051                                 }
1052
1053                                 currentLineBuffer += lbufDelta;
1054                                 pixels <<= 16;
1055                         }
1056                 }
1057         }
1058         else if (depth == 5)                                                    // 24 BPP
1059         {
1060 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1061 //There *might* be others...
1062 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1063 if (firstPix)
1064         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1065                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1066                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1067                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1068
1069                 while (iwidth--)
1070                 {
1071                         // Fetch phrase...
1072                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1073                         data += pitch;
1074
1075                         for(int i=0; i<2; i++)
1076                         {
1077                                 // We don't use a 32-bit var here because of endian issues...!
1078                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1079                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1080
1081                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1082                                         ;       // Do nothing...
1083                                 else
1084                                         *currentLineBuffer = bits3,
1085                                         *(currentLineBuffer + 1) = bits2,
1086                                         *(currentLineBuffer + 2) = bits1,
1087                                         *(currentLineBuffer + 3) = bits0;
1088
1089                                 currentLineBuffer += lbufDelta;
1090                                 pixels <<= 32;
1091                         }
1092                 }
1093         }
1094 }
1095
1096 //
1097 // Store scaled bitmap in line buffer
1098 //
1099 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1100 {
1101 // Need to make sure that when writing that it stays within the line buffer...
1102 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1103         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1104         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1105         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1106         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1107 //#ifdef OP_DEBUG_BMP
1108 // Prolly should use this... Though not sure exactly how.
1109 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1110         uint32 firstPix = (p1 >> 49) & 0x3F;
1111 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1112 if (firstPix)
1113         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1114 //#endif
1115 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1116 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1117 //Optimize: break these out to their own BOOL values [DONE]
1118         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1119         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1120                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1121                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1122         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1123         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1124
1125         uint8 * tom_ram_8 = tom_get_ram_pointer();
1126         uint8 * paletteRAM = &tom_ram_8[0x400];
1127         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1128         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1129         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1130
1131         uint8 hscale = p2 & 0xFF;
1132 //      uint8 horizontalRemainder = hscale;                             // Not sure if it starts full, but seems reasonable [It's not!]
1133         uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay!
1134         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1135         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1136
1137 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1138 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1139
1140 // Looks like an hscale of zero means don't draw!
1141         if (!render || iwidth == 0 || hscale == 0)
1142                 return;
1143
1144 /*extern int start_logging;
1145 if (start_logging)
1146         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1147                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1148 //#define OP_DEBUG_BMP
1149 //#ifdef OP_DEBUG_BMP
1150 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1151 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1152 //#endif
1153
1154         int32 startPos = xpos, endPos = xpos +
1155                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1156         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1157         bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false);      // VMODE
1158         // Not sure if this is Jaguar Two only location or what...
1159         // From the docs, it is... If we want to limit here we should think of something else.
1160 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1161         int32 limit = 720;
1162         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1163
1164         // If the image is completely to the left or right of the line buffer, then bail.
1165 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1166 //There are four possibilities:
1167 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1168 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1169 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1170 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1171 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1172 // numbers 1 & 3 are of concern.
1173 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1174 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1175
1176 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1177 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1178 // Still have to be careful with the DATA and IWIDTH values though...
1179
1180         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1181                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1182                 return;
1183
1184         // Otherwise, find the clip limits and clip the phrase as well...
1185         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1186         //       line buffer, but it shouldn't matter since there are two unused line
1187         //       buffers below and nothing above and I'll at most write 40 bytes outside
1188         //       the line buffer... I could use a fractional clip begin/end value, but
1189         //       this makes the blit a *lot* more hairy. I might fix this in the future
1190         //       if it becomes necessary. (JLH)
1191         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1192         //       which pixel in the phrase is being written, and quit when either end of phrases
1193         //       is reached or line buffer extents are surpassed.
1194
1195 //This stuff is probably wrong as well... !!! FIX !!!
1196 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1197 //Yup. Seems that JagMania doesn't work correctly with this...
1198 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1199 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1200 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1201 // a bit more accurately... Strange!
1202 //It's probably a case of the REFLECT flag being set and the background being written
1203 //from the right side of the screen...
1204 //But no, it isn't... At least if the diagnostics are telling the truth!
1205
1206         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1207         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1208         // !!! FIX !!!
1209
1210 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1211 //the scaling factor is small. So fix it already! !!! FIX !!!
1212 /*if (scaledPhrasePixels == 0)
1213 {
1214         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1215         DumpScaledObject(p0, p1, p2);
1216 }//*/
1217 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1218
1219 //Try a simple example...
1220 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1221 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1222 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1223 //
1224 // Normally, we would expect this in the line buffer:
1225 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1226 //
1227 // But instead we're getting:
1228 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1229 //
1230 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1231 // on negative boundary--or are we? Hmm...
1232 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1233 //
1234 // Let's try a real world example:
1235 //
1236 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1237 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1238 //
1239 // Really, spp is 27.75 in the second case...
1240 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1241 // start position (14 * 27.75), we get -6.5... NOT -17!
1242
1243 //Now it seems we're working OK, at least for the first case...
1244 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1245
1246         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1247 {
1248 extern int start_logging;
1249 if (start_logging)
1250         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1251 //              clippedWidth = 0 - startPos,
1252                 clippedWidth = (0 - startPos) << 5,
1253 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1254                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1255 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1256                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1257 if (start_logging)
1258         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1259 }
1260
1261         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1262                 clippedWidth = 0 - endPos,
1263                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1264
1265         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1266                 clippedWidth = endPos - lbufWidth,
1267                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1268
1269         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1270                 clippedWidth = startPos - lbufWidth,
1271                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1272                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1273
1274 extern int op_start_log;
1275 if (op_start_log && clippedWidth != 0)
1276         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1277 if (op_start_log && startPos == 13)
1278 {
1279         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1280         DumpScaledObject(p0, p1, p2);
1281         if (iwidth == 7)
1282         {
1283                 WriteLog("    %08X: ", data);
1284                 for(int i=0; i<7*8; i++)
1285                         WriteLog("%02X ", JaguarReadByte(data+i));
1286                 WriteLog("\n");
1287         }
1288 }
1289         // If the image is sitting on the line buffer left or right edge, we need to compensate
1290         // by decreasing the image phrase width accordingly.
1291         iwidth -= phraseClippedWidth;
1292
1293         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1294         // the pixel data.
1295 //      data += phraseClippedWidth * (pitch << 3);
1296         data += dataClippedWidth * (pitch << 3);
1297
1298         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1299         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1300 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1301 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1302         uint32 lbufAddress = 0x1800 + startPos * 2;
1303         uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
1304 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1305 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1306
1307         // Render.
1308
1309 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1310 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1311 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1312 // anyway.
1313 // This seems to be the case (at least according to the Midsummer docs)...!
1314
1315         if (depth == 0)                                                                 // 1 BPP
1316         {
1317 if (firstPix != 0)
1318         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1319                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1320                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1321
1322                 int pixCount = 0;
1323                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1324
1325                 while ((int32)iwidth > 0)
1326                 {
1327                         uint8 bits = pixels >> 63;
1328
1329                         if (flagTRANS && bits == 0)
1330                                 ;       // Do nothing...
1331                         else
1332                         {
1333                                 if (!flagRMW)
1334                                         // This is the *only* correct use of endian-dependent code
1335                                         // (i.e., mem-to-mem direct copying)!
1336                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1337                                 else
1338                                         *currentLineBuffer = 
1339                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1340                                         *(currentLineBuffer + 1) = 
1341                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1342                         }
1343
1344                         currentLineBuffer += lbufDelta;
1345
1346 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1347                         while (horizontalRemainder & 0x80)
1348                         {
1349                                 horizontalRemainder += hscale;
1350                                 pixCount++;
1351                                 pixels <<= 1;
1352                         }//*/
1353                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1354                         {
1355                                 horizontalRemainder += hscale;
1356                                 pixCount++;
1357                                 pixels <<= 1;
1358                         }
1359                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1360
1361                         if (pixCount > 63)
1362                         {
1363                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1364
1365                                 data += (pitch << 3) * phrasesToSkip;
1366                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1367                                 pixels <<= 1 * pixelShift;
1368                                 iwidth -= phrasesToSkip;
1369                                 pixCount = pixelShift;
1370                         }
1371                 }
1372         }
1373         else if (depth == 1)                                                    // 2 BPP
1374         {
1375 if (firstPix != 0)
1376         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1377                 index &= 0xFC;                                                          // Top six bits form CLUT index
1378                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1379                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1380
1381                 int pixCount = 0;
1382                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1383
1384                 while ((int32)iwidth > 0)
1385                 {
1386                         uint8 bits = pixels >> 62;
1387
1388                         if (flagTRANS && bits == 0)
1389                                 ;       // Do nothing...
1390                         else
1391                         {
1392                                 if (!flagRMW)
1393                                         // This is the *only* correct use of endian-dependent code
1394                                         // (i.e., mem-to-mem direct copying)!
1395                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1396                                 else
1397                                         *currentLineBuffer = 
1398                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1399                                         *(currentLineBuffer + 1) = 
1400                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1401                         }
1402
1403                         currentLineBuffer += lbufDelta;
1404
1405 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1406                         while (horizontalRemainder & 0x80)
1407                         {
1408                                 horizontalRemainder += hscale;
1409                                 pixCount++;
1410                                 pixels <<= 2;
1411                         }//*/
1412                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1413                         {
1414                                 horizontalRemainder += hscale;
1415                                 pixCount++;
1416                                 pixels <<= 2;
1417                         }
1418                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1419
1420                         if (pixCount > 31)
1421                         {
1422                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1423
1424                                 data += (pitch << 3) * phrasesToSkip;
1425                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1426                                 pixels <<= 2 * pixelShift;
1427                                 iwidth -= phrasesToSkip;
1428                                 pixCount = pixelShift;
1429                         }
1430                 }
1431         }
1432         else if (depth == 2)                                                    // 4 BPP
1433         {
1434 if (firstPix != 0)
1435         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1436                 index &= 0xF0;                                                          // Top four bits form CLUT index
1437                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1438                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1439
1440                 int pixCount = 0;
1441                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1442
1443                 while ((int32)iwidth > 0)
1444                 {
1445                         uint8 bits = pixels >> 60;
1446
1447                         if (flagTRANS && bits == 0)
1448                                 ;       // Do nothing...
1449                         else
1450                         {
1451                                 if (!flagRMW)
1452                                         // This is the *only* correct use of endian-dependent code
1453                                         // (i.e., mem-to-mem direct copying)!
1454                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1455                                 else
1456                                         *currentLineBuffer = 
1457                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1458                                         *(currentLineBuffer + 1) = 
1459                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1460                         }
1461
1462                         currentLineBuffer += lbufDelta;
1463
1464 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1465                         while (horizontalRemainder & 0x80)
1466                         {
1467                                 horizontalRemainder += hscale;
1468                                 pixCount++;
1469                                 pixels <<= 4;
1470                         }//*/
1471                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1472                         {
1473                                 horizontalRemainder += hscale;
1474                                 pixCount++;
1475                                 pixels <<= 4;
1476                         }
1477                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1478
1479                         if (pixCount > 15)
1480                         {
1481                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1482
1483                                 data += (pitch << 3) * phrasesToSkip;
1484                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1485                                 pixels <<= 4 * pixelShift;
1486                                 iwidth -= phrasesToSkip;
1487                                 pixCount = pixelShift;
1488                         }
1489                 }
1490         }
1491         else if (depth == 3)                                                    // 8 BPP
1492         {
1493 if (firstPix)
1494         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1495                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1496                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1497
1498                 int pixCount = 0;
1499                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1500
1501                 while ((int32)iwidth > 0)
1502                 {
1503                         uint8 bits = pixels >> 56;
1504
1505                         if (flagTRANS && bits == 0)
1506                                 ;       // Do nothing...
1507                         else
1508                         {
1509                                 if (!flagRMW)
1510                                         // This is the *only* correct use of endian-dependent code
1511                                         // (i.e., mem-to-mem direct copying)!
1512                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1513 /*                              {
1514                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1515                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1516                                 }*/
1517                                 else
1518                                         *currentLineBuffer = 
1519                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1520                                         *(currentLineBuffer + 1) = 
1521                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1522                         }
1523
1524                         currentLineBuffer += lbufDelta;
1525
1526                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1527                         {
1528                                 horizontalRemainder += hscale;
1529                                 pixCount++;
1530                                 pixels <<= 8;
1531                         }
1532                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1533
1534                         if (pixCount > 7)
1535                         {
1536                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1537
1538                                 data += (pitch << 3) * phrasesToSkip;
1539                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1540                                 pixels <<= 8 * pixelShift;
1541                                 iwidth -= phrasesToSkip;
1542                                 pixCount = pixelShift;
1543                         }
1544                 }
1545         }
1546         else if (depth == 4)                                                    // 16 BPP
1547         {
1548 if (firstPix != 0)
1549         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1550                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1551                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1552
1553                 int pixCount = 0;
1554                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1555
1556                 while ((int32)iwidth > 0)
1557                 {
1558                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1559
1560                         if (flagTRANS && (bitsLo | bitsHi) == 0)
1561                                 ;       // Do nothing...
1562                         else
1563                         {
1564                                 if (!flagRMW)
1565                                         *currentLineBuffer = bitsHi,
1566                                         *(currentLineBuffer + 1) = bitsLo;
1567                                 else
1568                                         *currentLineBuffer = 
1569                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1570                                         *(currentLineBuffer + 1) = 
1571                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1572                         }
1573
1574                         currentLineBuffer += lbufDelta;
1575
1576 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1577                         while (horizontalRemainder & 0x80)
1578                         {
1579                                 horizontalRemainder += hscale;
1580                                 pixCount++;
1581                                 pixels <<= 16;
1582                         }//*/
1583                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1584                         {
1585                                 horizontalRemainder += hscale;
1586                                 pixCount++;
1587                                 pixels <<= 16;
1588                         }
1589                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1590
1591                         if (pixCount > 3)
1592                         {
1593                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1594
1595                                 data += (pitch << 3) * phrasesToSkip;
1596                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1597                                 pixels <<= 16 * pixelShift;
1598
1599                                 iwidth -= phrasesToSkip;
1600
1601                                 pixCount = pixelShift;
1602                         }
1603                 }
1604         }
1605         else if (depth == 5)                                                    // 24 BPP
1606         {
1607 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1608 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1609 if (firstPix != 0)
1610         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1611                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1612                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1613                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1614
1615                 while (iwidth--)
1616                 {
1617                         // Fetch phrase...
1618                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1619                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1620
1621                         for(int i=0; i<2; i++)
1622                         {
1623                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1624                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1625
1626                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1627                                         ;       // Do nothing...
1628                                 else
1629                                         *currentLineBuffer = bits3,
1630                                         *(currentLineBuffer + 1) = bits2,
1631                                         *(currentLineBuffer + 2) = bits1,
1632                                         *(currentLineBuffer + 3) = bits0;
1633
1634                                 currentLineBuffer += lbufDelta;
1635                                 pixels <<= 32;
1636                         }
1637                 }
1638         }
1639 }