]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
38322d8e432a0fd9e6d8678d10b48024bc1d6296
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James L. Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68k.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0
40 #define CONDITION_LESS_THAN                     1
41 #define CONDITION_GREATER_THAN          2
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #define OPFLAG_RELEASE          8                                       // Bus release bit
46 #define OPFLAG_TRANS            4                                       // Transparency bit
47 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
48 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
49
50 // Private function prototypes
51
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void OPDumpObjectList(uint32 address);
55 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
56 void DumpFixedObject(uint64 p0, uint64 p1);
57 void DumpBitmapCore(uint64 p0, uint64 p1);
58 uint64 OPLoadPhrase(uint32 offset);
59
60 // Local global variables
61
62 // Blend tables (64K each)
63 static uint8 op_blend_y[0x10000];
64 static uint8 op_blend_cr[0x10000];
65 // There may be a problem with this "RAM" overlapping (and thus being independent of)
66 // some of the regular TOM RAM...
67 //#warning objectp_ram is separated from TOM RAM--need to fix that!
68 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
69 uint8 objectp_running = 0;
70 //bool objectp_stop_reading_list;
71
72 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
73 //static uint32 op_bitmap_bit_size[8] =
74 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
75 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
76 static uint32 op_pointer;
77
78 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
79
80
81 //
82 // Object Processor initialization
83 //
84 void OPInit(void)
85 {
86         // Here we calculate the saturating blend of a signed 4-bit value and an
87         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
88         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
89         for(int i=0; i<256*256; i++)
90         {
91                 int y = (i >> 8) & 0xFF;
92                 int dy = (int8)i;                                       // Sign extend the Y index
93                 int c1 = (i >> 8) & 0x0F;
94                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
95                 int c2 = (i >> 12) & 0x0F;
96                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
97
98                 y += dy;
99
100                 if (y < 0)
101                         y = 0;
102                 else if (y > 0xFF)
103                         y = 0xFF;
104
105                 op_blend_y[i] = y;
106
107                 c1 += dc1;
108
109                 if (c1 < 0)
110                         c1 = 0;
111                 else if (c1 > 0x0F)
112                         c1 = 0x0F;
113
114                 c2 += dc2;
115
116                 if (c2 < 0)
117                         c2 = 0;
118                 else if (c2 > 0x0F)
119                         c2 = 0x0F;
120
121                 op_blend_cr[i] = (c2 << 4) | c1;
122         }
123
124         OPReset();
125 }
126
127 //
128 // Object Processor reset
129 //
130 void OPReset(void)
131 {
132 //      memset(objectp_ram, 0x00, 0x40);
133         objectp_running = 0;
134 }
135
136 static const char * opType[8] =
137 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
138 static const char * ccType[8] =
139         { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
140 static uint32 objectLink[8192];
141 static uint32 numberOfLinks;
142
143 void OPDone(void)
144 {
145 //#warning "!!! Fix OL dump so that it follows links !!!"
146 //      const char * opType[8] =
147 //      { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
148 //      const char * ccType[8] =
149 //              { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
150
151         uint32 olp = OPGetListPointer();
152         WriteLog("\nOP: OLP = $%08X\n", olp);
153         WriteLog("OP: Phrase dump\n    ----------\n");
154
155 #if 0
156         for(uint32 i=0; i<0x100; i+=8)
157         {
158                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
159                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
160
161                 if ((lo & 0x07) == 3)
162                 {
163                         uint16 ypos = (lo >> 3) & 0x7FF;
164                         uint8  cc   = (lo >> 14) & 0x03;
165                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
166                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
167                 }
168
169                 WriteLog("\n");
170
171                 if ((lo & 0x07) == 0)
172                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
173
174                 if ((lo & 0x07) == 1)
175                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
176         }
177
178         WriteLog("\n");
179 #else
180         numberOfLinks = 0;
181
182         OPDumpObjectList(olp);
183 #endif
184 }
185
186
187 // To do this properly, we have to use recursion...
188 void OPDumpObjectList(uint32 address)
189 {
190         // Sanity checking: If we've already visited this link, bail out!
191         for(uint32 i=0; i<numberOfLinks; i++)
192         {
193                 if (address == objectLink[i])
194                         return;
195         }
196
197         objectLink[numberOfLinks++] = address;
198         uint8 objectType = 0;
199
200         do
201         {
202                 uint32 hi = JaguarReadLong(address + 0, OP);
203                 uint32 lo = JaguarReadLong(address + 4, OP);
204                 objectType = lo & 0x07;
205                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
206                 WriteLog("%08X: %08X %08X %s", address, hi, lo, opType[objectType]);
207
208                 if (objectType == 3)
209                 {
210                         uint16 ypos = (lo >> 3) & 0x7FF;
211                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
212                         WriteLog(" YPOS=%u, CC=%s, link=$%08X", ypos, ccType[cc], link);
213
214                         // Recursion needed to follow all links!
215                         WriteLog("\n");
216                         OPDumpObjectList(address + 8);
217
218                         // Do the sanity check after recursive call: We may have already seen this...
219                         // Sanity checking: If we've already visited this link, bail out!
220 //disnowok: we added ourself above
221 //                      for(uint32 i=0; i<numberOfLinks; i++)
222 //                      {
223 //                              if (address == objectLink[i])
224 //                                      return;
225 //                      }
226                 }
227
228                 WriteLog("\n");
229
230                 if (objectType == 0)
231                         DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
232
233                 if (objectType == 1)
234                         DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
235                                 OPLoadPhrase(address + 16));
236
237                 if (address == link)    // Ruh roh...
238                 {
239                         // Runaway recursive link is bad!
240                         WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
241                         return;
242                 }
243
244                 address = link;
245                 objectLink[numberOfLinks++] = address;
246         }
247         while (objectType != 4);
248
249         WriteLog("\n");
250 }
251
252
253
254 //
255 // Object Processor memory access
256 // Memory range: F00010 - F00027
257 //
258 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
259 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
260 //      F00026            W   -------- -------x   OBF - object processor flag
261 //
262
263 #if 0
264 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
265 {
266         offset &= 0x3F;
267         return objectp_ram[offset];
268 }
269
270 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
271 {
272         offset &= 0x3F;
273         return GET16(objectp_ram, offset);
274 }
275
276 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
277 {
278         offset &= 0x3F;
279         objectp_ram[offset] = data;
280 }
281
282 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
283 {
284         offset &= 0x3F;
285         SET16(objectp_ram, offset, data);
286
287 /*if (offset == 0x20)
288 WriteLog("OP: Setting lo list pointer: %04X\n", data);
289 if (offset == 0x22)
290 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
291 }
292 #endif
293
294 uint32 OPGetListPointer(void)
295 {
296         // Note: This register is LO / HI WORD, hence the funky look of this...
297         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
298 }
299
300 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
301
302 uint32 OPGetStatusRegister(void)
303 {
304         return GET16(tomRam8, 0x26);
305 }
306
307 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
308
309 void OPSetStatusRegister(uint32 data)
310 {
311         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
312         tomRam8[0x27] |= (data & 0xFE);
313 }
314
315 void OPSetCurrentObject(uint64 object)
316 {
317 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
318         // Stored as least significant 32 bits first, ms32 last in big endian
319 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
320         objectp_ram[0x12] = object & 0xFF; object >>= 8;
321         objectp_ram[0x11] = object & 0xFF; object >>= 8;
322         objectp_ram[0x10] = object & 0xFF; object >>= 8;
323
324         objectp_ram[0x17] = object & 0xFF; object >>= 8;
325         objectp_ram[0x16] = object & 0xFF; object >>= 8;
326         objectp_ram[0x15] = object & 0xFF; object >>= 8;
327         objectp_ram[0x14] = object & 0xFF;*/
328 // Let's try regular good old big endian...
329         tomRam8[0x17] = object & 0xFF; object >>= 8;
330         tomRam8[0x16] = object & 0xFF; object >>= 8;
331         tomRam8[0x15] = object & 0xFF; object >>= 8;
332         tomRam8[0x14] = object & 0xFF; object >>= 8;
333
334         tomRam8[0x13] = object & 0xFF; object >>= 8;
335         tomRam8[0x12] = object & 0xFF; object >>= 8;
336         tomRam8[0x11] = object & 0xFF; object >>= 8;
337         tomRam8[0x10] = object & 0xFF;
338 }
339
340 uint64 OPLoadPhrase(uint32 offset)
341 {
342         offset &= ~0x07;                                                // 8 byte alignment
343         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
344 }
345
346 void OPStorePhrase(uint32 offset, uint64 p)
347 {
348         offset &= ~0x07;                                                // 8 byte alignment
349         JaguarWriteLong(offset, p >> 32, OP);
350         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
351 }
352
353 //
354 // Debugging routines
355 //
356 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
357 {
358         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
359         WriteLog("          %08X %08X\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
360         DumpBitmapCore(p0, p1);
361         uint32 hscale = p2 & 0xFF;
362         uint32 vscale = (p2 >> 8) & 0xFF;
363         uint32 remainder = (p2 >> 16) & 0xFF;
364         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
365 }
366
367 void DumpFixedObject(uint64 p0, uint64 p1)
368 {
369         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
370         DumpBitmapCore(p0, p1);
371 }
372
373 void DumpBitmapCore(uint64 p0, uint64 p1)
374 {
375         uint8 bitdepth = (p1 >> 12) & 0x07;
376 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
377         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
378         int32 xpos = p1 & 0xFFF;
379         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);       // Sign extend that mutha!
380         uint32 iwidth = ((p1 >> 28) & 0x3FF);
381         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
382         uint16 height = ((p0 >> 14) & 0x3FF);
383         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
384         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
385         uint32 firstPix = (p1 >> 49) & 0x3F;
386         uint8 flags = (p1 >> 45) & 0x0F;
387         uint8 idx = (p1 >> 38) & 0x7F;
388         uint32 pitch = (p1 >> 15) & 0x07;
389         WriteLog("    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
390                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link,
391                 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
392                 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
393                 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
394 }
395
396 //
397 // Object Processor main routine
398 //
399 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
400 void OPProcessList(int halfline, bool render)
401 {
402 extern int op_start_log;
403 //      char * condition_to_str[8] =
404 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
405
406         op_pointer = OPGetListPointer();
407
408 //      objectp_stop_reading_list = false;
409
410 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
411 //op_done();
412
413 // *** BEGIN OP PROCESSOR TESTING ONLY ***
414 extern bool interactiveMode;
415 extern bool iToggle;
416 extern int objectPtr;
417 bool inhibit;
418 int bitmapCounter = 0;
419 // *** END OP PROCESSOR TESTING ONLY ***
420
421         uint32 opCyclesToRun = 30000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
422
423 //      if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
424         while (op_pointer)
425         {
426 // *** BEGIN OP PROCESSOR TESTING ONLY ***
427 if (interactiveMode && bitmapCounter == objectPtr)
428         inhibit = iToggle;
429 else
430         inhibit = false;
431 // *** END OP PROCESSOR TESTING ONLY ***
432 //              if (objectp_stop_reading_list)
433 //                      return;
434
435                 uint64 p0 = OPLoadPhrase(op_pointer);
436                 op_pointer += 8;
437 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
438
439 #if 1
440 if (halfline == TOMGetVDB() && op_start_log)
441 //if (halfline == 215 && op_start_log)
442 //if (halfline == 28 && op_start_log)
443 //if (halfline == 0)
444 {
445 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
446 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
447 {
448 WriteLog(" (BITMAP) ");
449 uint64 p1 = OPLoadPhrase(op_pointer);
450 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
451         uint8 bitdepth = (p1 >> 12) & 0x07;
452 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
453         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
454 int32 xpos = p1 & 0xFFF;
455 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
456         uint32 iwidth = ((p1 >> 28) & 0x3FF);
457         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
458         uint16 height = ((p0 >> 14) & 0x3FF);
459         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
460         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
461         uint32 firstPix = (p1 >> 49) & 0x3F;
462         uint8 flags = (p1 >> 45) & 0x0F;
463         uint8 idx = (p1 >> 38) & 0x7F;
464         uint32 pitch = (p1 >> 15) & 0x07;
465 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
466         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
467 }
468 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
469 {
470 WriteLog(" (SCALED BITMAP)");
471 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
472 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
473 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
474         uint8 bitdepth = (p1 >> 12) & 0x07;
475 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
476         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
477 int32 xpos = p1 & 0xFFF;
478 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
479         uint32 iwidth = ((p1 >> 28) & 0x3FF);
480         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
481         uint16 height = ((p0 >> 14) & 0x3FF);
482         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
483         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
484         uint32 firstPix = (p1 >> 49) & 0x3F;
485         uint8 flags = (p1 >> 45) & 0x0F;
486         uint8 idx = (p1 >> 38) & 0x7F;
487         uint32 pitch = (p1 >> 15) & 0x07;
488 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
489         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
490         uint32 hscale = p2 & 0xFF;
491         uint32 vscale = (p2 >> 8) & 0xFF;
492         uint32 remainder = (p2 >> 16) & 0xFF;
493 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
494 }
495 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
496 WriteLog(" (GPU)\n");
497 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
498 {
499 WriteLog(" (BRANCH)\n");
500 uint8 * jaguarMainRam = GetRamPtr();
501 WriteLog("[RAM] --> ");
502 for(int k=0; k<8; k++)
503         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
504 WriteLog("\n");
505 }
506 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
507 WriteLog("    --> List end\n\n");
508 }
509 #endif
510
511                 switch ((uint8)p0 & 0x07)
512                 {
513                 case OBJECT_TYPE_BITMAP:
514                 {
515 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
516                         uint16 ypos = (p0 >> 3) & 0x7FF;
517 // This is only theory implied by Rayman...!
518 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
519 // the VDB value. With interlacing, this would be slightly more tricky.
520 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
521 // to affect any other game in a negative way (that I've seen).
522 // Either that, or it's an undocumented bug...
523
524 //No, the reason this was needed is that the OP code before was wrong. Any value
525 //less than VDB will get written to the top line of the display!
526 #if 0
527 // Not so sure... Let's see what happens here...
528 // No change...
529                         if (ypos == 0)
530                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
531 #endif
532 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
533 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
534 // what's causing things to fuck up. Still no idea why.
535
536                         uint32 height = (p0 & 0xFFC000) >> 14;
537                         uint32 oldOPP = op_pointer - 8;
538 // *** BEGIN OP PROCESSOR TESTING ONLY ***
539 if (inhibit && op_start_log)
540         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
541 bitmapCounter++;
542 if (!inhibit)   // For OP testing only!
543 // *** END OP PROCESSOR TESTING ONLY ***
544                         if (halfline >= ypos && height > 0)
545                         {
546                                 uint64 p1 = OPLoadPhrase(op_pointer);
547                                 op_pointer += 8;
548 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
549 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
550 //                              OPProcessFixedBitmap(halfline, p0, p1, render);
551                                 OPProcessFixedBitmap(p0, p1, render);
552
553                                 // OP write-backs
554
555 //???Does this really happen??? Doesn't seem to work if you do this...!
556 //Probably not. Must be a bug in the documentation...!
557 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
558 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
559 //                              SET16(tom_ram_8, 0x22, link >> 16);
560 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
561                                 if (height - 1 > 0)
562                                         height--;*/
563                                 // NOTE: Would subtract 2 if in interlaced mode...!
564 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
565 //                              if (height)
566                                 height--;
567
568                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
569                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
570                                 data += dwidth;
571
572                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
573                                 p0 |= (uint64)height << 14;
574                                 p0 |= data << 40;
575                                 OPStorePhrase(oldOPP, p0);
576                         }
577 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
578 //Temp, for testing...
579 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
580 //And it does! !!! FIX !!!
581 //Let's remove this "fix" since it screws up more than it fixes.
582 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
583                 return;*/
584
585                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
586 //WriteLog("New OP: %08X\n", op_pointer);
587                         break;
588                 }
589                 case OBJECT_TYPE_SCALE:
590                 {
591 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
592                         uint16 ypos = (p0 >> 3) & 0x7FF;
593                         uint32 height = (p0 & 0xFFC000) >> 14;
594                         uint32 oldOPP = op_pointer - 8;
595 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
596 // *** BEGIN OP PROCESSOR TESTING ONLY ***
597 if (inhibit && op_start_log)
598 {
599         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
600         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
601 }
602 bitmapCounter++;
603 if (!inhibit)   // For OP testing only!
604 // *** END OP PROCESSOR TESTING ONLY ***
605                         if (halfline >= ypos && height > 0)
606                         {
607                                 uint64 p1 = OPLoadPhrase(op_pointer);
608                                 op_pointer += 8;
609                                 uint64 p2 = OPLoadPhrase(op_pointer);
610                                 op_pointer += 8;
611 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
612                                 OPProcessScaledBitmap(p0, p1, p2, render);
613
614                                 // OP write-backs
615
616                                 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
617                                 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
618 //Actually, we should skip this object if it has a vscale of zero.
619 //Or do we? Not sure... Atari Karts has a few lines that look like:
620 // (SCALED BITMAP)
621 //000E8268 --> phrase 00010000 7000B00D
622 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
623 //    [hsc: 9A, vsc: 00, rem: 00]
624 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
625 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
626
627                                 if (vscale == 0)
628                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
629
630 //extern int start_logging;
631 //if (start_logging)
632 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
633 //Locks up here:
634 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
635 //There are other problems here, it looks like...
636 //Another lock up:
637 //About to execute OP (508)...
638 /*
639 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
640 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
641 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
642 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
643 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
644 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
645 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
646 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
647 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
648 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
649 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
650 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
651 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
652 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
653 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
654 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
655 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
656 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
657 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
658 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
659 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
660 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
661 */
662 //Here's another problem:
663 //    [hsc: 20, vsc: 20, rem: 00]
664 // Since we're not checking for $E0 (but that's what we get from the above), we end
665 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
666 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
667 //Also note: $E0 = 7.0 which IS a legal vscale value...
668
669 //                              if (remainder & 0x80)                           // I.e., it's negative
670 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
671 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
672 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
673 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
674 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
675                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
676                                 if (remainder < 0x20)
677                                 {
678                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
679                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
680
681 //                                      while (remainder & 0x80)
682 //                                      while ((remainder & 0x80) || remainder == 0)
683 //                                      while ((remainder - 1) >= 0xE0)
684 //                                      while ((remainder >= 0xE1) || remainder == 0)
685 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
686 //                                      while (remainder <= 0x20)
687                                         while (remainder < 0x20)
688                                         {
689                                                 remainder += vscale;
690
691                                                 if (height)
692                                                         height--;
693
694                                                 data += dwidth;
695                                         }
696
697                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
698                                         p0 |= (uint64)height << 14;
699                                         p0 |= data << 40;
700                                         OPStorePhrase(oldOPP, p0);
701                                 }
702
703                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
704
705 //if (start_logging)
706 //      WriteLog("--> Finished writebacks...\n");//*/
707
708 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
709                                 p2 &= ~0x0000000000FF0000LL;
710                                 p2 |= (uint64)remainder << 16;
711 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
712                                 OPStorePhrase(oldOPP + 16, p2);
713 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
714 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
715                         }
716
717                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
718                         break;
719                 }
720                 case OBJECT_TYPE_GPU:
721                 {
722 //WriteLog("OP: Asserting GPU IRQ #3...\n");
723 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
724                         OPSetCurrentObject(p0);
725                         GPUSetIRQLine(3, ASSERT_LINE);
726 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
727 // !!! FIX !!!
728 //Do something like:
729 //OPSuspendedByGPU = true;
730 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
731 //on the next halfline...
732 // --> It continues from where it was interrupted! !!! FIX !!!
733                         break;
734                 }
735                 case OBJECT_TYPE_BRANCH:
736                 {
737                         uint16 ypos = (p0 >> 3) & 0x7FF;
738 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
739 //       conditions! Need at least one more bit for that! :-P
740 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
741                         uint8  cc   = (p0 >> 14) & 0x03;
742                         uint32 link = (p0 >> 21) & 0x3FFFF8;
743
744 //                      if ((ypos!=507)&&(ypos!=25))
745 //                              WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
746                         switch (cc)
747                         {
748                         case CONDITION_EQUAL:
749                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
750                                         op_pointer = link;
751                                 break;
752                         case CONDITION_LESS_THAN:
753                                 if (TOMReadWord(0xF00006, OP) < ypos)
754                                         op_pointer = link;
755                                 break;
756                         case CONDITION_GREATER_THAN:
757                                 if (TOMReadWord(0xF00006, OP) > ypos)
758                                         op_pointer = link;
759                                 break;
760                         case CONDITION_OP_FLAG_SET:
761                                 if (OPGetStatusRegister() & 0x01)
762                                         op_pointer = link;
763                                 break;
764                         case CONDITION_SECOND_HALF_LINE:
765 //Here's the ASIC code:
766 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
767 //which means, do the link if bit 10 of HC is set...
768
769                                 // This basically means branch if bit 10 of HC is set
770 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
771                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
772                                 LogDone();
773                                 exit(0);
774                                 break;
775                         default:
776                                 // Basically, if you do this, the OP does nothing. :-)
777                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
778                         }
779                         break;
780                 }
781                 case OBJECT_TYPE_STOP:
782                 {
783 //op_start_log = 0;
784                         // unsure
785 //WriteLog("OP: --> STOP\n");
786 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
787 //This seems more likely...
788                         OPSetCurrentObject(p0);
789
790                         if (p0 & 0x08)
791                         {
792                                 // We need to check whether these interrupts are enabled or not, THEN
793                                 // set an IRQ + pending flag if necessary...
794                                 if (TOMIRQEnabled(IRQ_OPFLAG))
795                                 {
796                                         TOMSetPendingObjectInt();
797                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
798                                 }
799                         }
800
801                         return;
802 //                      break;
803                 }
804                 default:
805                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
806                         return;
807                 }
808
809                 // Here is a little sanity check to keep the OP from locking up the machine
810                 // when fed bad data. Better would be to count how many actual cycles it used
811                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
812 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
813                 opCyclesToRun--;
814
815                 if (!opCyclesToRun)
816                         return;
817         }
818 }
819
820 //
821 // Store fixed size bitmap in line buffer
822 //
823 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
824 {
825 // Need to make sure that when writing that it stays within the line buffer...
826 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
827         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
828         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
829         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
830         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
831 //#ifdef OP_DEBUG_BMP
832         uint32  firstPix = (p1 >> 49) & 0x3F;
833         // "The LSB is significant only for scaled objects..." -JTRM
834         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
835         firstPix &= 0x3E;
836 //#endif
837 // We can ignore the RELEASE (high order) bit for now--probably forever...!
838 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
839 //Optimize: break these out to their own BOOL values
840         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
841         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
842                 flagRMW = (flags & OPFLAG_RMW ? true : false),
843                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
844 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
845 //  provide the most significant bits of the palette address."
846         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
847         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
848         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
849
850 //      int16 scanlineWidth = tom_getVideoModeWidth();
851         uint8 * tomRam8 = TOMGetRamPointer();
852         uint8 * paletteRAM = &tomRam8[0x400];
853         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
854         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
855         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
856
857 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
858 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
859
860 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
861 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
862 // Pitch == 0 is OK too...
863 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
864 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
865         if (!render || iwidth == 0)
866                 return;
867
868 //OK, so we know the position in the line buffer is correct. It's the clipping in
869 //24bpp mode that's wrong!
870 #if 0
871 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
872 //into the line buffer for each pixel.
873 if (depth == 5) // i.e., 24bpp mode...
874         xpos >>= 1;     // Cut it in half...
875 #endif
876
877 //#define OP_DEBUG_BMP
878 //#ifdef OP_DEBUG_BMP
879 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
880 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
881 //#endif
882
883 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
884         int32 startPos = xpos, endPos = xpos +
885                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
886                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
887         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
888         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
889         // Not sure if this is Jaguar Two only location or what...
890         // From the docs, it is... If we want to limit here we should think of something else.
891 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
892 //      int32 limit = 720;
893 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
894 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
895         // This is correct, the OP line buffer is a constant size... 
896         int32 limit = 720;
897         int32 lbufWidth = 719;
898
899         // If the image is completely to the left or right of the line buffer, then bail.
900 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
901 //There are four possibilities:
902 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
903 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
904 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
905 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
906 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
907 // numbers 1 & 3 are of concern.
908 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
909 //      if (rightMargin < 0 || leftMargin > lbufWidth)
910
911 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
912 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
913 // Still have to be careful with the DATA and IWIDTH values though...
914
915 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
916 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
917 //              return;
918         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
919                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
920                 return;
921
922         // Otherwise, find the clip limits and clip the phrase as well...
923         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
924         //       line buffer, but it shouldn't matter since there are two unused line
925         //       buffers below and nothing above and I'll at most write 8 bytes outside
926         //       the line buffer... I could use a fractional clip begin/end value, but
927         //       this makes the blit a *lot* more hairy. I might fix this in the future
928         //       if it becomes necessary. (JLH)
929         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
930         //       which pixel in the phrase is being written, and quit when either end of phrases
931         //       is reached or line buffer extents are surpassed.
932
933 //This stuff is probably wrong as well... !!! FIX !!!
934 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
935 //Yup. Seems that JagMania doesn't work correctly with this...
936 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
937 //      if (!flagREFLECT)
938
939 /*
940         if (leftMargin < 0)
941                 clippedWidth = 0 - leftMargin,
942                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
943                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
944 //              leftMargin = 0;
945
946         if (rightMargin > lbufWidth)
947                 clippedWidth = rightMargin - lbufWidth,
948                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
949 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
950 //              rightMargin = lbufWidth;
951 */
952 if (depth > 5)
953         WriteLog("OP: We're about to encounter a divide by zero error!\n");
954         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
955         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
956         // !!! FIX !!!
957         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
958                 clippedWidth = 0 - startPos,
959                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
960                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
961
962         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
963                 clippedWidth = 0 - endPos,
964                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
965
966         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
967                 clippedWidth = endPos - lbufWidth,
968                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
969
970         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
971                 clippedWidth = startPos - lbufWidth,
972                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
973                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
974 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
975
976         // If the image is sitting on the line buffer left or right edge, we need to compensate
977         // by decreasing the image phrase width accordingly.
978         iwidth -= phraseClippedWidth;
979
980         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
981         // the pixel data.
982 //      data += phraseClippedWidth * (pitch << 3);
983         data += dataClippedWidth * pitch;
984
985         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
986         //       bitmap! This makes clipping & etc. MUCH, much easier...!
987 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
988 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
989 //Is this a bug in the OP?
990 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
991 //Though it looks like we're doing it here no matter what...
992 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
993 //Let's try this:
994         uint32 lbufAddress = 0x1800 + (startPos * 2);
995         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
996
997         // Render.
998
999 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1000 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1001 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1002 // anyway.
1003 // This seems to be the case (at least according to the Midsummer docs)...!
1004
1005 // This is to test using palette zeroes instead of bit zeroes...
1006 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1007 //#define OP_USES_PALETTE_ZERO
1008
1009         if (depth == 0)                                                                 // 1 BPP
1010         {
1011                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1012                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1013
1014                 // Fetch 1st phrase...
1015                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1016 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1017 //i.e., we didn't clip on the margin... !!! FIX !!!
1018                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1019                 int i = firstPix;                                                       // Start counter at right spot...
1020
1021                 while (iwidth--)
1022                 {
1023                         while (i++ < 64)
1024                         {
1025                                 uint8 bit = pixels >> 63;
1026 #ifndef OP_USES_PALETTE_ZERO
1027                                 if (flagTRANS && bit == 0)
1028 #else
1029                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1030 #endif
1031                                         ;       // Do nothing...
1032                                 else
1033                                 {
1034                                         if (!flagRMW)
1035 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1036 //Won't optimize RMW case though...
1037                                                 // This is the *only* correct use of endian-dependent code
1038                                                 // (i.e., mem-to-mem direct copying)!
1039                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
1040                                         else
1041                                                 *currentLineBuffer =
1042                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1043                                                 *(currentLineBuffer + 1) =
1044                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1045                                 }
1046
1047                                 currentLineBuffer += lbufDelta;
1048                                 pixels <<= 1;
1049                         }
1050                         i = 0;
1051                         // Fetch next phrase...
1052                         data += pitch;
1053                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1054                 }
1055         }
1056         else if (depth == 1)                                                    // 2 BPP
1057         {
1058 if (firstPix)
1059         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1060                 index &= 0xFC;                                                          // Top six bits form CLUT index
1061                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1062                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1063
1064                 while (iwidth--)
1065                 {
1066                         // Fetch phrase...
1067                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1068                         data += pitch;
1069
1070                         for(int i=0; i<32; i++)
1071                         {
1072                                 uint8 bits = pixels >> 62;
1073 // Seems to me that both of these are in the same endian, so we could cast it as
1074 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1075 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1076 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1077 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1078 #ifndef OP_USES_PALETTE_ZERO
1079                                 if (flagTRANS && bits == 0)
1080 #else
1081                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1082 #endif
1083                                         ;       // Do nothing...
1084                                 else
1085                                 {
1086                                         if (!flagRMW)
1087                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1088                                         else
1089                                                 *currentLineBuffer =
1090                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1091                                                 *(currentLineBuffer + 1) =
1092                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1093                                 }
1094
1095                                 currentLineBuffer += lbufDelta;
1096                                 pixels <<= 2;
1097                         }
1098                 }
1099         }
1100         else if (depth == 2)                                                    // 4 BPP
1101         {
1102 if (firstPix)
1103         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1104                 index &= 0xF0;                                                          // Top four bits form CLUT index
1105                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1106                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1107
1108                 while (iwidth--)
1109                 {
1110                         // Fetch phrase...
1111                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1112                         data += pitch;
1113
1114                         for(int i=0; i<16; i++)
1115                         {
1116                                 uint8 bits = pixels >> 60;
1117 // Seems to me that both of these are in the same endian, so we could cast it as
1118 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1119 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1120 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1121 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1122 #ifndef OP_USES_PALETTE_ZERO
1123                                 if (flagTRANS && bits == 0)
1124 #else
1125                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1126 #endif
1127                                         ;       // Do nothing...
1128                                 else
1129                                 {
1130                                         if (!flagRMW)
1131                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1132                                         else
1133                                                 *currentLineBuffer =
1134                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1135                                                 *(currentLineBuffer + 1) =
1136                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1137                                 }
1138
1139                                 currentLineBuffer += lbufDelta;
1140                                 pixels <<= 4;
1141                         }
1142                 }
1143         }
1144         else if (depth == 3)                                                    // 8 BPP
1145         {
1146                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1147                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1148
1149                 // Fetch 1st phrase...
1150                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1151 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1152 //i.e., we didn't clip on the margin... !!! FIX !!!
1153                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1154                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1155                 int i = firstPix >> 3;                                          // Start counter at right spot...
1156
1157                 while (iwidth--)
1158                 {
1159                         while (i++ < 8)
1160                         {
1161                                 uint8 bits = pixels >> 56;
1162 // Seems to me that both of these are in the same endian, so we could cast it as
1163 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1164 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1165 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1166 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1167 //This would seem to be problematic...
1168 //Because it's the palette entry being zero that makes the pixel transparent...
1169 //Let's try it and see.
1170 #ifndef OP_USES_PALETTE_ZERO
1171                                 if (flagTRANS && bits == 0)
1172 #else
1173                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1174 #endif
1175                                         ;       // Do nothing...
1176                                 else
1177                                 {
1178                                         if (!flagRMW)
1179                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1180                                         else
1181                                                 *currentLineBuffer =
1182                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1183                                                 *(currentLineBuffer + 1) =
1184                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1185                                 }
1186
1187                                 currentLineBuffer += lbufDelta;
1188                                 pixels <<= 8;
1189                         }
1190                         i = 0;
1191                         // Fetch next phrase...
1192                         data += pitch;
1193                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1194                 }
1195         }
1196         else if (depth == 4)                                                    // 16 BPP
1197         {
1198 if (firstPix)
1199         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1200                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1201                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1202
1203                 while (iwidth--)
1204                 {
1205                         // Fetch phrase...
1206                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1207                         data += pitch;
1208
1209                         for(int i=0; i<4; i++)
1210                         {
1211                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1212 // Seems to me that both of these are in the same endian, so we could cast it as
1213 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1214 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1215 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1216 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1217 //This doesn't seem right... Let's try the encoded black value ($8800):
1218 //Apparently, CRY 0 maps to $8800...
1219                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1220 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1221                                         ;       // Do nothing...
1222                                 else
1223                                 {
1224                                         if (!flagRMW)
1225                                                 *currentLineBuffer = bitsHi,
1226                                                 *(currentLineBuffer + 1) = bitsLo;
1227                                         else
1228                                                 *currentLineBuffer =
1229                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1230                                                 *(currentLineBuffer + 1) =
1231                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1232                                 }
1233
1234                                 currentLineBuffer += lbufDelta;
1235                                 pixels <<= 16;
1236                         }
1237                 }
1238         }
1239         else if (depth == 5)                                                    // 24 BPP
1240         {
1241 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1242 //There *might* be others...
1243 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1244 if (firstPix)
1245         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1246                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1247                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1248                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1249
1250                 while (iwidth--)
1251                 {
1252                         // Fetch phrase...
1253                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1254                         data += pitch;
1255
1256                         for(int i=0; i<2; i++)
1257                         {
1258                                 // We don't use a 32-bit var here because of endian issues...!
1259                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1260                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1261
1262                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1263                                         ;       // Do nothing...
1264                                 else
1265                                         *currentLineBuffer = bits3,
1266                                         *(currentLineBuffer + 1) = bits2,
1267                                         *(currentLineBuffer + 2) = bits1,
1268                                         *(currentLineBuffer + 3) = bits0;
1269
1270                                 currentLineBuffer += lbufDelta;
1271                                 pixels <<= 32;
1272                         }
1273                 }
1274         }
1275 }
1276
1277 //
1278 // Store scaled bitmap in line buffer
1279 //
1280 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1281 {
1282 // Need to make sure that when writing that it stays within the line buffer...
1283 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1284         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1285         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1286         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1287         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1288 //#ifdef OP_DEBUG_BMP
1289 // Prolly should use this... Though not sure exactly how.
1290 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1291         uint32 firstPix = (p1 >> 49) & 0x3F;
1292 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1293 if (firstPix)
1294         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1295 //#endif
1296 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1297 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1298 //Optimize: break these out to their own BOOL values [DONE]
1299         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1300         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1301                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1302                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1303         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1304         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1305
1306         uint8 * tomRam8 = TOMGetRamPointer();
1307         uint8 * paletteRAM = &tomRam8[0x400];
1308         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1309         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1310         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1311
1312         uint16 hscale = p2 & 0xFF;
1313 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1314 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1315         uint16 horizontalRemainder = hscale;                            // Not sure if it starts full, but seems reasonable [It's not!]
1316 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1317         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1318         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1319
1320 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1321 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1322
1323 // Looks like an hscale of zero means don't draw!
1324         if (!render || iwidth == 0 || hscale == 0)
1325                 return;
1326
1327 /*extern int start_logging;
1328 if (start_logging)
1329         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1330                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1331 //#define OP_DEBUG_BMP
1332 //#ifdef OP_DEBUG_BMP
1333 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1334 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1335 //#endif
1336
1337         int32 startPos = xpos, endPos = xpos +
1338                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1339         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1340         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1341         // Not sure if this is Jaguar Two only location or what...
1342         // From the docs, it is... If we want to limit here we should think of something else.
1343 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1344         int32 limit = 720;
1345 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1346         int32 lbufWidth = 719;  // Zero based limit...
1347
1348         // If the image is completely to the left or right of the line buffer, then bail.
1349 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1350 //There are four possibilities:
1351 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1352 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1353 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1354 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1355 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1356 // numbers 1 & 3 are of concern.
1357 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1358 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1359
1360 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1361 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1362 // Still have to be careful with the DATA and IWIDTH values though...
1363
1364         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1365                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1366                 return;
1367
1368         // Otherwise, find the clip limits and clip the phrase as well...
1369         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1370         //       line buffer, but it shouldn't matter since there are two unused line
1371         //       buffers below and nothing above and I'll at most write 40 bytes outside
1372         //       the line buffer... I could use a fractional clip begin/end value, but
1373         //       this makes the blit a *lot* more hairy. I might fix this in the future
1374         //       if it becomes necessary. (JLH)
1375         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1376         //       which pixel in the phrase is being written, and quit when either end of phrases
1377         //       is reached or line buffer extents are surpassed.
1378
1379 //This stuff is probably wrong as well... !!! FIX !!!
1380 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1381 //Yup. Seems that JagMania doesn't work correctly with this...
1382 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1383 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1384 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1385 // a bit more accurately... Strange!
1386 //It's probably a case of the REFLECT flag being set and the background being written
1387 //from the right side of the screen...
1388 //But no, it isn't... At least if the diagnostics are telling the truth!
1389
1390         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1391         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1392         // !!! FIX !!!
1393
1394 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1395 //the scaling factor is small. So fix it already! !!! FIX !!!
1396 /*if (scaledPhrasePixels == 0)
1397 {
1398         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1399         DumpScaledObject(p0, p1, p2);
1400 }//*/
1401 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1402
1403 //Try a simple example...
1404 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1405 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1406 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1407 //
1408 // Normally, we would expect this in the line buffer:
1409 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1410 //
1411 // But instead we're getting:
1412 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1413 //
1414 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1415 // on negative boundary--or are we? Hmm...
1416 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1417 //
1418 // Let's try a real world example:
1419 //
1420 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1421 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1422 //
1423 // Really, spp is 27.75 in the second case...
1424 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1425 // start position (14 * 27.75), we get -6.5... NOT -17!
1426
1427 //Now it seems we're working OK, at least for the first case...
1428 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1429
1430         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1431 {
1432 extern int start_logging;
1433 if (start_logging)
1434         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1435 //              clippedWidth = 0 - startPos,
1436                 clippedWidth = (0 - startPos) << 5,
1437 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1438                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1439 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1440                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1441 if (start_logging)
1442         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1443 }
1444
1445         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1446                 clippedWidth = 0 - endPos,
1447                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1448
1449         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1450                 clippedWidth = endPos - lbufWidth,
1451                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1452
1453         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1454                 clippedWidth = startPos - lbufWidth,
1455                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1456                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1457
1458 extern int op_start_log;
1459 if (op_start_log && clippedWidth != 0)
1460         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1461 if (op_start_log && startPos == 13)
1462 {
1463         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1464         DumpScaledObject(p0, p1, p2);
1465         if (iwidth == 7)
1466         {
1467                 WriteLog("    %08X: ", data);
1468                 for(int i=0; i<7*8; i++)
1469                         WriteLog("%02X ", JaguarReadByte(data+i));
1470                 WriteLog("\n");
1471         }
1472 }
1473         // If the image is sitting on the line buffer left or right edge, we need to compensate
1474         // by decreasing the image phrase width accordingly.
1475         iwidth -= phraseClippedWidth;
1476
1477         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1478         // the pixel data.
1479 //      data += phraseClippedWidth * (pitch << 3);
1480         data += dataClippedWidth * (pitch << 3);
1481
1482         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1483         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1484 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1485 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1486         uint32 lbufAddress = 0x1800 + startPos * 2;
1487         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1488 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1489 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1490
1491         // Render.
1492
1493 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1494 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1495 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1496 // anyway.
1497 // This seems to be the case (at least according to the Midsummer docs)...!
1498
1499         if (depth == 0)                                                                 // 1 BPP
1500         {
1501 if (firstPix != 0)
1502         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1503                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1504                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1505
1506                 int pixCount = 0;
1507                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1508
1509                 while ((int32)iwidth > 0)
1510                 {
1511                         uint8 bits = pixels >> 63;
1512
1513 #ifndef OP_USES_PALETTE_ZERO
1514                         if (flagTRANS && bits == 0)
1515 #else
1516                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1517 #endif
1518                                 ;       // Do nothing...
1519                         else
1520                         {
1521                                 if (!flagRMW)
1522                                         // This is the *only* correct use of endian-dependent code
1523                                         // (i.e., mem-to-mem direct copying)!
1524                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1525                                 else
1526                                         *currentLineBuffer =
1527                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1528                                         *(currentLineBuffer + 1) =
1529                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1530                         }
1531
1532                         currentLineBuffer += lbufDelta;
1533
1534 /*
1535 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1536 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1537 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1538 */
1539 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1540                         while (horizontalRemainder & 0x80)
1541                         {
1542                                 horizontalRemainder += hscale;
1543                                 pixCount++;
1544                                 pixels <<= 1;
1545                         }//*/
1546 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1547                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1548                         {
1549                                 horizontalRemainder += hscale;
1550                                 pixCount++;
1551                                 pixels <<= 1;
1552                         }
1553                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1554
1555                         if (pixCount > 63)
1556                         {
1557                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1558
1559                                 data += (pitch << 3) * phrasesToSkip;
1560                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1561                                 pixels <<= 1 * pixelShift;
1562                                 iwidth -= phrasesToSkip;
1563                                 pixCount = pixelShift;
1564                         }
1565                 }
1566         }
1567         else if (depth == 1)                                                    // 2 BPP
1568         {
1569 if (firstPix != 0)
1570         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1571                 index &= 0xFC;                                                          // Top six bits form CLUT index
1572                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1573                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1574
1575                 int pixCount = 0;
1576                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1577
1578                 while ((int32)iwidth > 0)
1579                 {
1580                         uint8 bits = pixels >> 62;
1581
1582 #ifndef OP_USES_PALETTE_ZERO
1583                         if (flagTRANS && bits == 0)
1584 #else
1585                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1586 #endif
1587                                 ;       // Do nothing...
1588                         else
1589                         {
1590                                 if (!flagRMW)
1591                                         // This is the *only* correct use of endian-dependent code
1592                                         // (i.e., mem-to-mem direct copying)!
1593                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1594                                 else
1595                                         *currentLineBuffer =
1596                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1597                                         *(currentLineBuffer + 1) =
1598                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1599                         }
1600
1601                         currentLineBuffer += lbufDelta;
1602
1603 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1604                         while (horizontalRemainder & 0x80)
1605                         {
1606                                 horizontalRemainder += hscale;
1607                                 pixCount++;
1608                                 pixels <<= 2;
1609                         }//*/
1610 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1611                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1612                         {
1613                                 horizontalRemainder += hscale;
1614                                 pixCount++;
1615                                 pixels <<= 2;
1616                         }
1617                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1618
1619                         if (pixCount > 31)
1620                         {
1621                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1622
1623                                 data += (pitch << 3) * phrasesToSkip;
1624                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1625                                 pixels <<= 2 * pixelShift;
1626                                 iwidth -= phrasesToSkip;
1627                                 pixCount = pixelShift;
1628                         }
1629                 }
1630         }
1631         else if (depth == 2)                                                    // 4 BPP
1632         {
1633 if (firstPix != 0)
1634         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1635                 index &= 0xF0;                                                          // Top four bits form CLUT index
1636                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1637                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1638
1639                 int pixCount = 0;
1640                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1641
1642                 while ((int32)iwidth > 0)
1643                 {
1644                         uint8 bits = pixels >> 60;
1645
1646 #ifndef OP_USES_PALETTE_ZERO
1647                         if (flagTRANS && bits == 0)
1648 #else
1649                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1650 #endif
1651                                 ;       // Do nothing...
1652                         else
1653                         {
1654                                 if (!flagRMW)
1655                                         // This is the *only* correct use of endian-dependent code
1656                                         // (i.e., mem-to-mem direct copying)!
1657                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1658                                 else
1659                                         *currentLineBuffer =
1660                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1661                                         *(currentLineBuffer + 1) =
1662                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1663                         }
1664
1665                         currentLineBuffer += lbufDelta;
1666
1667 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1668                         while (horizontalRemainder & 0x80)
1669                         {
1670                                 horizontalRemainder += hscale;
1671                                 pixCount++;
1672                                 pixels <<= 4;
1673                         }//*/
1674 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1675                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1676                         {
1677                                 horizontalRemainder += hscale;
1678                                 pixCount++;
1679                                 pixels <<= 4;
1680                         }
1681                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1682
1683                         if (pixCount > 15)
1684                         {
1685                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1686
1687                                 data += (pitch << 3) * phrasesToSkip;
1688                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1689                                 pixels <<= 4 * pixelShift;
1690                                 iwidth -= phrasesToSkip;
1691                                 pixCount = pixelShift;
1692                         }
1693                 }
1694         }
1695         else if (depth == 3)                                                    // 8 BPP
1696         {
1697 if (firstPix)
1698         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1699                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1700                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1701
1702                 int pixCount = 0;
1703                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1704
1705                 while ((int32)iwidth > 0)
1706                 {
1707                         uint8 bits = pixels >> 56;
1708
1709 #ifndef OP_USES_PALETTE_ZERO
1710                         if (flagTRANS && bits == 0)
1711 #else
1712                         if (flagTRANS && (paletteRAM16[bits] == 0))
1713 #endif
1714                                 ;       // Do nothing...
1715                         else
1716                         {
1717                                 if (!flagRMW)
1718                                         // This is the *only* correct use of endian-dependent code
1719                                         // (i.e., mem-to-mem direct copying)!
1720                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1721 /*                              {
1722                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1723                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1724                                 }*/
1725                                 else
1726                                         *currentLineBuffer =
1727                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1728                                         *(currentLineBuffer + 1) =
1729                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1730                         }
1731
1732                         currentLineBuffer += lbufDelta;
1733
1734 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1735                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1736                         {
1737                                 horizontalRemainder += hscale;
1738                                 pixCount++;
1739                                 pixels <<= 8;
1740                         }
1741                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1742
1743                         if (pixCount > 7)
1744                         {
1745                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1746
1747                                 data += (pitch << 3) * phrasesToSkip;
1748                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1749                                 pixels <<= 8 * pixelShift;
1750                                 iwidth -= phrasesToSkip;
1751                                 pixCount = pixelShift;
1752                         }
1753                 }
1754         }
1755         else if (depth == 4)                                                    // 16 BPP
1756         {
1757 if (firstPix != 0)
1758         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1759                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1760                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1761
1762                 int pixCount = 0;
1763                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1764
1765                 while ((int32)iwidth > 0)
1766                 {
1767                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1768
1769 //This doesn't seem right... Let's try the encoded black value ($8800):
1770 //Apparently, CRY 0 maps to $8800...
1771                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1772 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1773                                 ;       // Do nothing...
1774                         else
1775                         {
1776                                 if (!flagRMW)
1777                                         *currentLineBuffer = bitsHi,
1778                                         *(currentLineBuffer + 1) = bitsLo;
1779                                 else
1780                                         *currentLineBuffer =
1781                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1782                                         *(currentLineBuffer + 1) =
1783                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1784                         }
1785
1786                         currentLineBuffer += lbufDelta;
1787
1788 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1789                         while (horizontalRemainder & 0x80)
1790                         {
1791                                 horizontalRemainder += hscale;
1792                                 pixCount++;
1793                                 pixels <<= 16;
1794                         }//*/
1795 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1796                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1797                         {
1798                                 horizontalRemainder += hscale;
1799                                 pixCount++;
1800                                 pixels <<= 16;
1801                         }
1802                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1803 //*/
1804                         if (pixCount > 3)
1805                         {
1806                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1807
1808                                 data += (pitch << 3) * phrasesToSkip;
1809                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1810                                 pixels <<= 16 * pixelShift;
1811
1812                                 iwidth -= phrasesToSkip;
1813
1814                                 pixCount = pixelShift;
1815                         }
1816                 }
1817         }
1818         else if (depth == 5)                                                    // 24 BPP
1819         {
1820 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1821 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1822 if (firstPix != 0)
1823         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1824                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1825                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1826                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1827
1828                 while (iwidth--)
1829                 {
1830                         // Fetch phrase...
1831                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1832                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1833
1834                         for(int i=0; i<2; i++)
1835                         {
1836                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1837                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1838
1839                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1840                                         ;       // Do nothing...
1841                                 else
1842                                         *currentLineBuffer = bits3,
1843                                         *(currentLineBuffer + 1) = bits2,
1844                                         *(currentLineBuffer + 2) = bits1,
1845                                         *(currentLineBuffer + 3) = bits0;
1846
1847                                 currentLineBuffer += lbufDelta;
1848                                 pixels <<= 32;
1849                         }
1850                 }
1851         }
1852 }