]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
Tweaks for OP dumping. Hopefully removing all duplicate entries now.
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James L. Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68k.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0
40 #define CONDITION_LESS_THAN                     1
41 #define CONDITION_GREATER_THAN          2
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #define OPFLAG_RELEASE          8                                       // Bus release bit
46 #define OPFLAG_TRANS            4                                       // Transparency bit
47 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
48 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
49
50 // Private function prototypes
51
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void OPDiscoverObjects(uint32 address);
55 void OPDumpObjectList(void);
56 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
57 void DumpFixedObject(uint64 p0, uint64 p1);
58 void DumpBitmapCore(uint64 p0, uint64 p1);
59 uint64 OPLoadPhrase(uint32 offset);
60
61 // Local global variables
62
63 // Blend tables (64K each)
64 static uint8 op_blend_y[0x10000];
65 static uint8 op_blend_cr[0x10000];
66 // There may be a problem with this "RAM" overlapping (and thus being independent of)
67 // some of the regular TOM RAM...
68 //#warning objectp_ram is separated from TOM RAM--need to fix that!
69 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
70 uint8 objectp_running = 0;
71 //bool objectp_stop_reading_list;
72
73 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
74 //static uint32 op_bitmap_bit_size[8] =
75 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
76 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
77 static uint32 op_pointer;
78
79 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
80
81
82 //
83 // Object Processor initialization
84 //
85 void OPInit(void)
86 {
87         // Here we calculate the saturating blend of a signed 4-bit value and an
88         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
89         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
90         for(int i=0; i<256*256; i++)
91         {
92                 int y = (i >> 8) & 0xFF;
93                 int dy = (int8)i;                                       // Sign extend the Y index
94                 int c1 = (i >> 8) & 0x0F;
95                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
96                 int c2 = (i >> 12) & 0x0F;
97                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
98
99                 y += dy;
100
101                 if (y < 0)
102                         y = 0;
103                 else if (y > 0xFF)
104                         y = 0xFF;
105
106                 op_blend_y[i] = y;
107
108                 c1 += dc1;
109
110                 if (c1 < 0)
111                         c1 = 0;
112                 else if (c1 > 0x0F)
113                         c1 = 0x0F;
114
115                 c2 += dc2;
116
117                 if (c2 < 0)
118                         c2 = 0;
119                 else if (c2 > 0x0F)
120                         c2 = 0x0F;
121
122                 op_blend_cr[i] = (c2 << 4) | c1;
123         }
124
125         OPReset();
126 }
127
128 //
129 // Object Processor reset
130 //
131 void OPReset(void)
132 {
133 //      memset(objectp_ram, 0x00, 0x40);
134         objectp_running = 0;
135 }
136
137 static const char * opType[8] =
138 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
139 static const char * ccType[8] =
140         { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
141 static uint32 object[8192];
142 static uint32 numberOfObjects;
143 //static uint32 objectLink[8192];
144 //static uint32 numberOfLinks;
145
146 void OPDone(void)
147 {
148 //#warning "!!! Fix OL dump so that it follows links !!!"
149 //      const char * opType[8] =
150 //      { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
151 //      const char * ccType[8] =
152 //              { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
153
154         uint32 olp = OPGetListPointer();
155         WriteLog("\nOP: OLP = $%08X\n", olp);
156         WriteLog("OP: Phrase dump\n    ----------\n");
157
158 #if 0
159         for(uint32 i=0; i<0x100; i+=8)
160         {
161                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
162                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
163
164                 if ((lo & 0x07) == 3)
165                 {
166                         uint16 ypos = (lo >> 3) & 0x7FF;
167                         uint8  cc   = (lo >> 14) & 0x03;
168                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
169                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
170                 }
171
172                 WriteLog("\n");
173
174                 if ((lo & 0x07) == 0)
175                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
176
177                 if ((lo & 0x07) == 1)
178                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
179         }
180
181         WriteLog("\n");
182 #else
183         numberOfObjects = 0;
184         OPDiscoverObjects(olp);
185         OPDumpObjectList();
186 #endif
187 }
188
189 void OPDiscoverObjects(uint32 address)
190 {
191         // Check to see if we've already seen this object
192         for(uint32 i=0; i<numberOfObjects; i++)
193         {
194                 if (address == object[i])
195                         return;
196         }
197
198         // Store the object...
199         object[numberOfObjects++] = address;
200         uint8 objectType = 0;
201
202         do
203         {
204                 uint32 hi = JaguarReadLong(address + 0, OP);
205                 uint32 lo = JaguarReadLong(address + 4, OP);
206                 objectType = lo & 0x07;
207                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
208
209                 if (objectType == 3)
210                 {
211                         uint16 ypos = (lo >> 3) & 0x7FF;
212                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
213
214                         // Recursion needed to follow all links!
215                         OPDiscoverObjects(address + 8);
216                 }
217
218                 if (address == link)    // Ruh roh...
219                 {
220                         // Runaway recursive link is bad!
221                         return;
222                 }
223
224                 address = link;
225
226                 // Check to see if we've already seen this object, and add it if not
227                 bool seenObject = false;
228
229                 for(uint32 i=0; i<numberOfObjects; i++)
230                 {
231                         if (address == object[i])
232                         {
233                                 seenObject = true;
234                                 break;
235                         }
236                 }
237
238                 if (!seenObject)
239                         object[numberOfObjects++] = address;
240         }
241         while (objectType != 4);
242 }
243
244 void OPDumpObjectList(void)
245 {
246         for(uint32 i=0; i<numberOfObjects; i++)
247         {
248                 uint32 address = object[i];
249
250                 uint32 hi = JaguarReadLong(address + 0, OP);
251                 uint32 lo = JaguarReadLong(address + 4, OP);
252                 uint8 objectType = lo & 0x07;
253                 uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
254                 WriteLog("%08X: %08X %08X %s", address, hi, lo, opType[objectType]);
255
256                 if (objectType == 3)
257                 {
258                         uint16 ypos = (lo >> 3) & 0x7FF;
259                         uint8  cc   = (lo >> 14) & 0x07;        // Proper # of bits == 3
260                         WriteLog(" YPOS=%u, CC=%s, link=$%08X", ypos, ccType[cc], link);
261                 }
262
263                 WriteLog("\n");
264
265                 if (objectType == 0)
266                         DumpFixedObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8));
267
268                 if (objectType == 1)
269                         DumpScaledObject(OPLoadPhrase(address + 0), OPLoadPhrase(address + 8),
270                                 OPLoadPhrase(address + 16));
271
272                 if (address == link)    // Ruh roh...
273                 {
274                         // Runaway recursive link is bad!
275                         WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
276                 }
277         }
278
279         WriteLog("\n");
280 }
281
282 //
283 // Object Processor memory access
284 // Memory range: F00010 - F00027
285 //
286 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
287 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
288 //      F00026            W   -------- -------x   OBF - object processor flag
289 //
290
291 #if 0
292 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
293 {
294         offset &= 0x3F;
295         return objectp_ram[offset];
296 }
297
298 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
299 {
300         offset &= 0x3F;
301         return GET16(objectp_ram, offset);
302 }
303
304 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
305 {
306         offset &= 0x3F;
307         objectp_ram[offset] = data;
308 }
309
310 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
311 {
312         offset &= 0x3F;
313         SET16(objectp_ram, offset, data);
314
315 /*if (offset == 0x20)
316 WriteLog("OP: Setting lo list pointer: %04X\n", data);
317 if (offset == 0x22)
318 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
319 }
320 #endif
321
322 uint32 OPGetListPointer(void)
323 {
324         // Note: This register is LO / HI WORD, hence the funky look of this...
325         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
326 }
327
328 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
329
330 uint32 OPGetStatusRegister(void)
331 {
332         return GET16(tomRam8, 0x26);
333 }
334
335 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
336
337 void OPSetStatusRegister(uint32 data)
338 {
339         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
340         tomRam8[0x27] |= (data & 0xFE);
341 }
342
343 void OPSetCurrentObject(uint64 object)
344 {
345 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
346         // Stored as least significant 32 bits first, ms32 last in big endian
347 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
348         objectp_ram[0x12] = object & 0xFF; object >>= 8;
349         objectp_ram[0x11] = object & 0xFF; object >>= 8;
350         objectp_ram[0x10] = object & 0xFF; object >>= 8;
351
352         objectp_ram[0x17] = object & 0xFF; object >>= 8;
353         objectp_ram[0x16] = object & 0xFF; object >>= 8;
354         objectp_ram[0x15] = object & 0xFF; object >>= 8;
355         objectp_ram[0x14] = object & 0xFF;*/
356 // Let's try regular good old big endian...
357         tomRam8[0x17] = object & 0xFF; object >>= 8;
358         tomRam8[0x16] = object & 0xFF; object >>= 8;
359         tomRam8[0x15] = object & 0xFF; object >>= 8;
360         tomRam8[0x14] = object & 0xFF; object >>= 8;
361
362         tomRam8[0x13] = object & 0xFF; object >>= 8;
363         tomRam8[0x12] = object & 0xFF; object >>= 8;
364         tomRam8[0x11] = object & 0xFF; object >>= 8;
365         tomRam8[0x10] = object & 0xFF;
366 }
367
368 uint64 OPLoadPhrase(uint32 offset)
369 {
370         offset &= ~0x07;                                                // 8 byte alignment
371         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
372 }
373
374 void OPStorePhrase(uint32 offset, uint64 p)
375 {
376         offset &= ~0x07;                                                // 8 byte alignment
377         JaguarWriteLong(offset, p >> 32, OP);
378         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
379 }
380
381 //
382 // Debugging routines
383 //
384 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
385 {
386         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
387         WriteLog("          %08X %08X\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
388         DumpBitmapCore(p0, p1);
389         uint32 hscale = p2 & 0xFF;
390         uint32 vscale = (p2 >> 8) & 0xFF;
391         uint32 remainder = (p2 >> 16) & 0xFF;
392         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
393 }
394
395 void DumpFixedObject(uint64 p0, uint64 p1)
396 {
397         WriteLog("          %08X %08X\n", (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
398         DumpBitmapCore(p0, p1);
399 }
400
401 void DumpBitmapCore(uint64 p0, uint64 p1)
402 {
403         uint8 bitdepth = (p1 >> 12) & 0x07;
404 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
405         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
406         int32 xpos = p1 & 0xFFF;
407         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);       // Sign extend that mutha!
408         uint32 iwidth = ((p1 >> 28) & 0x3FF);
409         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
410         uint16 height = ((p0 >> 14) & 0x3FF);
411         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
412         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
413         uint32 firstPix = (p1 >> 49) & 0x3F;
414         uint8 flags = (p1 >> 45) & 0x0F;
415         uint8 idx = (p1 >> 38) & 0x7F;
416         uint32 pitch = (p1 >> 15) & 0x07;
417         WriteLog("    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
418                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link,
419                 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
420                 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
421                 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
422 }
423
424 //
425 // Object Processor main routine
426 //
427 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
428 void OPProcessList(int halfline, bool render)
429 {
430 extern int op_start_log;
431 //      char * condition_to_str[8] =
432 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
433
434         op_pointer = OPGetListPointer();
435
436 //      objectp_stop_reading_list = false;
437
438 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
439 //op_done();
440
441 // *** BEGIN OP PROCESSOR TESTING ONLY ***
442 extern bool interactiveMode;
443 extern bool iToggle;
444 extern int objectPtr;
445 bool inhibit;
446 int bitmapCounter = 0;
447 // *** END OP PROCESSOR TESTING ONLY ***
448
449         uint32 opCyclesToRun = 30000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
450
451 //      if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
452         while (op_pointer)
453         {
454 // *** BEGIN OP PROCESSOR TESTING ONLY ***
455 if (interactiveMode && bitmapCounter == objectPtr)
456         inhibit = iToggle;
457 else
458         inhibit = false;
459 // *** END OP PROCESSOR TESTING ONLY ***
460 //              if (objectp_stop_reading_list)
461 //                      return;
462
463                 uint64 p0 = OPLoadPhrase(op_pointer);
464                 op_pointer += 8;
465 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
466
467 #if 1
468 if (halfline == TOMGetVDB() && op_start_log)
469 //if (halfline == 215 && op_start_log)
470 //if (halfline == 28 && op_start_log)
471 //if (halfline == 0)
472 {
473 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
474 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
475 {
476 WriteLog(" (BITMAP) ");
477 uint64 p1 = OPLoadPhrase(op_pointer);
478 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
479         uint8 bitdepth = (p1 >> 12) & 0x07;
480 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
481         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
482 int32 xpos = p1 & 0xFFF;
483 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
484         uint32 iwidth = ((p1 >> 28) & 0x3FF);
485         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
486         uint16 height = ((p0 >> 14) & 0x3FF);
487         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
488         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
489         uint32 firstPix = (p1 >> 49) & 0x3F;
490         uint8 flags = (p1 >> 45) & 0x0F;
491         uint8 idx = (p1 >> 38) & 0x7F;
492         uint32 pitch = (p1 >> 15) & 0x07;
493 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
494         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
495 }
496 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
497 {
498 WriteLog(" (SCALED BITMAP)");
499 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
500 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
501 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
502         uint8 bitdepth = (p1 >> 12) & 0x07;
503 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
504         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
505 int32 xpos = p1 & 0xFFF;
506 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
507         uint32 iwidth = ((p1 >> 28) & 0x3FF);
508         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
509         uint16 height = ((p0 >> 14) & 0x3FF);
510         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
511         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
512         uint32 firstPix = (p1 >> 49) & 0x3F;
513         uint8 flags = (p1 >> 45) & 0x0F;
514         uint8 idx = (p1 >> 38) & 0x7F;
515         uint32 pitch = (p1 >> 15) & 0x07;
516 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
517         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
518         uint32 hscale = p2 & 0xFF;
519         uint32 vscale = (p2 >> 8) & 0xFF;
520         uint32 remainder = (p2 >> 16) & 0xFF;
521 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
522 }
523 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
524 WriteLog(" (GPU)\n");
525 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
526 {
527 WriteLog(" (BRANCH)\n");
528 uint8 * jaguarMainRam = GetRamPtr();
529 WriteLog("[RAM] --> ");
530 for(int k=0; k<8; k++)
531         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
532 WriteLog("\n");
533 }
534 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
535 WriteLog("    --> List end\n\n");
536 }
537 #endif
538
539                 switch ((uint8)p0 & 0x07)
540                 {
541                 case OBJECT_TYPE_BITMAP:
542                 {
543 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
544                         uint16 ypos = (p0 >> 3) & 0x7FF;
545 // This is only theory implied by Rayman...!
546 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
547 // the VDB value. With interlacing, this would be slightly more tricky.
548 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
549 // to affect any other game in a negative way (that I've seen).
550 // Either that, or it's an undocumented bug...
551
552 //No, the reason this was needed is that the OP code before was wrong. Any value
553 //less than VDB will get written to the top line of the display!
554 #if 0
555 // Not so sure... Let's see what happens here...
556 // No change...
557                         if (ypos == 0)
558                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
559 #endif
560 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
561 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
562 // what's causing things to fuck up. Still no idea why.
563
564                         uint32 height = (p0 & 0xFFC000) >> 14;
565                         uint32 oldOPP = op_pointer - 8;
566 // *** BEGIN OP PROCESSOR TESTING ONLY ***
567 if (inhibit && op_start_log)
568         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
569 bitmapCounter++;
570 if (!inhibit)   // For OP testing only!
571 // *** END OP PROCESSOR TESTING ONLY ***
572                         if (halfline >= ypos && height > 0)
573                         {
574                                 uint64 p1 = OPLoadPhrase(op_pointer);
575                                 op_pointer += 8;
576 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
577 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
578 //                              OPProcessFixedBitmap(halfline, p0, p1, render);
579                                 OPProcessFixedBitmap(p0, p1, render);
580
581                                 // OP write-backs
582
583 //???Does this really happen??? Doesn't seem to work if you do this...!
584 //Probably not. Must be a bug in the documentation...!
585 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
586 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
587 //                              SET16(tom_ram_8, 0x22, link >> 16);
588 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
589                                 if (height - 1 > 0)
590                                         height--;*/
591                                 // NOTE: Would subtract 2 if in interlaced mode...!
592 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
593 //                              if (height)
594                                 height--;
595
596                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
597                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
598                                 data += dwidth;
599
600                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
601                                 p0 |= (uint64)height << 14;
602                                 p0 |= data << 40;
603                                 OPStorePhrase(oldOPP, p0);
604                         }
605 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
606 //Temp, for testing...
607 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
608 //And it does! !!! FIX !!!
609 //Let's remove this "fix" since it screws up more than it fixes.
610 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
611                 return;*/
612
613                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
614 //WriteLog("New OP: %08X\n", op_pointer);
615                         break;
616                 }
617                 case OBJECT_TYPE_SCALE:
618                 {
619 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
620                         uint16 ypos = (p0 >> 3) & 0x7FF;
621                         uint32 height = (p0 & 0xFFC000) >> 14;
622                         uint32 oldOPP = op_pointer - 8;
623 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
624 // *** BEGIN OP PROCESSOR TESTING ONLY ***
625 if (inhibit && op_start_log)
626 {
627         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
628         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
629 }
630 bitmapCounter++;
631 if (!inhibit)   // For OP testing only!
632 // *** END OP PROCESSOR TESTING ONLY ***
633                         if (halfline >= ypos && height > 0)
634                         {
635                                 uint64 p1 = OPLoadPhrase(op_pointer);
636                                 op_pointer += 8;
637                                 uint64 p2 = OPLoadPhrase(op_pointer);
638                                 op_pointer += 8;
639 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
640                                 OPProcessScaledBitmap(p0, p1, p2, render);
641
642                                 // OP write-backs
643
644                                 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
645                                 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
646 //Actually, we should skip this object if it has a vscale of zero.
647 //Or do we? Not sure... Atari Karts has a few lines that look like:
648 // (SCALED BITMAP)
649 //000E8268 --> phrase 00010000 7000B00D
650 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
651 //    [hsc: 9A, vsc: 00, rem: 00]
652 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
653 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
654
655                                 if (vscale == 0)
656                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
657
658 //extern int start_logging;
659 //if (start_logging)
660 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
661 //Locks up here:
662 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
663 //There are other problems here, it looks like...
664 //Another lock up:
665 //About to execute OP (508)...
666 /*
667 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
668 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
669 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
670 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
671 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
672 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
673 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
674 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
675 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
676 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
677 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
678 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
679 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
680 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
681 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
682 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
683 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
684 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
685 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
686 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
687 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
688 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
689 */
690 //Here's another problem:
691 //    [hsc: 20, vsc: 20, rem: 00]
692 // Since we're not checking for $E0 (but that's what we get from the above), we end
693 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
694 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
695 //Also note: $E0 = 7.0 which IS a legal vscale value...
696
697 //                              if (remainder & 0x80)                           // I.e., it's negative
698 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
699 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
700 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
701 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
702 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
703                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
704                                 if (remainder < 0x20)
705                                 {
706                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
707                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
708
709 //                                      while (remainder & 0x80)
710 //                                      while ((remainder & 0x80) || remainder == 0)
711 //                                      while ((remainder - 1) >= 0xE0)
712 //                                      while ((remainder >= 0xE1) || remainder == 0)
713 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
714 //                                      while (remainder <= 0x20)
715                                         while (remainder < 0x20)
716                                         {
717                                                 remainder += vscale;
718
719                                                 if (height)
720                                                         height--;
721
722                                                 data += dwidth;
723                                         }
724
725                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
726                                         p0 |= (uint64)height << 14;
727                                         p0 |= data << 40;
728                                         OPStorePhrase(oldOPP, p0);
729                                 }
730
731                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
732
733 //if (start_logging)
734 //      WriteLog("--> Finished writebacks...\n");//*/
735
736 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
737                                 p2 &= ~0x0000000000FF0000LL;
738                                 p2 |= (uint64)remainder << 16;
739 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
740                                 OPStorePhrase(oldOPP + 16, p2);
741 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
742 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
743                         }
744
745                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
746                         break;
747                 }
748                 case OBJECT_TYPE_GPU:
749                 {
750 //WriteLog("OP: Asserting GPU IRQ #3...\n");
751 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
752                         OPSetCurrentObject(p0);
753                         GPUSetIRQLine(3, ASSERT_LINE);
754 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
755 // !!! FIX !!!
756 //Do something like:
757 //OPSuspendedByGPU = true;
758 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
759 //on the next halfline...
760 // --> It continues from where it was interrupted! !!! FIX !!!
761                         break;
762                 }
763                 case OBJECT_TYPE_BRANCH:
764                 {
765                         uint16 ypos = (p0 >> 3) & 0x7FF;
766 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
767 //       conditions! Need at least one more bit for that! :-P
768 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
769                         uint8  cc   = (p0 >> 14) & 0x03;
770                         uint32 link = (p0 >> 21) & 0x3FFFF8;
771
772 //                      if ((ypos!=507)&&(ypos!=25))
773 //                              WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
774                         switch (cc)
775                         {
776                         case CONDITION_EQUAL:
777                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
778                                         op_pointer = link;
779                                 break;
780                         case CONDITION_LESS_THAN:
781                                 if (TOMReadWord(0xF00006, OP) < ypos)
782                                         op_pointer = link;
783                                 break;
784                         case CONDITION_GREATER_THAN:
785                                 if (TOMReadWord(0xF00006, OP) > ypos)
786                                         op_pointer = link;
787                                 break;
788                         case CONDITION_OP_FLAG_SET:
789                                 if (OPGetStatusRegister() & 0x01)
790                                         op_pointer = link;
791                                 break;
792                         case CONDITION_SECOND_HALF_LINE:
793 //Here's the ASIC code:
794 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
795 //which means, do the link if bit 10 of HC is set...
796
797                                 // This basically means branch if bit 10 of HC is set
798 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
799                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
800                                 LogDone();
801                                 exit(0);
802                                 break;
803                         default:
804                                 // Basically, if you do this, the OP does nothing. :-)
805                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
806                         }
807                         break;
808                 }
809                 case OBJECT_TYPE_STOP:
810                 {
811 //op_start_log = 0;
812                         // unsure
813 //WriteLog("OP: --> STOP\n");
814 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
815 //This seems more likely...
816                         OPSetCurrentObject(p0);
817
818                         if (p0 & 0x08)
819                         {
820                                 // We need to check whether these interrupts are enabled or not, THEN
821                                 // set an IRQ + pending flag if necessary...
822                                 if (TOMIRQEnabled(IRQ_OPFLAG))
823                                 {
824                                         TOMSetPendingObjectInt();
825                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
826                                 }
827                         }
828
829                         return;
830 //                      break;
831                 }
832                 default:
833                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
834                         return;
835                 }
836
837                 // Here is a little sanity check to keep the OP from locking up the machine
838                 // when fed bad data. Better would be to count how many actual cycles it used
839                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
840 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
841                 opCyclesToRun--;
842
843                 if (!opCyclesToRun)
844                         return;
845         }
846 }
847
848 //
849 // Store fixed size bitmap in line buffer
850 //
851 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
852 {
853 // Need to make sure that when writing that it stays within the line buffer...
854 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
855         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
856         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
857         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
858         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
859 //#ifdef OP_DEBUG_BMP
860         uint32  firstPix = (p1 >> 49) & 0x3F;
861         // "The LSB is significant only for scaled objects..." -JTRM
862         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
863         firstPix &= 0x3E;
864 //#endif
865 // We can ignore the RELEASE (high order) bit for now--probably forever...!
866 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
867 //Optimize: break these out to their own BOOL values
868         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
869         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
870                 flagRMW = (flags & OPFLAG_RMW ? true : false),
871                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
872 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
873 //  provide the most significant bits of the palette address."
874         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
875         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
876         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
877
878 //      int16 scanlineWidth = tom_getVideoModeWidth();
879         uint8 * tomRam8 = TOMGetRamPointer();
880         uint8 * paletteRAM = &tomRam8[0x400];
881         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
882         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
883         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
884
885 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
886 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
887
888 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
889 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
890 // Pitch == 0 is OK too...
891 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
892 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
893         if (!render || iwidth == 0)
894                 return;
895
896 //OK, so we know the position in the line buffer is correct. It's the clipping in
897 //24bpp mode that's wrong!
898 #if 0
899 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
900 //into the line buffer for each pixel.
901 if (depth == 5) // i.e., 24bpp mode...
902         xpos >>= 1;     // Cut it in half...
903 #endif
904
905 //#define OP_DEBUG_BMP
906 //#ifdef OP_DEBUG_BMP
907 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
908 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
909 //#endif
910
911 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
912         int32 startPos = xpos, endPos = xpos +
913                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
914                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
915         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
916         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
917         // Not sure if this is Jaguar Two only location or what...
918         // From the docs, it is... If we want to limit here we should think of something else.
919 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
920 //      int32 limit = 720;
921 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
922 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
923         // This is correct, the OP line buffer is a constant size... 
924         int32 limit = 720;
925         int32 lbufWidth = 719;
926
927         // If the image is completely to the left or right of the line buffer, then bail.
928 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
929 //There are four possibilities:
930 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
931 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
932 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
933 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
934 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
935 // numbers 1 & 3 are of concern.
936 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
937 //      if (rightMargin < 0 || leftMargin > lbufWidth)
938
939 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
940 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
941 // Still have to be careful with the DATA and IWIDTH values though...
942
943 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
944 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
945 //              return;
946         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
947                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
948                 return;
949
950         // Otherwise, find the clip limits and clip the phrase as well...
951         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
952         //       line buffer, but it shouldn't matter since there are two unused line
953         //       buffers below and nothing above and I'll at most write 8 bytes outside
954         //       the line buffer... I could use a fractional clip begin/end value, but
955         //       this makes the blit a *lot* more hairy. I might fix this in the future
956         //       if it becomes necessary. (JLH)
957         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
958         //       which pixel in the phrase is being written, and quit when either end of phrases
959         //       is reached or line buffer extents are surpassed.
960
961 //This stuff is probably wrong as well... !!! FIX !!!
962 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
963 //Yup. Seems that JagMania doesn't work correctly with this...
964 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
965 //      if (!flagREFLECT)
966
967 /*
968         if (leftMargin < 0)
969                 clippedWidth = 0 - leftMargin,
970                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
971                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
972 //              leftMargin = 0;
973
974         if (rightMargin > lbufWidth)
975                 clippedWidth = rightMargin - lbufWidth,
976                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
977 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
978 //              rightMargin = lbufWidth;
979 */
980 if (depth > 5)
981         WriteLog("OP: We're about to encounter a divide by zero error!\n");
982         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
983         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
984         // !!! FIX !!!
985         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
986                 clippedWidth = 0 - startPos,
987                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
988                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
989
990         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
991                 clippedWidth = 0 - endPos,
992                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
993
994         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
995                 clippedWidth = endPos - lbufWidth,
996                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
997
998         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
999                 clippedWidth = startPos - lbufWidth,
1000                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1001                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1002 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1003
1004         // If the image is sitting on the line buffer left or right edge, we need to compensate
1005         // by decreasing the image phrase width accordingly.
1006         iwidth -= phraseClippedWidth;
1007
1008         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1009         // the pixel data.
1010 //      data += phraseClippedWidth * (pitch << 3);
1011         data += dataClippedWidth * pitch;
1012
1013         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1014         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1015 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1016 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1017 //Is this a bug in the OP?
1018 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1019 //Though it looks like we're doing it here no matter what...
1020 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1021 //Let's try this:
1022         uint32 lbufAddress = 0x1800 + (startPos * 2);
1023         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1024
1025         // Render.
1026
1027 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1028 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1029 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1030 // anyway.
1031 // This seems to be the case (at least according to the Midsummer docs)...!
1032
1033 // This is to test using palette zeroes instead of bit zeroes...
1034 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1035 //#define OP_USES_PALETTE_ZERO
1036
1037         if (depth == 0)                                                                 // 1 BPP
1038         {
1039                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1040                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1041
1042                 // Fetch 1st phrase...
1043                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1044 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1045 //i.e., we didn't clip on the margin... !!! FIX !!!
1046                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1047                 int i = firstPix;                                                       // Start counter at right spot...
1048
1049                 while (iwidth--)
1050                 {
1051                         while (i++ < 64)
1052                         {
1053                                 uint8 bit = pixels >> 63;
1054 #ifndef OP_USES_PALETTE_ZERO
1055                                 if (flagTRANS && bit == 0)
1056 #else
1057                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1058 #endif
1059                                         ;       // Do nothing...
1060                                 else
1061                                 {
1062                                         if (!flagRMW)
1063 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1064 //Won't optimize RMW case though...
1065                                                 // This is the *only* correct use of endian-dependent code
1066                                                 // (i.e., mem-to-mem direct copying)!
1067                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
1068                                         else
1069                                                 *currentLineBuffer =
1070                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1071                                                 *(currentLineBuffer + 1) =
1072                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1073                                 }
1074
1075                                 currentLineBuffer += lbufDelta;
1076                                 pixels <<= 1;
1077                         }
1078                         i = 0;
1079                         // Fetch next phrase...
1080                         data += pitch;
1081                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1082                 }
1083         }
1084         else if (depth == 1)                                                    // 2 BPP
1085         {
1086 if (firstPix)
1087         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1088                 index &= 0xFC;                                                          // Top six bits form CLUT index
1089                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1090                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1091
1092                 while (iwidth--)
1093                 {
1094                         // Fetch phrase...
1095                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1096                         data += pitch;
1097
1098                         for(int i=0; i<32; i++)
1099                         {
1100                                 uint8 bits = pixels >> 62;
1101 // Seems to me that both of these are in the same endian, so we could cast it as
1102 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1103 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1104 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1105 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1106 #ifndef OP_USES_PALETTE_ZERO
1107                                 if (flagTRANS && bits == 0)
1108 #else
1109                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1110 #endif
1111                                         ;       // Do nothing...
1112                                 else
1113                                 {
1114                                         if (!flagRMW)
1115                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1116                                         else
1117                                                 *currentLineBuffer =
1118                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1119                                                 *(currentLineBuffer + 1) =
1120                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1121                                 }
1122
1123                                 currentLineBuffer += lbufDelta;
1124                                 pixels <<= 2;
1125                         }
1126                 }
1127         }
1128         else if (depth == 2)                                                    // 4 BPP
1129         {
1130 if (firstPix)
1131         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1132                 index &= 0xF0;                                                          // Top four bits form CLUT index
1133                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1134                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1135
1136                 while (iwidth--)
1137                 {
1138                         // Fetch phrase...
1139                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1140                         data += pitch;
1141
1142                         for(int i=0; i<16; i++)
1143                         {
1144                                 uint8 bits = pixels >> 60;
1145 // Seems to me that both of these are in the same endian, so we could cast it as
1146 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1147 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1148 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1149 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1150 #ifndef OP_USES_PALETTE_ZERO
1151                                 if (flagTRANS && bits == 0)
1152 #else
1153                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1154 #endif
1155                                         ;       // Do nothing...
1156                                 else
1157                                 {
1158                                         if (!flagRMW)
1159                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1160                                         else
1161                                                 *currentLineBuffer =
1162                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1163                                                 *(currentLineBuffer + 1) =
1164                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1165                                 }
1166
1167                                 currentLineBuffer += lbufDelta;
1168                                 pixels <<= 4;
1169                         }
1170                 }
1171         }
1172         else if (depth == 3)                                                    // 8 BPP
1173         {
1174                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1175                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1176
1177                 // Fetch 1st phrase...
1178                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1179 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1180 //i.e., we didn't clip on the margin... !!! FIX !!!
1181                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1182                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1183                 int i = firstPix >> 3;                                          // Start counter at right spot...
1184
1185                 while (iwidth--)
1186                 {
1187                         while (i++ < 8)
1188                         {
1189                                 uint8 bits = pixels >> 56;
1190 // Seems to me that both of these are in the same endian, so we could cast it as
1191 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1192 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1193 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1194 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1195 //This would seem to be problematic...
1196 //Because it's the palette entry being zero that makes the pixel transparent...
1197 //Let's try it and see.
1198 #ifndef OP_USES_PALETTE_ZERO
1199                                 if (flagTRANS && bits == 0)
1200 #else
1201                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1202 #endif
1203                                         ;       // Do nothing...
1204                                 else
1205                                 {
1206                                         if (!flagRMW)
1207                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1208                                         else
1209                                                 *currentLineBuffer =
1210                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1211                                                 *(currentLineBuffer + 1) =
1212                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1213                                 }
1214
1215                                 currentLineBuffer += lbufDelta;
1216                                 pixels <<= 8;
1217                         }
1218                         i = 0;
1219                         // Fetch next phrase...
1220                         data += pitch;
1221                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1222                 }
1223         }
1224         else if (depth == 4)                                                    // 16 BPP
1225         {
1226 if (firstPix)
1227         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1228                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1229                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1230
1231                 while (iwidth--)
1232                 {
1233                         // Fetch phrase...
1234                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1235                         data += pitch;
1236
1237                         for(int i=0; i<4; i++)
1238                         {
1239                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1240 // Seems to me that both of these are in the same endian, so we could cast it as
1241 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1242 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1243 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1244 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1245 //This doesn't seem right... Let's try the encoded black value ($8800):
1246 //Apparently, CRY 0 maps to $8800...
1247                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1248 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1249                                         ;       // Do nothing...
1250                                 else
1251                                 {
1252                                         if (!flagRMW)
1253                                                 *currentLineBuffer = bitsHi,
1254                                                 *(currentLineBuffer + 1) = bitsLo;
1255                                         else
1256                                                 *currentLineBuffer =
1257                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1258                                                 *(currentLineBuffer + 1) =
1259                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1260                                 }
1261
1262                                 currentLineBuffer += lbufDelta;
1263                                 pixels <<= 16;
1264                         }
1265                 }
1266         }
1267         else if (depth == 5)                                                    // 24 BPP
1268         {
1269 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1270 //There *might* be others...
1271 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1272 if (firstPix)
1273         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1274                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1275                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1276                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1277
1278                 while (iwidth--)
1279                 {
1280                         // Fetch phrase...
1281                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1282                         data += pitch;
1283
1284                         for(int i=0; i<2; i++)
1285                         {
1286                                 // We don't use a 32-bit var here because of endian issues...!
1287                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1288                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1289
1290                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1291                                         ;       // Do nothing...
1292                                 else
1293                                         *currentLineBuffer = bits3,
1294                                         *(currentLineBuffer + 1) = bits2,
1295                                         *(currentLineBuffer + 2) = bits1,
1296                                         *(currentLineBuffer + 3) = bits0;
1297
1298                                 currentLineBuffer += lbufDelta;
1299                                 pixels <<= 32;
1300                         }
1301                 }
1302         }
1303 }
1304
1305 //
1306 // Store scaled bitmap in line buffer
1307 //
1308 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1309 {
1310 // Need to make sure that when writing that it stays within the line buffer...
1311 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1312         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1313         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1314         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1315         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1316 //#ifdef OP_DEBUG_BMP
1317 // Prolly should use this... Though not sure exactly how.
1318 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1319         uint32 firstPix = (p1 >> 49) & 0x3F;
1320 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1321 if (firstPix)
1322         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1323 //#endif
1324 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1325 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1326 //Optimize: break these out to their own BOOL values [DONE]
1327         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1328         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1329                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1330                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1331         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1332         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1333
1334         uint8 * tomRam8 = TOMGetRamPointer();
1335         uint8 * paletteRAM = &tomRam8[0x400];
1336         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1337         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1338         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1339
1340         uint16 hscale = p2 & 0xFF;
1341 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1342 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1343         uint16 horizontalRemainder = hscale;                            // Not sure if it starts full, but seems reasonable [It's not!]
1344 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1345         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1346         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1347
1348 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1349 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1350
1351 // Looks like an hscale of zero means don't draw!
1352         if (!render || iwidth == 0 || hscale == 0)
1353                 return;
1354
1355 /*extern int start_logging;
1356 if (start_logging)
1357         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1358                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1359 //#define OP_DEBUG_BMP
1360 //#ifdef OP_DEBUG_BMP
1361 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1362 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1363 //#endif
1364
1365         int32 startPos = xpos, endPos = xpos +
1366                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1367         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1368         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1369         // Not sure if this is Jaguar Two only location or what...
1370         // From the docs, it is... If we want to limit here we should think of something else.
1371 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1372         int32 limit = 720;
1373 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1374         int32 lbufWidth = 719;  // Zero based limit...
1375
1376         // If the image is completely to the left or right of the line buffer, then bail.
1377 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1378 //There are four possibilities:
1379 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1380 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1381 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1382 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1383 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1384 // numbers 1 & 3 are of concern.
1385 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1386 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1387
1388 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1389 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1390 // Still have to be careful with the DATA and IWIDTH values though...
1391
1392         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1393                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1394                 return;
1395
1396         // Otherwise, find the clip limits and clip the phrase as well...
1397         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1398         //       line buffer, but it shouldn't matter since there are two unused line
1399         //       buffers below and nothing above and I'll at most write 40 bytes outside
1400         //       the line buffer... I could use a fractional clip begin/end value, but
1401         //       this makes the blit a *lot* more hairy. I might fix this in the future
1402         //       if it becomes necessary. (JLH)
1403         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1404         //       which pixel in the phrase is being written, and quit when either end of phrases
1405         //       is reached or line buffer extents are surpassed.
1406
1407 //This stuff is probably wrong as well... !!! FIX !!!
1408 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1409 //Yup. Seems that JagMania doesn't work correctly with this...
1410 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1411 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1412 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1413 // a bit more accurately... Strange!
1414 //It's probably a case of the REFLECT flag being set and the background being written
1415 //from the right side of the screen...
1416 //But no, it isn't... At least if the diagnostics are telling the truth!
1417
1418         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1419         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1420         // !!! FIX !!!
1421
1422 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1423 //the scaling factor is small. So fix it already! !!! FIX !!!
1424 /*if (scaledPhrasePixels == 0)
1425 {
1426         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1427         DumpScaledObject(p0, p1, p2);
1428 }//*/
1429 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1430
1431 //Try a simple example...
1432 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1433 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1434 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1435 //
1436 // Normally, we would expect this in the line buffer:
1437 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1438 //
1439 // But instead we're getting:
1440 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1441 //
1442 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1443 // on negative boundary--or are we? Hmm...
1444 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1445 //
1446 // Let's try a real world example:
1447 //
1448 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1449 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1450 //
1451 // Really, spp is 27.75 in the second case...
1452 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1453 // start position (14 * 27.75), we get -6.5... NOT -17!
1454
1455 //Now it seems we're working OK, at least for the first case...
1456 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1457
1458         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1459 {
1460 extern int start_logging;
1461 if (start_logging)
1462         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1463 //              clippedWidth = 0 - startPos,
1464                 clippedWidth = (0 - startPos) << 5,
1465 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1466                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1467 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1468                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1469 if (start_logging)
1470         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1471 }
1472
1473         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1474                 clippedWidth = 0 - endPos,
1475                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1476
1477         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1478                 clippedWidth = endPos - lbufWidth,
1479                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1480
1481         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1482                 clippedWidth = startPos - lbufWidth,
1483                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1484                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1485
1486 extern int op_start_log;
1487 if (op_start_log && clippedWidth != 0)
1488         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1489 if (op_start_log && startPos == 13)
1490 {
1491         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1492         DumpScaledObject(p0, p1, p2);
1493         if (iwidth == 7)
1494         {
1495                 WriteLog("    %08X: ", data);
1496                 for(int i=0; i<7*8; i++)
1497                         WriteLog("%02X ", JaguarReadByte(data+i));
1498                 WriteLog("\n");
1499         }
1500 }
1501         // If the image is sitting on the line buffer left or right edge, we need to compensate
1502         // by decreasing the image phrase width accordingly.
1503         iwidth -= phraseClippedWidth;
1504
1505         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1506         // the pixel data.
1507 //      data += phraseClippedWidth * (pitch << 3);
1508         data += dataClippedWidth * (pitch << 3);
1509
1510         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1511         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1512 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1513 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1514         uint32 lbufAddress = 0x1800 + startPos * 2;
1515         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1516 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1517 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1518
1519         // Render.
1520
1521 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1522 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1523 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1524 // anyway.
1525 // This seems to be the case (at least according to the Midsummer docs)...!
1526
1527         if (depth == 0)                                                                 // 1 BPP
1528         {
1529 if (firstPix != 0)
1530         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1531                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1532                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1533
1534                 int pixCount = 0;
1535                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1536
1537                 while ((int32)iwidth > 0)
1538                 {
1539                         uint8 bits = pixels >> 63;
1540
1541 #ifndef OP_USES_PALETTE_ZERO
1542                         if (flagTRANS && bits == 0)
1543 #else
1544                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1545 #endif
1546                                 ;       // Do nothing...
1547                         else
1548                         {
1549                                 if (!flagRMW)
1550                                         // This is the *only* correct use of endian-dependent code
1551                                         // (i.e., mem-to-mem direct copying)!
1552                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1553                                 else
1554                                         *currentLineBuffer =
1555                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1556                                         *(currentLineBuffer + 1) =
1557                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1558                         }
1559
1560                         currentLineBuffer += lbufDelta;
1561
1562 /*
1563 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1564 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1565 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1566 */
1567 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1568                         while (horizontalRemainder & 0x80)
1569                         {
1570                                 horizontalRemainder += hscale;
1571                                 pixCount++;
1572                                 pixels <<= 1;
1573                         }//*/
1574 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1575                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1576                         {
1577                                 horizontalRemainder += hscale;
1578                                 pixCount++;
1579                                 pixels <<= 1;
1580                         }
1581                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1582
1583                         if (pixCount > 63)
1584                         {
1585                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1586
1587                                 data += (pitch << 3) * phrasesToSkip;
1588                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1589                                 pixels <<= 1 * pixelShift;
1590                                 iwidth -= phrasesToSkip;
1591                                 pixCount = pixelShift;
1592                         }
1593                 }
1594         }
1595         else if (depth == 1)                                                    // 2 BPP
1596         {
1597 if (firstPix != 0)
1598         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1599                 index &= 0xFC;                                                          // Top six bits form CLUT index
1600                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1601                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1602
1603                 int pixCount = 0;
1604                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1605
1606                 while ((int32)iwidth > 0)
1607                 {
1608                         uint8 bits = pixels >> 62;
1609
1610 #ifndef OP_USES_PALETTE_ZERO
1611                         if (flagTRANS && bits == 0)
1612 #else
1613                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1614 #endif
1615                                 ;       // Do nothing...
1616                         else
1617                         {
1618                                 if (!flagRMW)
1619                                         // This is the *only* correct use of endian-dependent code
1620                                         // (i.e., mem-to-mem direct copying)!
1621                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1622                                 else
1623                                         *currentLineBuffer =
1624                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1625                                         *(currentLineBuffer + 1) =
1626                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1627                         }
1628
1629                         currentLineBuffer += lbufDelta;
1630
1631 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1632                         while (horizontalRemainder & 0x80)
1633                         {
1634                                 horizontalRemainder += hscale;
1635                                 pixCount++;
1636                                 pixels <<= 2;
1637                         }//*/
1638 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1639                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1640                         {
1641                                 horizontalRemainder += hscale;
1642                                 pixCount++;
1643                                 pixels <<= 2;
1644                         }
1645                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1646
1647                         if (pixCount > 31)
1648                         {
1649                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1650
1651                                 data += (pitch << 3) * phrasesToSkip;
1652                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1653                                 pixels <<= 2 * pixelShift;
1654                                 iwidth -= phrasesToSkip;
1655                                 pixCount = pixelShift;
1656                         }
1657                 }
1658         }
1659         else if (depth == 2)                                                    // 4 BPP
1660         {
1661 if (firstPix != 0)
1662         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1663                 index &= 0xF0;                                                          // Top four bits form CLUT index
1664                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1665                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1666
1667                 int pixCount = 0;
1668                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1669
1670                 while ((int32)iwidth > 0)
1671                 {
1672                         uint8 bits = pixels >> 60;
1673
1674 #ifndef OP_USES_PALETTE_ZERO
1675                         if (flagTRANS && bits == 0)
1676 #else
1677                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1678 #endif
1679                                 ;       // Do nothing...
1680                         else
1681                         {
1682                                 if (!flagRMW)
1683                                         // This is the *only* correct use of endian-dependent code
1684                                         // (i.e., mem-to-mem direct copying)!
1685                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1686                                 else
1687                                         *currentLineBuffer =
1688                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1689                                         *(currentLineBuffer + 1) =
1690                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1691                         }
1692
1693                         currentLineBuffer += lbufDelta;
1694
1695 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1696                         while (horizontalRemainder & 0x80)
1697                         {
1698                                 horizontalRemainder += hscale;
1699                                 pixCount++;
1700                                 pixels <<= 4;
1701                         }//*/
1702 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1703                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1704                         {
1705                                 horizontalRemainder += hscale;
1706                                 pixCount++;
1707                                 pixels <<= 4;
1708                         }
1709                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1710
1711                         if (pixCount > 15)
1712                         {
1713                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1714
1715                                 data += (pitch << 3) * phrasesToSkip;
1716                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1717                                 pixels <<= 4 * pixelShift;
1718                                 iwidth -= phrasesToSkip;
1719                                 pixCount = pixelShift;
1720                         }
1721                 }
1722         }
1723         else if (depth == 3)                                                    // 8 BPP
1724         {
1725 if (firstPix)
1726         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1727                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1728                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1729
1730                 int pixCount = 0;
1731                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1732
1733                 while ((int32)iwidth > 0)
1734                 {
1735                         uint8 bits = pixels >> 56;
1736
1737 #ifndef OP_USES_PALETTE_ZERO
1738                         if (flagTRANS && bits == 0)
1739 #else
1740                         if (flagTRANS && (paletteRAM16[bits] == 0))
1741 #endif
1742                                 ;       // Do nothing...
1743                         else
1744                         {
1745                                 if (!flagRMW)
1746                                         // This is the *only* correct use of endian-dependent code
1747                                         // (i.e., mem-to-mem direct copying)!
1748                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1749 /*                              {
1750                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1751                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1752                                 }*/
1753                                 else
1754                                         *currentLineBuffer =
1755                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1756                                         *(currentLineBuffer + 1) =
1757                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1758                         }
1759
1760                         currentLineBuffer += lbufDelta;
1761
1762 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1763                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1764                         {
1765                                 horizontalRemainder += hscale;
1766                                 pixCount++;
1767                                 pixels <<= 8;
1768                         }
1769                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1770
1771                         if (pixCount > 7)
1772                         {
1773                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1774
1775                                 data += (pitch << 3) * phrasesToSkip;
1776                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1777                                 pixels <<= 8 * pixelShift;
1778                                 iwidth -= phrasesToSkip;
1779                                 pixCount = pixelShift;
1780                         }
1781                 }
1782         }
1783         else if (depth == 4)                                                    // 16 BPP
1784         {
1785 if (firstPix != 0)
1786         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1787                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1788                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1789
1790                 int pixCount = 0;
1791                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1792
1793                 while ((int32)iwidth > 0)
1794                 {
1795                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1796
1797 //This doesn't seem right... Let's try the encoded black value ($8800):
1798 //Apparently, CRY 0 maps to $8800...
1799                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1800 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1801                                 ;       // Do nothing...
1802                         else
1803                         {
1804                                 if (!flagRMW)
1805                                         *currentLineBuffer = bitsHi,
1806                                         *(currentLineBuffer + 1) = bitsLo;
1807                                 else
1808                                         *currentLineBuffer =
1809                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1810                                         *(currentLineBuffer + 1) =
1811                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1812                         }
1813
1814                         currentLineBuffer += lbufDelta;
1815
1816 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1817                         while (horizontalRemainder & 0x80)
1818                         {
1819                                 horizontalRemainder += hscale;
1820                                 pixCount++;
1821                                 pixels <<= 16;
1822                         }//*/
1823 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1824                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1825                         {
1826                                 horizontalRemainder += hscale;
1827                                 pixCount++;
1828                                 pixels <<= 16;
1829                         }
1830                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1831 //*/
1832                         if (pixCount > 3)
1833                         {
1834                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1835
1836                                 data += (pitch << 3) * phrasesToSkip;
1837                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1838                                 pixels <<= 16 * pixelShift;
1839
1840                                 iwidth -= phrasesToSkip;
1841
1842                                 pixCount = pixelShift;
1843                         }
1844                 }
1845         }
1846         else if (depth == 5)                                                    // 24 BPP
1847         {
1848 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1849 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1850 if (firstPix != 0)
1851         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1852                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1853                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1854                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1855
1856                 while (iwidth--)
1857                 {
1858                         // Fetch phrase...
1859                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1860                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1861
1862                         for(int i=0; i<2; i++)
1863                         {
1864                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1865                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1866
1867                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1868                                         ;       // Do nothing...
1869                                 else
1870                                         *currentLineBuffer = bits3,
1871                                         *(currentLineBuffer + 1) = bits2,
1872                                         *(currentLineBuffer + 2) = bits1,
1873                                         *(currentLineBuffer + 3) = bits0;
1874
1875                                 currentLineBuffer += lbufDelta;
1876                                 pixels <<= 32;
1877                         }
1878                 }
1879         }
1880 }