]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
3cca7fa669aaa69969146d5511bd24cb5814cdc6
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -----------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68000/m68kinterface.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16_t)dst<<8)) | ((uint16_t)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16_t)dst)<<8) | ((uint16_t)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0                       // VC == YPOS
40 #define CONDITION_LESS_THAN                     1                       // VC < YPOS
41 #define CONDITION_GREATER_THAN          2                       // VC > YPOS
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #if 0
46 #define OPFLAG_RELEASE          8                                       // Bus release bit
47 #define OPFLAG_TRANS            4                                       // Transparency bit
48 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
49 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
50 #endif
51
52 // Private function prototypes
53
54 void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render);
55 void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render);
56 void OPDiscoverObjects(uint32_t address);
57 void OPDumpObjectList(void);
58 void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2);
59 void DumpFixedObject(uint64_t p0, uint64_t p1);
60 void DumpBitmapCore(uint64_t p0, uint64_t p1);
61 uint64_t OPLoadPhrase(uint32_t offset);
62
63 // Local global variables
64
65 // Blend tables (64K each)
66 static uint8_t op_blend_y[0x10000];
67 static uint8_t op_blend_cr[0x10000];
68 // There may be a problem with this "RAM" overlapping (and thus being independent of)
69 // some of the regular TOM RAM...
70 //#warning objectp_ram is separated from TOM RAM--need to fix that!
71 //static uint8_t objectp_ram[0x40];                     // This is based at $F00000
72 uint8_t objectp_running = 0;
73 //bool objectp_stop_reading_list;
74
75 static uint8_t op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
76 //static uint32_t op_bitmap_bit_size[8] =
77 //      { (uint32_t)(0.125*65536), (uint32_t)(0.25*65536), (uint32_t)(0.5*65536), (uint32_t)(1*65536),
78 //        (uint32_t)(2*65536),     (uint32_t)(1*65536),    (uint32_t)(1*65536),   (uint32_t)(1*65536) };
79 static uint32_t op_pointer;
80
81 int32_t phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
82
83
84 //
85 // Object Processor initialization
86 //
87 void OPInit(void)
88 {
89         // Here we calculate the saturating blend of a signed 4-bit value and an
90         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
91         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
92         for(int i=0; i<256*256; i++)
93         {
94                 int y = (i >> 8) & 0xFF;
95                 int dy = (int8_t)i;                                     // Sign extend the Y index
96                 int c1 = (i >> 8) & 0x0F;
97                 int dc1 = (int8_t)(i << 4) >> 4;                // Sign extend the R index
98                 int c2 = (i >> 12) & 0x0F;
99                 int dc2 = (int8_t)(i & 0xF0) >> 4;      // Sign extend the C index
100
101                 y += dy;
102
103                 if (y < 0)
104                         y = 0;
105                 else if (y > 0xFF)
106                         y = 0xFF;
107
108                 op_blend_y[i] = y;
109
110                 c1 += dc1;
111
112                 if (c1 < 0)
113                         c1 = 0;
114                 else if (c1 > 0x0F)
115                         c1 = 0x0F;
116
117                 c2 += dc2;
118
119                 if (c2 < 0)
120                         c2 = 0;
121                 else if (c2 > 0x0F)
122                         c2 = 0x0F;
123
124                 op_blend_cr[i] = (c2 << 4) | c1;
125         }
126
127         OPReset();
128 }
129
130
131 //
132 // Object Processor reset
133 //
134 void OPReset(void)
135 {
136 //      memset(objectp_ram, 0x00, 0x40);
137         objectp_running = 0;
138 }
139
140
141 static const char * opType[8] =
142 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
143 static const char * ccType[8] =
144         { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
145 static uint32_t object[8192];
146 static uint32_t numberOfObjects;
147 //static uint32_t objectLink[8192];
148 //static uint32_t numberOfLinks;
149
150
151 void OPDone(void)
152 {
153 //#warning "!!! Fix OL dump so that it follows links !!!"
154 //      const char * opType[8] =
155 //      { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
156 //      const char * ccType[8] =
157 //              { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
158
159         uint32_t olp = OPGetListPointer();
160         WriteLog("\nOP: OLP = $%08X\n", olp);
161         WriteLog("OP: Phrase dump\n    ----------\n");
162
163 #if 0
164         for(uint32_t i=0; i<0x100; i+=8)
165         {
166                 uint32_t hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
167                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
168
169                 if ((lo & 0x07) == 3)
170                 {
171                         uint16_t ypos = (lo >> 3) & 0x7FF;
172                         uint8_t  cc   = (lo >> 14) & 0x03;
173                         uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
174                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
175                 }
176
177                 WriteLog("\n");
178
179                 if ((lo & 0x07) == 0)
180                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
181
182                 if ((lo & 0x07) == 1)
183                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
184         }
185
186         WriteLog("\n");
187 #else
188 //#warning "!!! Fix lockup in OPDiscoverObjects() !!!"
189 //temp, to keep the following function from locking up on bad/weird OLs
190 //return;
191
192         numberOfObjects = 0;
193         OPDiscoverObjects(olp);
194         OPDumpObjectList();
195 #endif
196 }
197
198
199 bool OPObjectExists(uint32_t address)
200 {
201         // Yes, we really do a linear search, every time. :-/
202         for(uint32_t i=0; i<numberOfObjects; i++)
203         {
204                 if (address == object[i])
205                         return true;
206         }
207
208         return false;
209 }
210
211
212 void OPDiscoverObjects(uint32_t address)
213 {
214         uint8_t objectType = 0;
215
216         do
217         {
218                 // If we've seen this object already, bail out!
219                 // Otherwise, add it to the list
220                 if (OPObjectExists(address))
221                         return;
222
223                 object[numberOfObjects++] = address;
224
225                 // Get the object & decode its type, link address
226                 uint32_t hi = JaguarReadLong(address + 0, OP);
227                 uint32_t lo = JaguarReadLong(address + 4, OP);
228                 objectType = lo & 0x07;
229                 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
230
231                 if (objectType == 3)
232                 {
233                         // Branch if YPOS < 2047 can be treated as a GOTO, so don't do any
234                         // discovery in that case. Otherwise, have at it:
235                         if ((lo & 0xFFFF) != 0x7FFB)
236                                 // Recursion needed to follow all links! This does depth-first
237                                 // recursion on the not-taken objects
238                                 OPDiscoverObjects(address + 8);
239                 }
240
241                 // Get the next object...
242                 address = link;
243         }
244         while (objectType != 4);
245 }
246
247
248 void OPDumpObjectList(void)
249 {
250         for(uint32_t i=0; i<numberOfObjects; i++)
251         {
252                 uint32_t address = object[i];
253
254                 uint32_t hi = JaguarReadLong(address + 0, OP);
255                 uint32_t lo = JaguarReadLong(address + 4, OP);
256                 uint8_t objectType = lo & 0x07;
257                 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
258                 WriteLog("%08X: %08X %08X %s -> $%08X", address, hi, lo, opType[objectType], link);
259
260                 if (objectType == 3)
261                 {
262                         uint16_t ypos = (lo >> 3) & 0x7FF;
263                         uint8_t  cc   = (lo >> 14) & 0x07;      // Proper # of bits == 3
264                         WriteLog(" YPOS %s %u", ccType[cc], ypos);
265                 }
266
267                 WriteLog("\n");
268
269                 // Yes, this is how the OP finds follow-on phrases for bitmap/scaled
270                 // bitmap objects...!
271                 if (objectType == 0)
272                         DumpFixedObject(OPLoadPhrase(address + 0),
273                                 OPLoadPhrase(address | 0x08));
274
275                 if (objectType == 1)
276                         DumpScaledObject(OPLoadPhrase(address + 0),
277                                 OPLoadPhrase(address | 0x08), OPLoadPhrase(address | 0x10));
278
279                 if (address == link)    // Ruh roh...
280                 {
281                         // Runaway recursive link is bad!
282                         WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
283                 }
284         }
285
286         WriteLog("\n");
287 }
288
289
290 //
291 // Object Processor memory access
292 // Memory range: F00010 - F00027
293 //
294 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
295 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
296 //      F00026            W   -------- -------x   OBF - object processor flag
297 //
298
299 #if 0
300 uint8_t OPReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
301 {
302         offset &= 0x3F;
303         return objectp_ram[offset];
304 }
305
306 uint16_t OPReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
307 {
308         offset &= 0x3F;
309         return GET16(objectp_ram, offset);
310 }
311
312 void OPWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
313 {
314         offset &= 0x3F;
315         objectp_ram[offset] = data;
316 }
317
318 void OPWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
319 {
320         offset &= 0x3F;
321         SET16(objectp_ram, offset, data);
322
323 /*if (offset == 0x20)
324 WriteLog("OP: Setting lo list pointer: %04X\n", data);
325 if (offset == 0x22)
326 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
327 }
328 #endif
329
330
331 uint32_t OPGetListPointer(void)
332 {
333         // Note: This register is LO / HI WORD, hence the funky look of this...
334         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
335 }
336
337
338 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
339
340 uint32_t OPGetStatusRegister(void)
341 {
342         return GET16(tomRam8, 0x26);
343 }
344
345
346 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
347
348 void OPSetStatusRegister(uint32_t data)
349 {
350         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
351         tomRam8[0x27] |= (data & 0xFE);
352 }
353
354
355 void OPSetCurrentObject(uint64_t object)
356 {
357 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
358         // Stored as least significant 32 bits first, ms32 last in big endian
359 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
360         objectp_ram[0x12] = object & 0xFF; object >>= 8;
361         objectp_ram[0x11] = object & 0xFF; object >>= 8;
362         objectp_ram[0x10] = object & 0xFF; object >>= 8;
363
364         objectp_ram[0x17] = object & 0xFF; object >>= 8;
365         objectp_ram[0x16] = object & 0xFF; object >>= 8;
366         objectp_ram[0x15] = object & 0xFF; object >>= 8;
367         objectp_ram[0x14] = object & 0xFF;*/
368 // Let's try regular good old big endian...
369         tomRam8[0x17] = object & 0xFF; object >>= 8;
370         tomRam8[0x16] = object & 0xFF; object >>= 8;
371         tomRam8[0x15] = object & 0xFF; object >>= 8;
372         tomRam8[0x14] = object & 0xFF; object >>= 8;
373
374         tomRam8[0x13] = object & 0xFF; object >>= 8;
375         tomRam8[0x12] = object & 0xFF; object >>= 8;
376         tomRam8[0x11] = object & 0xFF; object >>= 8;
377         tomRam8[0x10] = object & 0xFF;
378 }
379
380
381 uint64_t OPLoadPhrase(uint32_t offset)
382 {
383         offset &= ~0x07;                                                // 8 byte alignment
384         return ((uint64_t)JaguarReadLong(offset, OP) << 32) | (uint64_t)JaguarReadLong(offset+4, OP);
385 }
386
387
388 void OPStorePhrase(uint32_t offset, uint64_t p)
389 {
390         offset &= ~0x07;                                                // 8 byte alignment
391         JaguarWriteLong(offset, p >> 32, OP);
392         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
393 }
394
395
396 //
397 // Debugging routines
398 //
399 void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2)
400 {
401         WriteLog("          %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF));
402         WriteLog("          %08X %08X\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
403         DumpBitmapCore(p0, p1);
404         uint32_t hscale = p2 & 0xFF;
405         uint32_t vscale = (p2 >> 8) & 0xFF;
406         uint32_t remainder = (p2 >> 16) & 0xFF;
407         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
408 }
409
410
411 void DumpFixedObject(uint64_t p0, uint64_t p1)
412 {
413         WriteLog("          %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF));
414         DumpBitmapCore(p0, p1);
415 }
416
417
418 void DumpBitmapCore(uint64_t p0, uint64_t p1)
419 {
420         uint32_t bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
421         uint8_t bitdepth = (p1 >> 12) & 0x07;
422 //WAS:  int16_t ypos = ((p0 >> 3) & 0x3FF);                     // ??? What if not interlaced (/2)?
423         int16_t ypos = ((p0 >> 3) & 0x7FF);                     // ??? What if not interlaced (/2)?
424         int32_t xpos = p1 & 0xFFF;
425         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);       // Sign extend that mutha!
426         uint32_t iwidth = ((p1 >> 28) & 0x3FF);
427         uint32_t dwidth = ((p1 >> 18) & 0x3FF);         // Unsigned!
428         uint16_t height = ((p0 >> 14) & 0x3FF);
429         uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
430         uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
431         uint32_t firstPix = (p1 >> 49) & 0x3F;
432         uint8_t flags = (p1 >> 45) & 0x0F;
433         uint8_t idx = (p1 >> 38) & 0x7F;
434         uint32_t pitch = (p1 >> 15) & 0x07;
435         WriteLog("    [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
436                 iwidth * bdMultiplier[bitdepth],
437                 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth],
438                 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
439                 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
440                 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
441 }
442
443
444 //
445 // Object Processor main routine
446 //
447 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
448 void OPProcessList(int halfline, bool render)
449 {
450 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!"
451 // We ignore them, for now; not good D-:
452 // N.B.: Half-lines are exactly that, half-lines. When in interlaced mode, it
453 //       draws the screen exactly the same way as it does in non, one line at a
454 //       time. The only way you know you're in field #2 is that the topmost bit
455 //       of VC is set. Half-line mode is so you can draw higher horizontal
456 //       resolutions than you normally could, as the line buffer is only 720
457 //       pixels wide...
458         halfline &= 0x7FF;
459
460 extern int op_start_log;
461
462         op_pointer = OPGetListPointer();
463
464 //      objectp_stop_reading_list = false;
465
466 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
467 //op_done();
468
469 // *** BEGIN OP PROCESSOR TESTING ONLY ***
470 extern bool interactiveMode;
471 extern bool iToggle;
472 extern int objectPtr;
473 bool inhibit;
474 int bitmapCounter = 0;
475 // *** END OP PROCESSOR TESTING ONLY ***
476
477         uint32_t opCyclesToRun = 30000;                                 // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
478
479 //      if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
480         while (op_pointer)
481         {
482 // *** BEGIN OP PROCESSOR TESTING ONLY ***
483 if (interactiveMode && bitmapCounter == objectPtr)
484         inhibit = iToggle;
485 else
486         inhibit = false;
487 // *** END OP PROCESSOR TESTING ONLY ***
488 //              if (objectp_stop_reading_list)
489 //                      return;
490
491                 uint64_t p0 = OPLoadPhrase(op_pointer);
492                 op_pointer += 8;
493 //WriteLog("\t%08X type %i\n", op_pointer, (uint8_t)p0 & 0x07);
494
495 #if 1
496 if (halfline == TOMGetVDB() && op_start_log)
497 //if (halfline == 215 && op_start_log)
498 //if (halfline == 28 && op_start_log)
499 //if (halfline == 0)
500 {
501 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
502 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
503 {
504 WriteLog(" (BITMAP) ");
505 uint64_t p1 = OPLoadPhrase(op_pointer);
506 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
507         uint8_t bitdepth = (p1 >> 12) & 0x07;
508 //WAS:  int16_t ypos = ((p0 >> 3) & 0x3FF);                     // ??? What if not interlaced (/2)?
509         int16_t ypos = ((p0 >> 3) & 0x7FF);                     // ??? What if not interlaced (/2)?
510 int32_t xpos = p1 & 0xFFF;
511 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
512         uint32_t iwidth = ((p1 >> 28) & 0x3FF);
513         uint32_t dwidth = ((p1 >> 18) & 0x3FF);         // Unsigned!
514         uint16_t height = ((p0 >> 14) & 0x3FF);
515         uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
516         uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
517         uint32_t firstPix = (p1 >> 49) & 0x3F;
518         uint8_t flags = (p1 >> 45) & 0x0F;
519         uint8_t idx = (p1 >> 38) & 0x7F;
520         uint32_t pitch = (p1 >> 15) & 0x07;
521 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
522         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
523 }
524 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
525 {
526 WriteLog(" (SCALED BITMAP)");
527 uint64_t p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
528 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
529 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
530         uint8_t bitdepth = (p1 >> 12) & 0x07;
531 //WAS:  int16_t ypos = ((p0 >> 3) & 0x3FF);                     // ??? What if not interlaced (/2)?
532         int16_t ypos = ((p0 >> 3) & 0x7FF);                     // ??? What if not interlaced (/2)?
533 int32_t xpos = p1 & 0xFFF;
534 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
535         uint32_t iwidth = ((p1 >> 28) & 0x3FF);
536         uint32_t dwidth = ((p1 >> 18) & 0x3FF);         // Unsigned!
537         uint16_t height = ((p0 >> 14) & 0x3FF);
538         uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
539         uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
540         uint32_t firstPix = (p1 >> 49) & 0x3F;
541         uint8_t flags = (p1 >> 45) & 0x0F;
542         uint8_t idx = (p1 >> 38) & 0x7F;
543         uint32_t pitch = (p1 >> 15) & 0x07;
544 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
545         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
546         uint32_t hscale = p2 & 0xFF;
547         uint32_t vscale = (p2 >> 8) & 0xFF;
548         uint32_t remainder = (p2 >> 16) & 0xFF;
549 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
550 }
551 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
552 WriteLog(" (GPU)\n");
553 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
554 {
555 WriteLog(" (BRANCH)\n");
556 uint8_t * jaguarMainRam = GetRamPtr();
557 WriteLog("[RAM] --> ");
558 for(int k=0; k<8; k++)
559         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
560 WriteLog("\n");
561 }
562 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
563 WriteLog("    --> List end\n\n");
564 }
565 #endif
566
567                 switch ((uint8_t)p0 & 0x07)
568                 {
569                 case OBJECT_TYPE_BITMAP:
570                 {
571                         uint16_t ypos = (p0 >> 3) & 0x7FF;
572 // This is only theory implied by Rayman...!
573 // It seems that if the YPOS is zero, then bump the YPOS value so that it
574 // coincides with the VDB value. With interlacing, this would be slightly more
575 // tricky. There's probably another bit somewhere that enables this mode--but
576 // so far, doesn't seem to affect any other game in a negative way (that I've
577 // seen). Either that, or it's an undocumented bug...
578
579 //No, the reason this was needed is that the OP code before was wrong. Any value
580 //less than VDB will get written to the top line of the display!
581 #if 0
582 // Not so sure... Let's see what happens here...
583 // No change...
584                         if (ypos == 0)
585                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
586 #endif
587 // Actually, no. Any item less than VDB will get only the lines that hang over
588 // VDB displayed. Actually, this is incorrect. It seems that VDB value is wrong
589 // somewhere and that's what's causing things to fuck up. Still no idea why.
590
591                         uint32_t height = (p0 & 0xFFC000) >> 14;
592                         uint32_t oldOPP = op_pointer - 8;
593 // *** BEGIN OP PROCESSOR TESTING ONLY ***
594 if (inhibit && op_start_log)
595         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
596 bitmapCounter++;
597 if (!inhibit)   // For OP testing only!
598 // *** END OP PROCESSOR TESTING ONLY ***
599                         if (halfline >= ypos && height > 0)
600                         {
601                                 // Believe it or not, this is what the OP actually does...
602                                 // which is why they're required to be on a dphrase boundary!
603                                 uint64_t p1 = OPLoadPhrase(oldOPP | 0x08);
604 //unneeded                              op_pointer += 8;
605 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
606 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
607 //                              OPProcessFixedBitmap(halfline, p0, p1, render);
608                                 OPProcessFixedBitmap(p0, p1, render);
609
610                                 // OP write-backs
611
612                                 height--;
613
614                                 uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40;
615                                 uint64_t dwidth = (p1 & 0xFFC0000) >> 15;
616                                 data += dwidth;
617
618                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
619                                 p0 |= (uint64_t)height << 14;
620                                 p0 |= data << 40;
621                                 OPStorePhrase(oldOPP, p0);
622                         }
623
624                         // OP bottom 3 bits are hardwired to zero. The link address
625                         // reflects this, so we only need the top 19 bits of the address
626                         // (which is why we only shift 21, and not 24).
627                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
628
629                         // KLUDGE: Seems that memory access is mirrored in the first 8MB of
630                         // memory...
631                         if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
632                                 op_pointer &= 0xFF1FFFFF;       // Knock out bits 21-23
633
634                         break;
635                 }
636                 case OBJECT_TYPE_SCALE:
637                 {
638 //WAS:                  uint16_t ypos = (p0 >> 3) & 0x3FF;
639                         uint16_t ypos = (p0 >> 3) & 0x7FF;
640                         uint32_t height = (p0 & 0xFFC000) >> 14;
641                         uint32_t oldOPP = op_pointer - 8;
642 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
643 // *** BEGIN OP PROCESSOR TESTING ONLY ***
644 if (inhibit && op_start_log)
645 {
646         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
647         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
648 }
649 bitmapCounter++;
650 if (!inhibit)   // For OP testing only!
651 // *** END OP PROCESSOR TESTING ONLY ***
652                         if (halfline >= ypos && height > 0)
653                         {
654                                 // Believe it or not, this is what the OP actually does...
655                                 // which is why they're required to be on a qphrase boundary!
656                                 uint64_t p1 = OPLoadPhrase(oldOPP | 0x08);
657                                 uint64_t p2 = OPLoadPhrase(oldOPP | 0x10);
658 //unneeded                              op_pointer += 16;
659                                 OPProcessScaledBitmap(p0, p1, p2, render);
660
661                                 // OP write-backs
662
663                                 uint16_t remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
664                                 uint8_t /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
665 //Actually, we should skip this object if it has a vscale of zero.
666 //Or do we? Not sure... Atari Karts has a few lines that look like:
667 // (SCALED BITMAP)
668 //000E8268 --> phrase 00010000 7000B00D
669 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
670 //    [hsc: 9A, vsc: 00, rem: 00]
671 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
672 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
673
674                                 if (vscale == 0)
675                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
676
677 //extern int start_logging;
678 //if (start_logging)
679 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
680 //Locks up here:
681 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
682 //There are other problems here, it looks like...
683 //Another lock up:
684 //About to execute OP (508)...
685 /*
686 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
687 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
688 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
689 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
690 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
691 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
692 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
693 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
694 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
695 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
696 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
697 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
698 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
699 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
700 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
701 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
702 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
703 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
704 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
705 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
706 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
707 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
708 */
709 //Here's another problem:
710 //    [hsc: 20, vsc: 20, rem: 00]
711 // Since we're not checking for $E0 (but that's what we get from the above), we
712 // end up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but...
713 // still not quite right. Either that, or the Accolade team that wrote Bubsy
714 // screwed up royal.]
715 //Also note: $E0 = 7.0 which IS a legal vscale value...
716
717 //                              if (remainder & 0x80)                           // I.e., it's negative
718 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
719 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
720 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
721 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
722 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
723                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
724                                 if (remainder < 0x20)
725                                 {
726                                         uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40;
727                                         uint64_t dwidth = (p1 & 0xFFC0000) >> 15;
728
729 //                                      while (remainder & 0x80)
730 //                                      while ((remainder & 0x80) || remainder == 0)
731 //                                      while ((remainder - 1) >= 0xE0)
732 //                                      while ((remainder >= 0xE1) || remainder == 0)
733 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
734 //                                      while (remainder <= 0x20)
735                                         while (remainder < 0x20)
736                                         {
737                                                 remainder += vscale;
738
739                                                 if (height)
740                                                         height--;
741
742                                                 data += dwidth;
743                                         }
744
745                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
746                                         p0 |= (uint64_t)height << 14;
747                                         p0 |= data << 40;
748                                         OPStorePhrase(oldOPP, p0);
749                                 }
750
751                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
752
753 //if (start_logging)
754 //      WriteLog("--> Finished writebacks...\n");//*/
755
756 //WriteLog(" [%08X%08X -> ", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
757                                 p2 &= ~0x0000000000FF0000LL;
758                                 p2 |= (uint64_t)remainder << 16;
759 //WriteLog("%08X%08X]\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
760                                 OPStorePhrase(oldOPP + 16, p2);
761 //remainder = (uint8_t)(p2 >> 16), vscale = (uint8_t)(p2 >> 8);
762 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
763                         }
764
765                         // OP bottom 3 bits are hardwired to zero. The link address
766                         // reflects this, so we only need the top 19 bits of the address
767                         // (which is why we only shift 21, and not 24).
768                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
769
770                         // KLUDGE: Seems that memory access is mirrored in the first 8MB of
771                         // memory...
772                         if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
773                                 op_pointer &= 0xFF1FFFFF;       // Knock out bits 21-23
774
775                         break;
776                 }
777                 case OBJECT_TYPE_GPU:
778                 {
779 //WriteLog("OP: Asserting GPU IRQ #3...\n");
780 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
781                         OPSetCurrentObject(p0);
782                         GPUSetIRQLine(3, ASSERT_LINE);
783 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
784 // !!! FIX !!!
785 //Do something like:
786 //OPSuspendedByGPU = true;
787 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
788 //on the next halfline...
789 // --> It continues from where it was interrupted! !!! FIX !!!
790                         break;
791                 }
792                 case OBJECT_TYPE_BRANCH:
793                 {
794                         uint16_t ypos = (p0 >> 3) & 0x7FF;
795 // NOTE: The JTRM sez there are only 2 bits used for the CC, but lists *five*
796 //       conditions! Need at least one more bit for that! :-P
797 // Also, the ASIC nets imply that it uses bits 14-16 (height in BM & SBM objects)
798 #warning "!!! Possibly bad CC handling in OP (missing 1 bit) !!!"
799                         uint8_t  cc   = (p0 >> 14) & 0x03;
800                         uint32_t link = (p0 >> 21) & 0x3FFFF8;
801
802 //                      if ((ypos!=507)&&(ypos!=25))
803 //                              WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
804                         switch (cc)
805                         {
806                         case CONDITION_EQUAL:
807                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
808                                         op_pointer = link;
809                                 break;
810                         case CONDITION_LESS_THAN:
811                                 if (TOMReadWord(0xF00006, OP) < ypos)
812                                         op_pointer = link;
813                                 break;
814                         case CONDITION_GREATER_THAN:
815                                 if (TOMReadWord(0xF00006, OP) > ypos)
816                                         op_pointer = link;
817                                 break;
818                         case CONDITION_OP_FLAG_SET:
819                                 if (OPGetStatusRegister() & 0x01)
820                                         op_pointer = link;
821                                 break;
822                         case CONDITION_SECOND_HALF_LINE:
823 //Here's the ASIC code:
824 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
825 //which means, do the link if bit 10 of HC is set...
826
827                                 // This basically means branch if bit 10 of HC is set
828 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
829                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
830                                 LogDone();
831                                 exit(0);
832                                 break;
833                         default:
834                                 // Basically, if you do this, the OP does nothing. :-)
835                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
836                         }
837                         break;
838                 }
839                 case OBJECT_TYPE_STOP:
840                 {
841 //op_start_log = 0;
842                         // unsure
843 //WriteLog("OP: --> STOP\n");
844 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
845 //This seems more likely...
846                         OPSetCurrentObject(p0);
847
848                         if (p0 & 0x08)
849                         {
850                                 // We need to check whether these interrupts are enabled or
851                                 // not, THEN set an IRQ + pending flag if necessary...
852                                 if (TOMIRQEnabled(IRQ_OPFLAG))
853                                 {
854                                         TOMSetPendingObjectInt();
855                                         m68k_set_irq(2);                // Cause a 68K IPL 2 to occur...
856                                 }
857                         }
858
859                         return;
860 //                      break;
861                 }
862                 default:
863 //                      WriteLog("op: unknown object type %i\n", ((uint8_t)p0 & 0x07));
864 //                      return;
865                         ;
866                 }
867
868                 // Here is a little sanity check to keep the OP from locking up the
869                 // machine when fed bad data. Better would be to count how many actual
870                 // cycles it used and bail out/reenter to properly simulate an
871                 // overloaded OP... !!! FIX !!!
872 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
873                 opCyclesToRun--;
874
875                 if (!opCyclesToRun)
876                         return;
877         }
878 }
879
880
881 //
882 // Store fixed size bitmap in line buffer
883 //
884 void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render)
885 {
886 // Need to make sure that when writing that it stays within the line buffer...
887 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
888         uint8_t depth = (p1 >> 12) & 0x07;                              // Color depth of image
889         int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
890         uint32_t iwidth = (p1 >> 28) & 0x3FF;                           // Image width in *phrases*
891         uint32_t data = (p0 >> 40) & 0xFFFFF8;                  // Pixel data address
892 //#ifdef OP_DEBUG_BMP
893         uint32_t firstPix = (p1 >> 49) & 0x3F;
894         // "The LSB is significant only for scaled objects..." -JTRM
895         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
896         firstPix &= 0x3E;
897 //#endif
898 // We can ignore the RELEASE (high order) bit for now--probably forever...!
899 //      uint8_t flags = (p1 >> 45) & 0x0F;      // REFLECT, RMW, TRANS, RELEASE
900 //Optimize: break these out to their own BOOL values
901         uint8_t flags = (p1 >> 45) & 0x07;                              // REFLECT (0), RMW (1), TRANS (2)
902         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
903                 flagRMW = (flags & OPFLAG_RMW ? true : false),
904                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
905 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
906 //  provide the most significant bits of the palette address."
907         uint8_t index = (p1 >> 37) & 0xFE;                              // CLUT index offset (upper pix, 1-4 bpp)
908         uint32_t pitch = (p1 >> 15) & 0x07;                             // Phrase pitch
909         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
910
911 //      int16_t scanlineWidth = tom_getVideoModeWidth();
912         uint8_t * tomRam8 = TOMGetRamPointer();
913         uint8_t * paletteRAM = &tomRam8[0x400];
914         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
915         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
916         uint16_t * paletteRAM16 = (uint16_t *)paletteRAM;
917
918 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
919 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
920
921 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
922 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
923 // Pitch == 0 is OK too...
924
925 //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to investigate
926 //        on real hardware...
927 #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
928 if (iwidth == 0)
929         iwidth = 1;
930
931 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
932 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
933         if (!render || iwidth == 0)
934                 return;
935
936 //OK, so we know the position in the line buffer is correct. It's the clipping in
937 //24bpp mode that's wrong!
938 #if 0
939 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
940 //into the line buffer for each pixel.
941 if (depth == 5) // i.e., 24bpp mode...
942         xpos >>= 1;     // Cut it in half...
943 #endif
944
945 //#define OP_DEBUG_BMP
946 //#ifdef OP_DEBUG_BMP
947 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
948 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
949 //#endif
950
951 //      int32_t leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
952         int32_t startPos = xpos, endPos = xpos +
953                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
954                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
955         uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
956         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
957         // Not sure if this is Jaguar Two only location or what...
958         // From the docs, it is... If we want to limit here we should think of something else.
959 //      int32_t limit = GET16(tom_ram_8, 0x0008);                       // LIMIT
960 //      int32_t limit = 720;
961 //      int32_t lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1);       // Zero based limit...
962 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
963         // This is correct, the OP line buffer is a constant size... 
964         int32_t limit = 720;
965         int32_t lbufWidth = 719;
966
967         // If the image is completely to the left or right of the line buffer, then bail.
968 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
969 //There are four possibilities:
970 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
971 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
972 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
973 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
974 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
975 // numbers 1 & 3 are of concern.
976 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
977 //      if (rightMargin < 0 || leftMargin > lbufWidth)
978
979 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
980 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
981 // Still have to be careful with the DATA and IWIDTH values though...
982
983 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
984 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
985 //              return;
986         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
987                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
988                 return;
989
990         // Otherwise, find the clip limits and clip the phrase as well...
991         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
992         //       line buffer, but it shouldn't matter since there are two unused line
993         //       buffers below and nothing above and I'll at most write 8 bytes outside
994         //       the line buffer... I could use a fractional clip begin/end value, but
995         //       this makes the blit a *lot* more hairy. I might fix this in the future
996         //       if it becomes necessary. (JLH)
997         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
998         //       which pixel in the phrase is being written, and quit when either end of phrases
999         //       is reached or line buffer extents are surpassed.
1000
1001 //This stuff is probably wrong as well... !!! FIX !!!
1002 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1003 //Yup. Seems that JagMania doesn't work correctly with this...
1004 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1005 //      if (!flagREFLECT)
1006
1007 /*
1008         if (leftMargin < 0)
1009                 clippedWidth = 0 - leftMargin,
1010                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1011                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1012 //              leftMargin = 0;
1013
1014         if (rightMargin > lbufWidth)
1015                 clippedWidth = rightMargin - lbufWidth,
1016                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
1017 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1018 //              rightMargin = lbufWidth;
1019 */
1020 if (depth > 5)
1021         WriteLog("OP: We're about to encounter a divide by zero error!\n");
1022         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1023         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1024         // !!! FIX !!!
1025         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1026                 clippedWidth = 0 - startPos,
1027                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1028                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1029
1030         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1031                 clippedWidth = 0 - endPos,
1032                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1033
1034         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1035                 clippedWidth = endPos - lbufWidth,
1036                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1037
1038         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1039                 clippedWidth = startPos - lbufWidth,
1040                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1041                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1042 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1043
1044         // If the image is sitting on the line buffer left or right edge, we need to compensate
1045         // by decreasing the image phrase width accordingly.
1046         iwidth -= phraseClippedWidth;
1047
1048         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1049         // the pixel data.
1050 //      data += phraseClippedWidth * (pitch << 3);
1051         data += dataClippedWidth * pitch;
1052
1053         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1054         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1055 //      uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1056 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1057 //Is this a bug in the OP?
1058 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1059 //Though it looks like we're doing it here no matter what...
1060 //      uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1061 //Let's try this:
1062         uint32_t lbufAddress = 0x1800 + (startPos * 2);
1063         uint8_t * currentLineBuffer = &tomRam8[lbufAddress];
1064
1065         // Render.
1066
1067 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1068 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1069 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1070 // anyway.
1071 // This seems to be the case (at least according to the Midsummer docs)...!
1072
1073 // This is to test using palette zeroes instead of bit zeroes...
1074 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1075 //#define OP_USES_PALETTE_ZERO
1076
1077         if (depth == 0)                                                                 // 1 BPP
1078         {
1079                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1080                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1081
1082                 // Fetch 1st phrase...
1083                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1084 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1085 //i.e., we didn't clip on the margin... !!! FIX !!!
1086                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1087                 int i = firstPix;                                                       // Start counter at right spot...
1088
1089                 while (iwidth--)
1090                 {
1091                         while (i++ < 64)
1092                         {
1093                                 uint8_t bit = pixels >> 63;
1094 #ifndef OP_USES_PALETTE_ZERO
1095                                 if (flagTRANS && bit == 0)
1096 #else
1097                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1098 #endif
1099                                         ;       // Do nothing...
1100                                 else
1101                                 {
1102                                         if (!flagRMW)
1103 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1104 //Won't optimize RMW case though...
1105                                                 // This is the *only* correct use of endian-dependent code
1106                                                 // (i.e., mem-to-mem direct copying)!
1107                                                 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bit];
1108                                         else
1109                                                 *currentLineBuffer =
1110                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1111                                                 *(currentLineBuffer + 1) =
1112                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1113                                 }
1114
1115                                 currentLineBuffer += lbufDelta;
1116                                 pixels <<= 1;
1117                         }
1118                         i = 0;
1119                         // Fetch next phrase...
1120                         data += pitch;
1121                         pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1122                 }
1123         }
1124         else if (depth == 1)                                                    // 2 BPP
1125         {
1126 if (firstPix)
1127         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1128                 index &= 0xFC;                                                          // Top six bits form CLUT index
1129                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1130                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1131
1132                 while (iwidth--)
1133                 {
1134                         // Fetch phrase...
1135                         uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1136                         data += pitch;
1137
1138                         for(int i=0; i<32; i++)
1139                         {
1140                                 uint8_t bits = pixels >> 62;
1141 // Seems to me that both of these are in the same endian, so we could cast it as
1142 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1143 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1144 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1145 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1146 #ifndef OP_USES_PALETTE_ZERO
1147                                 if (flagTRANS && bits == 0)
1148 #else
1149                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1150 #endif
1151                                         ;       // Do nothing...
1152                                 else
1153                                 {
1154                                         if (!flagRMW)
1155                                                 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1156                                         else
1157                                                 *currentLineBuffer =
1158                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1159                                                 *(currentLineBuffer + 1) =
1160                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1161                                 }
1162
1163                                 currentLineBuffer += lbufDelta;
1164                                 pixels <<= 2;
1165                         }
1166                 }
1167         }
1168         else if (depth == 2)                                                    // 4 BPP
1169         {
1170 if (firstPix)
1171         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1172                 index &= 0xF0;                                                          // Top four bits form CLUT index
1173                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1174                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1175
1176                 while (iwidth--)
1177                 {
1178                         // Fetch phrase...
1179                         uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1180                         data += pitch;
1181
1182                         for(int i=0; i<16; i++)
1183                         {
1184                                 uint8_t bits = pixels >> 60;
1185 // Seems to me that both of these are in the same endian, so we could cast it as
1186 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1187 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1188 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1189 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1190 #ifndef OP_USES_PALETTE_ZERO
1191                                 if (flagTRANS && bits == 0)
1192 #else
1193                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1194 #endif
1195                                         ;       // Do nothing...
1196                                 else
1197                                 {
1198                                         if (!flagRMW)
1199                                                 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1200                                         else
1201                                                 *currentLineBuffer =
1202                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1203                                                 *(currentLineBuffer + 1) =
1204                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1205                                 }
1206
1207                                 currentLineBuffer += lbufDelta;
1208                                 pixels <<= 4;
1209                         }
1210                 }
1211         }
1212         else if (depth == 3)                                                    // 8 BPP
1213         {
1214                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1215                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1216
1217                 // Fetch 1st phrase...
1218                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1219 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1220 //i.e., we didn't clip on the margin... !!! FIX !!!
1221                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1222                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1223                 int i = firstPix >> 3;                                          // Start counter at right spot...
1224
1225                 while (iwidth--)
1226                 {
1227                         while (i++ < 8)
1228                         {
1229                                 uint8_t bits = pixels >> 56;
1230 // Seems to me that both of these are in the same endian, so we could cast it as
1231 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1232 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1233 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1234 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1235 //This would seem to be problematic...
1236 //Because it's the palette entry being zero that makes the pixel transparent...
1237 //Let's try it and see.
1238 #ifndef OP_USES_PALETTE_ZERO
1239                                 if (flagTRANS && bits == 0)
1240 #else
1241                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1242 #endif
1243                                         ;       // Do nothing...
1244                                 else
1245                                 {
1246                                         if (!flagRMW)
1247                                                 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1248                                         else
1249                                                 *currentLineBuffer =
1250                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1251                                                 *(currentLineBuffer + 1) =
1252                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1253                                 }
1254
1255                                 currentLineBuffer += lbufDelta;
1256                                 pixels <<= 8;
1257                         }
1258                         i = 0;
1259                         // Fetch next phrase...
1260                         data += pitch;
1261                         pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1262                 }
1263         }
1264         else if (depth == 4)                                                    // 16 BPP
1265         {
1266 if (firstPix)
1267         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1268                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1269                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1270
1271                 while (iwidth--)
1272                 {
1273                         // Fetch phrase...
1274                         uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1275                         data += pitch;
1276
1277                         for(int i=0; i<4; i++)
1278                         {
1279                                 uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1280 // Seems to me that both of these are in the same endian, so we could cast it as
1281 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1282 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1283 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1284 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1285 //This doesn't seem right... Let's try the encoded black value ($8800):
1286 //Apparently, CRY 0 maps to $8800...
1287                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1288 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1289                                         ;       // Do nothing...
1290                                 else
1291                                 {
1292                                         if (!flagRMW)
1293                                                 *currentLineBuffer = bitsHi,
1294                                                 *(currentLineBuffer + 1) = bitsLo;
1295                                         else
1296                                                 *currentLineBuffer =
1297                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1298                                                 *(currentLineBuffer + 1) =
1299                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1300                                 }
1301
1302                                 currentLineBuffer += lbufDelta;
1303                                 pixels <<= 16;
1304                         }
1305                 }
1306         }
1307         else if (depth == 5)                                                    // 24 BPP
1308         {
1309 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1310 //There *might* be others...
1311 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1312 if (firstPix)
1313         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1314                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1315                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1316                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04;
1317
1318                 while (iwidth--)
1319                 {
1320                         // Fetch phrase...
1321                         uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1322                         data += pitch;
1323
1324                         for(int i=0; i<2; i++)
1325                         {
1326                                 // We don't use a 32-bit var here because of endian issues...!
1327                                 uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48,
1328                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1329
1330                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1331                                         ;       // Do nothing...
1332                                 else
1333                                         *currentLineBuffer = bits3,
1334                                         *(currentLineBuffer + 1) = bits2,
1335                                         *(currentLineBuffer + 2) = bits1,
1336                                         *(currentLineBuffer + 3) = bits0;
1337
1338                                 currentLineBuffer += lbufDelta;
1339                                 pixels <<= 32;
1340                         }
1341                 }
1342         }
1343 }
1344
1345
1346 //
1347 // Store scaled bitmap in line buffer
1348 //
1349 void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render)
1350 {
1351 // Need to make sure that when writing that it stays within the line buffer...
1352 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1353         uint8_t depth = (p1 >> 12) & 0x07;                              // Color depth of image
1354         int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1355         uint32_t iwidth = (p1 >> 28) & 0x3FF;                           // Image width in *phrases*
1356         uint32_t data = (p0 >> 40) & 0xFFFFF8;                  // Pixel data address
1357 //#ifdef OP_DEBUG_BMP
1358 // Prolly should use this... Though not sure exactly how.
1359 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1360         uint32_t firstPix = (p1 >> 49) & 0x3F;
1361 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1362 if (firstPix)
1363         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1364 //#endif
1365 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1366 //      uint8_t flags = (p1 >> 45) & 0x0F;      // REFLECT, RMW, TRANS, RELEASE
1367 //Optimize: break these out to their own BOOL values [DONE]
1368         uint8_t flags = (p1 >> 45) & 0x07;                              // REFLECT (0), RMW (1), TRANS (2)
1369         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1370                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1371                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1372         uint8_t index = (p1 >> 37) & 0xFE;                              // CLUT index offset (upper pix, 1-4 bpp)
1373         uint32_t pitch = (p1 >> 15) & 0x07;                             // Phrase pitch
1374
1375         uint8_t * tomRam8 = TOMGetRamPointer();
1376         uint8_t * paletteRAM = &tomRam8[0x400];
1377         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct
1378         // copies--NOT for use when using endian-corrected data (i.e., any of the
1379         // *ReadWord functions!)
1380         uint16_t * paletteRAM16 = (uint16_t *)paletteRAM;
1381
1382         uint16_t hscale = p2 & 0xFF;
1383 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this.
1384 // Not sure why, but seems to be consistent with the vertical scaling now (and
1385 // it may turn out to be wrong!)...
1386         uint16_t horizontalRemainder = hscale;                          // Not sure if it starts full, but seems reasonable [It's not!]
1387 //      uint8_t horizontalRemainder = 0;                                        // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1388         int32_t scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1389         uint32_t scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1390
1391 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1392 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1393
1394 // Looks like an hscale of zero means don't draw!
1395         if (!render || iwidth == 0 || hscale == 0)
1396                 return;
1397
1398 /*extern int start_logging;
1399 if (start_logging)
1400         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1401                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1402 //#define OP_DEBUG_BMP
1403 //#ifdef OP_DEBUG_BMP
1404 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1405 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1406 //#endif
1407
1408         int32_t startPos = xpos, endPos = xpos +
1409                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1410         uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1411         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1412         // Not sure if this is Jaguar Two only location or what...
1413         // From the docs, it is... If we want to limit here we should think of something else.
1414 //      int32_t limit = GET16(tom_ram_8, 0x0008);                       // LIMIT
1415         int32_t limit = 720;
1416 //      int32_t lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1);       // Zero based limit...
1417         int32_t lbufWidth = 719;        // Zero based limit...
1418
1419         // If the image is completely to the left or right of the line buffer, then bail.
1420 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1421 //There are four possibilities:
1422 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1423 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1424 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1425 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1426 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1427 // numbers 1 & 3 are of concern.
1428 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1429 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1430
1431 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1432 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1433 // Still have to be careful with the DATA and IWIDTH values though...
1434
1435         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1436                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1437                 return;
1438
1439         // Otherwise, find the clip limits and clip the phrase as well...
1440         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of
1441         //       the line buffer, but it shouldn't matter since there are two
1442         //       unused line buffers below and nothing above and I'll at most write
1443         //       40 bytes outside the line buffer... I could use a fractional clip
1444         //       begin/end value, but this makes the blit a *lot* more hairy. I
1445         //       might fix this in the future if it becomes necessary. (JLH)
1446         //       Probably wouldn't be *that* hairy. Just use a delta that tells the
1447         //       inner loop which pixel in the phrase is being written, and quit
1448         //       when either end of phrases is reached or line buffer extents are
1449         //       surpassed.
1450
1451 //This stuff is probably wrong as well... !!! FIX !!!
1452 //The strange thing is that it seems to work, but that's no guarantee that it's
1453 //bulletproof!
1454 //Yup. Seems that JagMania doesn't work correctly with this...
1455 //Dunno if this is the problem, but Atari Karts is showing *some* of the road
1456 //now...
1457 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the
1458 //problem lies elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases
1459 //seems to draw the ground a bit more accurately... Strange!
1460 //It's probably a case of the REFLECT flag being set and the background being
1461 //written from the right side of the screen...
1462 //But no, it isn't... At least if the diagnostics are telling the truth!
1463
1464         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1465         // ALSO: There may be another case where we start out of bounds and end out
1466         // of bounds...!
1467         // !!! FIX !!!
1468
1469 //There's a problem here with scaledPhrasePixels in that it can be forced to
1470 //zero when the scaling factor is small. So fix it already! !!! FIX !!!
1471 /*if (scaledPhrasePixels == 0)
1472 {
1473         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1474         DumpScaledObject(p0, p1, p2);
1475 }//*/
1476 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1477
1478 //Try a simple example...
1479 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1480 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1481 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1482 //
1483 // Normally, we would expect this in the line buffer:
1484 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1485 //
1486 // But instead we're getting:
1487 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1488 //
1489 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1490 // on negative boundary--or are we? Hmm...
1491 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1492 //
1493 // Let's try a real world example:
1494 //
1495 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1496 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1497 //
1498 // Really, spp is 27.75 in the second case...
1499 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1500 // start position (14 * 27.75), we get -6.5... NOT -17!
1501
1502 //Now it seems we're working OK, at least for the first case...
1503 uint32_t scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1504
1505         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1506 {
1507 extern int start_logging;
1508 if (start_logging)
1509         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1510 //              clippedWidth = 0 - startPos,
1511                 clippedWidth = (0 - startPos) << 5,
1512 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1513                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1514 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1515                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1516 if (start_logging)
1517         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1518 }
1519
1520         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1521                 clippedWidth = 0 - endPos,
1522                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1523
1524         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1525                 clippedWidth = endPos - lbufWidth,
1526                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1527
1528         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1529                 clippedWidth = startPos - lbufWidth,
1530                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1531                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1532
1533 extern int op_start_log;
1534 if (op_start_log && clippedWidth != 0)
1535         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1536 if (op_start_log && startPos == 13)
1537 {
1538         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1539         DumpScaledObject(p0, p1, p2);
1540         if (iwidth == 7)
1541         {
1542                 WriteLog("    %08X: ", data);
1543                 for(int i=0; i<7*8; i++)
1544                         WriteLog("%02X ", JaguarReadByte(data+i));
1545                 WriteLog("\n");
1546         }
1547 }
1548         // If the image is sitting on the line buffer left or right edge, we need to compensate
1549         // by decreasing the image phrase width accordingly.
1550         iwidth -= phraseClippedWidth;
1551
1552         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1553         // the pixel data.
1554 //      data += phraseClippedWidth * (pitch << 3);
1555         data += dataClippedWidth * (pitch << 3);
1556
1557         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1558         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1559 //      uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1560 //      uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1561         uint32_t lbufAddress = 0x1800 + startPos * 2;
1562         uint8_t * currentLineBuffer = &tomRam8[lbufAddress];
1563 //uint8_t * lineBufferLowerLimit = &tom_ram_8[0x1800],
1564 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1565
1566         // Render.
1567
1568 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1569 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1570 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1571 // anyway.
1572 // This seems to be the case (at least according to the Midsummer docs)...!
1573
1574         if (depth == 0)                                                                 // 1 BPP
1575         {
1576 if (firstPix != 0)
1577         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1578                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1579                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1580
1581                 int pixCount = 0;
1582                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1583
1584                 while ((int32_t)iwidth > 0)
1585                 {
1586                         uint8_t bits = pixels >> 63;
1587
1588 #ifndef OP_USES_PALETTE_ZERO
1589                         if (flagTRANS && bits == 0)
1590 #else
1591                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1592 #endif
1593                                 ;       // Do nothing...
1594                         else
1595                         {
1596                                 if (!flagRMW)
1597                                         // This is the *only* correct use of endian-dependent code
1598                                         // (i.e., mem-to-mem direct copying)!
1599                                         *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1600                                 else
1601                                         *currentLineBuffer =
1602                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1603                                         *(currentLineBuffer + 1) =
1604                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1605                         }
1606
1607                         currentLineBuffer += lbufDelta;
1608
1609 /*
1610 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1611 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1612 wide, so we could probably go back to that (as long as we make it an int16_t and not a uint16!)
1613 */
1614 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1615                         while (horizontalRemainder & 0x80)
1616                         {
1617                                 horizontalRemainder += hscale;
1618                                 pixCount++;
1619                                 pixels <<= 1;
1620                         }//*/
1621 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1622                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1623                         {
1624                                 horizontalRemainder += hscale;
1625                                 pixCount++;
1626                                 pixels <<= 1;
1627                         }
1628                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1629
1630                         if (pixCount > 63)
1631                         {
1632                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1633
1634                                 data += (pitch << 3) * phrasesToSkip;
1635                                 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1636                                 pixels <<= 1 * pixelShift;
1637                                 iwidth -= phrasesToSkip;
1638                                 pixCount = pixelShift;
1639                         }
1640                 }
1641         }
1642         else if (depth == 1)                                                    // 2 BPP
1643         {
1644 if (firstPix != 0)
1645         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1646                 index &= 0xFC;                                                          // Top six bits form CLUT index
1647                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1648                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1649
1650                 int pixCount = 0;
1651                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1652
1653                 while ((int32_t)iwidth > 0)
1654                 {
1655                         uint8_t bits = pixels >> 62;
1656
1657 #ifndef OP_USES_PALETTE_ZERO
1658                         if (flagTRANS && bits == 0)
1659 #else
1660                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1661 #endif
1662                                 ;       // Do nothing...
1663                         else
1664                         {
1665                                 if (!flagRMW)
1666                                         // This is the *only* correct use of endian-dependent code
1667                                         // (i.e., mem-to-mem direct copying)!
1668                                         *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1669                                 else
1670                                         *currentLineBuffer =
1671                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1672                                         *(currentLineBuffer + 1) =
1673                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1674                         }
1675
1676                         currentLineBuffer += lbufDelta;
1677
1678 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1679                         while (horizontalRemainder & 0x80)
1680                         {
1681                                 horizontalRemainder += hscale;
1682                                 pixCount++;
1683                                 pixels <<= 2;
1684                         }//*/
1685 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1686                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1687                         {
1688                                 horizontalRemainder += hscale;
1689                                 pixCount++;
1690                                 pixels <<= 2;
1691                         }
1692                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1693
1694                         if (pixCount > 31)
1695                         {
1696                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1697
1698                                 data += (pitch << 3) * phrasesToSkip;
1699                                 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1700                                 pixels <<= 2 * pixelShift;
1701                                 iwidth -= phrasesToSkip;
1702                                 pixCount = pixelShift;
1703                         }
1704                 }
1705         }
1706         else if (depth == 2)                                                    // 4 BPP
1707         {
1708 if (firstPix != 0)
1709         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1710                 index &= 0xF0;                                                          // Top four bits form CLUT index
1711                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1712                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1713
1714                 int pixCount = 0;
1715                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1716
1717                 while ((int32_t)iwidth > 0)
1718                 {
1719                         uint8_t bits = pixels >> 60;
1720
1721 #ifndef OP_USES_PALETTE_ZERO
1722                         if (flagTRANS && bits == 0)
1723 #else
1724                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1725 #endif
1726                                 ;       // Do nothing...
1727                         else
1728                         {
1729                                 if (!flagRMW)
1730                                         // This is the *only* correct use of endian-dependent code
1731                                         // (i.e., mem-to-mem direct copying)!
1732                                         *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1733                                 else
1734                                         *currentLineBuffer =
1735                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1736                                         *(currentLineBuffer + 1) =
1737                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1738                         }
1739
1740                         currentLineBuffer += lbufDelta;
1741
1742 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1743                         while (horizontalRemainder & 0x80)
1744                         {
1745                                 horizontalRemainder += hscale;
1746                                 pixCount++;
1747                                 pixels <<= 4;
1748                         }//*/
1749 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1750                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1751                         {
1752                                 horizontalRemainder += hscale;
1753                                 pixCount++;
1754                                 pixels <<= 4;
1755                         }
1756                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1757
1758                         if (pixCount > 15)
1759                         {
1760                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1761
1762                                 data += (pitch << 3) * phrasesToSkip;
1763                                 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1764                                 pixels <<= 4 * pixelShift;
1765                                 iwidth -= phrasesToSkip;
1766                                 pixCount = pixelShift;
1767                         }
1768                 }
1769         }
1770         else if (depth == 3)                                                    // 8 BPP
1771         {
1772 if (firstPix)
1773         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1774                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1775                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1776
1777                 int pixCount = 0;
1778                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1779
1780                 while ((int32_t)iwidth > 0)
1781                 {
1782                         uint8_t bits = pixels >> 56;
1783
1784 #ifndef OP_USES_PALETTE_ZERO
1785                         if (flagTRANS && bits == 0)
1786 #else
1787                         if (flagTRANS && (paletteRAM16[bits] == 0))
1788 #endif
1789                                 ;       // Do nothing...
1790                         else
1791                         {
1792                                 if (!flagRMW)
1793                                         // This is the *only* correct use of endian-dependent code
1794                                         // (i.e., mem-to-mem direct copying)!
1795                                         *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1796 /*                              {
1797                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1798                                                 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1799                                 }*/
1800                                 else
1801                                         *currentLineBuffer =
1802                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1803                                         *(currentLineBuffer + 1) =
1804                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1805                         }
1806
1807                         currentLineBuffer += lbufDelta;
1808
1809 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1810                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1811                         {
1812                                 horizontalRemainder += hscale;
1813                                 pixCount++;
1814                                 pixels <<= 8;
1815                         }
1816                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1817
1818                         if (pixCount > 7)
1819                         {
1820                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1821
1822                                 data += (pitch << 3) * phrasesToSkip;
1823                                 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1824                                 pixels <<= 8 * pixelShift;
1825                                 iwidth -= phrasesToSkip;
1826                                 pixCount = pixelShift;
1827                         }
1828                 }
1829         }
1830         else if (depth == 4)                                                    // 16 BPP
1831         {
1832 if (firstPix != 0)
1833         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1834                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1835                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1836
1837                 int pixCount = 0;
1838                 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1839
1840                 while ((int32_t)iwidth > 0)
1841                 {
1842                         uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1843
1844 //This doesn't seem right... Let's try the encoded black value ($8800):
1845 //Apparently, CRY 0 maps to $8800...
1846                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1847 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1848                                 ;       // Do nothing...
1849                         else
1850                         {
1851                                 if (!flagRMW)
1852                                         *currentLineBuffer = bitsHi,
1853                                         *(currentLineBuffer + 1) = bitsLo;
1854                                 else
1855                                         *currentLineBuffer =
1856                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1857                                         *(currentLineBuffer + 1) =
1858                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1859                         }
1860
1861                         currentLineBuffer += lbufDelta;
1862
1863 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1864                         while (horizontalRemainder & 0x80)
1865                         {
1866                                 horizontalRemainder += hscale;
1867                                 pixCount++;
1868                                 pixels <<= 16;
1869                         }//*/
1870 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1871                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1872                         {
1873                                 horizontalRemainder += hscale;
1874                                 pixCount++;
1875                                 pixels <<= 16;
1876                         }
1877                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1878 //*/
1879                         if (pixCount > 3)
1880                         {
1881                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1882
1883                                 data += (pitch << 3) * phrasesToSkip;
1884                                 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1885                                 pixels <<= 16 * pixelShift;
1886
1887                                 iwidth -= phrasesToSkip;
1888
1889                                 pixCount = pixelShift;
1890                         }
1891                 }
1892         }
1893         else if (depth == 5)                                                    // 24 BPP
1894         {
1895 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1896 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1897 if (firstPix != 0)
1898         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1899                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1900                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1901                 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04;
1902
1903                 while (iwidth--)
1904                 {
1905                         // Fetch phrase...
1906                         uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1907                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1908
1909                         for(int i=0; i<2; i++)
1910                         {
1911                                 uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48,
1912                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1913
1914                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1915                                         ;       // Do nothing...
1916                                 else
1917                                         *currentLineBuffer = bits3,
1918                                         *(currentLineBuffer + 1) = bits2,
1919                                         *(currentLineBuffer + 2) = bits1,
1920                                         *(currentLineBuffer + 3) = bits0;
1921
1922                                 currentLineBuffer += lbufDelta;
1923                                 pixels <<= 32;
1924                         }
1925                 }
1926         }
1927 }