]> Shamusworld >> Repos - virtualjaguar/blob - objectp.cpp
38631648d250d048c38384bc175e46b39c184d8a
[virtualjaguar] / objectp.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James L. Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "objectp.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68k.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0
40 #define CONDITION_LESS_THAN                     1
41 #define CONDITION_GREATER_THAN          2
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #define OPFLAG_RELEASE          8                                       // Bus release bit
46 #define OPFLAG_TRANS            4                                       // Transparency bit
47 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
48 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
49
50 // Private function prototypes
51
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
55 void DumpFixedObject(uint64 p0, uint64 p1);
56 uint64 OPLoadPhrase(uint32 offset);
57
58 // Local global variables
59
60 // Blend tables (64K each)
61 static uint8 op_blend_y[0x10000];
62 static uint8 op_blend_cr[0x10000];
63 // There may be a problem with this "RAM" overlapping (and thus being independent of)
64 // some of the regular TOM RAM...
65 //#warning objectp_ram is separated from TOM RAM--need to fix that!
66 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
67 uint8 objectp_running = 0;
68 //bool objectp_stop_reading_list;
69
70 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
71 //static uint32 op_bitmap_bit_size[8] =
72 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
73 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
74 static uint32 op_pointer;
75
76 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
77
78
79 //
80 // Object Processor initialization
81 //
82 void OPInit(void)
83 {
84         // Here we calculate the saturating blend of a signed 4-bit value and an
85         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
86         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
87         for(int i=0; i<256*256; i++)
88         {
89                 int y = (i >> 8) & 0xFF;
90                 int dy = (int8)i;                                       // Sign extend the Y index
91                 int c1 = (i >> 8) & 0x0F;
92                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
93                 int c2 = (i >> 12) & 0x0F;
94                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
95
96                 y += dy;
97
98                 if (y < 0)
99                         y = 0;
100                 else if (y > 0xFF)
101                         y = 0xFF;
102
103                 op_blend_y[i] = y;
104
105                 c1 += dc1;
106
107                 if (c1 < 0)
108                         c1 = 0;
109                 else if (c1 > 0x0F)
110                         c1 = 0x0F;
111
112                 c2 += dc2;
113
114                 if (c2 < 0)
115                         c2 = 0;
116                 else if (c2 > 0x0F)
117                         c2 = 0x0F;
118
119                 op_blend_cr[i] = (c2 << 4) | c1;
120         }
121
122         OPReset();
123 }
124
125 //
126 // Object Processor reset
127 //
128 void OPReset(void)
129 {
130 //      memset(objectp_ram, 0x00, 0x40);
131         objectp_running = 0;
132 }
133
134 void OPDone(void)
135 {
136         const char * opType[8] =
137         { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
138         const char * ccType[8] =
139                 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
140
141         uint32 olp = OPGetListPointer();
142         WriteLog("OP: OLP = %08X\n", olp);
143         WriteLog("OP: Phrase dump\n    ----------\n");
144         for(uint32 i=0; i<0x100; i+=8)
145         {
146                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
147                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
148                 if ((lo & 0x07) == 3)
149                 {
150                         uint16 ypos = (lo >> 3) & 0x7FF;
151                         uint8  cc   = (lo >> 14) & 0x03;
152                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
153                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
154                 }
155                 WriteLog("\n");
156                 if ((lo & 0x07) == 0)
157                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
158                 if ((lo & 0x07) == 1)
159                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
160         }
161         WriteLog("\n");
162
163 //      memory_free(op_blend_y);
164 //      memory_free(op_blend_cr);
165 }
166
167 //
168 // Object Processor memory access
169 // Memory range: F00010 - F00027
170 //
171 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
172 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
173 //      F00026            W   -------- -------x   OBF - object processor flag
174 //
175
176 #if 0
177 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
178 {
179         offset &= 0x3F;
180         return objectp_ram[offset];
181 }
182
183 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
184 {
185         offset &= 0x3F;
186         return GET16(objectp_ram, offset);
187 }
188
189 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
190 {
191         offset &= 0x3F;
192         objectp_ram[offset] = data;
193 }
194
195 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
196 {
197         offset &= 0x3F;
198         SET16(objectp_ram, offset, data);
199
200 /*if (offset == 0x20)
201 WriteLog("OP: Setting lo list pointer: %04X\n", data);
202 if (offset == 0x22)
203 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
204 }
205 #endif
206
207 uint32 OPGetListPointer(void)
208 {
209         // Note: This register is LO / HI WORD, hence the funky look of this...
210         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
211 }
212
213 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
214
215 uint32 OPGetStatusRegister(void)
216 {
217         return GET16(tomRam8, 0x26);
218 }
219
220 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
221
222 void OPSetStatusRegister(uint32 data)
223 {
224         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
225         tomRam8[0x27] |= (data & 0xFE);
226 }
227
228 void OPSetCurrentObject(uint64 object)
229 {
230 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
231         // Stored as least significant 32 bits first, ms32 last in big endian
232 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
233         objectp_ram[0x12] = object & 0xFF; object >>= 8;
234         objectp_ram[0x11] = object & 0xFF; object >>= 8;
235         objectp_ram[0x10] = object & 0xFF; object >>= 8;
236
237         objectp_ram[0x17] = object & 0xFF; object >>= 8;
238         objectp_ram[0x16] = object & 0xFF; object >>= 8;
239         objectp_ram[0x15] = object & 0xFF; object >>= 8;
240         objectp_ram[0x14] = object & 0xFF;*/
241 // Let's try regular good old big endian...
242         tomRam8[0x17] = object & 0xFF; object >>= 8;
243         tomRam8[0x16] = object & 0xFF; object >>= 8;
244         tomRam8[0x15] = object & 0xFF; object >>= 8;
245         tomRam8[0x14] = object & 0xFF; object >>= 8;
246
247         tomRam8[0x13] = object & 0xFF; object >>= 8;
248         tomRam8[0x12] = object & 0xFF; object >>= 8;
249         tomRam8[0x11] = object & 0xFF; object >>= 8;
250         tomRam8[0x10] = object & 0xFF;
251 }
252
253 uint64 OPLoadPhrase(uint32 offset)
254 {
255         offset &= ~0x07;                                                // 8 byte alignment
256         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
257 }
258
259 void OPStorePhrase(uint32 offset, uint64 p)
260 {
261         offset &= ~0x07;                                                // 8 byte alignment
262         JaguarWriteLong(offset, p >> 32, OP);
263         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
264 }
265
266 //
267 // Debugging routines
268 //
269 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
270 {
271         WriteLog(" (SCALED BITMAP)");
272         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
273         WriteLog("                 %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
274         uint8 bitdepth = (p1 >> 12) & 0x07;
275 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
276         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
277         int32 xpos = p1 & 0xFFF;
278         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
279         uint32 iwidth = ((p1 >> 28) & 0x3FF);
280         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
281         uint16 height = ((p0 >> 14) & 0x3FF);
282         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
283         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
284         uint32 firstPix = (p1 >> 49) & 0x3F;
285         uint8 flags = (p1 >> 45) & 0x0F;
286         uint8 idx = (p1 >> 38) & 0x7F;
287         uint32 pitch = (p1 >> 15) & 0x07;
288         WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
289                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
290         uint32 hscale = p2 & 0xFF;
291         uint32 vscale = (p2 >> 8) & 0xFF;
292         uint32 remainder = (p2 >> 16) & 0xFF;
293         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
294 }
295
296 void DumpFixedObject(uint64 p0, uint64 p1)
297 {
298         WriteLog(" (BITMAP)");
299         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
300         uint8 bitdepth = (p1 >> 12) & 0x07;
301 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
302         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
303         int32 xpos = p1 & 0xFFF;
304         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
305         uint32 iwidth = ((p1 >> 28) & 0x3FF);
306         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
307         uint16 height = ((p0 >> 14) & 0x3FF);
308         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
309         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
310         uint32 firstPix = (p1 >> 49) & 0x3F;
311         uint8 flags = (p1 >> 45) & 0x0F;
312         uint8 idx = (p1 >> 38) & 0x7F;
313         uint32 pitch = (p1 >> 15) & 0x07;
314         WriteLog("    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
315                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
316 }
317
318 //
319 // Object Processor main routine
320 //
321 //Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing
322 //where we left off. !!! FIX !!!
323 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
324 void OPProcessList(int scanline, bool render)
325 {
326 extern int op_start_log;
327 //      char * condition_to_str[8] =
328 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
329
330         op_pointer = OPGetListPointer();
331
332 //      objectp_stop_reading_list = false;
333
334 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer);
335 //op_done();
336
337 // *** BEGIN OP PROCESSOR TESTING ONLY ***
338 extern bool interactiveMode;
339 extern bool iToggle;
340 extern int objectPtr;
341 bool inhibit;
342 int bitmapCounter = 0;
343 // *** END OP PROCESSOR TESTING ONLY ***
344
345         uint32 opCyclesToRun = 10000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
346
347 //      if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
348         while (op_pointer)
349         {
350 // *** BEGIN OP PROCESSOR TESTING ONLY ***
351 if (interactiveMode && bitmapCounter == objectPtr)
352         inhibit = iToggle;
353 else
354         inhibit = false;
355 // *** END OP PROCESSOR TESTING ONLY ***
356 //              if (objectp_stop_reading_list)
357 //                      return;
358
359                 uint64 p0 = OPLoadPhrase(op_pointer);
360 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
361                 op_pointer += 8;
362 if (scanline == TOMGetVDB() && op_start_log)
363 //if (scanline == 215 && op_start_log)
364 //if (scanline == 28 && op_start_log)
365 {
366 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
367 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
368 {
369 WriteLog(" (BITMAP) ");
370 uint64 p1 = OPLoadPhrase(op_pointer);
371 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
372         uint8 bitdepth = (p1 >> 12) & 0x07;
373 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
374         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
375 int32 xpos = p1 & 0xFFF;
376 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
377         uint32 iwidth = ((p1 >> 28) & 0x3FF);
378         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
379         uint16 height = ((p0 >> 14) & 0x3FF);
380         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
381         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
382         uint32 firstPix = (p1 >> 49) & 0x3F;
383         uint8 flags = (p1 >> 45) & 0x0F;
384         uint8 idx = (p1 >> 38) & 0x7F;
385         uint32 pitch = (p1 >> 15) & 0x07;
386 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
387         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
388 }
389 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
390 {
391 WriteLog(" (SCALED BITMAP)");
392 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
393 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
394 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
395         uint8 bitdepth = (p1 >> 12) & 0x07;
396 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
397         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
398 int32 xpos = p1 & 0xFFF;
399 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
400         uint32 iwidth = ((p1 >> 28) & 0x3FF);
401         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
402         uint16 height = ((p0 >> 14) & 0x3FF);
403         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
404         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
405         uint32 firstPix = (p1 >> 49) & 0x3F;
406         uint8 flags = (p1 >> 45) & 0x0F;
407         uint8 idx = (p1 >> 38) & 0x7F;
408         uint32 pitch = (p1 >> 15) & 0x07;
409 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
410         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
411         uint32 hscale = p2 & 0xFF;
412         uint32 vscale = (p2 >> 8) & 0xFF;
413         uint32 remainder = (p2 >> 16) & 0xFF;
414 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
415 }
416 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
417 WriteLog(" (GPU)\n");
418 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
419 {
420 WriteLog(" (BRANCH)\n");
421 uint8 * jaguarMainRam = GetRamPtr();
422 WriteLog("[RAM] --> ");
423 for(int k=0; k<8; k++)
424         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
425 WriteLog("\n");
426 }
427 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
428 WriteLog("    --> List end\n");
429 }//*/
430
431                 switch ((uint8)p0 & 0x07)
432                 {
433                 case OBJECT_TYPE_BITMAP:
434                 {
435 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
436                         uint16 ypos = (p0 >> 3) & 0x7FF;
437 // This is only theory implied by Rayman...!
438 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
439 // the VDB value. With interlacing, this would be slightly more tricky.
440 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
441 // to affect any other game in a negative way (that I've seen).
442 // Either that, or it's an undocumented bug...
443
444 //No, the reason this was needed is that the OP code before was wrong. Any value
445 //less than VDB will get written to the top line of the display!
446 //                      if (ypos == 0)
447 //                              ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
448                         uint32 height = (p0 & 0xFFC000) >> 14;
449                         uint32 oldOPP = op_pointer - 8;
450 // *** BEGIN OP PROCESSOR TESTING ONLY ***
451 if (inhibit && op_start_log)
452         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
453 bitmapCounter++;
454 if (!inhibit)   // For OP testing only!
455 // *** END OP PROCESSOR TESTING ONLY ***
456                         if (scanline >= ypos && height > 0)
457                         {
458                                 uint64 p1 = OPLoadPhrase(op_pointer);
459                                 op_pointer += 8;
460 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
461 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
462 //                              OPProcessFixedBitmap(scanline, p0, p1, render);
463                                 OPProcessFixedBitmap(p0, p1, render);
464
465                                 // OP write-backs
466
467 //???Does this really happen??? Doesn't seem to work if you do this...!
468 //Probably not. Must be a bug in the documentation...!
469 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
470 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
471 //                              SET16(tom_ram_8, 0x22, link >> 16);
472 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
473                                 if (height - 1 > 0)
474                                         height--;*/
475                                 // NOTE: Would subtract 2 if in interlaced mode...!
476 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
477 //                              if (height)
478                                 height--;
479
480                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
481                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
482                                 data += dwidth;
483
484                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
485                                 p0 |= (uint64)height << 14;
486                                 p0 |= data << 40;
487                                 OPStorePhrase(oldOPP, p0);
488                         }
489 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
490 //Temp, for testing...
491 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
492 //And it does! !!! FIX !!!
493 //Let's remove this "fix" since it screws up more than it fixes.
494 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
495                 return;*/
496
497                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
498 //WriteLog("New OP: %08X\n", op_pointer);
499                         break;
500                 }
501                 case OBJECT_TYPE_SCALE:
502                 {
503 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
504                         uint16 ypos = (p0 >> 3) & 0x7FF;
505                         uint32 height = (p0 & 0xFFC000) >> 14;
506                         uint32 oldOPP = op_pointer - 8;
507 // *** BEGIN OP PROCESSOR TESTING ONLY ***
508 if (inhibit && op_start_log)
509 {
510         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
511         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
512 }
513 bitmapCounter++;
514 if (!inhibit)   // For OP testing only!
515 // *** END OP PROCESSOR TESTING ONLY ***
516                         if (scanline >= ypos && height > 0)
517                         {
518                                 uint64 p1 = OPLoadPhrase(op_pointer);
519                                 op_pointer += 8;
520                                 uint64 p2 = OPLoadPhrase(op_pointer);
521                                 op_pointer += 8;
522 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
523                                 OPProcessScaledBitmap(p0, p1, p2, render);
524
525                                 // OP write-backs
526
527                                 uint8 remainder = p2 >> 16, vscale = p2 >> 8;
528 //Actually, we should skip this object if it has a vscale of zero.
529 //Or do we? Not sure... Atari Karts has a few lines that look like:
530 // (SCALED BITMAP)
531 //000E8268 --> phrase 00010000 7000B00D
532 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
533 //    [hsc: 9A, vsc: 00, rem: 00]
534 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
535
536                                 if (vscale == 0)
537                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
538
539 //extern int start_logging;
540 //if (start_logging)
541 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
542 //Locks up here:
543 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
544 //There are other problems here, it looks like...
545 //Another lock up:
546 //About to execute OP (508)...
547 /*
548 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
549 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
550 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
551 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
552 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
553 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
554 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
555 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
556 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
557 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
558 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
559 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
560 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
561 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
562 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
563 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
564 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
565 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
566 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
567 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
568 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
569 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
570 */
571 //Here's another problem:
572 //    [hsc: 20, vsc: 20, rem: 00]
573 // Since we're not checking for $E0 (but that's what we get from the above), we end
574 // up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite
575 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
576 //Also note: $E0 = 7.0 which IS a legal vscale value...
577
578 //                              if (remainder & 0x80)                           // I.e., it's negative
579 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
580 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
581 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
582 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
583                                 if (remainder <= 0x20)                          // I.e., it's <= 0
584                                 {
585                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
586                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
587
588 //                                      while (remainder & 0x80)
589 //                                      while ((remainder & 0x80) || remainder == 0)
590 //                                      while ((remainder - 1) >= 0xE0)
591 //                                      while ((remainder >= 0xE1) || remainder == 0)
592 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
593                                         while (remainder <= 0x20)
594                                         {
595                                                 remainder += vscale;
596
597                                                 if (height)
598                                                         height--;
599
600                                                 data += dwidth;
601                                         }
602
603                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
604                                         p0 |= (uint64)height << 14;
605                                         p0 |= data << 40;
606                                         OPStorePhrase(oldOPP, p0);
607                                 }
608
609                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
610
611 //if (start_logging)
612 //      WriteLog("--> Finished writebacks...\n");//*/
613
614 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
615                                 p2 &= ~0x0000000000FF0000LL;
616                                 p2 |= (uint64)remainder << 16;
617 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
618                                 OPStorePhrase(oldOPP+16, p2);
619 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
620 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
621                         }
622                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
623                         break;
624                 }
625                 case OBJECT_TYPE_GPU:
626                 {
627 //WriteLog("OP: Asserting GPU IRQ #3...\n");
628 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
629                         OPSetCurrentObject(p0);
630                         GPUSetIRQLine(3, ASSERT_LINE);
631 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
632 // !!! FIX !!!
633 //Do something like:
634 //OPSuspendedByGPU = true;
635 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
636 //on the next scanline...
637 // --> It continues from where it was interrupted! !!! FIX !!!
638                         break;
639                 }
640                 case OBJECT_TYPE_BRANCH:
641                 {
642                         uint16 ypos = (p0 >> 3) & 0x7FF;
643                         uint8  cc   = (p0 >> 14) & 0x03;
644                         uint32 link = (p0 >> 21) & 0x3FFFF8;
645
646 //                      if ((ypos!=507)&&(ypos!=25))
647 //                              WriteLog("\t%i%s%i link=0x%.8x\n",scanline,condition_to_str[cc],ypos>>1,link);
648                         switch (cc)
649                         {
650                         case CONDITION_EQUAL:
651                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
652                                         op_pointer = link;
653                                 break;
654                         case CONDITION_LESS_THAN:
655                                 if (TOMReadWord(0xF00006, OP) < ypos)
656                                         op_pointer = link;
657                                 break;
658                         case CONDITION_GREATER_THAN:
659                                 if (TOMReadWord(0xF00006, OP) > ypos)
660                                         op_pointer = link;
661                                 break;
662                         case CONDITION_OP_FLAG_SET:
663                                 if (OPGetStatusRegister() & 0x01)
664                                         op_pointer = link;
665                                 break;
666                         case CONDITION_SECOND_HALF_LINE:
667                                 // This basically means branch if bit 10 of HC is set
668 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
669                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shuting down\n");
670                                 LogDone();
671                                 exit(0);
672                                 break;
673                         default:
674                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
675                         }
676                         break;
677                 }
678                 case OBJECT_TYPE_STOP:
679                 {
680 //op_start_log = 0;
681                         // unsure
682 //WriteLog("OP: --> STOP\n");
683 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
684 //This seems more likely...
685                         OPSetCurrentObject(p0);
686
687                         if (p0 & 0x08)
688                         {
689                                 TOMSetPendingObjectInt();
690                                 if (TOMIRQEnabled(IRQ_OPFLAG))// && jaguar_interrupt_handler_is_valid(64))
691                                         m68k_set_irq(7);                                // Cause an NMI to occur...
692                         }
693
694                         return;
695 //                      break;
696                 }
697                 default:
698                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
699                         return;
700                 }
701
702                 // Here is a little sanity check to keep the OP from locking up the machine
703                 // when fed bad data. Better would be to count how many actual cycles it used
704                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
705 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
706                 opCyclesToRun--;
707                 if (!opCyclesToRun)
708                         return;
709         }
710 }
711
712 //
713 // Store fixed size bitmap in line buffer
714 //
715 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
716 {
717 // Need to make sure that when writing that it stays within the line buffer...
718 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
719         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
720         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
721         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
722         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
723 //#ifdef OP_DEBUG_BMP
724         uint32  firstPix = (p1 >> 49) & 0x3F;
725         // "The LSB is significant only for scaled objects..." -JTRM
726         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
727         firstPix &= 0x3E;
728 //#endif
729 // We can ignore the RELEASE (high order) bit for now--probably forever...!
730 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
731 //Optimize: break these out to their own BOOL values
732         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
733         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
734                 flagRMW = (flags & OPFLAG_RMW ? true : false),
735                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
736 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
737 //  provide the most significant bits of the palette address."
738         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
739         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
740         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
741
742 //      int16 scanlineWidth = tom_getVideoModeWidth();
743         uint8 * tomRam8 = TOMGetRamPointer();
744         uint8 * paletteRAM = &tomRam8[0x400];
745         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
746         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
747         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
748
749 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
750 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
751
752 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
753 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
754 // Pitch == 0 is OK too...
755 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
756 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
757         if (!render || iwidth == 0)
758                 return;
759
760 //#define OP_DEBUG_BMP
761 //#ifdef OP_DEBUG_BMP
762 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
763 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
764 //#endif
765
766 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
767         int32 startPos = xpos, endPos = xpos +
768                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
769                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
770         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
771         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
772         // Not sure if this is Jaguar Two only location or what...
773         // From the docs, it is... If we want to limit here we should think of something else.
774 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
775         int32 limit = 720;
776         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
777
778         // If the image is completely to the left or right of the line buffer, then bail.
779 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
780 //There are four possibilities:
781 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
782 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
783 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
784 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
785 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
786 // numbers 1 & 3 are of concern.
787 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
788 //      if (rightMargin < 0 || leftMargin > lbufWidth)
789
790 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
791 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
792 // Still have to be careful with the DATA and IWIDTH values though...
793
794 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
795 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
796 //              return;
797         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
798                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
799                 return;
800
801         // Otherwise, find the clip limits and clip the phrase as well...
802         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
803         //       line buffer, but it shouldn't matter since there are two unused line
804         //       buffers below and nothing above and I'll at most write 8 bytes outside
805         //       the line buffer... I could use a fractional clip begin/end value, but
806         //       this makes the blit a *lot* more hairy. I might fix this in the future
807         //       if it becomes necessary. (JLH)
808         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
809         //       which pixel in the phrase is being written, and quit when either end of phrases
810         //       is reached or line buffer extents are surpassed.
811
812 //This stuff is probably wrong as well... !!! FIX !!!
813 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
814 //Yup. Seems that JagMania doesn't work correctly with this...
815 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
816 //      if (!flagREFLECT)
817
818 /*
819         if (leftMargin < 0)
820                 clippedWidth = 0 - leftMargin,
821                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
822                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
823 //              leftMargin = 0;
824
825         if (rightMargin > lbufWidth)
826                 clippedWidth = rightMargin - lbufWidth,
827                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
828 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
829 //              rightMargin = lbufWidth;
830 */
831 if (depth > 5)
832         WriteLog("OP: We're about to encounter a divide by zero error!\n");
833         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
834         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
835         // !!! FIX !!!
836         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
837                 clippedWidth = 0 - startPos,
838                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
839                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
840
841         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
842                 clippedWidth = 0 - endPos,
843                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
844
845         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
846                 clippedWidth = endPos - lbufWidth,
847                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
848
849         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
850                 clippedWidth = startPos - lbufWidth,
851                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
852                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
853
854         // If the image is sitting on the line buffer left or right edge, we need to compensate
855         // by decreasing the image phrase width accordingly.
856         iwidth -= phraseClippedWidth;
857
858         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
859         // the pixel data.
860 //      data += phraseClippedWidth * (pitch << 3);
861         data += dataClippedWidth * pitch;
862
863         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
864         //       bitmap! This makes clipping & etc. MUCH, much easier...!
865 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
866 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
867 //Is this a bug in the OP?
868         uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
869         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
870
871         // Render.
872
873 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
874 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
875 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
876 // anyway.
877 // This seems to be the case (at least according to the Midsummer docs)...!
878
879         if (depth == 0)                                                                 // 1 BPP
880         {
881                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
882                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
883
884                 // Fetch 1st phrase...
885                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
886 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
887 //i.e., we didn't clip on the margin... !!! FIX !!!
888                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
889                 int i = firstPix;                                                       // Start counter at right spot...
890
891                 while (iwidth--)
892                 {
893                         while (i++ < 64)
894                         {
895                                 uint8 bit = pixels >> 63;
896                                 if (flagTRANS && bit == 0)
897                                         ;       // Do nothing...
898                                 else
899                                 {
900                                         if (!flagRMW)
901 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
902 //Won't optimize RMW case though...
903                                                 // This is the *only* correct use of endian-dependent code
904                                                 // (i.e., mem-to-mem direct copying)!
905                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
906                                         else
907                                                 *currentLineBuffer =
908                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
909                                                 *(currentLineBuffer + 1) =
910                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
911                                 }
912
913                                 currentLineBuffer += lbufDelta;
914                                 pixels <<= 1;
915                         }
916                         i = 0;
917                         // Fetch next phrase...
918                         data += pitch;
919                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
920                 }
921         }
922         else if (depth == 1)                                                    // 2 BPP
923         {
924 if (firstPix)
925         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
926                 index &= 0xFC;                                                          // Top six bits form CLUT index
927                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
928                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
929
930                 while (iwidth--)
931                 {
932                         // Fetch phrase...
933                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
934                         data += pitch;
935
936                         for(int i=0; i<32; i++)
937                         {
938                                 uint8 bits = pixels >> 62;
939 // Seems to me that both of these are in the same endian, so we could cast it as
940 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
941 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
942 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
943 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
944                                 if (flagTRANS && bits == 0)
945                                         ;       // Do nothing...
946                                 else
947                                 {
948                                         if (!flagRMW)
949                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
950                                         else
951                                                 *currentLineBuffer =
952                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
953                                                 *(currentLineBuffer + 1) =
954                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
955                                 }
956
957                                 currentLineBuffer += lbufDelta;
958                                 pixels <<= 2;
959                         }
960                 }
961         }
962         else if (depth == 2)                                                    // 4 BPP
963         {
964 if (firstPix)
965         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
966                 index &= 0xF0;                                                          // Top four bits form CLUT index
967                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
968                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
969
970                 while (iwidth--)
971                 {
972                         // Fetch phrase...
973                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
974                         data += pitch;
975
976                         for(int i=0; i<16; i++)
977                         {
978                                 uint8 bits = pixels >> 60;
979 // Seems to me that both of these are in the same endian, so we could cast it as
980 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
981 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
982 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
983 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
984                                 if (flagTRANS && bits == 0)
985                                         ;       // Do nothing...
986                                 else
987                                 {
988                                         if (!flagRMW)
989                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
990                                         else
991                                                 *currentLineBuffer =
992                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
993                                                 *(currentLineBuffer + 1) =
994                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
995                                 }
996
997                                 currentLineBuffer += lbufDelta;
998                                 pixels <<= 4;
999                         }
1000                 }
1001         }
1002         else if (depth == 3)                                                    // 8 BPP
1003         {
1004                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1005                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1006
1007                 // Fetch 1st phrase...
1008                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1009 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1010 //i.e., we didn't clip on the margin... !!! FIX !!!
1011                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1012                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1013                 int i = firstPix >> 3;                                          // Start counter at right spot...
1014
1015                 while (iwidth--)
1016                 {
1017                         while (i++ < 8)
1018                         {
1019                                 uint8 bits = pixels >> 56;
1020 // Seems to me that both of these are in the same endian, so we could cast it as
1021 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1022 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1023 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1024 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1025                                 if (flagTRANS && bits == 0)
1026                                         ;       // Do nothing...
1027                                 else
1028                                 {
1029                                         if (!flagRMW)
1030                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1031                                         else
1032                                                 *currentLineBuffer =
1033                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1034                                                 *(currentLineBuffer + 1) =
1035                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1036                                 }
1037
1038                                 currentLineBuffer += lbufDelta;
1039                                 pixels <<= 8;
1040                         }
1041                         i = 0;
1042                         // Fetch next phrase...
1043                         data += pitch;
1044                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1045                 }
1046         }
1047         else if (depth == 4)                                                    // 16 BPP
1048         {
1049 if (firstPix)
1050         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1051                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1052                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1053
1054                 while (iwidth--)
1055                 {
1056                         // Fetch phrase...
1057                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1058                         data += pitch;
1059
1060                         for(int i=0; i<4; i++)
1061                         {
1062                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1063 // Seems to me that both of these are in the same endian, so we could cast it as
1064 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1065 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1066 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1067 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1068                                 if (flagTRANS && (bitsLo | bitsHi) == 0)
1069                                         ;       // Do nothing...
1070                                 else
1071                                 {
1072                                         if (!flagRMW)
1073                                                 *currentLineBuffer = bitsHi,
1074                                                 *(currentLineBuffer + 1) = bitsLo;
1075                                         else
1076                                                 *currentLineBuffer =
1077                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1078                                                 *(currentLineBuffer + 1) =
1079                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1080                                 }
1081
1082                                 currentLineBuffer += lbufDelta;
1083                                 pixels <<= 16;
1084                         }
1085                 }
1086         }
1087         else if (depth == 5)                                                    // 24 BPP
1088         {
1089 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1090 //There *might* be others...
1091 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1092 if (firstPix)
1093         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1094                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1095                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1096                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1097
1098                 while (iwidth--)
1099                 {
1100                         // Fetch phrase...
1101                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1102                         data += pitch;
1103
1104                         for(int i=0; i<2; i++)
1105                         {
1106                                 // We don't use a 32-bit var here because of endian issues...!
1107                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1108                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1109
1110                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1111                                         ;       // Do nothing...
1112                                 else
1113                                         *currentLineBuffer = bits3,
1114                                         *(currentLineBuffer + 1) = bits2,
1115                                         *(currentLineBuffer + 2) = bits1,
1116                                         *(currentLineBuffer + 3) = bits0;
1117
1118                                 currentLineBuffer += lbufDelta;
1119                                 pixels <<= 32;
1120                         }
1121                 }
1122         }
1123 }
1124
1125 //
1126 // Store scaled bitmap in line buffer
1127 //
1128 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1129 {
1130 // Need to make sure that when writing that it stays within the line buffer...
1131 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1132         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1133         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1134         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1135         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1136 //#ifdef OP_DEBUG_BMP
1137 // Prolly should use this... Though not sure exactly how.
1138 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1139         uint32 firstPix = (p1 >> 49) & 0x3F;
1140 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1141 if (firstPix)
1142         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1143 //#endif
1144 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1145 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1146 //Optimize: break these out to their own BOOL values [DONE]
1147         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1148         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1149                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1150                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1151         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1152         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1153
1154         uint8 * tomRam8 = TOMGetRamPointer();
1155         uint8 * paletteRAM = &tomRam8[0x400];
1156         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1157         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1158         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1159
1160         uint8 hscale = p2 & 0xFF;
1161 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1162 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1163         uint8 horizontalRemainder = hscale;                             // Not sure if it starts full, but seems reasonable [It's not!]
1164 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1165         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1166         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1167
1168 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1169 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1170
1171 // Looks like an hscale of zero means don't draw!
1172         if (!render || iwidth == 0 || hscale == 0)
1173                 return;
1174
1175 /*extern int start_logging;
1176 if (start_logging)
1177         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1178                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1179 //#define OP_DEBUG_BMP
1180 //#ifdef OP_DEBUG_BMP
1181 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1182 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1183 //#endif
1184
1185         int32 startPos = xpos, endPos = xpos +
1186                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1187         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1188         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1189         // Not sure if this is Jaguar Two only location or what...
1190         // From the docs, it is... If we want to limit here we should think of something else.
1191 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1192         int32 limit = 720;
1193         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1194
1195         // If the image is completely to the left or right of the line buffer, then bail.
1196 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1197 //There are four possibilities:
1198 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1199 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1200 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1201 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1202 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1203 // numbers 1 & 3 are of concern.
1204 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1205 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1206
1207 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1208 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1209 // Still have to be careful with the DATA and IWIDTH values though...
1210
1211         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1212                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1213                 return;
1214
1215         // Otherwise, find the clip limits and clip the phrase as well...
1216         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1217         //       line buffer, but it shouldn't matter since there are two unused line
1218         //       buffers below and nothing above and I'll at most write 40 bytes outside
1219         //       the line buffer... I could use a fractional clip begin/end value, but
1220         //       this makes the blit a *lot* more hairy. I might fix this in the future
1221         //       if it becomes necessary. (JLH)
1222         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1223         //       which pixel in the phrase is being written, and quit when either end of phrases
1224         //       is reached or line buffer extents are surpassed.
1225
1226 //This stuff is probably wrong as well... !!! FIX !!!
1227 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1228 //Yup. Seems that JagMania doesn't work correctly with this...
1229 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1230 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1231 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1232 // a bit more accurately... Strange!
1233 //It's probably a case of the REFLECT flag being set and the background being written
1234 //from the right side of the screen...
1235 //But no, it isn't... At least if the diagnostics are telling the truth!
1236
1237         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1238         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1239         // !!! FIX !!!
1240
1241 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1242 //the scaling factor is small. So fix it already! !!! FIX !!!
1243 /*if (scaledPhrasePixels == 0)
1244 {
1245         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1246         DumpScaledObject(p0, p1, p2);
1247 }//*/
1248 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1249
1250 //Try a simple example...
1251 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1252 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1253 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1254 //
1255 // Normally, we would expect this in the line buffer:
1256 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1257 //
1258 // But instead we're getting:
1259 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1260 //
1261 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1262 // on negative boundary--or are we? Hmm...
1263 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1264 //
1265 // Let's try a real world example:
1266 //
1267 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1268 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1269 //
1270 // Really, spp is 27.75 in the second case...
1271 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1272 // start position (14 * 27.75), we get -6.5... NOT -17!
1273
1274 //Now it seems we're working OK, at least for the first case...
1275 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1276
1277         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1278 {
1279 extern int start_logging;
1280 if (start_logging)
1281         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1282 //              clippedWidth = 0 - startPos,
1283                 clippedWidth = (0 - startPos) << 5,
1284 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1285                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1286 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1287                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1288 if (start_logging)
1289         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1290 }
1291
1292         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1293                 clippedWidth = 0 - endPos,
1294                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1295
1296         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1297                 clippedWidth = endPos - lbufWidth,
1298                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1299
1300         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1301                 clippedWidth = startPos - lbufWidth,
1302                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1303                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1304
1305 extern int op_start_log;
1306 if (op_start_log && clippedWidth != 0)
1307         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1308 if (op_start_log && startPos == 13)
1309 {
1310         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1311         DumpScaledObject(p0, p1, p2);
1312         if (iwidth == 7)
1313         {
1314                 WriteLog("    %08X: ", data);
1315                 for(int i=0; i<7*8; i++)
1316                         WriteLog("%02X ", JaguarReadByte(data+i));
1317                 WriteLog("\n");
1318         }
1319 }
1320         // If the image is sitting on the line buffer left or right edge, we need to compensate
1321         // by decreasing the image phrase width accordingly.
1322         iwidth -= phraseClippedWidth;
1323
1324         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1325         // the pixel data.
1326 //      data += phraseClippedWidth * (pitch << 3);
1327         data += dataClippedWidth * (pitch << 3);
1328
1329         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1330         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1331 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1332 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1333         uint32 lbufAddress = 0x1800 + startPos * 2;
1334         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1335 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1336 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1337
1338         // Render.
1339
1340 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1341 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1342 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1343 // anyway.
1344 // This seems to be the case (at least according to the Midsummer docs)...!
1345
1346         if (depth == 0)                                                                 // 1 BPP
1347         {
1348 if (firstPix != 0)
1349         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1350                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1351                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1352
1353                 int pixCount = 0;
1354                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1355
1356                 while ((int32)iwidth > 0)
1357                 {
1358                         uint8 bits = pixels >> 63;
1359
1360                         if (flagTRANS && bits == 0)
1361                                 ;       // Do nothing...
1362                         else
1363                         {
1364                                 if (!flagRMW)
1365                                         // This is the *only* correct use of endian-dependent code
1366                                         // (i.e., mem-to-mem direct copying)!
1367                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1368                                 else
1369                                         *currentLineBuffer =
1370                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1371                                         *(currentLineBuffer + 1) =
1372                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1373                         }
1374
1375                         currentLineBuffer += lbufDelta;
1376
1377 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1378                         while (horizontalRemainder & 0x80)
1379                         {
1380                                 horizontalRemainder += hscale;
1381                                 pixCount++;
1382                                 pixels <<= 1;
1383                         }//*/
1384                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1385                         {
1386                                 horizontalRemainder += hscale;
1387                                 pixCount++;
1388                                 pixels <<= 1;
1389                         }
1390                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1391
1392                         if (pixCount > 63)
1393                         {
1394                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1395
1396                                 data += (pitch << 3) * phrasesToSkip;
1397                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1398                                 pixels <<= 1 * pixelShift;
1399                                 iwidth -= phrasesToSkip;
1400                                 pixCount = pixelShift;
1401                         }
1402                 }
1403         }
1404         else if (depth == 1)                                                    // 2 BPP
1405         {
1406 if (firstPix != 0)
1407         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1408                 index &= 0xFC;                                                          // Top six bits form CLUT index
1409                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1410                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1411
1412                 int pixCount = 0;
1413                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1414
1415                 while ((int32)iwidth > 0)
1416                 {
1417                         uint8 bits = pixels >> 62;
1418
1419                         if (flagTRANS && bits == 0)
1420                                 ;       // Do nothing...
1421                         else
1422                         {
1423                                 if (!flagRMW)
1424                                         // This is the *only* correct use of endian-dependent code
1425                                         // (i.e., mem-to-mem direct copying)!
1426                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1427                                 else
1428                                         *currentLineBuffer =
1429                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1430                                         *(currentLineBuffer + 1) =
1431                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1432                         }
1433
1434                         currentLineBuffer += lbufDelta;
1435
1436 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1437                         while (horizontalRemainder & 0x80)
1438                         {
1439                                 horizontalRemainder += hscale;
1440                                 pixCount++;
1441                                 pixels <<= 2;
1442                         }//*/
1443                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1444                         {
1445                                 horizontalRemainder += hscale;
1446                                 pixCount++;
1447                                 pixels <<= 2;
1448                         }
1449                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1450
1451                         if (pixCount > 31)
1452                         {
1453                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1454
1455                                 data += (pitch << 3) * phrasesToSkip;
1456                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1457                                 pixels <<= 2 * pixelShift;
1458                                 iwidth -= phrasesToSkip;
1459                                 pixCount = pixelShift;
1460                         }
1461                 }
1462         }
1463         else if (depth == 2)                                                    // 4 BPP
1464         {
1465 if (firstPix != 0)
1466         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1467                 index &= 0xF0;                                                          // Top four bits form CLUT index
1468                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1469                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1470
1471                 int pixCount = 0;
1472                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1473
1474                 while ((int32)iwidth > 0)
1475                 {
1476                         uint8 bits = pixels >> 60;
1477
1478                         if (flagTRANS && bits == 0)
1479                                 ;       // Do nothing...
1480                         else
1481                         {
1482                                 if (!flagRMW)
1483                                         // This is the *only* correct use of endian-dependent code
1484                                         // (i.e., mem-to-mem direct copying)!
1485                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1486                                 else
1487                                         *currentLineBuffer =
1488                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1489                                         *(currentLineBuffer + 1) =
1490                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1491                         }
1492
1493                         currentLineBuffer += lbufDelta;
1494
1495 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1496                         while (horizontalRemainder & 0x80)
1497                         {
1498                                 horizontalRemainder += hscale;
1499                                 pixCount++;
1500                                 pixels <<= 4;
1501                         }//*/
1502                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1503                         {
1504                                 horizontalRemainder += hscale;
1505                                 pixCount++;
1506                                 pixels <<= 4;
1507                         }
1508                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1509
1510                         if (pixCount > 15)
1511                         {
1512                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1513
1514                                 data += (pitch << 3) * phrasesToSkip;
1515                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1516                                 pixels <<= 4 * pixelShift;
1517                                 iwidth -= phrasesToSkip;
1518                                 pixCount = pixelShift;
1519                         }
1520                 }
1521         }
1522         else if (depth == 3)                                                    // 8 BPP
1523         {
1524 if (firstPix)
1525         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1526                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1527                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1528
1529                 int pixCount = 0;
1530                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1531
1532                 while ((int32)iwidth > 0)
1533                 {
1534                         uint8 bits = pixels >> 56;
1535
1536                         if (flagTRANS && bits == 0)
1537                                 ;       // Do nothing...
1538                         else
1539                         {
1540                                 if (!flagRMW)
1541                                         // This is the *only* correct use of endian-dependent code
1542                                         // (i.e., mem-to-mem direct copying)!
1543                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1544 /*                              {
1545                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1546                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1547                                 }*/
1548                                 else
1549                                         *currentLineBuffer =
1550                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1551                                         *(currentLineBuffer + 1) =
1552                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1553                         }
1554
1555                         currentLineBuffer += lbufDelta;
1556
1557                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1558                         {
1559                                 horizontalRemainder += hscale;
1560                                 pixCount++;
1561                                 pixels <<= 8;
1562                         }
1563                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1564
1565                         if (pixCount > 7)
1566                         {
1567                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1568
1569                                 data += (pitch << 3) * phrasesToSkip;
1570                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1571                                 pixels <<= 8 * pixelShift;
1572                                 iwidth -= phrasesToSkip;
1573                                 pixCount = pixelShift;
1574                         }
1575                 }
1576         }
1577         else if (depth == 4)                                                    // 16 BPP
1578         {
1579 if (firstPix != 0)
1580         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1581                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1582                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1583
1584                 int pixCount = 0;
1585                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1586
1587                 while ((int32)iwidth > 0)
1588                 {
1589                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1590
1591                         if (flagTRANS && (bitsLo | bitsHi) == 0)
1592                                 ;       // Do nothing...
1593                         else
1594                         {
1595                                 if (!flagRMW)
1596                                         *currentLineBuffer = bitsHi,
1597                                         *(currentLineBuffer + 1) = bitsLo;
1598                                 else
1599                                         *currentLineBuffer =
1600                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1601                                         *(currentLineBuffer + 1) =
1602                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1603                         }
1604
1605                         currentLineBuffer += lbufDelta;
1606
1607 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1608                         while (horizontalRemainder & 0x80)
1609                         {
1610                                 horizontalRemainder += hscale;
1611                                 pixCount++;
1612                                 pixels <<= 16;
1613                         }//*/
1614                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1615                         {
1616                                 horizontalRemainder += hscale;
1617                                 pixCount++;
1618                                 pixels <<= 16;
1619                         }
1620                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1621 //*/
1622                         if (pixCount > 3)
1623                         {
1624                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1625
1626                                 data += (pitch << 3) * phrasesToSkip;
1627                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1628                                 pixels <<= 16 * pixelShift;
1629
1630                                 iwidth -= phrasesToSkip;
1631
1632                                 pixCount = pixelShift;
1633                         }
1634                 }
1635         }
1636         else if (depth == 5)                                                    // 24 BPP
1637         {
1638 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1639 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1640 if (firstPix != 0)
1641         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1642                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1643                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1644                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1645
1646                 while (iwidth--)
1647                 {
1648                         // Fetch phrase...
1649                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1650                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1651
1652                         for(int i=0; i<2; i++)
1653                         {
1654                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1655                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1656
1657                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1658                                         ;       // Do nothing...
1659                                 else
1660                                         *currentLineBuffer = bits3,
1661                                         *(currentLineBuffer + 1) = bits2,
1662                                         *(currentLineBuffer + 2) = bits1,
1663                                         *(currentLineBuffer + 3) = bits0;
1664
1665                                 currentLineBuffer += lbufDelta;
1666                                 pixels <<= 32;
1667                         }
1668                 }
1669         }
1670 }