]> Shamusworld >> Repos - virtualjaguar/blob - src/objectp.cpp
856b233ca3e6e6563246ce65c6fd160b2189b166
[virtualjaguar] / src / objectp.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 //
8
9 #include "objectp.h"
10
11 #include <stdlib.h>
12 #include <string.h>
13 #include "gpu.h"
14 #include "jaguar.h"
15 #include "log.h"
16 #include "m68k.h"
17 #include "tom.h"
18
19 //#define OP_DEBUG
20 //#define OP_DEBUG_BMP
21
22 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
23 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
24
25 #define OBJECT_TYPE_BITMAP      0                                       // 000
26 #define OBJECT_TYPE_SCALE       1                                       // 001
27 #define OBJECT_TYPE_GPU         2                                       // 010
28 #define OBJECT_TYPE_BRANCH      3                                       // 011
29 #define OBJECT_TYPE_STOP        4                                       // 100
30
31 #define CONDITION_EQUAL                         0
32 #define CONDITION_LESS_THAN                     1
33 #define CONDITION_GREATER_THAN          2
34 #define CONDITION_OP_FLAG_SET           3
35 #define CONDITION_SECOND_HALF_LINE      4
36
37 #define OPFLAG_RELEASE          8                                       // Bus release bit
38 #define OPFLAG_TRANS            4                                       // Transparency bit
39 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
40 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
41
42 // Private function prototypes
43
44 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
45 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
46 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
47 void DumpFixedObject(uint64 p0, uint64 p1);
48 uint64 OPLoadPhrase(uint32 offset);
49
50 // Local global variables
51
52 // Blend tables (64K each)
53 static uint8 op_blend_y[0x10000];
54 static uint8 op_blend_cr[0x10000];
55 // There may be a problem with this "RAM" overlapping (and thus being independent of)
56 // some of the regular TOM RAM...
57 //#warning objectp_ram is separated from TOM RAM--need to fix that!
58 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
59 uint8 objectp_running = 0;
60 //bool objectp_stop_reading_list;
61
62 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
63 //static uint32 op_bitmap_bit_size[8] =
64 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
65 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
66 static uint32 op_pointer;
67
68 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
69
70
71 //
72 // Object Processor initialization
73 //
74 void OPInit(void)
75 {
76         // Here we calculate the saturating blend of a signed 4-bit value and an
77         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
78         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
79         for(int i=0; i<256*256; i++)
80         {
81                 int y = (i >> 8) & 0xFF;
82                 int dy = (int8)i;                                       // Sign extend the Y index
83                 int c1 = (i >> 8) & 0x0F;
84                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
85                 int c2 = (i >> 12) & 0x0F;
86                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
87
88                 y += dy;
89
90                 if (y < 0)
91                         y = 0;
92                 else if (y > 0xFF)
93                         y = 0xFF;
94
95                 op_blend_y[i] = y;
96
97                 c1 += dc1;
98
99                 if (c1 < 0)
100                         c1 = 0;
101                 else if (c1 > 0x0F)
102                         c1 = 0x0F;
103
104                 c2 += dc2;
105
106                 if (c2 < 0)
107                         c2 = 0;
108                 else if (c2 > 0x0F)
109                         c2 = 0x0F;
110
111                 op_blend_cr[i] = (c2 << 4) | c1;
112         }
113
114         OPReset();
115 }
116
117 //
118 // Object Processor reset
119 //
120 void OPReset(void)
121 {
122 //      memset(objectp_ram, 0x00, 0x40);
123         objectp_running = 0;
124 }
125
126 void OPDone(void)
127 {
128         const char * opType[8] =
129         { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
130         const char * ccType[8] =
131                 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
132
133         uint32 olp = OPGetListPointer();
134         WriteLog("OP: OLP = %08X\n", olp);
135         WriteLog("OP: Phrase dump\n    ----------\n");
136         for(uint32 i=0; i<0x100; i+=8)
137         {
138                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
139                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
140                 if ((lo & 0x07) == 3)
141                 {
142                         uint16 ypos = (lo >> 3) & 0x7FF;
143                         uint8  cc   = (lo >> 14) & 0x03;
144                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
145                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
146                 }
147                 WriteLog("\n");
148                 if ((lo & 0x07) == 0)
149                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
150                 if ((lo & 0x07) == 1)
151                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
152         }
153         WriteLog("\n");
154
155 //      memory_free(op_blend_y);
156 //      memory_free(op_blend_cr);
157 }
158
159 //
160 // Object Processor memory access
161 // Memory range: F00010 - F00027
162 //
163 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
164 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
165 //      F00026            W   -------- -------x   OBF - object processor flag
166 //
167
168 #if 0
169 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
170 {
171         offset &= 0x3F;
172         return objectp_ram[offset];
173 }
174
175 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
176 {
177         offset &= 0x3F;
178         return GET16(objectp_ram, offset);
179 }
180
181 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
182 {
183         offset &= 0x3F;
184         objectp_ram[offset] = data;
185 }
186
187 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
188 {
189         offset &= 0x3F;
190         SET16(objectp_ram, offset, data);
191
192 /*if (offset == 0x20)
193 WriteLog("OP: Setting lo list pointer: %04X\n", data);
194 if (offset == 0x22)
195 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
196 }
197 #endif
198
199 uint32 OPGetListPointer(void)
200 {
201         // Note: This register is LO / HI WORD, hence the funky look of this...
202         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
203 }
204
205 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
206
207 uint32 OPGetStatusRegister(void)
208 {
209         return GET16(tomRam8, 0x26);
210 }
211
212 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
213
214 void OPSetStatusRegister(uint32 data)
215 {
216         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
217         tomRam8[0x27] |= (data & 0xFE);
218 }
219
220 void OPSetCurrentObject(uint64 object)
221 {
222 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
223         // Stored as least significant 32 bits first, ms32 last in big endian
224 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
225         objectp_ram[0x12] = object & 0xFF; object >>= 8;
226         objectp_ram[0x11] = object & 0xFF; object >>= 8;
227         objectp_ram[0x10] = object & 0xFF; object >>= 8;
228
229         objectp_ram[0x17] = object & 0xFF; object >>= 8;
230         objectp_ram[0x16] = object & 0xFF; object >>= 8;
231         objectp_ram[0x15] = object & 0xFF; object >>= 8;
232         objectp_ram[0x14] = object & 0xFF;*/
233 // Let's try regular good old big endian...
234         tomRam8[0x17] = object & 0xFF; object >>= 8;
235         tomRam8[0x16] = object & 0xFF; object >>= 8;
236         tomRam8[0x15] = object & 0xFF; object >>= 8;
237         tomRam8[0x14] = object & 0xFF; object >>= 8;
238
239         tomRam8[0x13] = object & 0xFF; object >>= 8;
240         tomRam8[0x12] = object & 0xFF; object >>= 8;
241         tomRam8[0x11] = object & 0xFF; object >>= 8;
242         tomRam8[0x10] = object & 0xFF;
243 }
244
245 uint64 OPLoadPhrase(uint32 offset)
246 {
247         offset &= ~0x07;                                                // 8 byte alignment
248         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
249 }
250
251 void OPStorePhrase(uint32 offset, uint64 p)
252 {
253         offset &= ~0x07;                                                // 8 byte alignment
254         JaguarWriteLong(offset, p >> 32, OP);
255         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
256 }
257
258 //
259 // Debugging routines
260 //
261 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
262 {
263         WriteLog(" (SCALED BITMAP)");
264         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
265         WriteLog("                 %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
266         uint8 bitdepth = (p1 >> 12) & 0x07;
267 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
268         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
269         int32 xpos = p1 & 0xFFF;
270         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
271         uint32 iwidth = ((p1 >> 28) & 0x3FF);
272         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
273         uint16 height = ((p0 >> 14) & 0x3FF);
274         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
275         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
276         uint32 firstPix = (p1 >> 49) & 0x3F;
277         uint8 flags = (p1 >> 45) & 0x0F;
278         uint8 idx = (p1 >> 38) & 0x7F;
279         uint32 pitch = (p1 >> 15) & 0x07;
280         WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
281                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
282         uint32 hscale = p2 & 0xFF;
283         uint32 vscale = (p2 >> 8) & 0xFF;
284         uint32 remainder = (p2 >> 16) & 0xFF;
285         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
286 }
287
288 void DumpFixedObject(uint64 p0, uint64 p1)
289 {
290         WriteLog(" (BITMAP)");
291         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
292         uint8 bitdepth = (p1 >> 12) & 0x07;
293 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
294         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
295         int32 xpos = p1 & 0xFFF;
296         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
297         uint32 iwidth = ((p1 >> 28) & 0x3FF);
298         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
299         uint16 height = ((p0 >> 14) & 0x3FF);
300         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
301         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
302         uint32 firstPix = (p1 >> 49) & 0x3F;
303         uint8 flags = (p1 >> 45) & 0x0F;
304         uint8 idx = (p1 >> 38) & 0x7F;
305         uint32 pitch = (p1 >> 15) & 0x07;
306         WriteLog("    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
307                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
308 }
309
310 //
311 // Object Processor main routine
312 //
313 //Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing
314 //where we left off. !!! FIX !!!
315 #warning Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!
316 void OPProcessList(int scanline, bool render)
317 {
318 extern int op_start_log;
319 //      char * condition_to_str[8] =
320 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
321
322         op_pointer = OPGetListPointer();
323
324 //      objectp_stop_reading_list = false;
325
326 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer);
327 //op_done();
328
329 // *** BEGIN OP PROCESSOR TESTING ONLY ***
330 extern bool interactiveMode;
331 extern bool iToggle;
332 extern int objectPtr;
333 bool inhibit;
334 int bitmapCounter = 0;
335 // *** END OP PROCESSOR TESTING ONLY ***
336
337         uint32 opCyclesToRun = 10000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
338
339 //      if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
340         while (op_pointer)
341         {
342 // *** BEGIN OP PROCESSOR TESTING ONLY ***
343 if (interactiveMode && bitmapCounter == objectPtr)
344         inhibit = iToggle;
345 else
346         inhibit = false;
347 // *** END OP PROCESSOR TESTING ONLY ***
348 //              if (objectp_stop_reading_list)
349 //                      return;
350
351                 uint64 p0 = OPLoadPhrase(op_pointer);
352 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
353                 op_pointer += 8;
354 if (scanline == TOMGetVDB() && op_start_log)
355 //if (scanline == 215 && op_start_log)
356 //if (scanline == 28 && op_start_log)
357 {
358 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
359 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
360 {
361 WriteLog(" (BITMAP) ");
362 uint64 p1 = OPLoadPhrase(op_pointer);
363 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
364         uint8 bitdepth = (p1 >> 12) & 0x07;
365 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
366         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
367 int32 xpos = p1 & 0xFFF;
368 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
369         uint32 iwidth = ((p1 >> 28) & 0x3FF);
370         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
371         uint16 height = ((p0 >> 14) & 0x3FF);
372         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
373         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
374         uint32 firstPix = (p1 >> 49) & 0x3F;
375         uint8 flags = (p1 >> 45) & 0x0F;
376         uint8 idx = (p1 >> 38) & 0x7F;
377         uint32 pitch = (p1 >> 15) & 0x07;
378 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
379         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
380 }
381 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
382 {
383 WriteLog(" (SCALED BITMAP)");
384 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
385 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
386 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
387         uint8 bitdepth = (p1 >> 12) & 0x07;
388 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
389         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
390 int32 xpos = p1 & 0xFFF;
391 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
392         uint32 iwidth = ((p1 >> 28) & 0x3FF);
393         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
394         uint16 height = ((p0 >> 14) & 0x3FF);
395         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
396         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
397         uint32 firstPix = (p1 >> 49) & 0x3F;
398         uint8 flags = (p1 >> 45) & 0x0F;
399         uint8 idx = (p1 >> 38) & 0x7F;
400         uint32 pitch = (p1 >> 15) & 0x07;
401 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
402         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
403         uint32 hscale = p2 & 0xFF;
404         uint32 vscale = (p2 >> 8) & 0xFF;
405         uint32 remainder = (p2 >> 16) & 0xFF;
406 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
407 }
408 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
409 WriteLog(" (GPU)\n");
410 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
411 {
412 WriteLog(" (BRANCH)\n");
413 uint8 * jaguarMainRam = GetRamPtr();
414 WriteLog("[RAM] --> ");
415 for(int k=0; k<8; k++)
416         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
417 WriteLog("\n");
418 }
419 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
420 WriteLog("    --> List end\n");
421 }//*/
422
423                 switch ((uint8)p0 & 0x07)
424                 {
425                 case OBJECT_TYPE_BITMAP:
426                 {
427 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
428                         uint16 ypos = (p0 >> 3) & 0x7FF;
429 // This is only theory implied by Rayman...!
430 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
431 // the VDB value. With interlacing, this would be slightly more tricky.
432 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
433 // to affect any other game in a negative way (that I've seen).
434 // Either that, or it's an undocumented bug...
435
436 //No, the reason this was needed is that the OP code before was wrong. Any value
437 //less than VDB will get written to the top line of the display!
438 //                      if (ypos == 0)
439 //                              ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
440                         uint32 height = (p0 & 0xFFC000) >> 14;
441                         uint32 oldOPP = op_pointer - 8;
442 // *** BEGIN OP PROCESSOR TESTING ONLY ***
443 if (inhibit && op_start_log)
444         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
445 bitmapCounter++;
446 if (!inhibit)   // For OP testing only!
447 // *** END OP PROCESSOR TESTING ONLY ***
448                         if (scanline >= ypos && height > 0)
449                         {
450                                 uint64 p1 = OPLoadPhrase(op_pointer);
451                                 op_pointer += 8;
452 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
453 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
454 //                              OPProcessFixedBitmap(scanline, p0, p1, render);
455                                 OPProcessFixedBitmap(p0, p1, render);
456
457                                 // OP write-backs
458
459 //???Does this really happen??? Doesn't seem to work if you do this...!
460 //Probably not. Must be a bug in the documentation...!
461 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
462 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
463 //                              SET16(tom_ram_8, 0x22, link >> 16);
464 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
465                                 if (height - 1 > 0)
466                                         height--;*/
467                                 // NOTE: Would subtract 2 if in interlaced mode...!
468 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
469 //                              if (height)
470                                 height--;
471
472                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
473                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
474                                 data += dwidth;
475
476                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
477                                 p0 |= (uint64)height << 14;
478                                 p0 |= data << 40;
479                                 OPStorePhrase(oldOPP, p0);
480                         }
481 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
482 //Temp, for testing...
483 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
484 //And it does! !!! FIX !!!
485 //Let's remove this "fix" since it screws up more than it fixes.
486 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
487                 return;*/
488
489                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
490 //WriteLog("New OP: %08X\n", op_pointer);
491                         break;
492                 }
493                 case OBJECT_TYPE_SCALE:
494                 {
495 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
496                         uint16 ypos = (p0 >> 3) & 0x7FF;
497                         uint32 height = (p0 & 0xFFC000) >> 14;
498                         uint32 oldOPP = op_pointer - 8;
499 // *** BEGIN OP PROCESSOR TESTING ONLY ***
500 if (inhibit && op_start_log)
501 {
502         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
503         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
504 }
505 bitmapCounter++;
506 if (!inhibit)   // For OP testing only!
507 // *** END OP PROCESSOR TESTING ONLY ***
508                         if (scanline >= ypos && height > 0)
509                         {
510                                 uint64 p1 = OPLoadPhrase(op_pointer);
511                                 op_pointer += 8;
512                                 uint64 p2 = OPLoadPhrase(op_pointer);
513                                 op_pointer += 8;
514 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
515                                 OPProcessScaledBitmap(p0, p1, p2, render);
516
517                                 // OP write-backs
518
519                                 uint8 remainder = p2 >> 16, vscale = p2 >> 8;
520 //Actually, we should skip this object if it has a vscale of zero.
521 //Or do we? Not sure... Atari Karts has a few lines that look like:
522 // (SCALED BITMAP)
523 //000E8268 --> phrase 00010000 7000B00D
524 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
525 //    [hsc: 9A, vsc: 00, rem: 00]
526 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
527
528                                 if (vscale == 0)
529                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
530
531 //extern int start_logging;
532 //if (start_logging)
533 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
534 //Locks up here:
535 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
536 //There are other problems here, it looks like...
537 //Another lock up:
538 //About to execute OP (508)...
539 /*
540 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
541 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
542 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
543 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
544 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
545 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
546 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
547 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
548 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
549 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
550 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
551 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
552 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
553 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
554 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
555 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
556 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
557 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
558 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
559 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
560 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
561 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
562 */
563 //Here's another problem:
564 //    [hsc: 20, vsc: 20, rem: 00]
565 // Since we're not checking for $E0 (but that's what we get from the above), we end
566 // up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite
567 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
568 //Also note: $E0 = 7.0 which IS a legal vscale value...
569
570 //                              if (remainder & 0x80)                           // I.e., it's negative
571 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
572 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
573 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
574 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
575                                 if (remainder <= 0x20)                          // I.e., it's <= 0
576                                 {
577                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
578                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
579
580 //                                      while (remainder & 0x80)
581 //                                      while ((remainder & 0x80) || remainder == 0)
582 //                                      while ((remainder - 1) >= 0xE0)
583 //                                      while ((remainder >= 0xE1) || remainder == 0)
584 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
585                                         while (remainder <= 0x20)
586                                         {
587                                                 remainder += vscale;
588
589                                                 if (height)
590                                                         height--;
591
592                                                 data += dwidth;
593                                         }
594
595                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
596                                         p0 |= (uint64)height << 14;
597                                         p0 |= data << 40;
598                                         OPStorePhrase(oldOPP, p0);
599                                 }
600
601                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
602
603 //if (start_logging)
604 //      WriteLog("--> Finished writebacks...\n");//*/
605
606 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
607                                 p2 &= ~0x0000000000FF0000LL;
608                                 p2 |= (uint64)remainder << 16;
609 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
610                                 OPStorePhrase(oldOPP+16, p2);
611 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
612 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
613                         }
614                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
615                         break;
616                 }
617                 case OBJECT_TYPE_GPU:
618                 {
619 //WriteLog("OP: Asserting GPU IRQ #3...\n");
620                         OPSetCurrentObject(p0);
621                         GPUSetIRQLine(3, ASSERT_LINE);
622 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
623 // !!! FIX !!!
624 //Do something like:
625 //OPSuspendedByGPU = true;
626 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
627 //on the next scanline...
628 // --> It continues from where it was interrupted! !!! FIX !!!
629                         break;
630                 }
631                 case OBJECT_TYPE_BRANCH:
632                 {
633                         uint16 ypos = (p0 >> 3) & 0x7FF;
634                         uint8  cc   = (p0 >> 14) & 0x03;
635                         uint32 link = (p0 >> 21) & 0x3FFFF8;
636
637 //                      if ((ypos!=507)&&(ypos!=25))
638 //                              WriteLog("\t%i%s%i link=0x%.8x\n",scanline,condition_to_str[cc],ypos>>1,link);
639                         switch (cc)
640                         {
641                         case CONDITION_EQUAL:
642                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
643                                         op_pointer = link;
644                                 break;
645                         case CONDITION_LESS_THAN:
646                                 if (TOMReadWord(0xF00006, OP) < ypos)
647                                         op_pointer = link;
648                                 break;
649                         case CONDITION_GREATER_THAN:
650                                 if (TOMReadWord(0xF00006, OP) > ypos)
651                                         op_pointer = link;
652                                 break;
653                         case CONDITION_OP_FLAG_SET:
654                                 if (OPGetStatusRegister() & 0x01)
655                                         op_pointer = link;
656                                 break;
657                         case CONDITION_SECOND_HALF_LINE:
658                                 // This basically means branch if bit 10 of HC is set
659                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shuting down\n");
660 //                              fclose(log_get());
661                                 LogDone();
662                                 exit(0);
663                                 break;
664                         default:
665                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
666                         }
667                         break;
668                 }
669                 case OBJECT_TYPE_STOP:
670                 {
671 //op_start_log = 0;
672                         // unsure
673 //WriteLog("OP: --> STOP\n");
674 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
675 //This seems more likely...
676                         OPSetCurrentObject(p0);
677
678                         if (p0 & 0x08)
679                         {
680                                 TOMSetPendingObjectInt();
681                                 if (TOMIRQEnabled(IRQ_OPFLAG))// && jaguar_interrupt_handler_is_valid(64))
682                                         m68k_set_irq(7);                                // Cause an NMI to occur...
683                         }
684
685                         return;
686 //                      break;
687                 }
688                 default:
689                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
690                         return;
691                 }
692
693                 // Here is a little sanity check to keep the OP from locking up the machine
694                 // when fed bad data. Better would be to count how many actual cycles it used
695                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
696                 opCyclesToRun--;
697                 if (!opCyclesToRun)
698                         return;
699         }
700 }
701
702 //
703 // Store fixed size bitmap in line buffer
704 //
705 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
706 {
707 // Need to make sure that when writing that it stays within the line buffer...
708 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
709         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
710         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
711         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
712         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
713 //#ifdef OP_DEBUG_BMP
714         uint32  firstPix = (p1 >> 49) & 0x3F;
715         // "The LSB is significant only for scaled objects..." -JTRM
716         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
717         firstPix &= 0x3E;
718 //#endif
719 // We can ignore the RELEASE (high order) bit for now--probably forever...!
720 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
721 //Optimize: break these out to their own BOOL values
722         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
723         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
724                 flagRMW = (flags & OPFLAG_RMW ? true : false),
725                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
726 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
727 //  provide the most significant bits of the palette address."
728         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
729         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
730         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
731
732 //      int16 scanlineWidth = tom_getVideoModeWidth();
733         uint8 * tomRam8 = TOMGetRamPointer();
734         uint8 * paletteRAM = &tomRam8[0x400];
735         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
736         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
737         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
738
739 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
740 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
741
742 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
743 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
744 // Pitch == 0 is OK too...
745 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
746 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
747         if (!render || iwidth == 0)
748                 return;
749
750 //#define OP_DEBUG_BMP
751 //#ifdef OP_DEBUG_BMP
752 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
753 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
754 //#endif
755
756 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
757         int32 startPos = xpos, endPos = xpos +
758                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
759                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
760         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
761         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
762         // Not sure if this is Jaguar Two only location or what...
763         // From the docs, it is... If we want to limit here we should think of something else.
764 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
765         int32 limit = 720;
766         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
767
768         // If the image is completely to the left or right of the line buffer, then bail.
769 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
770 //There are four possibilities:
771 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
772 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
773 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
774 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
775 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
776 // numbers 1 & 3 are of concern.
777 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
778 //      if (rightMargin < 0 || leftMargin > lbufWidth)
779
780 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
781 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
782 // Still have to be careful with the DATA and IWIDTH values though...
783
784 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
785 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
786 //              return;
787         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
788                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
789                 return;
790
791         // Otherwise, find the clip limits and clip the phrase as well...
792         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
793         //       line buffer, but it shouldn't matter since there are two unused line
794         //       buffers below and nothing above and I'll at most write 8 bytes outside
795         //       the line buffer... I could use a fractional clip begin/end value, but
796         //       this makes the blit a *lot* more hairy. I might fix this in the future
797         //       if it becomes necessary. (JLH)
798         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
799         //       which pixel in the phrase is being written, and quit when either end of phrases
800         //       is reached or line buffer extents are surpassed.
801
802 //This stuff is probably wrong as well... !!! FIX !!!
803 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
804 //Yup. Seems that JagMania doesn't work correctly with this...
805 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
806 //      if (!flagREFLECT)
807
808 /*
809         if (leftMargin < 0)
810                 clippedWidth = 0 - leftMargin,
811                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
812                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
813 //              leftMargin = 0;
814
815         if (rightMargin > lbufWidth)
816                 clippedWidth = rightMargin - lbufWidth,
817                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
818 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
819 //              rightMargin = lbufWidth;
820 */
821 if (depth > 5)
822         WriteLog("OP: We're about to encounter a divide by zero error!\n");
823         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
824         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
825         // !!! FIX !!!
826         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
827                 clippedWidth = 0 - startPos,
828                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
829                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
830
831         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
832                 clippedWidth = 0 - endPos,
833                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
834
835         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
836                 clippedWidth = endPos - lbufWidth,
837                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
838
839         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
840                 clippedWidth = startPos - lbufWidth,
841                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
842                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
843
844         // If the image is sitting on the line buffer left or right edge, we need to compensate
845         // by decreasing the image phrase width accordingly.
846         iwidth -= phraseClippedWidth;
847
848         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
849         // the pixel data.
850 //      data += phraseClippedWidth * (pitch << 3);
851         data += dataClippedWidth * pitch;
852
853         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
854         //       bitmap! This makes clipping & etc. MUCH, much easier...!
855 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
856 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
857 //Is this a bug in the OP?
858         uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
859         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
860
861         // Render.
862
863 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
864 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
865 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
866 // anyway.
867 // This seems to be the case (at least according to the Midsummer docs)...!
868
869         if (depth == 0)                                                                 // 1 BPP
870         {
871                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
872                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
873
874                 // Fetch 1st phrase...
875                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
876 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
877 //i.e., we didn't clip on the margin... !!! FIX !!!
878                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
879                 int i = firstPix;                                                       // Start counter at right spot...
880
881                 while (iwidth--)
882                 {
883                         while (i++ < 64)
884                         {
885                                 uint8 bit = pixels >> 63;
886                                 if (flagTRANS && bit == 0)
887                                         ;       // Do nothing...
888                                 else
889                                 {
890                                         if (!flagRMW)
891 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
892 //Won't optimize RMW case though...
893                                                 // This is the *only* correct use of endian-dependent code
894                                                 // (i.e., mem-to-mem direct copying)!
895                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
896                                         else
897                                                 *currentLineBuffer =
898                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
899                                                 *(currentLineBuffer + 1) =
900                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
901                                 }
902
903                                 currentLineBuffer += lbufDelta;
904                                 pixels <<= 1;
905                         }
906                         i = 0;
907                         // Fetch next phrase...
908                         data += pitch;
909                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
910                 }
911         }
912         else if (depth == 1)                                                    // 2 BPP
913         {
914 if (firstPix)
915         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
916                 index &= 0xFC;                                                          // Top six bits form CLUT index
917                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
918                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
919
920                 while (iwidth--)
921                 {
922                         // Fetch phrase...
923                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
924                         data += pitch;
925
926                         for(int i=0; i<32; i++)
927                         {
928                                 uint8 bits = pixels >> 62;
929 // Seems to me that both of these are in the same endian, so we could cast it as
930 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
931 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
932 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
933 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
934                                 if (flagTRANS && bits == 0)
935                                         ;       // Do nothing...
936                                 else
937                                 {
938                                         if (!flagRMW)
939                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
940                                         else
941                                                 *currentLineBuffer =
942                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
943                                                 *(currentLineBuffer + 1) =
944                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
945                                 }
946
947                                 currentLineBuffer += lbufDelta;
948                                 pixels <<= 2;
949                         }
950                 }
951         }
952         else if (depth == 2)                                                    // 4 BPP
953         {
954 if (firstPix)
955         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
956                 index &= 0xF0;                                                          // Top four bits form CLUT index
957                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
958                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
959
960                 while (iwidth--)
961                 {
962                         // Fetch phrase...
963                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
964                         data += pitch;
965
966                         for(int i=0; i<16; i++)
967                         {
968                                 uint8 bits = pixels >> 60;
969 // Seems to me that both of these are in the same endian, so we could cast it as
970 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
971 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
972 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
973 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
974                                 if (flagTRANS && bits == 0)
975                                         ;       // Do nothing...
976                                 else
977                                 {
978                                         if (!flagRMW)
979                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
980                                         else
981                                                 *currentLineBuffer =
982                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
983                                                 *(currentLineBuffer + 1) =
984                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
985                                 }
986
987                                 currentLineBuffer += lbufDelta;
988                                 pixels <<= 4;
989                         }
990                 }
991         }
992         else if (depth == 3)                                                    // 8 BPP
993         {
994                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
995                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
996
997                 // Fetch 1st phrase...
998                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
999 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1000 //i.e., we didn't clip on the margin... !!! FIX !!!
1001                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1002                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1003                 int i = firstPix >> 3;                                          // Start counter at right spot...
1004
1005                 while (iwidth--)
1006                 {
1007                         while (i++ < 8)
1008                         {
1009                                 uint8 bits = pixels >> 56;
1010 // Seems to me that both of these are in the same endian, so we could cast it as
1011 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1012 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1013 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1014 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1015                                 if (flagTRANS && bits == 0)
1016                                         ;       // Do nothing...
1017                                 else
1018                                 {
1019                                         if (!flagRMW)
1020                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1021                                         else
1022                                                 *currentLineBuffer =
1023                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1024                                                 *(currentLineBuffer + 1) =
1025                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1026                                 }
1027
1028                                 currentLineBuffer += lbufDelta;
1029                                 pixels <<= 8;
1030                         }
1031                         i = 0;
1032                         // Fetch next phrase...
1033                         data += pitch;
1034                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1035                 }
1036         }
1037         else if (depth == 4)                                                    // 16 BPP
1038         {
1039 if (firstPix)
1040         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1041                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1042                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1043
1044                 while (iwidth--)
1045                 {
1046                         // Fetch phrase...
1047                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1048                         data += pitch;
1049
1050                         for(int i=0; i<4; i++)
1051                         {
1052                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1053 // Seems to me that both of these are in the same endian, so we could cast it as
1054 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1055 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1056 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1057 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1058                                 if (flagTRANS && (bitsLo | bitsHi) == 0)
1059                                         ;       // Do nothing...
1060                                 else
1061                                 {
1062                                         if (!flagRMW)
1063                                                 *currentLineBuffer = bitsHi,
1064                                                 *(currentLineBuffer + 1) = bitsLo;
1065                                         else
1066                                                 *currentLineBuffer =
1067                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1068                                                 *(currentLineBuffer + 1) =
1069                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1070                                 }
1071
1072                                 currentLineBuffer += lbufDelta;
1073                                 pixels <<= 16;
1074                         }
1075                 }
1076         }
1077         else if (depth == 5)                                                    // 24 BPP
1078         {
1079 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1080 //There *might* be others...
1081 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1082 if (firstPix)
1083         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1084                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1085                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1086                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1087
1088                 while (iwidth--)
1089                 {
1090                         // Fetch phrase...
1091                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1092                         data += pitch;
1093
1094                         for(int i=0; i<2; i++)
1095                         {
1096                                 // We don't use a 32-bit var here because of endian issues...!
1097                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1098                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1099
1100                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1101                                         ;       // Do nothing...
1102                                 else
1103                                         *currentLineBuffer = bits3,
1104                                         *(currentLineBuffer + 1) = bits2,
1105                                         *(currentLineBuffer + 2) = bits1,
1106                                         *(currentLineBuffer + 3) = bits0;
1107
1108                                 currentLineBuffer += lbufDelta;
1109                                 pixels <<= 32;
1110                         }
1111                 }
1112         }
1113 }
1114
1115 //
1116 // Store scaled bitmap in line buffer
1117 //
1118 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1119 {
1120 // Need to make sure that when writing that it stays within the line buffer...
1121 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1122         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1123         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1124         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1125         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1126 //#ifdef OP_DEBUG_BMP
1127 // Prolly should use this... Though not sure exactly how.
1128 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1129         uint32 firstPix = (p1 >> 49) & 0x3F;
1130 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1131 if (firstPix)
1132         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1133 //#endif
1134 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1135 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1136 //Optimize: break these out to their own BOOL values [DONE]
1137         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1138         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1139                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1140                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1141         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1142         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1143
1144         uint8 * tomRam8 = TOMGetRamPointer();
1145         uint8 * paletteRAM = &tomRam8[0x400];
1146         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1147         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1148         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1149
1150         uint8 hscale = p2 & 0xFF;
1151 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1152 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1153         uint8 horizontalRemainder = hscale;                             // Not sure if it starts full, but seems reasonable [It's not!]
1154 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1155         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1156         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1157
1158 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1159 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1160
1161 // Looks like an hscale of zero means don't draw!
1162         if (!render || iwidth == 0 || hscale == 0)
1163                 return;
1164
1165 /*extern int start_logging;
1166 if (start_logging)
1167         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1168                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1169 //#define OP_DEBUG_BMP
1170 //#ifdef OP_DEBUG_BMP
1171 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1172 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1173 //#endif
1174
1175         int32 startPos = xpos, endPos = xpos +
1176                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1177         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1178         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1179         // Not sure if this is Jaguar Two only location or what...
1180         // From the docs, it is... If we want to limit here we should think of something else.
1181 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1182         int32 limit = 720;
1183         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1184
1185         // If the image is completely to the left or right of the line buffer, then bail.
1186 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1187 //There are four possibilities:
1188 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1189 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1190 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1191 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1192 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1193 // numbers 1 & 3 are of concern.
1194 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1195 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1196
1197 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1198 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1199 // Still have to be careful with the DATA and IWIDTH values though...
1200
1201         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1202                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1203                 return;
1204
1205         // Otherwise, find the clip limits and clip the phrase as well...
1206         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1207         //       line buffer, but it shouldn't matter since there are two unused line
1208         //       buffers below and nothing above and I'll at most write 40 bytes outside
1209         //       the line buffer... I could use a fractional clip begin/end value, but
1210         //       this makes the blit a *lot* more hairy. I might fix this in the future
1211         //       if it becomes necessary. (JLH)
1212         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1213         //       which pixel in the phrase is being written, and quit when either end of phrases
1214         //       is reached or line buffer extents are surpassed.
1215
1216 //This stuff is probably wrong as well... !!! FIX !!!
1217 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1218 //Yup. Seems that JagMania doesn't work correctly with this...
1219 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1220 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1221 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1222 // a bit more accurately... Strange!
1223 //It's probably a case of the REFLECT flag being set and the background being written
1224 //from the right side of the screen...
1225 //But no, it isn't... At least if the diagnostics are telling the truth!
1226
1227         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1228         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1229         // !!! FIX !!!
1230
1231 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1232 //the scaling factor is small. So fix it already! !!! FIX !!!
1233 /*if (scaledPhrasePixels == 0)
1234 {
1235         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1236         DumpScaledObject(p0, p1, p2);
1237 }//*/
1238 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1239
1240 //Try a simple example...
1241 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1242 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1243 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1244 //
1245 // Normally, we would expect this in the line buffer:
1246 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1247 //
1248 // But instead we're getting:
1249 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1250 //
1251 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1252 // on negative boundary--or are we? Hmm...
1253 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1254 //
1255 // Let's try a real world example:
1256 //
1257 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1258 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1259 //
1260 // Really, spp is 27.75 in the second case...
1261 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1262 // start position (14 * 27.75), we get -6.5... NOT -17!
1263
1264 //Now it seems we're working OK, at least for the first case...
1265 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1266
1267         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1268 {
1269 extern int start_logging;
1270 if (start_logging)
1271         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1272 //              clippedWidth = 0 - startPos,
1273                 clippedWidth = (0 - startPos) << 5,
1274 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1275                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1276 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1277                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1278 if (start_logging)
1279         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1280 }
1281
1282         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1283                 clippedWidth = 0 - endPos,
1284                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1285
1286         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1287                 clippedWidth = endPos - lbufWidth,
1288                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1289
1290         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1291                 clippedWidth = startPos - lbufWidth,
1292                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1293                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1294
1295 extern int op_start_log;
1296 if (op_start_log && clippedWidth != 0)
1297         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1298 if (op_start_log && startPos == 13)
1299 {
1300         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1301         DumpScaledObject(p0, p1, p2);
1302         if (iwidth == 7)
1303         {
1304                 WriteLog("    %08X: ", data);
1305                 for(int i=0; i<7*8; i++)
1306                         WriteLog("%02X ", JaguarReadByte(data+i));
1307                 WriteLog("\n");
1308         }
1309 }
1310         // If the image is sitting on the line buffer left or right edge, we need to compensate
1311         // by decreasing the image phrase width accordingly.
1312         iwidth -= phraseClippedWidth;
1313
1314         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1315         // the pixel data.
1316 //      data += phraseClippedWidth * (pitch << 3);
1317         data += dataClippedWidth * (pitch << 3);
1318
1319         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1320         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1321 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1322 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1323         uint32 lbufAddress = 0x1800 + startPos * 2;
1324         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1325 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1326 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1327
1328         // Render.
1329
1330 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1331 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1332 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1333 // anyway.
1334 // This seems to be the case (at least according to the Midsummer docs)...!
1335
1336         if (depth == 0)                                                                 // 1 BPP
1337         {
1338 if (firstPix != 0)
1339         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1340                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1341                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1342
1343                 int pixCount = 0;
1344                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1345
1346                 while ((int32)iwidth > 0)
1347                 {
1348                         uint8 bits = pixels >> 63;
1349
1350                         if (flagTRANS && bits == 0)
1351                                 ;       // Do nothing...
1352                         else
1353                         {
1354                                 if (!flagRMW)
1355                                         // This is the *only* correct use of endian-dependent code
1356                                         // (i.e., mem-to-mem direct copying)!
1357                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1358                                 else
1359                                         *currentLineBuffer =
1360                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1361                                         *(currentLineBuffer + 1) =
1362                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1363                         }
1364
1365                         currentLineBuffer += lbufDelta;
1366
1367 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1368                         while (horizontalRemainder & 0x80)
1369                         {
1370                                 horizontalRemainder += hscale;
1371                                 pixCount++;
1372                                 pixels <<= 1;
1373                         }//*/
1374                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1375                         {
1376                                 horizontalRemainder += hscale;
1377                                 pixCount++;
1378                                 pixels <<= 1;
1379                         }
1380                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1381
1382                         if (pixCount > 63)
1383                         {
1384                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1385
1386                                 data += (pitch << 3) * phrasesToSkip;
1387                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1388                                 pixels <<= 1 * pixelShift;
1389                                 iwidth -= phrasesToSkip;
1390                                 pixCount = pixelShift;
1391                         }
1392                 }
1393         }
1394         else if (depth == 1)                                                    // 2 BPP
1395         {
1396 if (firstPix != 0)
1397         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1398                 index &= 0xFC;                                                          // Top six bits form CLUT index
1399                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1400                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1401
1402                 int pixCount = 0;
1403                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1404
1405                 while ((int32)iwidth > 0)
1406                 {
1407                         uint8 bits = pixels >> 62;
1408
1409                         if (flagTRANS && bits == 0)
1410                                 ;       // Do nothing...
1411                         else
1412                         {
1413                                 if (!flagRMW)
1414                                         // This is the *only* correct use of endian-dependent code
1415                                         // (i.e., mem-to-mem direct copying)!
1416                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1417                                 else
1418                                         *currentLineBuffer =
1419                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1420                                         *(currentLineBuffer + 1) =
1421                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1422                         }
1423
1424                         currentLineBuffer += lbufDelta;
1425
1426 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1427                         while (horizontalRemainder & 0x80)
1428                         {
1429                                 horizontalRemainder += hscale;
1430                                 pixCount++;
1431                                 pixels <<= 2;
1432                         }//*/
1433                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1434                         {
1435                                 horizontalRemainder += hscale;
1436                                 pixCount++;
1437                                 pixels <<= 2;
1438                         }
1439                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1440
1441                         if (pixCount > 31)
1442                         {
1443                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1444
1445                                 data += (pitch << 3) * phrasesToSkip;
1446                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1447                                 pixels <<= 2 * pixelShift;
1448                                 iwidth -= phrasesToSkip;
1449                                 pixCount = pixelShift;
1450                         }
1451                 }
1452         }
1453         else if (depth == 2)                                                    // 4 BPP
1454         {
1455 if (firstPix != 0)
1456         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1457                 index &= 0xF0;                                                          // Top four bits form CLUT index
1458                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1459                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1460
1461                 int pixCount = 0;
1462                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1463
1464                 while ((int32)iwidth > 0)
1465                 {
1466                         uint8 bits = pixels >> 60;
1467
1468                         if (flagTRANS && bits == 0)
1469                                 ;       // Do nothing...
1470                         else
1471                         {
1472                                 if (!flagRMW)
1473                                         // This is the *only* correct use of endian-dependent code
1474                                         // (i.e., mem-to-mem direct copying)!
1475                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1476                                 else
1477                                         *currentLineBuffer =
1478                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1479                                         *(currentLineBuffer + 1) =
1480                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1481                         }
1482
1483                         currentLineBuffer += lbufDelta;
1484
1485 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1486                         while (horizontalRemainder & 0x80)
1487                         {
1488                                 horizontalRemainder += hscale;
1489                                 pixCount++;
1490                                 pixels <<= 4;
1491                         }//*/
1492                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1493                         {
1494                                 horizontalRemainder += hscale;
1495                                 pixCount++;
1496                                 pixels <<= 4;
1497                         }
1498                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1499
1500                         if (pixCount > 15)
1501                         {
1502                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1503
1504                                 data += (pitch << 3) * phrasesToSkip;
1505                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1506                                 pixels <<= 4 * pixelShift;
1507                                 iwidth -= phrasesToSkip;
1508                                 pixCount = pixelShift;
1509                         }
1510                 }
1511         }
1512         else if (depth == 3)                                                    // 8 BPP
1513         {
1514 if (firstPix)
1515         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1516                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1517                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1518
1519                 int pixCount = 0;
1520                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1521
1522                 while ((int32)iwidth > 0)
1523                 {
1524                         uint8 bits = pixels >> 56;
1525
1526                         if (flagTRANS && bits == 0)
1527                                 ;       // Do nothing...
1528                         else
1529                         {
1530                                 if (!flagRMW)
1531                                         // This is the *only* correct use of endian-dependent code
1532                                         // (i.e., mem-to-mem direct copying)!
1533                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1534 /*                              {
1535                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1536                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1537                                 }*/
1538                                 else
1539                                         *currentLineBuffer =
1540                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1541                                         *(currentLineBuffer + 1) =
1542                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1543                         }
1544
1545                         currentLineBuffer += lbufDelta;
1546
1547                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1548                         {
1549                                 horizontalRemainder += hscale;
1550                                 pixCount++;
1551                                 pixels <<= 8;
1552                         }
1553                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1554
1555                         if (pixCount > 7)
1556                         {
1557                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1558
1559                                 data += (pitch << 3) * phrasesToSkip;
1560                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1561                                 pixels <<= 8 * pixelShift;
1562                                 iwidth -= phrasesToSkip;
1563                                 pixCount = pixelShift;
1564                         }
1565                 }
1566         }
1567         else if (depth == 4)                                                    // 16 BPP
1568         {
1569 if (firstPix != 0)
1570         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1571                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1572                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1573
1574                 int pixCount = 0;
1575                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1576
1577                 while ((int32)iwidth > 0)
1578                 {
1579                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1580
1581                         if (flagTRANS && (bitsLo | bitsHi) == 0)
1582                                 ;       // Do nothing...
1583                         else
1584                         {
1585                                 if (!flagRMW)
1586                                         *currentLineBuffer = bitsHi,
1587                                         *(currentLineBuffer + 1) = bitsLo;
1588                                 else
1589                                         *currentLineBuffer =
1590                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1591                                         *(currentLineBuffer + 1) =
1592                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1593                         }
1594
1595                         currentLineBuffer += lbufDelta;
1596
1597 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1598                         while (horizontalRemainder & 0x80)
1599                         {
1600                                 horizontalRemainder += hscale;
1601                                 pixCount++;
1602                                 pixels <<= 16;
1603                         }//*/
1604                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1605                         {
1606                                 horizontalRemainder += hscale;
1607                                 pixCount++;
1608                                 pixels <<= 16;
1609                         }
1610                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1611 //*/
1612                         if (pixCount > 3)
1613                         {
1614                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1615
1616                                 data += (pitch << 3) * phrasesToSkip;
1617                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1618                                 pixels <<= 16 * pixelShift;
1619
1620                                 iwidth -= phrasesToSkip;
1621
1622                                 pixCount = pixelShift;
1623                         }
1624                 }
1625         }
1626         else if (depth == 5)                                                    // 24 BPP
1627         {
1628 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1629 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1630 if (firstPix != 0)
1631         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1632                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1633                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1634                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1635
1636                 while (iwidth--)
1637                 {
1638                         // Fetch phrase...
1639                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1640                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1641
1642                         for(int i=0; i<2; i++)
1643                         {
1644                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1645                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1646
1647                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1648                                         ;       // Do nothing...
1649                                 else
1650                                         *currentLineBuffer = bits3,
1651                                         *(currentLineBuffer + 1) = bits2,
1652                                         *(currentLineBuffer + 2) = bits1,
1653                                         *(currentLineBuffer + 3) = bits0;
1654
1655                                 currentLineBuffer += lbufDelta;
1656                                 pixels <<= 32;
1657                         }
1658                 }
1659         }
1660 }