]> Shamusworld >> Repos - virtualjaguar/blob - src/objectp.cpp
946714530a5c392a56d17882194ea63de423e971
[virtualjaguar] / src / objectp.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 //
8
9 #include "objectp.h"
10
11 #include <stdlib.h>
12 #include <string.h>
13 #include "tom.h"
14 #include "jaguar.h"
15 #include "log.h"
16 #include "gpu.h"
17 #include "m68k.h"
18
19 //#define OP_DEBUG
20 //#define OP_DEBUG_BMP
21
22 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
23 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
24
25 #define OBJECT_TYPE_BITMAP      0                                       // 000
26 #define OBJECT_TYPE_SCALE       1                                       // 001
27 #define OBJECT_TYPE_GPU         2                                       // 010
28 #define OBJECT_TYPE_BRANCH      3                                       // 011
29 #define OBJECT_TYPE_STOP        4                                       // 100
30
31 #define CONDITION_EQUAL                         0
32 #define CONDITION_LESS_THAN                     1
33 #define CONDITION_GREATER_THAN          2
34 #define CONDITION_OP_FLAG_SET           3
35 #define CONDITION_SECOND_HALF_LINE      4
36
37 #define OPFLAG_RELEASE          8                                       // Bus release bit
38 #define OPFLAG_TRANS            4                                       // Transparency bit
39 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
40 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
41
42 // Private function prototypes
43
44 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
45 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
46 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
47 void DumpFixedObject(uint64 p0, uint64 p1);
48 uint64 op_load_phrase(uint32 offset);
49
50 // Local global variables
51
52 // Blend tables (64K each)
53 static uint8 op_blend_y[0x10000];
54 static uint8 op_blend_cr[0x10000];
55 // There may be a problem with this "RAM" overlapping (and thus being independent of)
56 // some of the regular TOM RAM...
57 //#warning objectp_ram is separated from TOM RAM--need to fix that!
58 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
59 uint8 objectp_running = 0;
60 //bool objectp_stop_reading_list;
61
62 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
63 //static uint32 op_bitmap_bit_size[8] =
64 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
65 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
66 static uint32 op_pointer;
67
68 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
69
70
71 //
72 // Object Processor initialization
73 //
74 void op_init(void)
75 {
76         // Here we calculate the saturating blend of a signed 4-bit value and an
77         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
78         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
79         for(int i=0; i<256*256; i++)
80         {
81                 int y = (i >> 8) & 0xFF;
82                 int dy = (int8)i;                                       // Sign extend the Y index
83                 int c1 = (i >> 8) & 0x0F;
84                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
85                 int c2 = (i >> 12) & 0x0F;
86                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
87
88                 y += dy;
89
90                 if (y < 0)
91                         y = 0;
92                 else if (y > 0xFF)
93                         y = 0xFF;
94
95                 op_blend_y[i] = y;
96
97                 c1 += dc1;
98
99                 if (c1 < 0)
100                         c1 = 0;
101                 else if (c1 > 0x0F)
102                         c1 = 0x0F;
103
104                 c2 += dc2;
105
106                 if (c2 < 0)
107                         c2 = 0;
108                 else if (c2 > 0x0F)
109                         c2 = 0x0F;
110
111                 op_blend_cr[i] = (c2 << 4) | c1;
112         }
113
114         op_reset();
115 }
116
117 //
118 // Object Processor reset
119 //
120 void op_reset(void)
121 {
122 //      memset(objectp_ram, 0x00, 0x40);
123         objectp_running = 0;
124 }
125
126 void op_done(void)
127 {
128         const char * opType[8] =
129         { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
130         const char * ccType[8] =
131                 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
132
133         uint32 olp = op_get_list_pointer();
134         WriteLog("OP: OLP = %08X\n", olp);
135         WriteLog("OP: Phrase dump\n    ----------\n");
136         for(uint32 i=0; i<0x100; i+=8)
137         {
138                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
139                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
140                 if ((lo & 0x07) == 3)
141                 {
142                         uint16 ypos = (lo >> 3) & 0x7FF;
143                         uint8  cc   = (lo >> 14) & 0x03;
144                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
145                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
146                 }
147                 WriteLog("\n");
148                 if ((lo & 0x07) == 0)
149                         DumpFixedObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8));
150                 if ((lo & 0x07) == 1)
151                         DumpScaledObject(op_load_phrase(olp+i), op_load_phrase(olp+i+8), op_load_phrase(olp+i+16));
152         }
153         WriteLog("\n");
154
155 //      memory_free(op_blend_y);
156 //      memory_free(op_blend_cr);
157 }
158
159 //
160 // Object Processor memory access
161 // Memory range: F00010 - F00027
162 //
163 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
164 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
165 //      F00026            W   -------- -------x   OBF - object processor flag
166 //
167
168 #if 0
169 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
170 {
171         offset &= 0x3F;
172         return objectp_ram[offset];
173 }
174
175 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
176 {
177         offset &= 0x3F;
178         return GET16(objectp_ram, offset);
179 }
180
181 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
182 {
183         offset &= 0x3F;
184         objectp_ram[offset] = data;
185 }
186
187 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
188 {
189         offset &= 0x3F;
190         SET16(objectp_ram, offset, data);
191
192 /*if (offset == 0x20)
193 WriteLog("OP: Setting lo list pointer: %04X\n", data);
194 if (offset == 0x22)
195 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
196 }
197 #endif
198
199 uint32 op_get_list_pointer(void)
200 {
201         // Note: This register is LO / HI WORD, hence the funky look of this...
202         return GET16(tom_ram_8, 0x20) | (GET16(tom_ram_8, 0x22) << 16);
203 }
204
205 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
206
207 uint32 op_get_status_register(void)
208 {
209         return GET16(tom_ram_8, 0x26);
210 }
211
212 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
213
214 void op_set_status_register(uint32 data)
215 {
216         tom_ram_8[0x26] = (data & 0x0000FF00) >> 8;
217         tom_ram_8[0x27] |= (data & 0xFE);
218 }
219
220 void op_set_current_object(uint64 object)
221 {
222 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
223         // Stored as least significant 32 bits first, ms32 last in big endian
224 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
225         objectp_ram[0x12] = object & 0xFF; object >>= 8;
226         objectp_ram[0x11] = object & 0xFF; object >>= 8;
227         objectp_ram[0x10] = object & 0xFF; object >>= 8;
228
229         objectp_ram[0x17] = object & 0xFF; object >>= 8;
230         objectp_ram[0x16] = object & 0xFF; object >>= 8;
231         objectp_ram[0x15] = object & 0xFF; object >>= 8;
232         objectp_ram[0x14] = object & 0xFF;*/
233 // Let's try regular good old big endian...
234         tom_ram_8[0x17] = object & 0xFF; object >>= 8;
235         tom_ram_8[0x16] = object & 0xFF; object >>= 8;
236         tom_ram_8[0x15] = object & 0xFF; object >>= 8;
237         tom_ram_8[0x14] = object & 0xFF; object >>= 8;
238
239         tom_ram_8[0x13] = object & 0xFF; object >>= 8;
240         tom_ram_8[0x12] = object & 0xFF; object >>= 8;
241         tom_ram_8[0x11] = object & 0xFF; object >>= 8;
242         tom_ram_8[0x10] = object & 0xFF;
243 }
244
245 uint64 op_load_phrase(uint32 offset)
246 {
247         offset &= ~0x07;                                                // 8 byte alignment
248         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
249 }
250
251 void OPStorePhrase(uint32 offset, uint64 p)
252 {
253         offset &= ~0x07;                                                // 8 byte alignment
254         JaguarWriteLong(offset, p >> 32, OP);
255         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
256 }
257
258 //
259 // Debugging routines
260 //
261 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
262 {
263         WriteLog(" (SCALED BITMAP)");
264         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
265         WriteLog("                 %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
266         uint8 bitdepth = (p1 >> 12) & 0x07;
267 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
268         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
269         int32 xpos = p1 & 0xFFF;
270         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
271         uint32 iwidth = ((p1 >> 28) & 0x3FF);
272         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
273         uint16 height = ((p0 >> 14) & 0x3FF);
274         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
275         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
276         uint32 firstPix = (p1 >> 49) & 0x3F;
277         uint8 flags = (p1 >> 45) & 0x0F;
278         uint8 idx = (p1 >> 38) & 0x7F;
279         uint32 pitch = (p1 >> 15) & 0x07;
280         WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
281                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
282         uint32 hscale = p2 & 0xFF;
283         uint32 vscale = (p2 >> 8) & 0xFF;
284         uint32 remainder = (p2 >> 16) & 0xFF;
285         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
286 }
287
288 void DumpFixedObject(uint64 p0, uint64 p1)
289 {
290         WriteLog(" (BITMAP)");
291         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
292         uint8 bitdepth = (p1 >> 12) & 0x07;
293 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
294         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
295         int32 xpos = p1 & 0xFFF;
296         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
297         uint32 iwidth = ((p1 >> 28) & 0x3FF);
298         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
299         uint16 height = ((p0 >> 14) & 0x3FF);
300         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
301         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
302         uint32 firstPix = (p1 >> 49) & 0x3F;
303         uint8 flags = (p1 >> 45) & 0x0F;
304         uint8 idx = (p1 >> 38) & 0x7F;
305         uint32 pitch = (p1 >> 15) & 0x07;
306         WriteLog("    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
307                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
308 }
309
310 //
311 // Object Processor main routine
312 //
313 //Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing
314 //where we left off. !!! FIX !!!
315 #warning Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!
316 void OPProcessList(int scanline, bool render)
317 {
318 extern int op_start_log;
319 //      char * condition_to_str[8] =
320 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
321
322         op_pointer = op_get_list_pointer();
323
324 //      objectp_stop_reading_list = false;
325
326 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer);
327 //op_done();
328
329 // *** BEGIN OP PROCESSOR TESTING ONLY ***
330 extern bool interactiveMode;
331 extern bool iToggle;
332 extern int objectPtr;
333 bool inhibit;
334 int bitmapCounter = 0;
335 // *** END OP PROCESSOR TESTING ONLY ***
336
337         uint32 opCyclesToRun = 10000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
338
339 //      if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
340         while (op_pointer)
341         {
342 // *** BEGIN OP PROCESSOR TESTING ONLY ***
343 if (interactiveMode && bitmapCounter == objectPtr)
344         inhibit = iToggle;
345 else
346         inhibit = false;
347 // *** END OP PROCESSOR TESTING ONLY ***
348 //              if (objectp_stop_reading_list)
349 //                      return;
350                         
351                 uint64 p0 = op_load_phrase(op_pointer);
352 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
353                 op_pointer += 8;
354 if (scanline == tom_get_vdb() && op_start_log)
355 //if (scanline == 215 && op_start_log)
356 //if (scanline == 28 && op_start_log)
357 {
358 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
359 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
360 {
361 WriteLog(" (BITMAP) ");
362 uint64 p1 = op_load_phrase(op_pointer);
363 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
364         uint8 bitdepth = (p1 >> 12) & 0x07;
365 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
366         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
367 int32 xpos = p1 & 0xFFF;
368 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
369         uint32 iwidth = ((p1 >> 28) & 0x3FF);
370         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
371         uint16 height = ((p0 >> 14) & 0x3FF);
372         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
373         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
374         uint32 firstPix = (p1 >> 49) & 0x3F;
375         uint8 flags = (p1 >> 45) & 0x0F;
376         uint8 idx = (p1 >> 38) & 0x7F;
377         uint32 pitch = (p1 >> 15) & 0x07;
378 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
379         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
380 }
381 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
382 {
383 WriteLog(" (SCALED BITMAP)");
384 uint64 p1 = op_load_phrase(op_pointer), p2 = op_load_phrase(op_pointer+8);
385 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
386 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
387         uint8 bitdepth = (p1 >> 12) & 0x07;
388 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
389         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
390 int32 xpos = p1 & 0xFFF;
391 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
392         uint32 iwidth = ((p1 >> 28) & 0x3FF);
393         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
394         uint16 height = ((p0 >> 14) & 0x3FF);
395         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
396         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
397         uint32 firstPix = (p1 >> 49) & 0x3F;
398         uint8 flags = (p1 >> 45) & 0x0F;
399         uint8 idx = (p1 >> 38) & 0x7F;
400         uint32 pitch = (p1 >> 15) & 0x07;
401 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
402         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
403         uint32 hscale = p2 & 0xFF;
404         uint32 vscale = (p2 >> 8) & 0xFF;
405         uint32 remainder = (p2 >> 16) & 0xFF;
406 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
407 }
408 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
409 WriteLog(" (GPU)\n");
410 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
411 {
412 WriteLog(" (BRANCH)\n");
413 uint8 * jaguar_mainRam = GetRamPtr();
414 WriteLog("[RAM] --> ");
415 for(int k=0; k<8; k++)
416         WriteLog("%02X ", jaguar_mainRam[op_pointer-8 + k]);
417 WriteLog("\n");
418 }
419 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
420 WriteLog("    --> List end\n");
421 }//*/
422                 
423                 switch ((uint8)p0 & 0x07)
424                 {
425                 case OBJECT_TYPE_BITMAP:
426                 {
427 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
428                         uint16 ypos = (p0 >> 3) & 0x7FF;
429 // This is only theory implied by Rayman...!
430 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
431 // the VDB value. With interlacing, this would be slightly more tricky.
432 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
433 // to affect any other game in a negative way (that I've seen).
434 // Either that, or it's an undocumented bug...
435
436 //No, the reason this was needed is that the OP code before was wrong. Any value
437 //less than VDB will get written to the top line of the display!
438 //                      if (ypos == 0)
439 //                              ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
440                         uint32 height = (p0 & 0xFFC000) >> 14;
441                         uint32 oldOPP = op_pointer - 8;
442 // *** BEGIN OP PROCESSOR TESTING ONLY ***
443 if (inhibit && op_start_log)
444         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
445 bitmapCounter++;
446 if (!inhibit)   // For OP testing only!
447 // *** END OP PROCESSOR TESTING ONLY ***
448                         if (scanline >= ypos && height > 0)
449                         {
450                                 uint64 p1 = op_load_phrase(op_pointer);
451                                 op_pointer += 8;
452 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
453 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
454 //                              OPProcessFixedBitmap(scanline, p0, p1, render);
455                                 OPProcessFixedBitmap(p0, p1, render);
456
457                                 // OP write-backs
458
459 //???Does this really happen??? Doesn't seem to work if you do this...!
460 //Probably not. Must be a bug in the documentation...!
461 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
462 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
463 //                              SET16(tom_ram_8, 0x22, link >> 16);
464 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
465                                 if (height - 1 > 0)
466                                         height--;*/
467                                 // NOTE: Would subtract 2 if in interlaced mode...!
468 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
469 //                              if (height)
470                                 height--;
471
472                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
473                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
474                                 data += dwidth;
475
476                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
477                                 p0 |= (uint64)height << 14;
478                                 p0 |= data << 40;
479                                 OPStorePhrase(oldOPP, p0);
480                         }
481 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
482 //Temp, for testing...
483 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
484 //And it does! !!! FIX !!!
485 //Let's remove this "fix" since it screws up more than it fixes.
486 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
487                 return;*/
488
489                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
490 //WriteLog("New OP: %08X\n", op_pointer);
491                         break;
492                 }
493                 case OBJECT_TYPE_SCALE:
494                 {
495 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
496                         uint16 ypos = (p0 >> 3) & 0x7FF;
497                         uint32 height = (p0 & 0xFFC000) >> 14;
498                         uint32 oldOPP = op_pointer - 8;
499 // *** BEGIN OP PROCESSOR TESTING ONLY ***
500 if (inhibit && op_start_log)
501 {
502         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
503         DumpScaledObject(p0, op_load_phrase(op_pointer), op_load_phrase(op_pointer+8));
504 }
505 bitmapCounter++;
506 if (!inhibit)   // For OP testing only!
507 // *** END OP PROCESSOR TESTING ONLY ***
508                         if (scanline >= ypos && height > 0)
509                         {
510                                 uint64 p1 = op_load_phrase(op_pointer);
511                                 op_pointer += 8;
512                                 uint64 p2 = op_load_phrase(op_pointer);
513                                 op_pointer += 8;
514 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
515                                 OPProcessScaledBitmap(p0, p1, p2, render);
516
517                                 // OP write-backs
518
519                                 uint8 remainder = p2 >> 16, vscale = p2 >> 8;
520 //Actually, we should skip this object if it has a vscale of zero.
521 //Or do we? Not sure... Atari Karts has a few lines that look like:
522 // (SCALED BITMAP)
523 //000E8268 --> phrase 00010000 7000B00D 
524 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
525 //    [hsc: 9A, vsc: 00, rem: 00]
526 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
527
528                                 if (vscale == 0)
529                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
530
531 //extern int start_logging;
532 //if (start_logging)
533 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
534 //Locks up here:
535 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
536 //There are other problems here, it looks like...
537 //Another lock up:
538 //About to execute OP (508)...
539 /*
540 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
541 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
542 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
543 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
544 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
545 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
546 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
547 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
548 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
549 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
550 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
551 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
552 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
553 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
554 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
555 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
556 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
557 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
558 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
559 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
560 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
561 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
562 */
563 //Here's another problem:
564 //    [hsc: 20, vsc: 20, rem: 00]
565 // Since we're not checking for $E0 (but that's what we get from the above), we end
566 // up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite
567 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
568 //Also note: $E0 = 7.0 which IS a legal vscale value...
569
570 //                              if (remainder & 0x80)                           // I.e., it's negative
571 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
572 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
573 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
574 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
575                                 if (remainder <= 0x20)                          // I.e., it's <= 0
576                                 {
577                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
578                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
579
580 //                                      while (remainder & 0x80)
581 //                                      while ((remainder & 0x80) || remainder == 0)
582 //                                      while ((remainder - 1) >= 0xE0)
583 //                                      while ((remainder >= 0xE1) || remainder == 0)
584 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
585                                         while (remainder <= 0x20)
586                                         {
587                                                 remainder += vscale;
588
589                                                 if (height)
590                                                         height--;
591
592                                                 data += dwidth;
593                                         }
594
595                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
596                                         p0 |= (uint64)height << 14;
597                                         p0 |= data << 40;
598                                         OPStorePhrase(oldOPP, p0);
599                                 }
600
601                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
602
603 //if (start_logging)
604 //      WriteLog("--> Finished writebacks...\n");//*/
605
606 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
607                                 p2 &= ~0x0000000000FF0000LL;
608                                 p2 |= (uint64)remainder << 16;
609 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
610                                 OPStorePhrase(oldOPP+16, p2);
611 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
612 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
613                         }
614                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
615                         break;
616                 }
617                 case OBJECT_TYPE_GPU:
618                 {
619 //WriteLog("OP: Asserting GPU IRQ #3...\n");
620                         op_set_current_object(p0);
621                         GPUSetIRQLine(3, ASSERT_LINE);
622 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
623 // !!! FIX !!!
624 //Do something like:
625 //OPSuspendedByGPU = true;
626 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
627 //on the next scanline...
628 // --> It continues from where it was interrupted! !!! FIX !!!
629                         break;
630                 }
631                 case OBJECT_TYPE_BRANCH:
632                 {
633                         uint16 ypos = (p0 >> 3) & 0x7FF;
634                         uint8  cc   = (p0 >> 14) & 0x03;
635                         uint32 link = (p0 >> 21) & 0x3FFFF8;
636                         
637 //                      if ((ypos!=507)&&(ypos!=25))
638 //                              WriteLog("\t%i%s%i link=0x%.8x\n",scanline,condition_to_str[cc],ypos>>1,link);
639                         switch (cc)
640                         {
641                         case CONDITION_EQUAL:
642                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
643                                         op_pointer = link;
644                                 break;
645                         case CONDITION_LESS_THAN:
646                                 if (TOMReadWord(0xF00006, OP) < ypos)
647                                         op_pointer = link;
648                                 break;
649                         case CONDITION_GREATER_THAN:
650                                 if (TOMReadWord(0xF00006, OP) > ypos)
651                                         op_pointer = link;
652                                 break;
653                         case CONDITION_OP_FLAG_SET:
654                                 if (op_get_status_register() & 0x01)
655                                         op_pointer = link;
656                                 break;
657                         case CONDITION_SECOND_HALF_LINE:
658                                 // This basically means branch if bit 10 of HC is set
659                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shuting down\n");
660                                 fclose(log_get());
661                                 exit(0);
662                                 break;
663                         default:
664                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
665                         }
666                         break;
667                 }
668                 case OBJECT_TYPE_STOP:
669                 {
670 //op_start_log = 0;
671                         // unsure
672 //WriteLog("OP: --> STOP\n");
673 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
674 //This seems more likely...
675                         op_set_current_object(p0);
676                         
677                         if (p0 & 0x08)
678                         {
679                                 tom_set_pending_object_int();
680                                 if (tom_irq_enabled(IRQ_OPFLAG))// && jaguar_interrupt_handler_is_valid(64))
681                                         m68k_set_irq(7);                                // Cause an NMI to occur...
682                         }
683
684                         return;
685 //                      break;
686                 }
687                 default:
688                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07)); 
689                         return;
690                 }
691
692                 // Here is a little sanity check to keep the OP from locking up the machine
693                 // when fed bad data. Better would be to count how many actual cycles it used
694                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
695                 opCyclesToRun--;
696                 if (!opCyclesToRun)
697                         return;
698         }
699 }
700
701 //
702 // Store fixed size bitmap in line buffer
703 //
704 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
705 {
706 // Need to make sure that when writing that it stays within the line buffer...
707 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
708         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
709         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
710         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
711         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
712 //#ifdef OP_DEBUG_BMP
713         uint32  firstPix = (p1 >> 49) & 0x3F;
714         // "The LSB is significant only for scaled objects..." -JTRM
715         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
716         firstPix &= 0x3E;
717 //#endif
718 // We can ignore the RELEASE (high order) bit for now--probably forever...!
719 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
720 //Optimize: break these out to their own BOOL values
721         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
722         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
723                 flagRMW = (flags & OPFLAG_RMW ? true : false),
724                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
725 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
726 //  provide the most significant bits of the palette address."
727         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
728         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
729         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
730
731 //      int16 scanlineWidth = tom_getVideoModeWidth();
732         uint8 * tom_ram_8 = tom_get_ram_pointer();
733         uint8 * paletteRAM = &tom_ram_8[0x400];
734         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
735         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
736         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
737
738 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
739 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
740
741 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
742 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
743 // Pitch == 0 is OK too...
744 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
745 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
746         if (!render || iwidth == 0)
747                 return;
748
749 //#define OP_DEBUG_BMP
750 //#ifdef OP_DEBUG_BMP
751 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
752 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
753 //#endif
754
755 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
756         int32 startPos = xpos, endPos = xpos +
757                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
758                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
759         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
760         bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false);      // VMODE
761         // Not sure if this is Jaguar Two only location or what...
762         // From the docs, it is... If we want to limit here we should think of something else.
763 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
764         int32 limit = 720;
765         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
766
767         // If the image is completely to the left or right of the line buffer, then bail.
768 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
769 //There are four possibilities:
770 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
771 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
772 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
773 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
774 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
775 // numbers 1 & 3 are of concern.
776 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
777 //      if (rightMargin < 0 || leftMargin > lbufWidth)
778
779 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
780 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
781 // Still have to be careful with the DATA and IWIDTH values though...
782
783 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
784 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
785 //              return;
786         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
787                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
788                 return;
789
790         // Otherwise, find the clip limits and clip the phrase as well...
791         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
792         //       line buffer, but it shouldn't matter since there are two unused line
793         //       buffers below and nothing above and I'll at most write 8 bytes outside
794         //       the line buffer... I could use a fractional clip begin/end value, but
795         //       this makes the blit a *lot* more hairy. I might fix this in the future
796         //       if it becomes necessary. (JLH)
797         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
798         //       which pixel in the phrase is being written, and quit when either end of phrases
799         //       is reached or line buffer extents are surpassed.
800
801 //This stuff is probably wrong as well... !!! FIX !!!
802 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
803 //Yup. Seems that JagMania doesn't work correctly with this...
804 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
805 //      if (!flagREFLECT)
806
807 /*
808         if (leftMargin < 0)
809                 clippedWidth = 0 - leftMargin,
810                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
811                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
812 //              leftMargin = 0;
813
814         if (rightMargin > lbufWidth)
815                 clippedWidth = rightMargin - lbufWidth,
816                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
817 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
818 //              rightMargin = lbufWidth;
819 */
820 if (depth > 5)
821         WriteLog("OP: We're about to encounter a divide by zero error!\n");
822         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
823         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
824         // !!! FIX !!!
825         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
826                 clippedWidth = 0 - startPos,
827                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
828                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
829
830         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
831                 clippedWidth = 0 - endPos,
832                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
833
834         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
835                 clippedWidth = endPos - lbufWidth,
836                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
837
838         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
839                 clippedWidth = startPos - lbufWidth,
840                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
841                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
842
843         // If the image is sitting on the line buffer left or right edge, we need to compensate
844         // by decreasing the image phrase width accordingly.
845         iwidth -= phraseClippedWidth;
846
847         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
848         // the pixel data.
849 //      data += phraseClippedWidth * (pitch << 3);
850         data += dataClippedWidth * pitch;
851
852         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
853         //       bitmap! This makes clipping & etc. MUCH, much easier...!
854 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
855 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
856 //Is this a bug in the OP?
857         uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
858         uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
859
860         // Render.
861
862 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
863 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
864 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
865 // anyway.
866 // This seems to be the case (at least according to the Midsummer docs)...!
867
868         if (depth == 0)                                                                 // 1 BPP
869         {
870                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
871                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
872
873                 // Fetch 1st phrase...
874                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
875 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
876 //i.e., we didn't clip on the margin... !!! FIX !!!
877                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
878                 int i = firstPix;                                                       // Start counter at right spot...
879
880                 while (iwidth--)
881                 {
882                         while (i++ < 64)
883                         {
884                                 uint8 bit = pixels >> 63;
885                                 if (flagTRANS && bit == 0)
886                                         ;       // Do nothing...
887                                 else
888                                 {
889                                         if (!flagRMW)
890 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
891 //Won't optimize RMW case though...
892                                                 // This is the *only* correct use of endian-dependent code
893                                                 // (i.e., mem-to-mem direct copying)!
894                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
895                                         else
896                                                 *currentLineBuffer = 
897                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
898                                                 *(currentLineBuffer + 1) = 
899                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
900                                 }
901
902                                 currentLineBuffer += lbufDelta;
903                                 pixels <<= 1;
904                         }
905                         i = 0;
906                         // Fetch next phrase...
907                         data += pitch;
908                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
909                 }
910         }
911         else if (depth == 1)                                                    // 2 BPP
912         {
913 if (firstPix)
914         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
915                 index &= 0xFC;                                                          // Top six bits form CLUT index
916                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
917                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
918
919                 while (iwidth--)
920                 {
921                         // Fetch phrase...
922                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
923                         data += pitch;
924
925                         for(int i=0; i<32; i++)
926                         {
927                                 uint8 bits = pixels >> 62;
928 // Seems to me that both of these are in the same endian, so we could cast it as
929 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
930 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
931 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
932 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
933                                 if (flagTRANS && bits == 0)
934                                         ;       // Do nothing...
935                                 else
936                                 {
937                                         if (!flagRMW)
938                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
939                                         else
940                                                 *currentLineBuffer = 
941                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
942                                                 *(currentLineBuffer + 1) = 
943                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
944                                 }
945
946                                 currentLineBuffer += lbufDelta;
947                                 pixels <<= 2;
948                         }
949                 }
950         }
951         else if (depth == 2)                                                    // 4 BPP
952         {
953 if (firstPix)
954         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
955                 index &= 0xF0;                                                          // Top four bits form CLUT index
956                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
957                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
958
959                 while (iwidth--)
960                 {
961                         // Fetch phrase...
962                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
963                         data += pitch;
964
965                         for(int i=0; i<16; i++)
966                         {
967                                 uint8 bits = pixels >> 60;
968 // Seems to me that both of these are in the same endian, so we could cast it as
969 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
970 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
971 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
972 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
973                                 if (flagTRANS && bits == 0)
974                                         ;       // Do nothing...
975                                 else
976                                 {
977                                         if (!flagRMW)
978                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
979                                         else
980                                                 *currentLineBuffer = 
981                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
982                                                 *(currentLineBuffer + 1) = 
983                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
984                                 }
985
986                                 currentLineBuffer += lbufDelta;
987                                 pixels <<= 4;
988                         }
989                 }
990         }
991         else if (depth == 3)                                                    // 8 BPP
992         {
993                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
994                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
995
996                 // Fetch 1st phrase...
997                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
998 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
999 //i.e., we didn't clip on the margin... !!! FIX !!!
1000                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1001                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1002                 int i = firstPix >> 3;                                          // Start counter at right spot...
1003
1004                 while (iwidth--)
1005                 {
1006                         while (i++ < 8)
1007                         {
1008                                 uint8 bits = pixels >> 56;
1009 // Seems to me that both of these are in the same endian, so we could cast it as
1010 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1011 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1012 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1013 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1014                                 if (flagTRANS && bits == 0)
1015                                         ;       // Do nothing...
1016                                 else
1017                                 {
1018                                         if (!flagRMW)
1019                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1020                                         else
1021                                                 *currentLineBuffer = 
1022                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1023                                                 *(currentLineBuffer + 1) = 
1024                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1025                                 }
1026
1027                                 currentLineBuffer += lbufDelta;
1028                                 pixels <<= 8;
1029                         }
1030                         i = 0;
1031                         // Fetch next phrase...
1032                         data += pitch;
1033                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1034                 }
1035         }
1036         else if (depth == 4)                                                    // 16 BPP
1037         {
1038 if (firstPix)
1039         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1040                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1041                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1042
1043                 while (iwidth--)
1044                 {
1045                         // Fetch phrase...
1046                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1047                         data += pitch;
1048
1049                         for(int i=0; i<4; i++)
1050                         {
1051                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1052 // Seems to me that both of these are in the same endian, so we could cast it as
1053 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1054 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1055 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1056 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1057                                 if (flagTRANS && (bitsLo | bitsHi) == 0)
1058                                         ;       // Do nothing...
1059                                 else
1060                                 {
1061                                         if (!flagRMW)
1062                                                 *currentLineBuffer = bitsHi,
1063                                                 *(currentLineBuffer + 1) = bitsLo;
1064                                         else
1065                                                 *currentLineBuffer = 
1066                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1067                                                 *(currentLineBuffer + 1) = 
1068                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1069                                 }
1070
1071                                 currentLineBuffer += lbufDelta;
1072                                 pixels <<= 16;
1073                         }
1074                 }
1075         }
1076         else if (depth == 5)                                                    // 24 BPP
1077         {
1078 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1079 //There *might* be others...
1080 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1081 if (firstPix)
1082         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1083                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1084                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1085                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1086
1087                 while (iwidth--)
1088                 {
1089                         // Fetch phrase...
1090                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1091                         data += pitch;
1092
1093                         for(int i=0; i<2; i++)
1094                         {
1095                                 // We don't use a 32-bit var here because of endian issues...!
1096                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1097                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1098
1099                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1100                                         ;       // Do nothing...
1101                                 else
1102                                         *currentLineBuffer = bits3,
1103                                         *(currentLineBuffer + 1) = bits2,
1104                                         *(currentLineBuffer + 2) = bits1,
1105                                         *(currentLineBuffer + 3) = bits0;
1106
1107                                 currentLineBuffer += lbufDelta;
1108                                 pixels <<= 32;
1109                         }
1110                 }
1111         }
1112 }
1113
1114 //
1115 // Store scaled bitmap in line buffer
1116 //
1117 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1118 {
1119 // Need to make sure that when writing that it stays within the line buffer...
1120 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1121         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1122         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1123         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1124         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1125 //#ifdef OP_DEBUG_BMP
1126 // Prolly should use this... Though not sure exactly how.
1127 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1128         uint32 firstPix = (p1 >> 49) & 0x3F;
1129 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1130 if (firstPix)
1131         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1132 //#endif
1133 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1134 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1135 //Optimize: break these out to their own BOOL values [DONE]
1136         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1137         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1138                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1139                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1140         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1141         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1142
1143         uint8 * tom_ram_8 = tom_get_ram_pointer();
1144         uint8 * paletteRAM = &tom_ram_8[0x400];
1145         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1146         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1147         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1148
1149         uint8 hscale = p2 & 0xFF;
1150 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1151 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1152         uint8 horizontalRemainder = hscale;                             // Not sure if it starts full, but seems reasonable [It's not!]
1153 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1154         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1155         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1156
1157 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1158 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1159
1160 // Looks like an hscale of zero means don't draw!
1161         if (!render || iwidth == 0 || hscale == 0)
1162                 return;
1163
1164 /*extern int start_logging;
1165 if (start_logging)
1166         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1167                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1168 //#define OP_DEBUG_BMP
1169 //#ifdef OP_DEBUG_BMP
1170 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1171 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1172 //#endif
1173
1174         int32 startPos = xpos, endPos = xpos +
1175                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1176         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1177         bool in24BPPMode = (((GET16(tom_ram_8, 0x0028) >> 1) & 0x03) == 1 ? true : false);      // VMODE
1178         // Not sure if this is Jaguar Two only location or what...
1179         // From the docs, it is... If we want to limit here we should think of something else.
1180 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1181         int32 limit = 720;
1182         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1183
1184         // If the image is completely to the left or right of the line buffer, then bail.
1185 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1186 //There are four possibilities:
1187 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1188 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1189 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1190 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1191 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1192 // numbers 1 & 3 are of concern.
1193 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1194 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1195
1196 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1197 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1198 // Still have to be careful with the DATA and IWIDTH values though...
1199
1200         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1201                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1202                 return;
1203
1204         // Otherwise, find the clip limits and clip the phrase as well...
1205         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1206         //       line buffer, but it shouldn't matter since there are two unused line
1207         //       buffers below and nothing above and I'll at most write 40 bytes outside
1208         //       the line buffer... I could use a fractional clip begin/end value, but
1209         //       this makes the blit a *lot* more hairy. I might fix this in the future
1210         //       if it becomes necessary. (JLH)
1211         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1212         //       which pixel in the phrase is being written, and quit when either end of phrases
1213         //       is reached or line buffer extents are surpassed.
1214
1215 //This stuff is probably wrong as well... !!! FIX !!!
1216 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1217 //Yup. Seems that JagMania doesn't work correctly with this...
1218 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1219 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1220 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1221 // a bit more accurately... Strange!
1222 //It's probably a case of the REFLECT flag being set and the background being written
1223 //from the right side of the screen...
1224 //But no, it isn't... At least if the diagnostics are telling the truth!
1225
1226         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1227         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1228         // !!! FIX !!!
1229
1230 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1231 //the scaling factor is small. So fix it already! !!! FIX !!!
1232 /*if (scaledPhrasePixels == 0)
1233 {
1234         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1235         DumpScaledObject(p0, p1, p2);
1236 }//*/
1237 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1238
1239 //Try a simple example...
1240 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1241 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1242 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1243 //
1244 // Normally, we would expect this in the line buffer:
1245 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1246 //
1247 // But instead we're getting:
1248 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1249 //
1250 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1251 // on negative boundary--or are we? Hmm...
1252 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1253 //
1254 // Let's try a real world example:
1255 //
1256 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1257 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1258 //
1259 // Really, spp is 27.75 in the second case...
1260 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1261 // start position (14 * 27.75), we get -6.5... NOT -17!
1262
1263 //Now it seems we're working OK, at least for the first case...
1264 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1265
1266         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1267 {
1268 extern int start_logging;
1269 if (start_logging)
1270         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1271 //              clippedWidth = 0 - startPos,
1272                 clippedWidth = (0 - startPos) << 5,
1273 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1274                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1275 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1276                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1277 if (start_logging)
1278         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1279 }
1280
1281         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1282                 clippedWidth = 0 - endPos,
1283                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1284
1285         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1286                 clippedWidth = endPos - lbufWidth,
1287                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1288
1289         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1290                 clippedWidth = startPos - lbufWidth,
1291                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1292                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1293
1294 extern int op_start_log;
1295 if (op_start_log && clippedWidth != 0)
1296         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1297 if (op_start_log && startPos == 13)
1298 {
1299         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1300         DumpScaledObject(p0, p1, p2);
1301         if (iwidth == 7)
1302         {
1303                 WriteLog("    %08X: ", data);
1304                 for(int i=0; i<7*8; i++)
1305                         WriteLog("%02X ", JaguarReadByte(data+i));
1306                 WriteLog("\n");
1307         }
1308 }
1309         // If the image is sitting on the line buffer left or right edge, we need to compensate
1310         // by decreasing the image phrase width accordingly.
1311         iwidth -= phraseClippedWidth;
1312
1313         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1314         // the pixel data.
1315 //      data += phraseClippedWidth * (pitch << 3);
1316         data += dataClippedWidth * (pitch << 3);
1317
1318         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1319         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1320 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1321 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1322         uint32 lbufAddress = 0x1800 + startPos * 2;
1323         uint8 * currentLineBuffer = &tom_ram_8[lbufAddress];
1324 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1325 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1326
1327         // Render.
1328
1329 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1330 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1331 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1332 // anyway.
1333 // This seems to be the case (at least according to the Midsummer docs)...!
1334
1335         if (depth == 0)                                                                 // 1 BPP
1336         {
1337 if (firstPix != 0)
1338         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1339                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1340                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1341
1342                 int pixCount = 0;
1343                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1344
1345                 while ((int32)iwidth > 0)
1346                 {
1347                         uint8 bits = pixels >> 63;
1348
1349                         if (flagTRANS && bits == 0)
1350                                 ;       // Do nothing...
1351                         else
1352                         {
1353                                 if (!flagRMW)
1354                                         // This is the *only* correct use of endian-dependent code
1355                                         // (i.e., mem-to-mem direct copying)!
1356                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1357                                 else
1358                                         *currentLineBuffer = 
1359                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1360                                         *(currentLineBuffer + 1) = 
1361                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1362                         }
1363
1364                         currentLineBuffer += lbufDelta;
1365
1366 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1367                         while (horizontalRemainder & 0x80)
1368                         {
1369                                 horizontalRemainder += hscale;
1370                                 pixCount++;
1371                                 pixels <<= 1;
1372                         }//*/
1373                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1374                         {
1375                                 horizontalRemainder += hscale;
1376                                 pixCount++;
1377                                 pixels <<= 1;
1378                         }
1379                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1380
1381                         if (pixCount > 63)
1382                         {
1383                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1384
1385                                 data += (pitch << 3) * phrasesToSkip;
1386                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1387                                 pixels <<= 1 * pixelShift;
1388                                 iwidth -= phrasesToSkip;
1389                                 pixCount = pixelShift;
1390                         }
1391                 }
1392         }
1393         else if (depth == 1)                                                    // 2 BPP
1394         {
1395 if (firstPix != 0)
1396         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1397                 index &= 0xFC;                                                          // Top six bits form CLUT index
1398                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1399                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1400
1401                 int pixCount = 0;
1402                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1403
1404                 while ((int32)iwidth > 0)
1405                 {
1406                         uint8 bits = pixels >> 62;
1407
1408                         if (flagTRANS && bits == 0)
1409                                 ;       // Do nothing...
1410                         else
1411                         {
1412                                 if (!flagRMW)
1413                                         // This is the *only* correct use of endian-dependent code
1414                                         // (i.e., mem-to-mem direct copying)!
1415                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1416                                 else
1417                                         *currentLineBuffer = 
1418                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1419                                         *(currentLineBuffer + 1) = 
1420                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1421                         }
1422
1423                         currentLineBuffer += lbufDelta;
1424
1425 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1426                         while (horizontalRemainder & 0x80)
1427                         {
1428                                 horizontalRemainder += hscale;
1429                                 pixCount++;
1430                                 pixels <<= 2;
1431                         }//*/
1432                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1433                         {
1434                                 horizontalRemainder += hscale;
1435                                 pixCount++;
1436                                 pixels <<= 2;
1437                         }
1438                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1439
1440                         if (pixCount > 31)
1441                         {
1442                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1443
1444                                 data += (pitch << 3) * phrasesToSkip;
1445                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1446                                 pixels <<= 2 * pixelShift;
1447                                 iwidth -= phrasesToSkip;
1448                                 pixCount = pixelShift;
1449                         }
1450                 }
1451         }
1452         else if (depth == 2)                                                    // 4 BPP
1453         {
1454 if (firstPix != 0)
1455         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1456                 index &= 0xF0;                                                          // Top four bits form CLUT index
1457                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1458                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1459
1460                 int pixCount = 0;
1461                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1462
1463                 while ((int32)iwidth > 0)
1464                 {
1465                         uint8 bits = pixels >> 60;
1466
1467                         if (flagTRANS && bits == 0)
1468                                 ;       // Do nothing...
1469                         else
1470                         {
1471                                 if (!flagRMW)
1472                                         // This is the *only* correct use of endian-dependent code
1473                                         // (i.e., mem-to-mem direct copying)!
1474                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1475                                 else
1476                                         *currentLineBuffer = 
1477                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1478                                         *(currentLineBuffer + 1) = 
1479                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1480                         }
1481
1482                         currentLineBuffer += lbufDelta;
1483
1484 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1485                         while (horizontalRemainder & 0x80)
1486                         {
1487                                 horizontalRemainder += hscale;
1488                                 pixCount++;
1489                                 pixels <<= 4;
1490                         }//*/
1491                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1492                         {
1493                                 horizontalRemainder += hscale;
1494                                 pixCount++;
1495                                 pixels <<= 4;
1496                         }
1497                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1498
1499                         if (pixCount > 15)
1500                         {
1501                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1502
1503                                 data += (pitch << 3) * phrasesToSkip;
1504                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1505                                 pixels <<= 4 * pixelShift;
1506                                 iwidth -= phrasesToSkip;
1507                                 pixCount = pixelShift;
1508                         }
1509                 }
1510         }
1511         else if (depth == 3)                                                    // 8 BPP
1512         {
1513 if (firstPix)
1514         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1515                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1516                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1517
1518                 int pixCount = 0;
1519                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1520
1521                 while ((int32)iwidth > 0)
1522                 {
1523                         uint8 bits = pixels >> 56;
1524
1525                         if (flagTRANS && bits == 0)
1526                                 ;       // Do nothing...
1527                         else
1528                         {
1529                                 if (!flagRMW)
1530                                         // This is the *only* correct use of endian-dependent code
1531                                         // (i.e., mem-to-mem direct copying)!
1532                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1533 /*                              {
1534                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1535                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1536                                 }*/
1537                                 else
1538                                         *currentLineBuffer = 
1539                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1540                                         *(currentLineBuffer + 1) = 
1541                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1542                         }
1543
1544                         currentLineBuffer += lbufDelta;
1545
1546                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1547                         {
1548                                 horizontalRemainder += hscale;
1549                                 pixCount++;
1550                                 pixels <<= 8;
1551                         }
1552                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1553
1554                         if (pixCount > 7)
1555                         {
1556                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1557
1558                                 data += (pitch << 3) * phrasesToSkip;
1559                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1560                                 pixels <<= 8 * pixelShift;
1561                                 iwidth -= phrasesToSkip;
1562                                 pixCount = pixelShift;
1563                         }
1564                 }
1565         }
1566         else if (depth == 4)                                                    // 16 BPP
1567         {
1568 if (firstPix != 0)
1569         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1570                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1571                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1572
1573                 int pixCount = 0;
1574                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1575
1576                 while ((int32)iwidth > 0)
1577                 {
1578                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1579
1580                         if (flagTRANS && (bitsLo | bitsHi) == 0)
1581                                 ;       // Do nothing...
1582                         else
1583                         {
1584                                 if (!flagRMW)
1585                                         *currentLineBuffer = bitsHi,
1586                                         *(currentLineBuffer + 1) = bitsLo;
1587                                 else
1588                                         *currentLineBuffer = 
1589                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1590                                         *(currentLineBuffer + 1) = 
1591                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1592                         }
1593
1594                         currentLineBuffer += lbufDelta;
1595
1596 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1597                         while (horizontalRemainder & 0x80)
1598                         {
1599                                 horizontalRemainder += hscale;
1600                                 pixCount++;
1601                                 pixels <<= 16;
1602                         }//*/
1603                         while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1604                         {
1605                                 horizontalRemainder += hscale;
1606                                 pixCount++;
1607                                 pixels <<= 16;
1608                         }
1609                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1610 //*/
1611                         if (pixCount > 3)
1612                         {
1613                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1614
1615                                 data += (pitch << 3) * phrasesToSkip;
1616                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1617                                 pixels <<= 16 * pixelShift;
1618
1619                                 iwidth -= phrasesToSkip;
1620
1621                                 pixCount = pixelShift;
1622                         }
1623                 }
1624         }
1625         else if (depth == 5)                                                    // 24 BPP
1626         {
1627 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1628 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1629 if (firstPix != 0)
1630         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1631                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1632                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1633                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1634
1635                 while (iwidth--)
1636                 {
1637                         // Fetch phrase...
1638                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1639                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1640
1641                         for(int i=0; i<2; i++)
1642                         {
1643                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1644                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1645
1646                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1647                                         ;       // Do nothing...
1648                                 else
1649                                         *currentLineBuffer = bits3,
1650                                         *(currentLineBuffer + 1) = bits2,
1651                                         *(currentLineBuffer + 2) = bits1,
1652                                         *(currentLineBuffer + 3) = bits0;
1653
1654                                 currentLineBuffer += lbufDelta;
1655                                 pixels <<= 32;
1656                         }
1657                 }
1658         }
1659 }