]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
Mask out ALT for now, small optimization in screen rendering code.
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James L. Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68k.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0
40 #define CONDITION_LESS_THAN                     1
41 #define CONDITION_GREATER_THAN          2
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #define OPFLAG_RELEASE          8                                       // Bus release bit
46 #define OPFLAG_TRANS            4                                       // Transparency bit
47 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
48 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
49
50 // Private function prototypes
51
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
55 void DumpFixedObject(uint64 p0, uint64 p1);
56 uint64 OPLoadPhrase(uint32 offset);
57
58 // Local global variables
59
60 // Blend tables (64K each)
61 static uint8 op_blend_y[0x10000];
62 static uint8 op_blend_cr[0x10000];
63 // There may be a problem with this "RAM" overlapping (and thus being independent of)
64 // some of the regular TOM RAM...
65 //#warning objectp_ram is separated from TOM RAM--need to fix that!
66 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
67 uint8 objectp_running = 0;
68 //bool objectp_stop_reading_list;
69
70 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
71 //static uint32 op_bitmap_bit_size[8] =
72 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
73 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
74 static uint32 op_pointer;
75
76 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
77
78
79 //
80 // Object Processor initialization
81 //
82 void OPInit(void)
83 {
84         // Here we calculate the saturating blend of a signed 4-bit value and an
85         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
86         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
87         for(int i=0; i<256*256; i++)
88         {
89                 int y = (i >> 8) & 0xFF;
90                 int dy = (int8)i;                                       // Sign extend the Y index
91                 int c1 = (i >> 8) & 0x0F;
92                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
93                 int c2 = (i >> 12) & 0x0F;
94                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
95
96                 y += dy;
97
98                 if (y < 0)
99                         y = 0;
100                 else if (y > 0xFF)
101                         y = 0xFF;
102
103                 op_blend_y[i] = y;
104
105                 c1 += dc1;
106
107                 if (c1 < 0)
108                         c1 = 0;
109                 else if (c1 > 0x0F)
110                         c1 = 0x0F;
111
112                 c2 += dc2;
113
114                 if (c2 < 0)
115                         c2 = 0;
116                 else if (c2 > 0x0F)
117                         c2 = 0x0F;
118
119                 op_blend_cr[i] = (c2 << 4) | c1;
120         }
121
122         OPReset();
123 }
124
125 //
126 // Object Processor reset
127 //
128 void OPReset(void)
129 {
130 //      memset(objectp_ram, 0x00, 0x40);
131         objectp_running = 0;
132 }
133
134 void OPDone(void)
135 {
136         const char * opType[8] =
137         { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
138         const char * ccType[8] =
139                 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
140
141         uint32 olp = OPGetListPointer();
142         WriteLog("OP: OLP = %08X\n", olp);
143         WriteLog("OP: Phrase dump\n    ----------\n");
144         for(uint32 i=0; i<0x100; i+=8)
145         {
146                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
147                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
148                 if ((lo & 0x07) == 3)
149                 {
150                         uint16 ypos = (lo >> 3) & 0x7FF;
151                         uint8  cc   = (lo >> 14) & 0x03;
152                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
153                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
154                 }
155                 WriteLog("\n");
156                 if ((lo & 0x07) == 0)
157                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
158                 if ((lo & 0x07) == 1)
159                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
160         }
161         WriteLog("\n");
162
163 //      memory_free(op_blend_y);
164 //      memory_free(op_blend_cr);
165 }
166
167 //
168 // Object Processor memory access
169 // Memory range: F00010 - F00027
170 //
171 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
172 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
173 //      F00026            W   -------- -------x   OBF - object processor flag
174 //
175
176 #if 0
177 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
178 {
179         offset &= 0x3F;
180         return objectp_ram[offset];
181 }
182
183 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
184 {
185         offset &= 0x3F;
186         return GET16(objectp_ram, offset);
187 }
188
189 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
190 {
191         offset &= 0x3F;
192         objectp_ram[offset] = data;
193 }
194
195 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
196 {
197         offset &= 0x3F;
198         SET16(objectp_ram, offset, data);
199
200 /*if (offset == 0x20)
201 WriteLog("OP: Setting lo list pointer: %04X\n", data);
202 if (offset == 0x22)
203 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
204 }
205 #endif
206
207 uint32 OPGetListPointer(void)
208 {
209         // Note: This register is LO / HI WORD, hence the funky look of this...
210         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
211 }
212
213 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
214
215 uint32 OPGetStatusRegister(void)
216 {
217         return GET16(tomRam8, 0x26);
218 }
219
220 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
221
222 void OPSetStatusRegister(uint32 data)
223 {
224         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
225         tomRam8[0x27] |= (data & 0xFE);
226 }
227
228 void OPSetCurrentObject(uint64 object)
229 {
230 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
231         // Stored as least significant 32 bits first, ms32 last in big endian
232 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
233         objectp_ram[0x12] = object & 0xFF; object >>= 8;
234         objectp_ram[0x11] = object & 0xFF; object >>= 8;
235         objectp_ram[0x10] = object & 0xFF; object >>= 8;
236
237         objectp_ram[0x17] = object & 0xFF; object >>= 8;
238         objectp_ram[0x16] = object & 0xFF; object >>= 8;
239         objectp_ram[0x15] = object & 0xFF; object >>= 8;
240         objectp_ram[0x14] = object & 0xFF;*/
241 // Let's try regular good old big endian...
242         tomRam8[0x17] = object & 0xFF; object >>= 8;
243         tomRam8[0x16] = object & 0xFF; object >>= 8;
244         tomRam8[0x15] = object & 0xFF; object >>= 8;
245         tomRam8[0x14] = object & 0xFF; object >>= 8;
246
247         tomRam8[0x13] = object & 0xFF; object >>= 8;
248         tomRam8[0x12] = object & 0xFF; object >>= 8;
249         tomRam8[0x11] = object & 0xFF; object >>= 8;
250         tomRam8[0x10] = object & 0xFF;
251 }
252
253 uint64 OPLoadPhrase(uint32 offset)
254 {
255         offset &= ~0x07;                                                // 8 byte alignment
256         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
257 }
258
259 void OPStorePhrase(uint32 offset, uint64 p)
260 {
261         offset &= ~0x07;                                                // 8 byte alignment
262         JaguarWriteLong(offset, p >> 32, OP);
263         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
264 }
265
266 //
267 // Debugging routines
268 //
269 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
270 {
271         WriteLog(" (SCALED BITMAP)");
272         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
273         WriteLog("                 %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
274         uint8 bitdepth = (p1 >> 12) & 0x07;
275 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
276         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
277         int32 xpos = p1 & 0xFFF;
278         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
279         uint32 iwidth = ((p1 >> 28) & 0x3FF);
280         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
281         uint16 height = ((p0 >> 14) & 0x3FF);
282         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
283         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
284         uint32 firstPix = (p1 >> 49) & 0x3F;
285         uint8 flags = (p1 >> 45) & 0x0F;
286         uint8 idx = (p1 >> 38) & 0x7F;
287         uint32 pitch = (p1 >> 15) & 0x07;
288         WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
289                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
290         uint32 hscale = p2 & 0xFF;
291         uint32 vscale = (p2 >> 8) & 0xFF;
292         uint32 remainder = (p2 >> 16) & 0xFF;
293         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
294 }
295
296 void DumpFixedObject(uint64 p0, uint64 p1)
297 {
298         WriteLog(" (BITMAP)");
299         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
300         uint8 bitdepth = (p1 >> 12) & 0x07;
301 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
302         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
303         int32 xpos = p1 & 0xFFF;
304         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
305         uint32 iwidth = ((p1 >> 28) & 0x3FF);
306         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
307         uint16 height = ((p0 >> 14) & 0x3FF);
308         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
309         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
310         uint32 firstPix = (p1 >> 49) & 0x3F;
311         uint8 flags = (p1 >> 45) & 0x0F;
312         uint8 idx = (p1 >> 38) & 0x7F;
313         uint32 pitch = (p1 >> 15) & 0x07;
314         WriteLog("    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
315                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
316 }
317
318 //
319 // Object Processor main routine
320 //
321 //Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing
322 //where we left off. !!! FIX !!!
323 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
324 void OPProcessList(int scanline, bool render)
325 {
326 extern int op_start_log;
327 //      char * condition_to_str[8] =
328 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
329
330         op_pointer = OPGetListPointer();
331
332 //      objectp_stop_reading_list = false;
333
334 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer);
335 //op_done();
336
337 // *** BEGIN OP PROCESSOR TESTING ONLY ***
338 extern bool interactiveMode;
339 extern bool iToggle;
340 extern int objectPtr;
341 bool inhibit;
342 int bitmapCounter = 0;
343 // *** END OP PROCESSOR TESTING ONLY ***
344
345         uint32 opCyclesToRun = 10000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
346
347 //      if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
348         while (op_pointer)
349         {
350 // *** BEGIN OP PROCESSOR TESTING ONLY ***
351 if (interactiveMode && bitmapCounter == objectPtr)
352         inhibit = iToggle;
353 else
354         inhibit = false;
355 // *** END OP PROCESSOR TESTING ONLY ***
356 //              if (objectp_stop_reading_list)
357 //                      return;
358
359                 uint64 p0 = OPLoadPhrase(op_pointer);
360 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
361                 op_pointer += 8;
362
363 #if 1
364 if (scanline == TOMGetVDB() && op_start_log)
365 //if (scanline == 215 && op_start_log)
366 //if (scanline == 28 && op_start_log)
367 //if (scanline == 0)
368 {
369 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
370 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
371 {
372 WriteLog(" (BITMAP) ");
373 uint64 p1 = OPLoadPhrase(op_pointer);
374 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
375         uint8 bitdepth = (p1 >> 12) & 0x07;
376 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
377         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
378 int32 xpos = p1 & 0xFFF;
379 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
380         uint32 iwidth = ((p1 >> 28) & 0x3FF);
381         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
382         uint16 height = ((p0 >> 14) & 0x3FF);
383         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
384         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
385         uint32 firstPix = (p1 >> 49) & 0x3F;
386         uint8 flags = (p1 >> 45) & 0x0F;
387         uint8 idx = (p1 >> 38) & 0x7F;
388         uint32 pitch = (p1 >> 15) & 0x07;
389 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
390         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
391 }
392 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
393 {
394 WriteLog(" (SCALED BITMAP)");
395 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
396 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
397 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
398         uint8 bitdepth = (p1 >> 12) & 0x07;
399 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
400         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
401 int32 xpos = p1 & 0xFFF;
402 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
403         uint32 iwidth = ((p1 >> 28) & 0x3FF);
404         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
405         uint16 height = ((p0 >> 14) & 0x3FF);
406         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
407         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
408         uint32 firstPix = (p1 >> 49) & 0x3F;
409         uint8 flags = (p1 >> 45) & 0x0F;
410         uint8 idx = (p1 >> 38) & 0x7F;
411         uint32 pitch = (p1 >> 15) & 0x07;
412 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
413         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
414         uint32 hscale = p2 & 0xFF;
415         uint32 vscale = (p2 >> 8) & 0xFF;
416         uint32 remainder = (p2 >> 16) & 0xFF;
417 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
418 }
419 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
420 WriteLog(" (GPU)\n");
421 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
422 {
423 WriteLog(" (BRANCH)\n");
424 uint8 * jaguarMainRam = GetRamPtr();
425 WriteLog("[RAM] --> ");
426 for(int k=0; k<8; k++)
427         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
428 WriteLog("\n");
429 }
430 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
431 WriteLog("    --> List end\n\n");
432 }
433 #endif
434
435                 switch ((uint8)p0 & 0x07)
436                 {
437                 case OBJECT_TYPE_BITMAP:
438                 {
439 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
440                         uint16 ypos = (p0 >> 3) & 0x7FF;
441 // This is only theory implied by Rayman...!
442 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
443 // the VDB value. With interlacing, this would be slightly more tricky.
444 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
445 // to affect any other game in a negative way (that I've seen).
446 // Either that, or it's an undocumented bug...
447
448 //No, the reason this was needed is that the OP code before was wrong. Any value
449 //less than VDB will get written to the top line of the display!
450 #if 0
451 // Not so sure... Let's see what happens here...
452 // No change...
453                         if (ypos == 0)
454                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
455 #endif
456 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
457 // So we need to fix this somehow... (and it has... in tom.cpp :-P)
458
459                         uint32 height = (p0 & 0xFFC000) >> 14;
460                         uint32 oldOPP = op_pointer - 8;
461 // *** BEGIN OP PROCESSOR TESTING ONLY ***
462 if (inhibit && op_start_log)
463         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
464 bitmapCounter++;
465 if (!inhibit)   // For OP testing only!
466 // *** END OP PROCESSOR TESTING ONLY ***
467                         if (scanline >= ypos && height > 0)
468                         {
469                                 uint64 p1 = OPLoadPhrase(op_pointer);
470                                 op_pointer += 8;
471 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
472 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
473 //                              OPProcessFixedBitmap(scanline, p0, p1, render);
474                                 OPProcessFixedBitmap(p0, p1, render);
475
476                                 // OP write-backs
477
478 //???Does this really happen??? Doesn't seem to work if you do this...!
479 //Probably not. Must be a bug in the documentation...!
480 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
481 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
482 //                              SET16(tom_ram_8, 0x22, link >> 16);
483 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
484                                 if (height - 1 > 0)
485                                         height--;*/
486                                 // NOTE: Would subtract 2 if in interlaced mode...!
487 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
488 //                              if (height)
489                                 height--;
490
491                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
492                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
493                                 data += dwidth;
494
495                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
496                                 p0 |= (uint64)height << 14;
497                                 p0 |= data << 40;
498                                 OPStorePhrase(oldOPP, p0);
499                         }
500 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
501 //Temp, for testing...
502 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
503 //And it does! !!! FIX !!!
504 //Let's remove this "fix" since it screws up more than it fixes.
505 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
506                 return;*/
507
508                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
509 //WriteLog("New OP: %08X\n", op_pointer);
510                         break;
511                 }
512                 case OBJECT_TYPE_SCALE:
513                 {
514 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
515                         uint16 ypos = (p0 >> 3) & 0x7FF;
516                         uint32 height = (p0 & 0xFFC000) >> 14;
517                         uint32 oldOPP = op_pointer - 8;
518 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
519 // *** BEGIN OP PROCESSOR TESTING ONLY ***
520 if (inhibit && op_start_log)
521 {
522         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
523         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
524 }
525 bitmapCounter++;
526 if (!inhibit)   // For OP testing only!
527 // *** END OP PROCESSOR TESTING ONLY ***
528                         if (scanline >= ypos && height > 0)
529                         {
530                                 uint64 p1 = OPLoadPhrase(op_pointer);
531                                 op_pointer += 8;
532                                 uint64 p2 = OPLoadPhrase(op_pointer);
533                                 op_pointer += 8;
534 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
535                                 OPProcessScaledBitmap(p0, p1, p2, render);
536
537                                 // OP write-backs
538
539                                 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
540                                 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
541 //Actually, we should skip this object if it has a vscale of zero.
542 //Or do we? Not sure... Atari Karts has a few lines that look like:
543 // (SCALED BITMAP)
544 //000E8268 --> phrase 00010000 7000B00D
545 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
546 //    [hsc: 9A, vsc: 00, rem: 00]
547 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
548 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
549
550                                 if (vscale == 0)
551                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
552
553 //extern int start_logging;
554 //if (start_logging)
555 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
556 //Locks up here:
557 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
558 //There are other problems here, it looks like...
559 //Another lock up:
560 //About to execute OP (508)...
561 /*
562 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
563 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
564 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
565 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
566 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
567 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
568 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
569 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
570 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
571 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
572 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
573 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
574 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
575 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
576 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
577 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
578 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
579 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
580 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
581 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
582 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
583 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
584 */
585 //Here's another problem:
586 //    [hsc: 20, vsc: 20, rem: 00]
587 // Since we're not checking for $E0 (but that's what we get from the above), we end
588 // up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite
589 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
590 //Also note: $E0 = 7.0 which IS a legal vscale value...
591
592 //                              if (remainder & 0x80)                           // I.e., it's negative
593 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
594 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
595 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
596 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
597 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
598                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
599                                 if (remainder < 0x20)
600                                 {
601                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
602                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
603
604 //                                      while (remainder & 0x80)
605 //                                      while ((remainder & 0x80) || remainder == 0)
606 //                                      while ((remainder - 1) >= 0xE0)
607 //                                      while ((remainder >= 0xE1) || remainder == 0)
608 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
609 //                                      while (remainder <= 0x20)
610                                         while (remainder < 0x20)
611                                         {
612                                                 remainder += vscale;
613
614                                                 if (height)
615                                                         height--;
616
617                                                 data += dwidth;
618                                         }
619
620                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
621                                         p0 |= (uint64)height << 14;
622                                         p0 |= data << 40;
623                                         OPStorePhrase(oldOPP, p0);
624                                 }
625
626                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
627
628 //if (start_logging)
629 //      WriteLog("--> Finished writebacks...\n");//*/
630
631 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
632                                 p2 &= ~0x0000000000FF0000LL;
633                                 p2 |= (uint64)remainder << 16;
634 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
635                                 OPStorePhrase(oldOPP + 16, p2);
636 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
637 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
638                         }
639
640                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
641                         break;
642                 }
643                 case OBJECT_TYPE_GPU:
644                 {
645 //WriteLog("OP: Asserting GPU IRQ #3...\n");
646 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
647                         OPSetCurrentObject(p0);
648                         GPUSetIRQLine(3, ASSERT_LINE);
649 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
650 // !!! FIX !!!
651 //Do something like:
652 //OPSuspendedByGPU = true;
653 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
654 //on the next scanline...
655 // --> It continues from where it was interrupted! !!! FIX !!!
656                         break;
657                 }
658                 case OBJECT_TYPE_BRANCH:
659                 {
660                         uint16 ypos = (p0 >> 3) & 0x7FF;
661                         uint8  cc   = (p0 >> 14) & 0x03;
662                         uint32 link = (p0 >> 21) & 0x3FFFF8;
663
664 //                      if ((ypos!=507)&&(ypos!=25))
665 //                              WriteLog("\t%i%s%i link=0x%.8x\n",scanline,condition_to_str[cc],ypos>>1,link);
666                         switch (cc)
667                         {
668                         case CONDITION_EQUAL:
669                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
670                                         op_pointer = link;
671                                 break;
672                         case CONDITION_LESS_THAN:
673                                 if (TOMReadWord(0xF00006, OP) < ypos)
674                                         op_pointer = link;
675                                 break;
676                         case CONDITION_GREATER_THAN:
677                                 if (TOMReadWord(0xF00006, OP) > ypos)
678                                         op_pointer = link;
679                                 break;
680                         case CONDITION_OP_FLAG_SET:
681                                 if (OPGetStatusRegister() & 0x01)
682                                         op_pointer = link;
683                                 break;
684                         case CONDITION_SECOND_HALF_LINE:
685 //Here's the ASIC code:
686 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
687 //which means, do the link if bit 10 of HC is set...
688
689                                 // This basically means branch if bit 10 of HC is set
690 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
691                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
692                                 LogDone();
693                                 exit(0);
694                                 break;
695                         default:
696                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
697                         }
698                         break;
699                 }
700                 case OBJECT_TYPE_STOP:
701                 {
702 //op_start_log = 0;
703                         // unsure
704 //WriteLog("OP: --> STOP\n");
705 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
706 //This seems more likely...
707                         OPSetCurrentObject(p0);
708
709                         if (p0 & 0x08)
710                         {
711                                 // We need to check whether these interrupts are enabled or not, THEN
712                                 // set an IRQ + pending flag if necessary...
713                                 if (TOMIRQEnabled(IRQ_OPFLAG))
714                                 {
715                                         TOMSetPendingObjectInt();
716                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
717                                 }
718                         }
719
720                         return;
721 //                      break;
722                 }
723                 default:
724                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
725                         return;
726                 }
727
728                 // Here is a little sanity check to keep the OP from locking up the machine
729                 // when fed bad data. Better would be to count how many actual cycles it used
730                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
731 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
732                 opCyclesToRun--;
733                 if (!opCyclesToRun)
734                         return;
735         }
736 }
737
738 //
739 // Store fixed size bitmap in line buffer
740 //
741 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
742 {
743 // Need to make sure that when writing that it stays within the line buffer...
744 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
745         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
746         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
747         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
748         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
749 //#ifdef OP_DEBUG_BMP
750         uint32  firstPix = (p1 >> 49) & 0x3F;
751         // "The LSB is significant only for scaled objects..." -JTRM
752         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
753         firstPix &= 0x3E;
754 //#endif
755 // We can ignore the RELEASE (high order) bit for now--probably forever...!
756 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
757 //Optimize: break these out to their own BOOL values
758         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
759         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
760                 flagRMW = (flags & OPFLAG_RMW ? true : false),
761                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
762 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
763 //  provide the most significant bits of the palette address."
764         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
765         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
766         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
767
768 //      int16 scanlineWidth = tom_getVideoModeWidth();
769         uint8 * tomRam8 = TOMGetRamPointer();
770         uint8 * paletteRAM = &tomRam8[0x400];
771         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
772         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
773         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
774
775 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
776 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
777
778 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
779 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
780 // Pitch == 0 is OK too...
781 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
782 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
783         if (!render || iwidth == 0)
784                 return;
785
786 //#define OP_DEBUG_BMP
787 //#ifdef OP_DEBUG_BMP
788 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
789 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
790 //#endif
791
792 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
793         int32 startPos = xpos, endPos = xpos +
794                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
795                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
796         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
797         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
798         // Not sure if this is Jaguar Two only location or what...
799         // From the docs, it is... If we want to limit here we should think of something else.
800 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
801         int32 limit = 720;
802         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
803
804         // If the image is completely to the left or right of the line buffer, then bail.
805 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
806 //There are four possibilities:
807 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
808 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
809 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
810 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
811 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
812 // numbers 1 & 3 are of concern.
813 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
814 //      if (rightMargin < 0 || leftMargin > lbufWidth)
815
816 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
817 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
818 // Still have to be careful with the DATA and IWIDTH values though...
819
820 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
821 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
822 //              return;
823         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
824                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
825                 return;
826
827         // Otherwise, find the clip limits and clip the phrase as well...
828         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
829         //       line buffer, but it shouldn't matter since there are two unused line
830         //       buffers below and nothing above and I'll at most write 8 bytes outside
831         //       the line buffer... I could use a fractional clip begin/end value, but
832         //       this makes the blit a *lot* more hairy. I might fix this in the future
833         //       if it becomes necessary. (JLH)
834         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
835         //       which pixel in the phrase is being written, and quit when either end of phrases
836         //       is reached or line buffer extents are surpassed.
837
838 //This stuff is probably wrong as well... !!! FIX !!!
839 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
840 //Yup. Seems that JagMania doesn't work correctly with this...
841 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
842 //      if (!flagREFLECT)
843
844 /*
845         if (leftMargin < 0)
846                 clippedWidth = 0 - leftMargin,
847                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
848                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
849 //              leftMargin = 0;
850
851         if (rightMargin > lbufWidth)
852                 clippedWidth = rightMargin - lbufWidth,
853                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
854 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
855 //              rightMargin = lbufWidth;
856 */
857 if (depth > 5)
858         WriteLog("OP: We're about to encounter a divide by zero error!\n");
859         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
860         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
861         // !!! FIX !!!
862         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
863                 clippedWidth = 0 - startPos,
864                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
865                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
866
867         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
868                 clippedWidth = 0 - endPos,
869                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
870
871         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
872                 clippedWidth = endPos - lbufWidth,
873                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
874
875         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
876                 clippedWidth = startPos - lbufWidth,
877                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
878                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
879
880         // If the image is sitting on the line buffer left or right edge, we need to compensate
881         // by decreasing the image phrase width accordingly.
882         iwidth -= phraseClippedWidth;
883
884         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
885         // the pixel data.
886 //      data += phraseClippedWidth * (pitch << 3);
887         data += dataClippedWidth * pitch;
888
889         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
890         //       bitmap! This makes clipping & etc. MUCH, much easier...!
891 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
892 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
893 //Is this a bug in the OP?
894         uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
895         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
896
897         // Render.
898
899 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
900 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
901 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
902 // anyway.
903 // This seems to be the case (at least according to the Midsummer docs)...!
904
905 // This is to test using palette zeroes instead of bit zeroes...
906 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
907 //#define OP_USES_PALETTE_ZERO
908
909         if (depth == 0)                                                                 // 1 BPP
910         {
911                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
912                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
913
914                 // Fetch 1st phrase...
915                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
916 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
917 //i.e., we didn't clip on the margin... !!! FIX !!!
918                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
919                 int i = firstPix;                                                       // Start counter at right spot...
920
921                 while (iwidth--)
922                 {
923                         while (i++ < 64)
924                         {
925                                 uint8 bit = pixels >> 63;
926 #ifndef OP_USES_PALETTE_ZERO
927                                 if (flagTRANS && bit == 0)
928 #else
929                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
930 #endif
931                                         ;       // Do nothing...
932                                 else
933                                 {
934                                         if (!flagRMW)
935 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
936 //Won't optimize RMW case though...
937                                                 // This is the *only* correct use of endian-dependent code
938                                                 // (i.e., mem-to-mem direct copying)!
939                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
940                                         else
941                                                 *currentLineBuffer =
942                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
943                                                 *(currentLineBuffer + 1) =
944                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
945                                 }
946
947                                 currentLineBuffer += lbufDelta;
948                                 pixels <<= 1;
949                         }
950                         i = 0;
951                         // Fetch next phrase...
952                         data += pitch;
953                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
954                 }
955         }
956         else if (depth == 1)                                                    // 2 BPP
957         {
958 if (firstPix)
959         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
960                 index &= 0xFC;                                                          // Top six bits form CLUT index
961                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
962                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
963
964                 while (iwidth--)
965                 {
966                         // Fetch phrase...
967                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
968                         data += pitch;
969
970                         for(int i=0; i<32; i++)
971                         {
972                                 uint8 bits = pixels >> 62;
973 // Seems to me that both of these are in the same endian, so we could cast it as
974 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
975 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
976 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
977 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
978 #ifndef OP_USES_PALETTE_ZERO
979                                 if (flagTRANS && bits == 0)
980 #else
981                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
982 #endif
983                                         ;       // Do nothing...
984                                 else
985                                 {
986                                         if (!flagRMW)
987                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
988                                         else
989                                                 *currentLineBuffer =
990                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
991                                                 *(currentLineBuffer + 1) =
992                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
993                                 }
994
995                                 currentLineBuffer += lbufDelta;
996                                 pixels <<= 2;
997                         }
998                 }
999         }
1000         else if (depth == 2)                                                    // 4 BPP
1001         {
1002 if (firstPix)
1003         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1004                 index &= 0xF0;                                                          // Top four bits form CLUT index
1005                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1006                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1007
1008                 while (iwidth--)
1009                 {
1010                         // Fetch phrase...
1011                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1012                         data += pitch;
1013
1014                         for(int i=0; i<16; i++)
1015                         {
1016                                 uint8 bits = pixels >> 60;
1017 // Seems to me that both of these are in the same endian, so we could cast it as
1018 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1019 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1020 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1021 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1022 #ifndef OP_USES_PALETTE_ZERO
1023                                 if (flagTRANS && bits == 0)
1024 #else
1025                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1026 #endif
1027                                         ;       // Do nothing...
1028                                 else
1029                                 {
1030                                         if (!flagRMW)
1031                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1032                                         else
1033                                                 *currentLineBuffer =
1034                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1035                                                 *(currentLineBuffer + 1) =
1036                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1037                                 }
1038
1039                                 currentLineBuffer += lbufDelta;
1040                                 pixels <<= 4;
1041                         }
1042                 }
1043         }
1044         else if (depth == 3)                                                    // 8 BPP
1045         {
1046                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1047                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1048
1049                 // Fetch 1st phrase...
1050                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1051 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1052 //i.e., we didn't clip on the margin... !!! FIX !!!
1053                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1054                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1055                 int i = firstPix >> 3;                                          // Start counter at right spot...
1056
1057                 while (iwidth--)
1058                 {
1059                         while (i++ < 8)
1060                         {
1061                                 uint8 bits = pixels >> 56;
1062 // Seems to me that both of these are in the same endian, so we could cast it as
1063 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1064 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1065 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1066 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1067 //This would seem to be problematic...
1068 //Because it's the palette entry being zero that makes the pixel transparent...
1069 //Let's try it and see.
1070 #ifndef OP_USES_PALETTE_ZERO
1071                                 if (flagTRANS && bits == 0)
1072 #else
1073                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1074 #endif
1075                                         ;       // Do nothing...
1076                                 else
1077                                 {
1078                                         if (!flagRMW)
1079                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1080                                         else
1081                                                 *currentLineBuffer =
1082                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1083                                                 *(currentLineBuffer + 1) =
1084                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1085                                 }
1086
1087                                 currentLineBuffer += lbufDelta;
1088                                 pixels <<= 8;
1089                         }
1090                         i = 0;
1091                         // Fetch next phrase...
1092                         data += pitch;
1093                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1094                 }
1095         }
1096         else if (depth == 4)                                                    // 16 BPP
1097         {
1098 if (firstPix)
1099         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1100                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1101                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1102
1103                 while (iwidth--)
1104                 {
1105                         // Fetch phrase...
1106                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1107                         data += pitch;
1108
1109                         for(int i=0; i<4; i++)
1110                         {
1111                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1112 // Seems to me that both of these are in the same endian, so we could cast it as
1113 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1114 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1115 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1116 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1117 //This doesn't seem right... Let's try the encoded black value ($8800):
1118 //Apparently, CRY 0 maps to $8800...
1119                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1120 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1121                                         ;       // Do nothing...
1122                                 else
1123                                 {
1124                                         if (!flagRMW)
1125                                                 *currentLineBuffer = bitsHi,
1126                                                 *(currentLineBuffer + 1) = bitsLo;
1127                                         else
1128                                                 *currentLineBuffer =
1129                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1130                                                 *(currentLineBuffer + 1) =
1131                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1132                                 }
1133
1134                                 currentLineBuffer += lbufDelta;
1135                                 pixels <<= 16;
1136                         }
1137                 }
1138         }
1139         else if (depth == 5)                                                    // 24 BPP
1140         {
1141 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1142 //There *might* be others...
1143 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1144 if (firstPix)
1145         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1146                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1147                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1148                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1149
1150                 while (iwidth--)
1151                 {
1152                         // Fetch phrase...
1153                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1154                         data += pitch;
1155
1156                         for(int i=0; i<2; i++)
1157                         {
1158                                 // We don't use a 32-bit var here because of endian issues...!
1159                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1160                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1161
1162                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1163                                         ;       // Do nothing...
1164                                 else
1165                                         *currentLineBuffer = bits3,
1166                                         *(currentLineBuffer + 1) = bits2,
1167                                         *(currentLineBuffer + 2) = bits1,
1168                                         *(currentLineBuffer + 3) = bits0;
1169
1170                                 currentLineBuffer += lbufDelta;
1171                                 pixels <<= 32;
1172                         }
1173                 }
1174         }
1175 }
1176
1177 //
1178 // Store scaled bitmap in line buffer
1179 //
1180 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1181 {
1182 // Need to make sure that when writing that it stays within the line buffer...
1183 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1184         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1185         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1186         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1187         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1188 //#ifdef OP_DEBUG_BMP
1189 // Prolly should use this... Though not sure exactly how.
1190 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1191         uint32 firstPix = (p1 >> 49) & 0x3F;
1192 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1193 if (firstPix)
1194         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1195 //#endif
1196 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1197 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1198 //Optimize: break these out to their own BOOL values [DONE]
1199         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1200         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1201                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1202                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1203         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1204         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1205
1206         uint8 * tomRam8 = TOMGetRamPointer();
1207         uint8 * paletteRAM = &tomRam8[0x400];
1208         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1209         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1210         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1211
1212         uint16 hscale = p2 & 0xFF;
1213 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1214 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1215         uint16 horizontalRemainder = hscale;                            // Not sure if it starts full, but seems reasonable [It's not!]
1216 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1217         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1218         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1219
1220 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1221 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1222
1223 // Looks like an hscale of zero means don't draw!
1224         if (!render || iwidth == 0 || hscale == 0)
1225                 return;
1226
1227 /*extern int start_logging;
1228 if (start_logging)
1229         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1230                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1231 //#define OP_DEBUG_BMP
1232 //#ifdef OP_DEBUG_BMP
1233 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1234 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1235 //#endif
1236
1237         int32 startPos = xpos, endPos = xpos +
1238                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1239         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1240         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1241         // Not sure if this is Jaguar Two only location or what...
1242         // From the docs, it is... If we want to limit here we should think of something else.
1243 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1244         int32 limit = 720;
1245         int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1246
1247         // If the image is completely to the left or right of the line buffer, then bail.
1248 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1249 //There are four possibilities:
1250 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1251 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1252 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1253 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1254 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1255 // numbers 1 & 3 are of concern.
1256 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1257 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1258
1259 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1260 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1261 // Still have to be careful with the DATA and IWIDTH values though...
1262
1263         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1264                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1265                 return;
1266
1267         // Otherwise, find the clip limits and clip the phrase as well...
1268         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1269         //       line buffer, but it shouldn't matter since there are two unused line
1270         //       buffers below and nothing above and I'll at most write 40 bytes outside
1271         //       the line buffer... I could use a fractional clip begin/end value, but
1272         //       this makes the blit a *lot* more hairy. I might fix this in the future
1273         //       if it becomes necessary. (JLH)
1274         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1275         //       which pixel in the phrase is being written, and quit when either end of phrases
1276         //       is reached or line buffer extents are surpassed.
1277
1278 //This stuff is probably wrong as well... !!! FIX !!!
1279 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1280 //Yup. Seems that JagMania doesn't work correctly with this...
1281 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1282 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1283 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1284 // a bit more accurately... Strange!
1285 //It's probably a case of the REFLECT flag being set and the background being written
1286 //from the right side of the screen...
1287 //But no, it isn't... At least if the diagnostics are telling the truth!
1288
1289         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1290         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1291         // !!! FIX !!!
1292
1293 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1294 //the scaling factor is small. So fix it already! !!! FIX !!!
1295 /*if (scaledPhrasePixels == 0)
1296 {
1297         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1298         DumpScaledObject(p0, p1, p2);
1299 }//*/
1300 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1301
1302 //Try a simple example...
1303 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1304 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1305 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1306 //
1307 // Normally, we would expect this in the line buffer:
1308 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1309 //
1310 // But instead we're getting:
1311 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1312 //
1313 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1314 // on negative boundary--or are we? Hmm...
1315 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1316 //
1317 // Let's try a real world example:
1318 //
1319 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1320 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1321 //
1322 // Really, spp is 27.75 in the second case...
1323 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1324 // start position (14 * 27.75), we get -6.5... NOT -17!
1325
1326 //Now it seems we're working OK, at least for the first case...
1327 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1328
1329         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1330 {
1331 extern int start_logging;
1332 if (start_logging)
1333         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1334 //              clippedWidth = 0 - startPos,
1335                 clippedWidth = (0 - startPos) << 5,
1336 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1337                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1338 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1339                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1340 if (start_logging)
1341         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1342 }
1343
1344         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1345                 clippedWidth = 0 - endPos,
1346                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1347
1348         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1349                 clippedWidth = endPos - lbufWidth,
1350                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1351
1352         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1353                 clippedWidth = startPos - lbufWidth,
1354                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1355                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1356
1357 extern int op_start_log;
1358 if (op_start_log && clippedWidth != 0)
1359         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1360 if (op_start_log && startPos == 13)
1361 {
1362         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1363         DumpScaledObject(p0, p1, p2);
1364         if (iwidth == 7)
1365         {
1366                 WriteLog("    %08X: ", data);
1367                 for(int i=0; i<7*8; i++)
1368                         WriteLog("%02X ", JaguarReadByte(data+i));
1369                 WriteLog("\n");
1370         }
1371 }
1372         // If the image is sitting on the line buffer left or right edge, we need to compensate
1373         // by decreasing the image phrase width accordingly.
1374         iwidth -= phraseClippedWidth;
1375
1376         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1377         // the pixel data.
1378 //      data += phraseClippedWidth * (pitch << 3);
1379         data += dataClippedWidth * (pitch << 3);
1380
1381         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1382         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1383 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1384 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1385         uint32 lbufAddress = 0x1800 + startPos * 2;
1386         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1387 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1388 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1389
1390         // Render.
1391
1392 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1393 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1394 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1395 // anyway.
1396 // This seems to be the case (at least according to the Midsummer docs)...!
1397
1398         if (depth == 0)                                                                 // 1 BPP
1399         {
1400 if (firstPix != 0)
1401         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1402                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1403                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1404
1405                 int pixCount = 0;
1406                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1407
1408                 while ((int32)iwidth > 0)
1409                 {
1410                         uint8 bits = pixels >> 63;
1411
1412 #ifndef OP_USES_PALETTE_ZERO
1413                         if (flagTRANS && bits == 0)
1414 #else
1415                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1416 #endif
1417                                 ;       // Do nothing...
1418                         else
1419                         {
1420                                 if (!flagRMW)
1421                                         // This is the *only* correct use of endian-dependent code
1422                                         // (i.e., mem-to-mem direct copying)!
1423                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1424                                 else
1425                                         *currentLineBuffer =
1426                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1427                                         *(currentLineBuffer + 1) =
1428                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1429                         }
1430
1431                         currentLineBuffer += lbufDelta;
1432
1433 /*
1434 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1435 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1436 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1437 */
1438 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1439                         while (horizontalRemainder & 0x80)
1440                         {
1441                                 horizontalRemainder += hscale;
1442                                 pixCount++;
1443                                 pixels <<= 1;
1444                         }//*/
1445 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1446                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1447                         {
1448                                 horizontalRemainder += hscale;
1449                                 pixCount++;
1450                                 pixels <<= 1;
1451                         }
1452                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1453
1454                         if (pixCount > 63)
1455                         {
1456                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1457
1458                                 data += (pitch << 3) * phrasesToSkip;
1459                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1460                                 pixels <<= 1 * pixelShift;
1461                                 iwidth -= phrasesToSkip;
1462                                 pixCount = pixelShift;
1463                         }
1464                 }
1465         }
1466         else if (depth == 1)                                                    // 2 BPP
1467         {
1468 if (firstPix != 0)
1469         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1470                 index &= 0xFC;                                                          // Top six bits form CLUT index
1471                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1472                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1473
1474                 int pixCount = 0;
1475                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1476
1477                 while ((int32)iwidth > 0)
1478                 {
1479                         uint8 bits = pixels >> 62;
1480
1481 #ifndef OP_USES_PALETTE_ZERO
1482                         if (flagTRANS && bits == 0)
1483 #else
1484                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1485 #endif
1486                                 ;       // Do nothing...
1487                         else
1488                         {
1489                                 if (!flagRMW)
1490                                         // This is the *only* correct use of endian-dependent code
1491                                         // (i.e., mem-to-mem direct copying)!
1492                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1493                                 else
1494                                         *currentLineBuffer =
1495                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1496                                         *(currentLineBuffer + 1) =
1497                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1498                         }
1499
1500                         currentLineBuffer += lbufDelta;
1501
1502 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1503                         while (horizontalRemainder & 0x80)
1504                         {
1505                                 horizontalRemainder += hscale;
1506                                 pixCount++;
1507                                 pixels <<= 2;
1508                         }//*/
1509 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1510                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1511                         {
1512                                 horizontalRemainder += hscale;
1513                                 pixCount++;
1514                                 pixels <<= 2;
1515                         }
1516                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1517
1518                         if (pixCount > 31)
1519                         {
1520                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1521
1522                                 data += (pitch << 3) * phrasesToSkip;
1523                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1524                                 pixels <<= 2 * pixelShift;
1525                                 iwidth -= phrasesToSkip;
1526                                 pixCount = pixelShift;
1527                         }
1528                 }
1529         }
1530         else if (depth == 2)                                                    // 4 BPP
1531         {
1532 if (firstPix != 0)
1533         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1534                 index &= 0xF0;                                                          // Top four bits form CLUT index
1535                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1536                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1537
1538                 int pixCount = 0;
1539                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1540
1541                 while ((int32)iwidth > 0)
1542                 {
1543                         uint8 bits = pixels >> 60;
1544
1545 #ifndef OP_USES_PALETTE_ZERO
1546                         if (flagTRANS && bits == 0)
1547 #else
1548                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1549 #endif
1550                                 ;       // Do nothing...
1551                         else
1552                         {
1553                                 if (!flagRMW)
1554                                         // This is the *only* correct use of endian-dependent code
1555                                         // (i.e., mem-to-mem direct copying)!
1556                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1557                                 else
1558                                         *currentLineBuffer =
1559                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1560                                         *(currentLineBuffer + 1) =
1561                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1562                         }
1563
1564                         currentLineBuffer += lbufDelta;
1565
1566 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1567                         while (horizontalRemainder & 0x80)
1568                         {
1569                                 horizontalRemainder += hscale;
1570                                 pixCount++;
1571                                 pixels <<= 4;
1572                         }//*/
1573 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1574                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1575                         {
1576                                 horizontalRemainder += hscale;
1577                                 pixCount++;
1578                                 pixels <<= 4;
1579                         }
1580                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1581
1582                         if (pixCount > 15)
1583                         {
1584                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1585
1586                                 data += (pitch << 3) * phrasesToSkip;
1587                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1588                                 pixels <<= 4 * pixelShift;
1589                                 iwidth -= phrasesToSkip;
1590                                 pixCount = pixelShift;
1591                         }
1592                 }
1593         }
1594         else if (depth == 3)                                                    // 8 BPP
1595         {
1596 if (firstPix)
1597         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1598                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1599                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1600
1601                 int pixCount = 0;
1602                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1603
1604                 while ((int32)iwidth > 0)
1605                 {
1606                         uint8 bits = pixels >> 56;
1607
1608 #ifndef OP_USES_PALETTE_ZERO
1609                         if (flagTRANS && bits == 0)
1610 #else
1611                         if (flagTRANS && (paletteRAM16[bits] == 0))
1612 #endif
1613                                 ;       // Do nothing...
1614                         else
1615                         {
1616                                 if (!flagRMW)
1617                                         // This is the *only* correct use of endian-dependent code
1618                                         // (i.e., mem-to-mem direct copying)!
1619                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1620 /*                              {
1621                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1622                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1623                                 }*/
1624                                 else
1625                                         *currentLineBuffer =
1626                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1627                                         *(currentLineBuffer + 1) =
1628                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1629                         }
1630
1631                         currentLineBuffer += lbufDelta;
1632
1633 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1634                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1635                         {
1636                                 horizontalRemainder += hscale;
1637                                 pixCount++;
1638                                 pixels <<= 8;
1639                         }
1640                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1641
1642                         if (pixCount > 7)
1643                         {
1644                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1645
1646                                 data += (pitch << 3) * phrasesToSkip;
1647                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1648                                 pixels <<= 8 * pixelShift;
1649                                 iwidth -= phrasesToSkip;
1650                                 pixCount = pixelShift;
1651                         }
1652                 }
1653         }
1654         else if (depth == 4)                                                    // 16 BPP
1655         {
1656 if (firstPix != 0)
1657         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1658                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1659                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1660
1661                 int pixCount = 0;
1662                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1663
1664                 while ((int32)iwidth > 0)
1665                 {
1666                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1667
1668 //This doesn't seem right... Let's try the encoded black value ($8800):
1669 //Apparently, CRY 0 maps to $8800...
1670                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1671 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1672                                 ;       // Do nothing...
1673                         else
1674                         {
1675                                 if (!flagRMW)
1676                                         *currentLineBuffer = bitsHi,
1677                                         *(currentLineBuffer + 1) = bitsLo;
1678                                 else
1679                                         *currentLineBuffer =
1680                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1681                                         *(currentLineBuffer + 1) =
1682                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1683                         }
1684
1685                         currentLineBuffer += lbufDelta;
1686
1687 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1688                         while (horizontalRemainder & 0x80)
1689                         {
1690                                 horizontalRemainder += hscale;
1691                                 pixCount++;
1692                                 pixels <<= 16;
1693                         }//*/
1694 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1695                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1696                         {
1697                                 horizontalRemainder += hscale;
1698                                 pixCount++;
1699                                 pixels <<= 16;
1700                         }
1701                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1702 //*/
1703                         if (pixCount > 3)
1704                         {
1705                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1706
1707                                 data += (pitch << 3) * phrasesToSkip;
1708                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1709                                 pixels <<= 16 * pixelShift;
1710
1711                                 iwidth -= phrasesToSkip;
1712
1713                                 pixCount = pixelShift;
1714                         }
1715                 }
1716         }
1717         else if (depth == 5)                                                    // 24 BPP
1718         {
1719 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1720 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1721 if (firstPix != 0)
1722         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1723                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1724                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1725                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1726
1727                 while (iwidth--)
1728                 {
1729                         // Fetch phrase...
1730                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1731                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1732
1733                         for(int i=0; i<2; i++)
1734                         {
1735                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1736                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1737
1738                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1739                                         ;       // Do nothing...
1740                                 else
1741                                         *currentLineBuffer = bits3,
1742                                         *(currentLineBuffer + 1) = bits2,
1743                                         *(currentLineBuffer + 2) = bits1,
1744                                         *(currentLineBuffer + 3) = bits0;
1745
1746                                 currentLineBuffer += lbufDelta;
1747                                 pixels <<= 32;
1748                         }
1749                 }
1750         }
1751 }