]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
73fc330010dced9a3a83a71f345d33dbbc54369c
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James L. Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68k.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0
40 #define CONDITION_LESS_THAN                     1
41 #define CONDITION_GREATER_THAN          2
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #define OPFLAG_RELEASE          8                                       // Bus release bit
46 #define OPFLAG_TRANS            4                                       // Transparency bit
47 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
48 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
49
50 // Private function prototypes
51
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
55 void DumpFixedObject(uint64 p0, uint64 p1);
56 uint64 OPLoadPhrase(uint32 offset);
57
58 // Local global variables
59
60 // Blend tables (64K each)
61 static uint8 op_blend_y[0x10000];
62 static uint8 op_blend_cr[0x10000];
63 // There may be a problem with this "RAM" overlapping (and thus being independent of)
64 // some of the regular TOM RAM...
65 //#warning objectp_ram is separated from TOM RAM--need to fix that!
66 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
67 uint8 objectp_running = 0;
68 //bool objectp_stop_reading_list;
69
70 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
71 //static uint32 op_bitmap_bit_size[8] =
72 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
73 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
74 static uint32 op_pointer;
75
76 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
77
78
79 //
80 // Object Processor initialization
81 //
82 void OPInit(void)
83 {
84         // Here we calculate the saturating blend of a signed 4-bit value and an
85         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
86         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
87         for(int i=0; i<256*256; i++)
88         {
89                 int y = (i >> 8) & 0xFF;
90                 int dy = (int8)i;                                       // Sign extend the Y index
91                 int c1 = (i >> 8) & 0x0F;
92                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
93                 int c2 = (i >> 12) & 0x0F;
94                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
95
96                 y += dy;
97
98                 if (y < 0)
99                         y = 0;
100                 else if (y > 0xFF)
101                         y = 0xFF;
102
103                 op_blend_y[i] = y;
104
105                 c1 += dc1;
106
107                 if (c1 < 0)
108                         c1 = 0;
109                 else if (c1 > 0x0F)
110                         c1 = 0x0F;
111
112                 c2 += dc2;
113
114                 if (c2 < 0)
115                         c2 = 0;
116                 else if (c2 > 0x0F)
117                         c2 = 0x0F;
118
119                 op_blend_cr[i] = (c2 << 4) | c1;
120         }
121
122         OPReset();
123 }
124
125 //
126 // Object Processor reset
127 //
128 void OPReset(void)
129 {
130 //      memset(objectp_ram, 0x00, 0x40);
131         objectp_running = 0;
132 }
133
134 void OPDone(void)
135 {
136         const char * opType[8] =
137         { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
138         const char * ccType[8] =
139                 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
140
141         uint32 olp = OPGetListPointer();
142         WriteLog("OP: OLP = %08X\n", olp);
143         WriteLog("OP: Phrase dump\n    ----------\n");
144         for(uint32 i=0; i<0x100; i+=8)
145         {
146                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
147                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
148                 if ((lo & 0x07) == 3)
149                 {
150                         uint16 ypos = (lo >> 3) & 0x7FF;
151                         uint8  cc   = (lo >> 14) & 0x03;
152                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
153                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
154                 }
155                 WriteLog("\n");
156                 if ((lo & 0x07) == 0)
157                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
158                 if ((lo & 0x07) == 1)
159                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
160         }
161         WriteLog("\n");
162
163 //      memory_free(op_blend_y);
164 //      memory_free(op_blend_cr);
165 }
166
167 //
168 // Object Processor memory access
169 // Memory range: F00010 - F00027
170 //
171 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
172 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
173 //      F00026            W   -------- -------x   OBF - object processor flag
174 //
175
176 #if 0
177 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
178 {
179         offset &= 0x3F;
180         return objectp_ram[offset];
181 }
182
183 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
184 {
185         offset &= 0x3F;
186         return GET16(objectp_ram, offset);
187 }
188
189 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
190 {
191         offset &= 0x3F;
192         objectp_ram[offset] = data;
193 }
194
195 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
196 {
197         offset &= 0x3F;
198         SET16(objectp_ram, offset, data);
199
200 /*if (offset == 0x20)
201 WriteLog("OP: Setting lo list pointer: %04X\n", data);
202 if (offset == 0x22)
203 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
204 }
205 #endif
206
207 uint32 OPGetListPointer(void)
208 {
209         // Note: This register is LO / HI WORD, hence the funky look of this...
210         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
211 }
212
213 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
214
215 uint32 OPGetStatusRegister(void)
216 {
217         return GET16(tomRam8, 0x26);
218 }
219
220 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
221
222 void OPSetStatusRegister(uint32 data)
223 {
224         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
225         tomRam8[0x27] |= (data & 0xFE);
226 }
227
228 void OPSetCurrentObject(uint64 object)
229 {
230 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
231         // Stored as least significant 32 bits first, ms32 last in big endian
232 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
233         objectp_ram[0x12] = object & 0xFF; object >>= 8;
234         objectp_ram[0x11] = object & 0xFF; object >>= 8;
235         objectp_ram[0x10] = object & 0xFF; object >>= 8;
236
237         objectp_ram[0x17] = object & 0xFF; object >>= 8;
238         objectp_ram[0x16] = object & 0xFF; object >>= 8;
239         objectp_ram[0x15] = object & 0xFF; object >>= 8;
240         objectp_ram[0x14] = object & 0xFF;*/
241 // Let's try regular good old big endian...
242         tomRam8[0x17] = object & 0xFF; object >>= 8;
243         tomRam8[0x16] = object & 0xFF; object >>= 8;
244         tomRam8[0x15] = object & 0xFF; object >>= 8;
245         tomRam8[0x14] = object & 0xFF; object >>= 8;
246
247         tomRam8[0x13] = object & 0xFF; object >>= 8;
248         tomRam8[0x12] = object & 0xFF; object >>= 8;
249         tomRam8[0x11] = object & 0xFF; object >>= 8;
250         tomRam8[0x10] = object & 0xFF;
251 }
252
253 uint64 OPLoadPhrase(uint32 offset)
254 {
255         offset &= ~0x07;                                                // 8 byte alignment
256         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
257 }
258
259 void OPStorePhrase(uint32 offset, uint64 p)
260 {
261         offset &= ~0x07;                                                // 8 byte alignment
262         JaguarWriteLong(offset, p >> 32, OP);
263         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
264 }
265
266 //
267 // Debugging routines
268 //
269 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
270 {
271         WriteLog(" (SCALED BITMAP)");
272         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
273         WriteLog("                 %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
274         uint8 bitdepth = (p1 >> 12) & 0x07;
275 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
276         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
277         int32 xpos = p1 & 0xFFF;
278         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
279         uint32 iwidth = ((p1 >> 28) & 0x3FF);
280         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
281         uint16 height = ((p0 >> 14) & 0x3FF);
282         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
283         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
284         uint32 firstPix = (p1 >> 49) & 0x3F;
285         uint8 flags = (p1 >> 45) & 0x0F;
286         uint8 idx = (p1 >> 38) & 0x7F;
287         uint32 pitch = (p1 >> 15) & 0x07;
288         WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
289                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
290         uint32 hscale = p2 & 0xFF;
291         uint32 vscale = (p2 >> 8) & 0xFF;
292         uint32 remainder = (p2 >> 16) & 0xFF;
293         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
294 }
295
296 void DumpFixedObject(uint64 p0, uint64 p1)
297 {
298         WriteLog(" (BITMAP)");
299         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
300         uint8 bitdepth = (p1 >> 12) & 0x07;
301 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
302         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
303         int32 xpos = p1 & 0xFFF;
304         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
305         uint32 iwidth = ((p1 >> 28) & 0x3FF);
306         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
307         uint16 height = ((p0 >> 14) & 0x3FF);
308         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
309         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
310         uint32 firstPix = (p1 >> 49) & 0x3F;
311         uint8 flags = (p1 >> 45) & 0x0F;
312         uint8 idx = (p1 >> 38) & 0x7F;
313         uint32 pitch = (p1 >> 15) & 0x07;
314         WriteLog("    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
315                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
316 }
317
318 //
319 // Object Processor main routine
320 //
321 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
322 void OPProcessList(int scanline, bool render)
323 {
324 extern int op_start_log;
325 //      char * condition_to_str[8] =
326 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
327
328         op_pointer = OPGetListPointer();
329
330 //      objectp_stop_reading_list = false;
331
332 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", scanline, op_pointer);
333 //op_done();
334
335 // *** BEGIN OP PROCESSOR TESTING ONLY ***
336 extern bool interactiveMode;
337 extern bool iToggle;
338 extern int objectPtr;
339 bool inhibit;
340 int bitmapCounter = 0;
341 // *** END OP PROCESSOR TESTING ONLY ***
342
343         uint32 opCyclesToRun = 30000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
344
345 //      if (op_pointer) WriteLog(" new op list at 0x%.8x scanline %i\n",op_pointer,scanline);
346         while (op_pointer)
347         {
348 // *** BEGIN OP PROCESSOR TESTING ONLY ***
349 if (interactiveMode && bitmapCounter == objectPtr)
350         inhibit = iToggle;
351 else
352         inhibit = false;
353 // *** END OP PROCESSOR TESTING ONLY ***
354 //              if (objectp_stop_reading_list)
355 //                      return;
356
357                 uint64 p0 = OPLoadPhrase(op_pointer);
358 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
359                 op_pointer += 8;
360
361 #if 1
362 if (scanline == TOMGetVDB() && op_start_log)
363 //if (scanline == 215 && op_start_log)
364 //if (scanline == 28 && op_start_log)
365 //if (scanline == 0)
366 {
367 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
368 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
369 {
370 WriteLog(" (BITMAP) ");
371 uint64 p1 = OPLoadPhrase(op_pointer);
372 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
373         uint8 bitdepth = (p1 >> 12) & 0x07;
374 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
375         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
376 int32 xpos = p1 & 0xFFF;
377 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
378         uint32 iwidth = ((p1 >> 28) & 0x3FF);
379         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
380         uint16 height = ((p0 >> 14) & 0x3FF);
381         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
382         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
383         uint32 firstPix = (p1 >> 49) & 0x3F;
384         uint8 flags = (p1 >> 45) & 0x0F;
385         uint8 idx = (p1 >> 38) & 0x7F;
386         uint32 pitch = (p1 >> 15) & 0x07;
387 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
388         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
389 }
390 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
391 {
392 WriteLog(" (SCALED BITMAP)");
393 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
394 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
395 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
396         uint8 bitdepth = (p1 >> 12) & 0x07;
397 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
398         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
399 int32 xpos = p1 & 0xFFF;
400 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
401         uint32 iwidth = ((p1 >> 28) & 0x3FF);
402         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
403         uint16 height = ((p0 >> 14) & 0x3FF);
404         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
405         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
406         uint32 firstPix = (p1 >> 49) & 0x3F;
407         uint8 flags = (p1 >> 45) & 0x0F;
408         uint8 idx = (p1 >> 38) & 0x7F;
409         uint32 pitch = (p1 >> 15) & 0x07;
410 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
411         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
412         uint32 hscale = p2 & 0xFF;
413         uint32 vscale = (p2 >> 8) & 0xFF;
414         uint32 remainder = (p2 >> 16) & 0xFF;
415 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
416 }
417 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
418 WriteLog(" (GPU)\n");
419 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
420 {
421 WriteLog(" (BRANCH)\n");
422 uint8 * jaguarMainRam = GetRamPtr();
423 WriteLog("[RAM] --> ");
424 for(int k=0; k<8; k++)
425         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
426 WriteLog("\n");
427 }
428 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
429 WriteLog("    --> List end\n\n");
430 }
431 #endif
432
433                 switch ((uint8)p0 & 0x07)
434                 {
435                 case OBJECT_TYPE_BITMAP:
436                 {
437 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
438                         uint16 ypos = (p0 >> 3) & 0x7FF;
439 // This is only theory implied by Rayman...!
440 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
441 // the VDB value. With interlacing, this would be slightly more tricky.
442 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
443 // to affect any other game in a negative way (that I've seen).
444 // Either that, or it's an undocumented bug...
445
446 //No, the reason this was needed is that the OP code before was wrong. Any value
447 //less than VDB will get written to the top line of the display!
448 #if 0
449 // Not so sure... Let's see what happens here...
450 // No change...
451                         if (ypos == 0)
452                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
453 #endif
454 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
455 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
456 // what's causing things to fuck up. Still no idea why.
457
458                         uint32 height = (p0 & 0xFFC000) >> 14;
459                         uint32 oldOPP = op_pointer - 8;
460 // *** BEGIN OP PROCESSOR TESTING ONLY ***
461 if (inhibit && op_start_log)
462         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
463 bitmapCounter++;
464 if (!inhibit)   // For OP testing only!
465 // *** END OP PROCESSOR TESTING ONLY ***
466                         if (scanline >= ypos && height > 0)
467                         {
468                                 uint64 p1 = OPLoadPhrase(op_pointer);
469                                 op_pointer += 8;
470 //WriteLog("OP: Writing scanline %d with ypos == %d...\n", scanline, ypos);
471 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
472 //                              OPProcessFixedBitmap(scanline, p0, p1, render);
473                                 OPProcessFixedBitmap(p0, p1, render);
474
475                                 // OP write-backs
476
477 //???Does this really happen??? Doesn't seem to work if you do this...!
478 //Probably not. Must be a bug in the documentation...!
479 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
480 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
481 //                              SET16(tom_ram_8, 0x22, link >> 16);
482 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
483                                 if (height - 1 > 0)
484                                         height--;*/
485                                 // NOTE: Would subtract 2 if in interlaced mode...!
486 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
487 //                              if (height)
488                                 height--;
489
490                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
491                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
492                                 data += dwidth;
493
494                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
495                                 p0 |= (uint64)height << 14;
496                                 p0 |= data << 40;
497                                 OPStorePhrase(oldOPP, p0);
498                         }
499 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
500 //Temp, for testing...
501 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
502 //And it does! !!! FIX !!!
503 //Let's remove this "fix" since it screws up more than it fixes.
504 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
505                 return;*/
506
507                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
508 //WriteLog("New OP: %08X\n", op_pointer);
509                         break;
510                 }
511                 case OBJECT_TYPE_SCALE:
512                 {
513 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
514                         uint16 ypos = (p0 >> 3) & 0x7FF;
515                         uint32 height = (p0 & 0xFFC000) >> 14;
516                         uint32 oldOPP = op_pointer - 8;
517 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
518 // *** BEGIN OP PROCESSOR TESTING ONLY ***
519 if (inhibit && op_start_log)
520 {
521         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (scanline=%u, ypos=%u, height=%u)\n", scanline, ypos, height);
522         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
523 }
524 bitmapCounter++;
525 if (!inhibit)   // For OP testing only!
526 // *** END OP PROCESSOR TESTING ONLY ***
527                         if (scanline >= ypos && height > 0)
528                         {
529                                 uint64 p1 = OPLoadPhrase(op_pointer);
530                                 op_pointer += 8;
531                                 uint64 p2 = OPLoadPhrase(op_pointer);
532                                 op_pointer += 8;
533 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, scanline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
534                                 OPProcessScaledBitmap(p0, p1, p2, render);
535
536                                 // OP write-backs
537
538                                 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
539                                 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
540 //Actually, we should skip this object if it has a vscale of zero.
541 //Or do we? Not sure... Atari Karts has a few lines that look like:
542 // (SCALED BITMAP)
543 //000E8268 --> phrase 00010000 7000B00D
544 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
545 //    [hsc: 9A, vsc: 00, rem: 00]
546 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
547 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
548
549                                 if (vscale == 0)
550                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
551
552 //extern int start_logging;
553 //if (start_logging)
554 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
555 //Locks up here:
556 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
557 //There are other problems here, it looks like...
558 //Another lock up:
559 //About to execute OP (508)...
560 /*
561 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
562 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
563 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
564 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
565 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
566 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
567 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
568 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
569 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
570 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
571 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
572 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
573 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
574 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
575 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
576 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
577 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
578 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
579 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
580 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
581 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
582 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
583 */
584 //Here's another problem:
585 //    [hsc: 20, vsc: 20, rem: 00]
586 // Since we're not checking for $E0 (but that's what we get from the above), we end
587 // up repeating this scanline unnecessarily... !!! FIX !!! [DONE, but... still not quite
588 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
589 //Also note: $E0 = 7.0 which IS a legal vscale value...
590
591 //                              if (remainder & 0x80)                           // I.e., it's negative
592 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
593 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
594 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
595 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
596 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
597                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
598                                 if (remainder < 0x20)
599                                 {
600                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
601                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
602
603 //                                      while (remainder & 0x80)
604 //                                      while ((remainder & 0x80) || remainder == 0)
605 //                                      while ((remainder - 1) >= 0xE0)
606 //                                      while ((remainder >= 0xE1) || remainder == 0)
607 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
608 //                                      while (remainder <= 0x20)
609                                         while (remainder < 0x20)
610                                         {
611                                                 remainder += vscale;
612
613                                                 if (height)
614                                                         height--;
615
616                                                 data += dwidth;
617                                         }
618
619                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
620                                         p0 |= (uint64)height << 14;
621                                         p0 |= data << 40;
622                                         OPStorePhrase(oldOPP, p0);
623                                 }
624
625                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
626
627 //if (start_logging)
628 //      WriteLog("--> Finished writebacks...\n");//*/
629
630 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
631                                 p2 &= ~0x0000000000FF0000LL;
632                                 p2 |= (uint64)remainder << 16;
633 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
634                                 OPStorePhrase(oldOPP + 16, p2);
635 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
636 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
637                         }
638
639                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
640                         break;
641                 }
642                 case OBJECT_TYPE_GPU:
643                 {
644 //WriteLog("OP: Asserting GPU IRQ #3...\n");
645 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
646                         OPSetCurrentObject(p0);
647                         GPUSetIRQLine(3, ASSERT_LINE);
648 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
649 // !!! FIX !!!
650 //Do something like:
651 //OPSuspendedByGPU = true;
652 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
653 //on the next scanline...
654 // --> It continues from where it was interrupted! !!! FIX !!!
655                         break;
656                 }
657                 case OBJECT_TYPE_BRANCH:
658                 {
659                         uint16 ypos = (p0 >> 3) & 0x7FF;
660                         uint8  cc   = (p0 >> 14) & 0x03;
661                         uint32 link = (p0 >> 21) & 0x3FFFF8;
662
663 //                      if ((ypos!=507)&&(ypos!=25))
664 //                              WriteLog("\t%i%s%i link=0x%.8x\n",scanline,condition_to_str[cc],ypos>>1,link);
665                         switch (cc)
666                         {
667                         case CONDITION_EQUAL:
668                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
669                                         op_pointer = link;
670                                 break;
671                         case CONDITION_LESS_THAN:
672                                 if (TOMReadWord(0xF00006, OP) < ypos)
673                                         op_pointer = link;
674                                 break;
675                         case CONDITION_GREATER_THAN:
676                                 if (TOMReadWord(0xF00006, OP) > ypos)
677                                         op_pointer = link;
678                                 break;
679                         case CONDITION_OP_FLAG_SET:
680                                 if (OPGetStatusRegister() & 0x01)
681                                         op_pointer = link;
682                                 break;
683                         case CONDITION_SECOND_HALF_LINE:
684 //Here's the ASIC code:
685 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
686 //which means, do the link if bit 10 of HC is set...
687
688                                 // This basically means branch if bit 10 of HC is set
689 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
690                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
691                                 LogDone();
692                                 exit(0);
693                                 break;
694                         default:
695                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
696                         }
697                         break;
698                 }
699                 case OBJECT_TYPE_STOP:
700                 {
701 //op_start_log = 0;
702                         // unsure
703 //WriteLog("OP: --> STOP\n");
704 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
705 //This seems more likely...
706                         OPSetCurrentObject(p0);
707
708                         if (p0 & 0x08)
709                         {
710                                 // We need to check whether these interrupts are enabled or not, THEN
711                                 // set an IRQ + pending flag if necessary...
712                                 if (TOMIRQEnabled(IRQ_OPFLAG))
713                                 {
714                                         TOMSetPendingObjectInt();
715                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
716                                 }
717                         }
718
719                         return;
720 //                      break;
721                 }
722                 default:
723                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
724                         return;
725                 }
726
727                 // Here is a little sanity check to keep the OP from locking up the machine
728                 // when fed bad data. Better would be to count how many actual cycles it used
729                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
730 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
731                 opCyclesToRun--;
732                 if (!opCyclesToRun)
733                         return;
734         }
735 }
736
737 //
738 // Store fixed size bitmap in line buffer
739 //
740 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
741 {
742 // Need to make sure that when writing that it stays within the line buffer...
743 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
744         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
745         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
746         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
747         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
748 //#ifdef OP_DEBUG_BMP
749         uint32  firstPix = (p1 >> 49) & 0x3F;
750         // "The LSB is significant only for scaled objects..." -JTRM
751         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
752         firstPix &= 0x3E;
753 //#endif
754 // We can ignore the RELEASE (high order) bit for now--probably forever...!
755 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
756 //Optimize: break these out to their own BOOL values
757         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
758         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
759                 flagRMW = (flags & OPFLAG_RMW ? true : false),
760                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
761 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
762 //  provide the most significant bits of the palette address."
763         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
764         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
765         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
766
767 //      int16 scanlineWidth = tom_getVideoModeWidth();
768         uint8 * tomRam8 = TOMGetRamPointer();
769         uint8 * paletteRAM = &tomRam8[0x400];
770         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
771         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
772         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
773
774 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
775 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
776
777 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
778 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
779 // Pitch == 0 is OK too...
780 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
781 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
782         if (!render || iwidth == 0)
783                 return;
784
785 //OK, so we know the position in the line buffer is correct. It's the clipping in
786 //24bpp mode that's wrong!
787 #if 0
788 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
789 //into the line buffer for each pixel.
790 if (depth == 5) // i.e., 24bpp mode...
791         xpos >>= 1;     // Cut it in half...
792 #endif
793
794 //#define OP_DEBUG_BMP
795 //#ifdef OP_DEBUG_BMP
796 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
797 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
798 //#endif
799
800 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
801         int32 startPos = xpos, endPos = xpos +
802                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
803                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
804         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
805         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
806         // Not sure if this is Jaguar Two only location or what...
807         // From the docs, it is... If we want to limit here we should think of something else.
808 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
809 //      int32 limit = 720;
810 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
811 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
812         // This is correct, the OP line buffer is a constant size... 
813         int32 limit = 720;
814         int32 lbufWidth = 719;
815
816         // If the image is completely to the left or right of the line buffer, then bail.
817 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
818 //There are four possibilities:
819 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
820 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
821 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
822 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
823 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
824 // numbers 1 & 3 are of concern.
825 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
826 //      if (rightMargin < 0 || leftMargin > lbufWidth)
827
828 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
829 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
830 // Still have to be careful with the DATA and IWIDTH values though...
831
832 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
833 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
834 //              return;
835         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
836                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
837                 return;
838
839         // Otherwise, find the clip limits and clip the phrase as well...
840         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
841         //       line buffer, but it shouldn't matter since there are two unused line
842         //       buffers below and nothing above and I'll at most write 8 bytes outside
843         //       the line buffer... I could use a fractional clip begin/end value, but
844         //       this makes the blit a *lot* more hairy. I might fix this in the future
845         //       if it becomes necessary. (JLH)
846         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
847         //       which pixel in the phrase is being written, and quit when either end of phrases
848         //       is reached or line buffer extents are surpassed.
849
850 //This stuff is probably wrong as well... !!! FIX !!!
851 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
852 //Yup. Seems that JagMania doesn't work correctly with this...
853 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
854 //      if (!flagREFLECT)
855
856 /*
857         if (leftMargin < 0)
858                 clippedWidth = 0 - leftMargin,
859                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
860                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
861 //              leftMargin = 0;
862
863         if (rightMargin > lbufWidth)
864                 clippedWidth = rightMargin - lbufWidth,
865                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
866 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
867 //              rightMargin = lbufWidth;
868 */
869 if (depth > 5)
870         WriteLog("OP: We're about to encounter a divide by zero error!\n");
871         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
872         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
873         // !!! FIX !!!
874         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
875                 clippedWidth = 0 - startPos,
876                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
877                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
878
879         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
880                 clippedWidth = 0 - endPos,
881                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
882
883         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
884                 clippedWidth = endPos - lbufWidth,
885                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
886
887         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
888                 clippedWidth = startPos - lbufWidth,
889                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
890                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
891 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
892
893         // If the image is sitting on the line buffer left or right edge, we need to compensate
894         // by decreasing the image phrase width accordingly.
895         iwidth -= phraseClippedWidth;
896
897         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
898         // the pixel data.
899 //      data += phraseClippedWidth * (pitch << 3);
900         data += dataClippedWidth * pitch;
901
902         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
903         //       bitmap! This makes clipping & etc. MUCH, much easier...!
904 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
905 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
906 //Is this a bug in the OP?
907 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
908 //Though it looks like we're doing it here no matter what...
909 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
910 //Let's try this:
911         uint32 lbufAddress = 0x1800 + (startPos * 2);
912         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
913
914         // Render.
915
916 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
917 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
918 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
919 // anyway.
920 // This seems to be the case (at least according to the Midsummer docs)...!
921
922 // This is to test using palette zeroes instead of bit zeroes...
923 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
924 //#define OP_USES_PALETTE_ZERO
925
926         if (depth == 0)                                                                 // 1 BPP
927         {
928                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
929                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
930
931                 // Fetch 1st phrase...
932                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
933 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
934 //i.e., we didn't clip on the margin... !!! FIX !!!
935                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
936                 int i = firstPix;                                                       // Start counter at right spot...
937
938                 while (iwidth--)
939                 {
940                         while (i++ < 64)
941                         {
942                                 uint8 bit = pixels >> 63;
943 #ifndef OP_USES_PALETTE_ZERO
944                                 if (flagTRANS && bit == 0)
945 #else
946                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
947 #endif
948                                         ;       // Do nothing...
949                                 else
950                                 {
951                                         if (!flagRMW)
952 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
953 //Won't optimize RMW case though...
954                                                 // This is the *only* correct use of endian-dependent code
955                                                 // (i.e., mem-to-mem direct copying)!
956                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
957                                         else
958                                                 *currentLineBuffer =
959                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
960                                                 *(currentLineBuffer + 1) =
961                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
962                                 }
963
964                                 currentLineBuffer += lbufDelta;
965                                 pixels <<= 1;
966                         }
967                         i = 0;
968                         // Fetch next phrase...
969                         data += pitch;
970                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
971                 }
972         }
973         else if (depth == 1)                                                    // 2 BPP
974         {
975 if (firstPix)
976         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
977                 index &= 0xFC;                                                          // Top six bits form CLUT index
978                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
979                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
980
981                 while (iwidth--)
982                 {
983                         // Fetch phrase...
984                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
985                         data += pitch;
986
987                         for(int i=0; i<32; i++)
988                         {
989                                 uint8 bits = pixels >> 62;
990 // Seems to me that both of these are in the same endian, so we could cast it as
991 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
992 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
993 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
994 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
995 #ifndef OP_USES_PALETTE_ZERO
996                                 if (flagTRANS && bits == 0)
997 #else
998                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
999 #endif
1000                                         ;       // Do nothing...
1001                                 else
1002                                 {
1003                                         if (!flagRMW)
1004                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1005                                         else
1006                                                 *currentLineBuffer =
1007                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1008                                                 *(currentLineBuffer + 1) =
1009                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1010                                 }
1011
1012                                 currentLineBuffer += lbufDelta;
1013                                 pixels <<= 2;
1014                         }
1015                 }
1016         }
1017         else if (depth == 2)                                                    // 4 BPP
1018         {
1019 if (firstPix)
1020         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1021                 index &= 0xF0;                                                          // Top four bits form CLUT index
1022                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1023                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1024
1025                 while (iwidth--)
1026                 {
1027                         // Fetch phrase...
1028                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1029                         data += pitch;
1030
1031                         for(int i=0; i<16; i++)
1032                         {
1033                                 uint8 bits = pixels >> 60;
1034 // Seems to me that both of these are in the same endian, so we could cast it as
1035 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1036 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1037 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1038 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1039 #ifndef OP_USES_PALETTE_ZERO
1040                                 if (flagTRANS && bits == 0)
1041 #else
1042                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1043 #endif
1044                                         ;       // Do nothing...
1045                                 else
1046                                 {
1047                                         if (!flagRMW)
1048                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1049                                         else
1050                                                 *currentLineBuffer =
1051                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1052                                                 *(currentLineBuffer + 1) =
1053                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1054                                 }
1055
1056                                 currentLineBuffer += lbufDelta;
1057                                 pixels <<= 4;
1058                         }
1059                 }
1060         }
1061         else if (depth == 3)                                                    // 8 BPP
1062         {
1063                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1064                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1065
1066                 // Fetch 1st phrase...
1067                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1068 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1069 //i.e., we didn't clip on the margin... !!! FIX !!!
1070                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1071                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1072                 int i = firstPix >> 3;                                          // Start counter at right spot...
1073
1074                 while (iwidth--)
1075                 {
1076                         while (i++ < 8)
1077                         {
1078                                 uint8 bits = pixels >> 56;
1079 // Seems to me that both of these are in the same endian, so we could cast it as
1080 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1081 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1082 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1083 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1084 //This would seem to be problematic...
1085 //Because it's the palette entry being zero that makes the pixel transparent...
1086 //Let's try it and see.
1087 #ifndef OP_USES_PALETTE_ZERO
1088                                 if (flagTRANS && bits == 0)
1089 #else
1090                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1091 #endif
1092                                         ;       // Do nothing...
1093                                 else
1094                                 {
1095                                         if (!flagRMW)
1096                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1097                                         else
1098                                                 *currentLineBuffer =
1099                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1100                                                 *(currentLineBuffer + 1) =
1101                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1102                                 }
1103
1104                                 currentLineBuffer += lbufDelta;
1105                                 pixels <<= 8;
1106                         }
1107                         i = 0;
1108                         // Fetch next phrase...
1109                         data += pitch;
1110                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1111                 }
1112         }
1113         else if (depth == 4)                                                    // 16 BPP
1114         {
1115 if (firstPix)
1116         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1117                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1118                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1119
1120                 while (iwidth--)
1121                 {
1122                         // Fetch phrase...
1123                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1124                         data += pitch;
1125
1126                         for(int i=0; i<4; i++)
1127                         {
1128                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1129 // Seems to me that both of these are in the same endian, so we could cast it as
1130 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1131 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1132 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1133 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1134 //This doesn't seem right... Let's try the encoded black value ($8800):
1135 //Apparently, CRY 0 maps to $8800...
1136                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1137 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1138                                         ;       // Do nothing...
1139                                 else
1140                                 {
1141                                         if (!flagRMW)
1142                                                 *currentLineBuffer = bitsHi,
1143                                                 *(currentLineBuffer + 1) = bitsLo;
1144                                         else
1145                                                 *currentLineBuffer =
1146                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1147                                                 *(currentLineBuffer + 1) =
1148                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1149                                 }
1150
1151                                 currentLineBuffer += lbufDelta;
1152                                 pixels <<= 16;
1153                         }
1154                 }
1155         }
1156         else if (depth == 5)                                                    // 24 BPP
1157         {
1158 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1159 //There *might* be others...
1160 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1161 if (firstPix)
1162         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1163                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1164                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1165                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1166
1167                 while (iwidth--)
1168                 {
1169                         // Fetch phrase...
1170                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1171                         data += pitch;
1172
1173                         for(int i=0; i<2; i++)
1174                         {
1175                                 // We don't use a 32-bit var here because of endian issues...!
1176                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1177                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1178
1179                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1180                                         ;       // Do nothing...
1181                                 else
1182                                         *currentLineBuffer = bits3,
1183                                         *(currentLineBuffer + 1) = bits2,
1184                                         *(currentLineBuffer + 2) = bits1,
1185                                         *(currentLineBuffer + 3) = bits0;
1186
1187                                 currentLineBuffer += lbufDelta;
1188                                 pixels <<= 32;
1189                         }
1190                 }
1191         }
1192 }
1193
1194 //
1195 // Store scaled bitmap in line buffer
1196 //
1197 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1198 {
1199 // Need to make sure that when writing that it stays within the line buffer...
1200 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1201         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1202         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1203         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1204         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1205 //#ifdef OP_DEBUG_BMP
1206 // Prolly should use this... Though not sure exactly how.
1207 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1208         uint32 firstPix = (p1 >> 49) & 0x3F;
1209 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1210 if (firstPix)
1211         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1212 //#endif
1213 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1214 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1215 //Optimize: break these out to their own BOOL values [DONE]
1216         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1217         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1218                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1219                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1220         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1221         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1222
1223         uint8 * tomRam8 = TOMGetRamPointer();
1224         uint8 * paletteRAM = &tomRam8[0x400];
1225         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1226         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1227         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1228
1229         uint16 hscale = p2 & 0xFF;
1230 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1231 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1232         uint16 horizontalRemainder = hscale;                            // Not sure if it starts full, but seems reasonable [It's not!]
1233 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1234         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1235         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1236
1237 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1238 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1239
1240 // Looks like an hscale of zero means don't draw!
1241         if (!render || iwidth == 0 || hscale == 0)
1242                 return;
1243
1244 /*extern int start_logging;
1245 if (start_logging)
1246         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1247                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1248 //#define OP_DEBUG_BMP
1249 //#ifdef OP_DEBUG_BMP
1250 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1251 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1252 //#endif
1253
1254         int32 startPos = xpos, endPos = xpos +
1255                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1256         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1257         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1258         // Not sure if this is Jaguar Two only location or what...
1259         // From the docs, it is... If we want to limit here we should think of something else.
1260 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1261         int32 limit = 720;
1262 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1263         int32 lbufWidth = 719;  // Zero based limit...
1264
1265         // If the image is completely to the left or right of the line buffer, then bail.
1266 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1267 //There are four possibilities:
1268 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1269 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1270 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1271 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1272 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1273 // numbers 1 & 3 are of concern.
1274 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1275 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1276
1277 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1278 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1279 // Still have to be careful with the DATA and IWIDTH values though...
1280
1281         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1282                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1283                 return;
1284
1285         // Otherwise, find the clip limits and clip the phrase as well...
1286         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1287         //       line buffer, but it shouldn't matter since there are two unused line
1288         //       buffers below and nothing above and I'll at most write 40 bytes outside
1289         //       the line buffer... I could use a fractional clip begin/end value, but
1290         //       this makes the blit a *lot* more hairy. I might fix this in the future
1291         //       if it becomes necessary. (JLH)
1292         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1293         //       which pixel in the phrase is being written, and quit when either end of phrases
1294         //       is reached or line buffer extents are surpassed.
1295
1296 //This stuff is probably wrong as well... !!! FIX !!!
1297 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1298 //Yup. Seems that JagMania doesn't work correctly with this...
1299 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1300 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1301 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1302 // a bit more accurately... Strange!
1303 //It's probably a case of the REFLECT flag being set and the background being written
1304 //from the right side of the screen...
1305 //But no, it isn't... At least if the diagnostics are telling the truth!
1306
1307         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1308         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1309         // !!! FIX !!!
1310
1311 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1312 //the scaling factor is small. So fix it already! !!! FIX !!!
1313 /*if (scaledPhrasePixels == 0)
1314 {
1315         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1316         DumpScaledObject(p0, p1, p2);
1317 }//*/
1318 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1319
1320 //Try a simple example...
1321 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1322 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1323 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1324 //
1325 // Normally, we would expect this in the line buffer:
1326 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1327 //
1328 // But instead we're getting:
1329 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1330 //
1331 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1332 // on negative boundary--or are we? Hmm...
1333 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1334 //
1335 // Let's try a real world example:
1336 //
1337 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1338 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1339 //
1340 // Really, spp is 27.75 in the second case...
1341 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1342 // start position (14 * 27.75), we get -6.5... NOT -17!
1343
1344 //Now it seems we're working OK, at least for the first case...
1345 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1346
1347         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1348 {
1349 extern int start_logging;
1350 if (start_logging)
1351         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1352 //              clippedWidth = 0 - startPos,
1353                 clippedWidth = (0 - startPos) << 5,
1354 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1355                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1356 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1357                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1358 if (start_logging)
1359         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1360 }
1361
1362         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1363                 clippedWidth = 0 - endPos,
1364                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1365
1366         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1367                 clippedWidth = endPos - lbufWidth,
1368                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1369
1370         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1371                 clippedWidth = startPos - lbufWidth,
1372                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1373                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1374
1375 extern int op_start_log;
1376 if (op_start_log && clippedWidth != 0)
1377         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1378 if (op_start_log && startPos == 13)
1379 {
1380         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1381         DumpScaledObject(p0, p1, p2);
1382         if (iwidth == 7)
1383         {
1384                 WriteLog("    %08X: ", data);
1385                 for(int i=0; i<7*8; i++)
1386                         WriteLog("%02X ", JaguarReadByte(data+i));
1387                 WriteLog("\n");
1388         }
1389 }
1390         // If the image is sitting on the line buffer left or right edge, we need to compensate
1391         // by decreasing the image phrase width accordingly.
1392         iwidth -= phraseClippedWidth;
1393
1394         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1395         // the pixel data.
1396 //      data += phraseClippedWidth * (pitch << 3);
1397         data += dataClippedWidth * (pitch << 3);
1398
1399         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1400         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1401 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1402 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1403         uint32 lbufAddress = 0x1800 + startPos * 2;
1404         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1405 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1406 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1407
1408         // Render.
1409
1410 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1411 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1412 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1413 // anyway.
1414 // This seems to be the case (at least according to the Midsummer docs)...!
1415
1416         if (depth == 0)                                                                 // 1 BPP
1417         {
1418 if (firstPix != 0)
1419         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1420                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1421                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1422
1423                 int pixCount = 0;
1424                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1425
1426                 while ((int32)iwidth > 0)
1427                 {
1428                         uint8 bits = pixels >> 63;
1429
1430 #ifndef OP_USES_PALETTE_ZERO
1431                         if (flagTRANS && bits == 0)
1432 #else
1433                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1434 #endif
1435                                 ;       // Do nothing...
1436                         else
1437                         {
1438                                 if (!flagRMW)
1439                                         // This is the *only* correct use of endian-dependent code
1440                                         // (i.e., mem-to-mem direct copying)!
1441                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1442                                 else
1443                                         *currentLineBuffer =
1444                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1445                                         *(currentLineBuffer + 1) =
1446                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1447                         }
1448
1449                         currentLineBuffer += lbufDelta;
1450
1451 /*
1452 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1453 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1454 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1455 */
1456 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1457                         while (horizontalRemainder & 0x80)
1458                         {
1459                                 horizontalRemainder += hscale;
1460                                 pixCount++;
1461                                 pixels <<= 1;
1462                         }//*/
1463 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1464                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1465                         {
1466                                 horizontalRemainder += hscale;
1467                                 pixCount++;
1468                                 pixels <<= 1;
1469                         }
1470                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1471
1472                         if (pixCount > 63)
1473                         {
1474                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1475
1476                                 data += (pitch << 3) * phrasesToSkip;
1477                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1478                                 pixels <<= 1 * pixelShift;
1479                                 iwidth -= phrasesToSkip;
1480                                 pixCount = pixelShift;
1481                         }
1482                 }
1483         }
1484         else if (depth == 1)                                                    // 2 BPP
1485         {
1486 if (firstPix != 0)
1487         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1488                 index &= 0xFC;                                                          // Top six bits form CLUT index
1489                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1490                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1491
1492                 int pixCount = 0;
1493                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1494
1495                 while ((int32)iwidth > 0)
1496                 {
1497                         uint8 bits = pixels >> 62;
1498
1499 #ifndef OP_USES_PALETTE_ZERO
1500                         if (flagTRANS && bits == 0)
1501 #else
1502                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1503 #endif
1504                                 ;       // Do nothing...
1505                         else
1506                         {
1507                                 if (!flagRMW)
1508                                         // This is the *only* correct use of endian-dependent code
1509                                         // (i.e., mem-to-mem direct copying)!
1510                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1511                                 else
1512                                         *currentLineBuffer =
1513                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1514                                         *(currentLineBuffer + 1) =
1515                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1516                         }
1517
1518                         currentLineBuffer += lbufDelta;
1519
1520 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1521                         while (horizontalRemainder & 0x80)
1522                         {
1523                                 horizontalRemainder += hscale;
1524                                 pixCount++;
1525                                 pixels <<= 2;
1526                         }//*/
1527 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1528                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1529                         {
1530                                 horizontalRemainder += hscale;
1531                                 pixCount++;
1532                                 pixels <<= 2;
1533                         }
1534                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1535
1536                         if (pixCount > 31)
1537                         {
1538                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1539
1540                                 data += (pitch << 3) * phrasesToSkip;
1541                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1542                                 pixels <<= 2 * pixelShift;
1543                                 iwidth -= phrasesToSkip;
1544                                 pixCount = pixelShift;
1545                         }
1546                 }
1547         }
1548         else if (depth == 2)                                                    // 4 BPP
1549         {
1550 if (firstPix != 0)
1551         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1552                 index &= 0xF0;                                                          // Top four bits form CLUT index
1553                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1554                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1555
1556                 int pixCount = 0;
1557                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1558
1559                 while ((int32)iwidth > 0)
1560                 {
1561                         uint8 bits = pixels >> 60;
1562
1563 #ifndef OP_USES_PALETTE_ZERO
1564                         if (flagTRANS && bits == 0)
1565 #else
1566                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1567 #endif
1568                                 ;       // Do nothing...
1569                         else
1570                         {
1571                                 if (!flagRMW)
1572                                         // This is the *only* correct use of endian-dependent code
1573                                         // (i.e., mem-to-mem direct copying)!
1574                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1575                                 else
1576                                         *currentLineBuffer =
1577                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1578                                         *(currentLineBuffer + 1) =
1579                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1580                         }
1581
1582                         currentLineBuffer += lbufDelta;
1583
1584 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1585                         while (horizontalRemainder & 0x80)
1586                         {
1587                                 horizontalRemainder += hscale;
1588                                 pixCount++;
1589                                 pixels <<= 4;
1590                         }//*/
1591 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1592                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1593                         {
1594                                 horizontalRemainder += hscale;
1595                                 pixCount++;
1596                                 pixels <<= 4;
1597                         }
1598                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1599
1600                         if (pixCount > 15)
1601                         {
1602                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1603
1604                                 data += (pitch << 3) * phrasesToSkip;
1605                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1606                                 pixels <<= 4 * pixelShift;
1607                                 iwidth -= phrasesToSkip;
1608                                 pixCount = pixelShift;
1609                         }
1610                 }
1611         }
1612         else if (depth == 3)                                                    // 8 BPP
1613         {
1614 if (firstPix)
1615         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1616                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1617                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1618
1619                 int pixCount = 0;
1620                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1621
1622                 while ((int32)iwidth > 0)
1623                 {
1624                         uint8 bits = pixels >> 56;
1625
1626 #ifndef OP_USES_PALETTE_ZERO
1627                         if (flagTRANS && bits == 0)
1628 #else
1629                         if (flagTRANS && (paletteRAM16[bits] == 0))
1630 #endif
1631                                 ;       // Do nothing...
1632                         else
1633                         {
1634                                 if (!flagRMW)
1635                                         // This is the *only* correct use of endian-dependent code
1636                                         // (i.e., mem-to-mem direct copying)!
1637                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1638 /*                              {
1639                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1640                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1641                                 }*/
1642                                 else
1643                                         *currentLineBuffer =
1644                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1645                                         *(currentLineBuffer + 1) =
1646                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1647                         }
1648
1649                         currentLineBuffer += lbufDelta;
1650
1651 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1652                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1653                         {
1654                                 horizontalRemainder += hscale;
1655                                 pixCount++;
1656                                 pixels <<= 8;
1657                         }
1658                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1659
1660                         if (pixCount > 7)
1661                         {
1662                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1663
1664                                 data += (pitch << 3) * phrasesToSkip;
1665                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1666                                 pixels <<= 8 * pixelShift;
1667                                 iwidth -= phrasesToSkip;
1668                                 pixCount = pixelShift;
1669                         }
1670                 }
1671         }
1672         else if (depth == 4)                                                    // 16 BPP
1673         {
1674 if (firstPix != 0)
1675         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1676                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1677                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1678
1679                 int pixCount = 0;
1680                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1681
1682                 while ((int32)iwidth > 0)
1683                 {
1684                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1685
1686 //This doesn't seem right... Let's try the encoded black value ($8800):
1687 //Apparently, CRY 0 maps to $8800...
1688                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1689 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1690                                 ;       // Do nothing...
1691                         else
1692                         {
1693                                 if (!flagRMW)
1694                                         *currentLineBuffer = bitsHi,
1695                                         *(currentLineBuffer + 1) = bitsLo;
1696                                 else
1697                                         *currentLineBuffer =
1698                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1699                                         *(currentLineBuffer + 1) =
1700                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1701                         }
1702
1703                         currentLineBuffer += lbufDelta;
1704
1705 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1706                         while (horizontalRemainder & 0x80)
1707                         {
1708                                 horizontalRemainder += hscale;
1709                                 pixCount++;
1710                                 pixels <<= 16;
1711                         }//*/
1712 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1713                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1714                         {
1715                                 horizontalRemainder += hscale;
1716                                 pixCount++;
1717                                 pixels <<= 16;
1718                         }
1719                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1720 //*/
1721                         if (pixCount > 3)
1722                         {
1723                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1724
1725                                 data += (pitch << 3) * phrasesToSkip;
1726                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1727                                 pixels <<= 16 * pixelShift;
1728
1729                                 iwidth -= phrasesToSkip;
1730
1731                                 pixCount = pixelShift;
1732                         }
1733                 }
1734         }
1735         else if (depth == 5)                                                    // 24 BPP
1736         {
1737 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1738 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1739 if (firstPix != 0)
1740         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1741                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1742                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1743                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1744
1745                 while (iwidth--)
1746                 {
1747                         // Fetch phrase...
1748                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1749                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1750
1751                         for(int i=0; i<2; i++)
1752                         {
1753                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1754                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1755
1756                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1757                                         ;       // Do nothing...
1758                                 else
1759                                         *currentLineBuffer = bits3,
1760                                         *(currentLineBuffer + 1) = bits2,
1761                                         *(currentLineBuffer + 2) = bits1,
1762                                         *(currentLineBuffer + 3) = bits0;
1763
1764                                 currentLineBuffer += lbufDelta;
1765                                 pixels <<= 32;
1766                         }
1767                 }
1768         }
1769 }