]> Shamusworld >> Repos - virtualjaguar/blob - src/op.cpp
RISC LOAD/STORE alignment fixes.
[virtualjaguar] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James L. Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James L. Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 //
15
16 #include "op.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "gpu.h"
21 #include "jaguar.h"
22 #include "log.h"
23 #include "m68k.h"
24 #include "memory.h"
25 #include "tom.h"
26
27 //#define OP_DEBUG
28 //#define OP_DEBUG_BMP
29
30 #define BLEND_Y(dst, src)       op_blend_y[(((uint16)dst<<8)) | ((uint16)(src))]
31 #define BLEND_CR(dst, src)      op_blend_cr[(((uint16)dst)<<8) | ((uint16)(src))]
32
33 #define OBJECT_TYPE_BITMAP      0                                       // 000
34 #define OBJECT_TYPE_SCALE       1                                       // 001
35 #define OBJECT_TYPE_GPU         2                                       // 010
36 #define OBJECT_TYPE_BRANCH      3                                       // 011
37 #define OBJECT_TYPE_STOP        4                                       // 100
38
39 #define CONDITION_EQUAL                         0
40 #define CONDITION_LESS_THAN                     1
41 #define CONDITION_GREATER_THAN          2
42 #define CONDITION_OP_FLAG_SET           3
43 #define CONDITION_SECOND_HALF_LINE      4
44
45 #define OPFLAG_RELEASE          8                                       // Bus release bit
46 #define OPFLAG_TRANS            4                                       // Transparency bit
47 #define OPFLAG_RMW                      2                                       // Read-Modify-Write bit
48 #define OPFLAG_REFLECT          1                                       // Horizontal mirror bit
49
50 // Private function prototypes
51
52 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render);
53 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render);
54 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2);
55 void DumpFixedObject(uint64 p0, uint64 p1);
56 uint64 OPLoadPhrase(uint32 offset);
57
58 // Local global variables
59
60 // Blend tables (64K each)
61 static uint8 op_blend_y[0x10000];
62 static uint8 op_blend_cr[0x10000];
63 // There may be a problem with this "RAM" overlapping (and thus being independent of)
64 // some of the regular TOM RAM...
65 //#warning objectp_ram is separated from TOM RAM--need to fix that!
66 //static uint8 objectp_ram[0x40];                       // This is based at $F00000
67 uint8 objectp_running = 0;
68 //bool objectp_stop_reading_list;
69
70 static uint8 op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
71 //static uint32 op_bitmap_bit_size[8] =
72 //      { (uint32)(0.125*65536), (uint32)(0.25*65536), (uint32)(0.5*65536), (uint32)(1*65536),
73 //        (uint32)(2*65536),     (uint32)(1*65536),    (uint32)(1*65536),   (uint32)(1*65536) };
74 static uint32 op_pointer;
75
76 int32 phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
77
78
79 //
80 // Object Processor initialization
81 //
82 void OPInit(void)
83 {
84         // Here we calculate the saturating blend of a signed 4-bit value and an
85         // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
86         // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
87         for(int i=0; i<256*256; i++)
88         {
89                 int y = (i >> 8) & 0xFF;
90                 int dy = (int8)i;                                       // Sign extend the Y index
91                 int c1 = (i >> 8) & 0x0F;
92                 int dc1 = (int8)(i << 4) >> 4;          // Sign extend the R index
93                 int c2 = (i >> 12) & 0x0F;
94                 int dc2 = (int8)(i & 0xF0) >> 4;        // Sign extend the C index
95
96                 y += dy;
97
98                 if (y < 0)
99                         y = 0;
100                 else if (y > 0xFF)
101                         y = 0xFF;
102
103                 op_blend_y[i] = y;
104
105                 c1 += dc1;
106
107                 if (c1 < 0)
108                         c1 = 0;
109                 else if (c1 > 0x0F)
110                         c1 = 0x0F;
111
112                 c2 += dc2;
113
114                 if (c2 < 0)
115                         c2 = 0;
116                 else if (c2 > 0x0F)
117                         c2 = 0x0F;
118
119                 op_blend_cr[i] = (c2 << 4) | c1;
120         }
121
122         OPReset();
123 }
124
125 //
126 // Object Processor reset
127 //
128 void OPReset(void)
129 {
130 //      memset(objectp_ram, 0x00, 0x40);
131         objectp_running = 0;
132 }
133
134 void OPDone(void)
135 {
136 #warning "!!! Fix OL dump so that it follows links !!!"
137         const char * opType[8] =
138         { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
139         const char * ccType[8] =
140                 { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
141
142         uint32 olp = OPGetListPointer();
143         WriteLog("OP: OLP = %08X\n", olp);
144         WriteLog("OP: Phrase dump\n    ----------\n");
145         for(uint32 i=0; i<0x100; i+=8)
146         {
147                 uint32 hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
148                 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
149                 if ((lo & 0x07) == 3)
150                 {
151                         uint16 ypos = (lo >> 3) & 0x7FF;
152                         uint8  cc   = (lo >> 14) & 0x03;
153                         uint32 link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
154                         WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
155                 }
156                 WriteLog("\n");
157                 if ((lo & 0x07) == 0)
158                         DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
159                 if ((lo & 0x07) == 1)
160                         DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
161         }
162         WriteLog("\n");
163
164 //      memory_free(op_blend_y);
165 //      memory_free(op_blend_cr);
166 }
167
168 //
169 // Object Processor memory access
170 // Memory range: F00010 - F00027
171 //
172 //      F00010-F00017   R     xxxxxxxx xxxxxxxx   OB - current object code from the graphics processor
173 //      F00020-F00023     W   xxxxxxxx xxxxxxxx   OLP - start of the object list
174 //      F00026            W   -------- -------x   OBF - object processor flag
175 //
176
177 #if 0
178 uint8 OPReadByte(uint32 offset, uint32 who/*=UNKNOWN*/)
179 {
180         offset &= 0x3F;
181         return objectp_ram[offset];
182 }
183
184 uint16 OPReadWord(uint32 offset, uint32 who/*=UNKNOWN*/)
185 {
186         offset &= 0x3F;
187         return GET16(objectp_ram, offset);
188 }
189
190 void OPWriteByte(uint32 offset, uint8 data, uint32 who/*=UNKNOWN*/)
191 {
192         offset &= 0x3F;
193         objectp_ram[offset] = data;
194 }
195
196 void OPWriteWord(uint32 offset, uint16 data, uint32 who/*=UNKNOWN*/)
197 {
198         offset &= 0x3F;
199         SET16(objectp_ram, offset, data);
200
201 /*if (offset == 0x20)
202 WriteLog("OP: Setting lo list pointer: %04X\n", data);
203 if (offset == 0x22)
204 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
205 }
206 #endif
207
208 uint32 OPGetListPointer(void)
209 {
210         // Note: This register is LO / HI WORD, hence the funky look of this...
211         return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
212 }
213
214 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
215
216 uint32 OPGetStatusRegister(void)
217 {
218         return GET16(tomRam8, 0x26);
219 }
220
221 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
222
223 void OPSetStatusRegister(uint32 data)
224 {
225         tomRam8[0x26] = (data & 0x0000FF00) >> 8;
226         tomRam8[0x27] |= (data & 0xFE);
227 }
228
229 void OPSetCurrentObject(uint64 object)
230 {
231 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
232         // Stored as least significant 32 bits first, ms32 last in big endian
233 /*      objectp_ram[0x13] = object & 0xFF; object >>= 8;
234         objectp_ram[0x12] = object & 0xFF; object >>= 8;
235         objectp_ram[0x11] = object & 0xFF; object >>= 8;
236         objectp_ram[0x10] = object & 0xFF; object >>= 8;
237
238         objectp_ram[0x17] = object & 0xFF; object >>= 8;
239         objectp_ram[0x16] = object & 0xFF; object >>= 8;
240         objectp_ram[0x15] = object & 0xFF; object >>= 8;
241         objectp_ram[0x14] = object & 0xFF;*/
242 // Let's try regular good old big endian...
243         tomRam8[0x17] = object & 0xFF; object >>= 8;
244         tomRam8[0x16] = object & 0xFF; object >>= 8;
245         tomRam8[0x15] = object & 0xFF; object >>= 8;
246         tomRam8[0x14] = object & 0xFF; object >>= 8;
247
248         tomRam8[0x13] = object & 0xFF; object >>= 8;
249         tomRam8[0x12] = object & 0xFF; object >>= 8;
250         tomRam8[0x11] = object & 0xFF; object >>= 8;
251         tomRam8[0x10] = object & 0xFF;
252 }
253
254 uint64 OPLoadPhrase(uint32 offset)
255 {
256         offset &= ~0x07;                                                // 8 byte alignment
257         return ((uint64)JaguarReadLong(offset, OP) << 32) | (uint64)JaguarReadLong(offset+4, OP);
258 }
259
260 void OPStorePhrase(uint32 offset, uint64 p)
261 {
262         offset &= ~0x07;                                                // 8 byte alignment
263         JaguarWriteLong(offset, p >> 32, OP);
264         JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
265 }
266
267 //
268 // Debugging routines
269 //
270 void DumpScaledObject(uint64 p0, uint64 p1, uint64 p2)
271 {
272         WriteLog(" (SCALED BITMAP)");
273         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
274         WriteLog("                 %08X --> phrase %08X %08X ", op_pointer+8, (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
275         uint8 bitdepth = (p1 >> 12) & 0x07;
276 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
277         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
278         int32 xpos = p1 & 0xFFF;
279         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
280         uint32 iwidth = ((p1 >> 28) & 0x3FF);
281         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
282         uint16 height = ((p0 >> 14) & 0x3FF);
283         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
284         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
285         uint32 firstPix = (p1 >> 49) & 0x3F;
286         uint8 flags = (p1 >> 45) & 0x0F;
287         uint8 idx = (p1 >> 38) & 0x7F;
288         uint32 pitch = (p1 >> 15) & 0x07;
289         WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
290                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
291         uint32 hscale = p2 & 0xFF;
292         uint32 vscale = (p2 >> 8) & 0xFF;
293         uint32 remainder = (p2 >> 16) & 0xFF;
294         WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
295 }
296
297 void DumpFixedObject(uint64 p0, uint64 p1)
298 {
299         WriteLog(" (BITMAP)");
300         WriteLog(" %08X --> phrase %08X %08X\n", op_pointer, (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF));
301         uint8 bitdepth = (p1 >> 12) & 0x07;
302 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
303         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
304         int32 xpos = p1 & 0xFFF;
305         xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
306         uint32 iwidth = ((p1 >> 28) & 0x3FF);
307         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
308         uint16 height = ((p0 >> 14) & 0x3FF);
309         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
310         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
311         uint32 firstPix = (p1 >> 49) & 0x3F;
312         uint8 flags = (p1 >> 45) & 0x0F;
313         uint8 idx = (p1 >> 38) & 0x7F;
314         uint32 pitch = (p1 >> 15) & 0x07;
315         WriteLog("    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
316                 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
317 }
318
319 //
320 // Object Processor main routine
321 //
322 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
323 void OPProcessList(int halfline, bool render)
324 {
325 extern int op_start_log;
326 //      char * condition_to_str[8] =
327 //              { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
328
329         op_pointer = OPGetListPointer();
330
331 //      objectp_stop_reading_list = false;
332
333 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
334 //op_done();
335
336 // *** BEGIN OP PROCESSOR TESTING ONLY ***
337 extern bool interactiveMode;
338 extern bool iToggle;
339 extern int objectPtr;
340 bool inhibit;
341 int bitmapCounter = 0;
342 // *** END OP PROCESSOR TESTING ONLY ***
343
344         uint32 opCyclesToRun = 30000;                                   // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
345
346 //      if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
347         while (op_pointer)
348         {
349 // *** BEGIN OP PROCESSOR TESTING ONLY ***
350 if (interactiveMode && bitmapCounter == objectPtr)
351         inhibit = iToggle;
352 else
353         inhibit = false;
354 // *** END OP PROCESSOR TESTING ONLY ***
355 //              if (objectp_stop_reading_list)
356 //                      return;
357
358                 uint64 p0 = OPLoadPhrase(op_pointer);
359                 op_pointer += 8;
360 //WriteLog("\t%08X type %i\n", op_pointer, (uint8)p0 & 0x07);
361
362 #if 1
363 if (halfline == TOMGetVDB() && op_start_log)
364 //if (halfline == 215 && op_start_log)
365 //if (halfline == 28 && op_start_log)
366 //if (halfline == 0)
367 {
368 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
369 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
370 {
371 WriteLog(" (BITMAP) ");
372 uint64 p1 = OPLoadPhrase(op_pointer);
373 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
374         uint8 bitdepth = (p1 >> 12) & 0x07;
375 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
376         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
377 int32 xpos = p1 & 0xFFF;
378 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
379         uint32 iwidth = ((p1 >> 28) & 0x3FF);
380         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
381         uint16 height = ((p0 >> 14) & 0x3FF);
382         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
383         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
384         uint32 firstPix = (p1 >> 49) & 0x3F;
385         uint8 flags = (p1 >> 45) & 0x0F;
386         uint8 idx = (p1 >> 38) & 0x7F;
387         uint32 pitch = (p1 >> 15) & 0x07;
388 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
389         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
390 }
391 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
392 {
393 WriteLog(" (SCALED BITMAP)");
394 uint64 p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
395 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
396 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
397         uint8 bitdepth = (p1 >> 12) & 0x07;
398 //WAS:  int16 ypos = ((p0 >> 3) & 0x3FF);                       // ??? What if not interlaced (/2)?
399         int16 ypos = ((p0 >> 3) & 0x7FF);                       // ??? What if not interlaced (/2)?
400 int32 xpos = p1 & 0xFFF;
401 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
402         uint32 iwidth = ((p1 >> 28) & 0x3FF);
403         uint32 dwidth = ((p1 >> 18) & 0x3FF);           // Unsigned!
404         uint16 height = ((p0 >> 14) & 0x3FF);
405         uint32 link = ((p0 >> 24) & 0x7FFFF) << 3;
406         uint32 ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
407         uint32 firstPix = (p1 >> 49) & 0x3F;
408         uint8 flags = (p1 >> 45) & 0x0F;
409         uint8 idx = (p1 >> 38) & 0x7F;
410         uint32 pitch = (p1 >> 15) & 0x07;
411 WriteLog("\n    [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
412         iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
413         uint32 hscale = p2 & 0xFF;
414         uint32 vscale = (p2 >> 8) & 0xFF;
415         uint32 remainder = (p2 >> 16) & 0xFF;
416 WriteLog("    [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
417 }
418 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
419 WriteLog(" (GPU)\n");
420 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
421 {
422 WriteLog(" (BRANCH)\n");
423 uint8 * jaguarMainRam = GetRamPtr();
424 WriteLog("[RAM] --> ");
425 for(int k=0; k<8; k++)
426         WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
427 WriteLog("\n");
428 }
429 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
430 WriteLog("    --> List end\n\n");
431 }
432 #endif
433
434                 switch ((uint8)p0 & 0x07)
435                 {
436                 case OBJECT_TYPE_BITMAP:
437                 {
438 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
439                         uint16 ypos = (p0 >> 3) & 0x7FF;
440 // This is only theory implied by Rayman...!
441 // It seems that if the YPOS is zero, then bump the YPOS value so that it coincides with
442 // the VDB value. With interlacing, this would be slightly more tricky.
443 // There's probably another bit somewhere that enables this mode--but so far, doesn't seem
444 // to affect any other game in a negative way (that I've seen).
445 // Either that, or it's an undocumented bug...
446
447 //No, the reason this was needed is that the OP code before was wrong. Any value
448 //less than VDB will get written to the top line of the display!
449 #if 0
450 // Not so sure... Let's see what happens here...
451 // No change...
452                         if (ypos == 0)
453                                 ypos = TOMReadWord(0xF00046, OP) / 2;                   // Get the VDB value
454 #endif
455 // Actually, no. Any item less than VDB will get only the lines that hang over VDB displayed.
456 // Actually, this is incorrect. It seems that VDB value is wrong somewhere and that's
457 // what's causing things to fuck up. Still no idea why.
458
459                         uint32 height = (p0 & 0xFFC000) >> 14;
460                         uint32 oldOPP = op_pointer - 8;
461 // *** BEGIN OP PROCESSOR TESTING ONLY ***
462 if (inhibit && op_start_log)
463         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
464 bitmapCounter++;
465 if (!inhibit)   // For OP testing only!
466 // *** END OP PROCESSOR TESTING ONLY ***
467                         if (halfline >= ypos && height > 0)
468                         {
469                                 uint64 p1 = OPLoadPhrase(op_pointer);
470                                 op_pointer += 8;
471 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
472 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
473 //                              OPProcessFixedBitmap(halfline, p0, p1, render);
474                                 OPProcessFixedBitmap(p0, p1, render);
475
476                                 // OP write-backs
477
478 //???Does this really happen??? Doesn't seem to work if you do this...!
479 //Probably not. Must be a bug in the documentation...!
480 //                              uint32 link = (p0 & 0x7FFFF000000) >> 21;
481 //                              SET16(tom_ram_8, 0x20, link & 0xFFFF);  // OLP
482 //                              SET16(tom_ram_8, 0x22, link >> 16);
483 /*                              uint32 height = (p0 & 0xFFC000) >> 14;
484                                 if (height - 1 > 0)
485                                         height--;*/
486                                 // NOTE: Would subtract 2 if in interlaced mode...!
487 //                              uint64 height = ((p0 & 0xFFC000) - 0x4000) & 0xFFC000;
488 //                              if (height)
489                                 height--;
490
491                                 uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
492                                 uint64 dwidth = (p1 & 0xFFC0000) >> 15;
493                                 data += dwidth;
494
495                                 p0 &= ~0xFFFFF80000FFC000LL;            // Mask out old data...
496                                 p0 |= (uint64)height << 14;
497                                 p0 |= data << 40;
498                                 OPStorePhrase(oldOPP, p0);
499                         }
500 //WriteLog("\t\tOld OP: %08X -> ", op_pointer);
501 //Temp, for testing...
502 //No doubt, this type of check will break all kinds of stuff... !!! FIX !!!
503 //And it does! !!! FIX !!!
504 //Let's remove this "fix" since it screws up more than it fixes.
505 /*      if (op_pointer > ((p0 & 0x000007FFFF000000LL) >> 21))
506                 return;*/
507
508                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
509 //WriteLog("New OP: %08X\n", op_pointer);
510                         break;
511                 }
512                 case OBJECT_TYPE_SCALE:
513                 {
514 //WAS:                  uint16 ypos = (p0 >> 3) & 0x3FF;
515                         uint16 ypos = (p0 >> 3) & 0x7FF;
516                         uint32 height = (p0 & 0xFFC000) >> 14;
517                         uint32 oldOPP = op_pointer - 8;
518 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
519 // *** BEGIN OP PROCESSOR TESTING ONLY ***
520 if (inhibit && op_start_log)
521 {
522         WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
523         DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
524 }
525 bitmapCounter++;
526 if (!inhibit)   // For OP testing only!
527 // *** END OP PROCESSOR TESTING ONLY ***
528                         if (halfline >= ypos && height > 0)
529                         {
530                                 uint64 p1 = OPLoadPhrase(op_pointer);
531                                 op_pointer += 8;
532                                 uint64 p2 = OPLoadPhrase(op_pointer);
533                                 op_pointer += 8;
534 //WriteLog("OP: %08X (%d) %08X%08X %08X%08X %08X%08X\n", oldOPP, halfline, (uint32)(p0>>32), (uint32)(p0&0xFFFFFFFF), (uint32)(p1>>32), (uint32)(p1&0xFFFFFFFF), (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
535                                 OPProcessScaledBitmap(p0, p1, p2, render);
536
537                                 // OP write-backs
538
539                                 uint16 remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
540                                 uint8 /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
541 //Actually, we should skip this object if it has a vscale of zero.
542 //Or do we? Not sure... Atari Karts has a few lines that look like:
543 // (SCALED BITMAP)
544 //000E8268 --> phrase 00010000 7000B00D
545 //    [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
546 //    [hsc: 9A, vsc: 00, rem: 00]
547 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
548 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
549
550                                 if (vscale == 0)
551                                         vscale = 0x20;                                  // OP bug??? Nope, it isn't...! Or is it?
552
553 //extern int start_logging;
554 //if (start_logging)
555 //      WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
556 //Locks up here:
557 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
558 //There are other problems here, it looks like...
559 //Another lock up:
560 //About to execute OP (508)...
561 /*
562 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
563 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
564 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
565 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
566 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
567 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
568 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
569 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
570 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
571 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
572 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
573 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
574 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
575 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
576 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
577 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
578 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
579 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
580 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
581 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
582 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
583 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
584 */
585 //Here's another problem:
586 //    [hsc: 20, vsc: 20, rem: 00]
587 // Since we're not checking for $E0 (but that's what we get from the above), we end
588 // up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... still not quite
589 // right. Either that, or the Accolade team that wrote Bubsy screwed up royal.]
590 //Also note: $E0 = 7.0 which IS a legal vscale value...
591
592 //                              if (remainder & 0x80)                           // I.e., it's negative
593 //                              if ((remainder & 0x80) || remainder == 0)       // I.e., it's <= 0
594 //                              if ((remainder - 1) >= 0xE0)            // I.e., it's <= 0
595 //                              if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
596 //                              if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
597 //                              if (remainder <= 0x20)                          // I.e., it's <= 1.0
598                                 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
599                                 if (remainder < 0x20)
600                                 {
601                                         uint64 data = (p0 & 0xFFFFF80000000000LL) >> 40;
602                                         uint64 dwidth = (p1 & 0xFFC0000) >> 15;
603
604 //                                      while (remainder & 0x80)
605 //                                      while ((remainder & 0x80) || remainder == 0)
606 //                                      while ((remainder - 1) >= 0xE0)
607 //                                      while ((remainder >= 0xE1) || remainder == 0)
608 //                                      while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
609 //                                      while (remainder <= 0x20)
610                                         while (remainder < 0x20)
611                                         {
612                                                 remainder += vscale;
613
614                                                 if (height)
615                                                         height--;
616
617                                                 data += dwidth;
618                                         }
619
620                                         p0 &= ~0xFFFFF80000FFC000LL;    // Mask out old data...
621                                         p0 |= (uint64)height << 14;
622                                         p0 |= data << 40;
623                                         OPStorePhrase(oldOPP, p0);
624                                 }
625
626                                 remainder -= 0x20;                                      // 1.0f in [3.5] fixed point format
627
628 //if (start_logging)
629 //      WriteLog("--> Finished writebacks...\n");//*/
630
631 //WriteLog(" [%08X%08X -> ", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
632                                 p2 &= ~0x0000000000FF0000LL;
633                                 p2 |= (uint64)remainder << 16;
634 //WriteLog("%08X%08X]\n", (uint32)(p2>>32), (uint32)(p2&0xFFFFFFFF));
635                                 OPStorePhrase(oldOPP + 16, p2);
636 //remainder = (uint8)(p2 >> 16), vscale = (uint8)(p2 >> 8);
637 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
638                         }
639
640                         op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
641                         break;
642                 }
643                 case OBJECT_TYPE_GPU:
644                 {
645 //WriteLog("OP: Asserting GPU IRQ #3...\n");
646 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
647                         OPSetCurrentObject(p0);
648                         GPUSetIRQLine(3, ASSERT_LINE);
649 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
650 // !!! FIX !!!
651 //Do something like:
652 //OPSuspendedByGPU = true;
653 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
654 //on the next halfline...
655 // --> It continues from where it was interrupted! !!! FIX !!!
656                         break;
657                 }
658                 case OBJECT_TYPE_BRANCH:
659                 {
660                         uint16 ypos = (p0 >> 3) & 0x7FF;
661                         uint8  cc   = (p0 >> 14) & 0x03;
662                         uint32 link = (p0 >> 21) & 0x3FFFF8;
663
664 //                      if ((ypos!=507)&&(ypos!=25))
665 //                              WriteLog("\t%i%s%i link=0x%.8x\n",halfline,condition_to_str[cc],ypos>>1,link);
666                         switch (cc)
667                         {
668                         case CONDITION_EQUAL:
669                                 if (TOMReadWord(0xF00006, OP) == ypos || ypos == 0x7FF)
670                                         op_pointer = link;
671                                 break;
672                         case CONDITION_LESS_THAN:
673                                 if (TOMReadWord(0xF00006, OP) < ypos)
674                                         op_pointer = link;
675                                 break;
676                         case CONDITION_GREATER_THAN:
677                                 if (TOMReadWord(0xF00006, OP) > ypos)
678                                         op_pointer = link;
679                                 break;
680                         case CONDITION_OP_FLAG_SET:
681                                 if (OPGetStatusRegister() & 0x01)
682                                         op_pointer = link;
683                                 break;
684                         case CONDITION_SECOND_HALF_LINE:
685 //Here's the ASIC code:
686 //  ND4(cctrue5, newheight[2], heightl[1], heightl[0], hcb[10]);
687 //which means, do the link if bit 10 of HC is set...
688
689                                 // This basically means branch if bit 10 of HC is set
690 #warning "Unhandled condition code causes emulator to crash... !!! FIX !!!"
691                                 WriteLog("OP: Unexpected CONDITION_SECOND_HALF_LINE in BRANCH object\nOP: shutting down!\n");
692                                 LogDone();
693                                 exit(0);
694                                 break;
695                         default:
696                                 // Basically, if you do this, the OP does nothing. :-)
697                                 WriteLog("OP: Unimplemented branch condition %i\n", cc);
698                         }
699                         break;
700                 }
701                 case OBJECT_TYPE_STOP:
702                 {
703 //op_start_log = 0;
704                         // unsure
705 //WriteLog("OP: --> STOP\n");
706 //                      op_set_status_register(((p0>>3) & 0xFFFFFFFF));
707 //This seems more likely...
708                         OPSetCurrentObject(p0);
709
710                         if (p0 & 0x08)
711                         {
712                                 // We need to check whether these interrupts are enabled or not, THEN
713                                 // set an IRQ + pending flag if necessary...
714                                 if (TOMIRQEnabled(IRQ_OPFLAG))
715                                 {
716                                         TOMSetPendingObjectInt();
717                                         m68k_set_irq(2);                                // Cause a 68K IPL 2 to occur...
718                                 }
719                         }
720
721                         return;
722 //                      break;
723                 }
724                 default:
725                         WriteLog("op: unknown object type %i\n", ((uint8)p0 & 0x07));
726                         return;
727                 }
728
729                 // Here is a little sanity check to keep the OP from locking up the machine
730                 // when fed bad data. Better would be to count how many actual cycles it used
731                 // and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!
732 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
733                 opCyclesToRun--;
734
735                 if (!opCyclesToRun)
736                         return;
737         }
738 }
739
740 //
741 // Store fixed size bitmap in line buffer
742 //
743 void OPProcessFixedBitmap(uint64 p0, uint64 p1, bool render)
744 {
745 // Need to make sure that when writing that it stays within the line buffer...
746 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
747         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
748         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
749         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
750         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
751 //#ifdef OP_DEBUG_BMP
752         uint32  firstPix = (p1 >> 49) & 0x3F;
753         // "The LSB is significant only for scaled objects..." -JTRM
754         // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top four are significant..."
755         firstPix &= 0x3E;
756 //#endif
757 // We can ignore the RELEASE (high order) bit for now--probably forever...!
758 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
759 //Optimize: break these out to their own BOOL values
760         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
761         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
762                 flagRMW = (flags & OPFLAG_RMW ? true : false),
763                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
764 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
765 //  provide the most significant bits of the palette address."
766         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
767         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
768         pitch <<= 3;                                                                    // Optimization: Multiply pitch by 8
769
770 //      int16 scanlineWidth = tom_getVideoModeWidth();
771         uint8 * tomRam8 = TOMGetRamPointer();
772         uint8 * paletteRAM = &tomRam8[0x400];
773         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
774         // for use when using endian-corrected data (i.e., any of the *_word_read functions!)
775         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
776
777 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
778 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
779
780 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
781 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as well.
782 // Pitch == 0 is OK too...
783 //      if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
784 //I'm not convinced that we need to concern ourselves with data & op_pointer here either!
785         if (!render || iwidth == 0)
786                 return;
787
788 //OK, so we know the position in the line buffer is correct. It's the clipping in
789 //24bpp mode that's wrong!
790 #if 0
791 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
792 //into the line buffer for each pixel.
793 if (depth == 5) // i.e., 24bpp mode...
794         xpos >>= 1;     // Cut it in half...
795 #endif
796
797 //#define OP_DEBUG_BMP
798 //#ifdef OP_DEBUG_BMP
799 //      WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
800 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
801 //#endif
802
803 //      int32 leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
804         int32 startPos = xpos, endPos = xpos +
805                 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
806                 : -((phraseWidthToPixels[depth] * iwidth) + 1));
807         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
808         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
809         // Not sure if this is Jaguar Two only location or what...
810         // From the docs, it is... If we want to limit here we should think of something else.
811 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
812 //      int32 limit = 720;
813 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
814 //printf("[OP:xpos=%i,spos=%i,epos=%i>", xpos, startPos, endPos);
815         // This is correct, the OP line buffer is a constant size... 
816         int32 limit = 720;
817         int32 lbufWidth = 719;
818
819         // If the image is completely to the left or right of the line buffer, then bail.
820 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
821 //There are four possibilities:
822 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
823 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
824 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
825 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
826 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
827 // numbers 1 & 3 are of concern.
828 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
829 //      if (rightMargin < 0 || leftMargin > lbufWidth)
830
831 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
832 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
833 // Still have to be careful with the DATA and IWIDTH values though...
834
835 //      if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
836 //              || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
837 //              return;
838         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
839                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
840                 return;
841
842         // Otherwise, find the clip limits and clip the phrase as well...
843         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
844         //       line buffer, but it shouldn't matter since there are two unused line
845         //       buffers below and nothing above and I'll at most write 8 bytes outside
846         //       the line buffer... I could use a fractional clip begin/end value, but
847         //       this makes the blit a *lot* more hairy. I might fix this in the future
848         //       if it becomes necessary. (JLH)
849         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
850         //       which pixel in the phrase is being written, and quit when either end of phrases
851         //       is reached or line buffer extents are surpassed.
852
853 //This stuff is probably wrong as well... !!! FIX !!!
854 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
855 //Yup. Seems that JagMania doesn't work correctly with this...
856 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
857 //      if (!flagREFLECT)
858
859 /*
860         if (leftMargin < 0)
861                 clippedWidth = 0 - leftMargin,
862                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
863                 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
864 //              leftMargin = 0;
865
866         if (rightMargin > lbufWidth)
867                 clippedWidth = rightMargin - lbufWidth,
868                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
869 //              rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
870 //              rightMargin = lbufWidth;
871 */
872 if (depth > 5)
873         WriteLog("OP: We're about to encounter a divide by zero error!\n");
874         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
875         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
876         // !!! FIX !!!
877         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
878                 clippedWidth = 0 - startPos,
879                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
880                 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
881
882         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
883                 clippedWidth = 0 - endPos,
884                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
885
886         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
887                 clippedWidth = endPos - lbufWidth,
888                 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
889
890         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
891                 clippedWidth = startPos - lbufWidth,
892                 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
893                 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
894 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
895
896         // If the image is sitting on the line buffer left or right edge, we need to compensate
897         // by decreasing the image phrase width accordingly.
898         iwidth -= phraseClippedWidth;
899
900         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
901         // the pixel data.
902 //      data += phraseClippedWidth * (pitch << 3);
903         data += dataClippedWidth * pitch;
904
905         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
906         //       bitmap! This makes clipping & etc. MUCH, much easier...!
907 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
908 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
909 //Is this a bug in the OP?
910 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
911 //Though it looks like we're doing it here no matter what...
912 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
913 //Let's try this:
914         uint32 lbufAddress = 0x1800 + (startPos * 2);
915         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
916
917         // Render.
918
919 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
920 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
921 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
922 // anyway.
923 // This seems to be the case (at least according to the Midsummer docs)...!
924
925 // This is to test using palette zeroes instead of bit zeroes...
926 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
927 //#define OP_USES_PALETTE_ZERO
928
929         if (depth == 0)                                                                 // 1 BPP
930         {
931                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
932                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
933
934                 // Fetch 1st phrase...
935                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
936 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
937 //i.e., we didn't clip on the margin... !!! FIX !!!
938                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
939                 int i = firstPix;                                                       // Start counter at right spot...
940
941                 while (iwidth--)
942                 {
943                         while (i++ < 64)
944                         {
945                                 uint8 bit = pixels >> 63;
946 #ifndef OP_USES_PALETTE_ZERO
947                                 if (flagTRANS && bit == 0)
948 #else
949                                 if (flagTRANS && (paletteRAM16[index | bit] == 0))
950 #endif
951                                         ;       // Do nothing...
952                                 else
953                                 {
954                                         if (!flagRMW)
955 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
956 //Won't optimize RMW case though...
957                                                 // This is the *only* correct use of endian-dependent code
958                                                 // (i.e., mem-to-mem direct copying)!
959                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bit];
960                                         else
961                                                 *currentLineBuffer =
962                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
963                                                 *(currentLineBuffer + 1) =
964                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
965                                 }
966
967                                 currentLineBuffer += lbufDelta;
968                                 pixels <<= 1;
969                         }
970                         i = 0;
971                         // Fetch next phrase...
972                         data += pitch;
973                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
974                 }
975         }
976         else if (depth == 1)                                                    // 2 BPP
977         {
978 if (firstPix)
979         WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
980                 index &= 0xFC;                                                          // Top six bits form CLUT index
981                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
982                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
983
984                 while (iwidth--)
985                 {
986                         // Fetch phrase...
987                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
988                         data += pitch;
989
990                         for(int i=0; i<32; i++)
991                         {
992                                 uint8 bits = pixels >> 62;
993 // Seems to me that both of these are in the same endian, so we could cast it as
994 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
995 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
996 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
997 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
998 #ifndef OP_USES_PALETTE_ZERO
999                                 if (flagTRANS && bits == 0)
1000 #else
1001                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1002 #endif
1003                                         ;       // Do nothing...
1004                                 else
1005                                 {
1006                                         if (!flagRMW)
1007                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1008                                         else
1009                                                 *currentLineBuffer =
1010                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1011                                                 *(currentLineBuffer + 1) =
1012                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1013                                 }
1014
1015                                 currentLineBuffer += lbufDelta;
1016                                 pixels <<= 2;
1017                         }
1018                 }
1019         }
1020         else if (depth == 2)                                                    // 4 BPP
1021         {
1022 if (firstPix)
1023         WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1024                 index &= 0xF0;                                                          // Top four bits form CLUT index
1025                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1026                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1027
1028                 while (iwidth--)
1029                 {
1030                         // Fetch phrase...
1031                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1032                         data += pitch;
1033
1034                         for(int i=0; i<16; i++)
1035                         {
1036                                 uint8 bits = pixels >> 60;
1037 // Seems to me that both of these are in the same endian, so we could cast it as
1038 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1039 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1040 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1041 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1042 #ifndef OP_USES_PALETTE_ZERO
1043                                 if (flagTRANS && bits == 0)
1044 #else
1045                                 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1046 #endif
1047                                         ;       // Do nothing...
1048                                 else
1049                                 {
1050                                         if (!flagRMW)
1051                                                 *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1052                                         else
1053                                                 *currentLineBuffer =
1054                                                         BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1055                                                 *(currentLineBuffer + 1) =
1056                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1057                                 }
1058
1059                                 currentLineBuffer += lbufDelta;
1060                                 pixels <<= 4;
1061                         }
1062                 }
1063         }
1064         else if (depth == 3)                                                    // 8 BPP
1065         {
1066                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1067                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1068
1069                 // Fetch 1st phrase...
1070                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1071 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1072 //i.e., we didn't clip on the margin... !!! FIX !!!
1073                 firstPix &= 0x30;                                                       // Only top two bits are valid for 8 BPP
1074                 pixels <<= firstPix;                                            // Skip first N pixels (N=firstPix)...
1075                 int i = firstPix >> 3;                                          // Start counter at right spot...
1076
1077                 while (iwidth--)
1078                 {
1079                         while (i++ < 8)
1080                         {
1081                                 uint8 bits = pixels >> 56;
1082 // Seems to me that both of these are in the same endian, so we could cast it as
1083 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1084 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1085 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1086 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1087 //This would seem to be problematic...
1088 //Because it's the palette entry being zero that makes the pixel transparent...
1089 //Let's try it and see.
1090 #ifndef OP_USES_PALETTE_ZERO
1091                                 if (flagTRANS && bits == 0)
1092 #else
1093                                 if (flagTRANS && (paletteRAM16[bits] == 0))
1094 #endif
1095                                         ;       // Do nothing...
1096                                 else
1097                                 {
1098                                         if (!flagRMW)
1099                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1100                                         else
1101                                                 *currentLineBuffer =
1102                                                         BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1103                                                 *(currentLineBuffer + 1) =
1104                                                         BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1105                                 }
1106
1107                                 currentLineBuffer += lbufDelta;
1108                                 pixels <<= 8;
1109                         }
1110                         i = 0;
1111                         // Fetch next phrase...
1112                         data += pitch;
1113                         pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1114                 }
1115         }
1116         else if (depth == 4)                                                    // 16 BPP
1117         {
1118 if (firstPix)
1119         WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1120                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1121                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1122
1123                 while (iwidth--)
1124                 {
1125                         // Fetch phrase...
1126                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1127                         data += pitch;
1128
1129                         for(int i=0; i<4; i++)
1130                         {
1131                                 uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1132 // Seems to me that both of these are in the same endian, so we could cast it as
1133 // uint16 * and do straight across copies (what about 24 bpp? Treat it differently...)
1134 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1135 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1136 // No, it isn't because we read the memory in an endian safe way--it *won't* work...
1137 //This doesn't seem right... Let's try the encoded black value ($8800):
1138 //Apparently, CRY 0 maps to $8800...
1139                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1140 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1141                                         ;       // Do nothing...
1142                                 else
1143                                 {
1144                                         if (!flagRMW)
1145                                                 *currentLineBuffer = bitsHi,
1146                                                 *(currentLineBuffer + 1) = bitsLo;
1147                                         else
1148                                                 *currentLineBuffer =
1149                                                         BLEND_CR(*currentLineBuffer, bitsHi),
1150                                                 *(currentLineBuffer + 1) =
1151                                                         BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1152                                 }
1153
1154                                 currentLineBuffer += lbufDelta;
1155                                 pixels <<= 16;
1156                         }
1157                 }
1158         }
1159         else if (depth == 5)                                                    // 24 BPP
1160         {
1161 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1162 //There *might* be others...
1163 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1164 if (firstPix)
1165         WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1166                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1167                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1168                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1169
1170                 while (iwidth--)
1171                 {
1172                         // Fetch phrase...
1173                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1174                         data += pitch;
1175
1176                         for(int i=0; i<2; i++)
1177                         {
1178                                 // We don't use a 32-bit var here because of endian issues...!
1179                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1180                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1181
1182                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1183                                         ;       // Do nothing...
1184                                 else
1185                                         *currentLineBuffer = bits3,
1186                                         *(currentLineBuffer + 1) = bits2,
1187                                         *(currentLineBuffer + 2) = bits1,
1188                                         *(currentLineBuffer + 3) = bits0;
1189
1190                                 currentLineBuffer += lbufDelta;
1191                                 pixels <<= 32;
1192                         }
1193                 }
1194         }
1195 }
1196
1197 //
1198 // Store scaled bitmap in line buffer
1199 //
1200 void OPProcessScaledBitmap(uint64 p0, uint64 p1, uint64 p2, bool render)
1201 {
1202 // Need to make sure that when writing that it stays within the line buffer...
1203 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1204         uint8 depth = (p1 >> 12) & 0x07;                                // Color depth of image
1205         int32 xpos = ((int16)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1206         uint32 iwidth = (p1 >> 28) & 0x3FF;                             // Image width in *phrases*
1207         uint32 data = (p0 >> 40) & 0xFFFFF8;                    // Pixel data address
1208 //#ifdef OP_DEBUG_BMP
1209 // Prolly should use this... Though not sure exactly how.
1210 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1211         uint32 firstPix = (p1 >> 49) & 0x3F;
1212 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1213 if (firstPix)
1214         WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1215 //#endif
1216 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1217 //      uint8 flags = (p1 >> 45) & 0x0F;        // REFLECT, RMW, TRANS, RELEASE
1218 //Optimize: break these out to their own BOOL values [DONE]
1219         uint8 flags = (p1 >> 45) & 0x07;                                // REFLECT (0), RMW (1), TRANS (2)
1220         bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1221                 flagRMW = (flags & OPFLAG_RMW ? true : false),
1222                 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1223         uint8 index = (p1 >> 37) & 0xFE;                                // CLUT index offset (upper pix, 1-4 bpp)
1224         uint32 pitch = (p1 >> 15) & 0x07;                               // Phrase pitch
1225
1226         uint8 * tomRam8 = TOMGetRamPointer();
1227         uint8 * paletteRAM = &tomRam8[0x400];
1228         // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct copies--NOT
1229         // for use when using endian-corrected data (i.e., any of the *ReadWord functions!)
1230         uint16 * paletteRAM16 = (uint16 *)paletteRAM;
1231
1232         uint16 hscale = p2 & 0xFF;
1233 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. Not sure why,
1234 // but seems to be consistent with the vertical scaling now (and it may turn out to be wrong!)...
1235         uint16 horizontalRemainder = hscale;                            // Not sure if it starts full, but seems reasonable [It's not!]
1236 //      uint8 horizontalRemainder = 0;                                  // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1237         int32 scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1238         uint32 scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1239
1240 //      WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1241 //              iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1242
1243 // Looks like an hscale of zero means don't draw!
1244         if (!render || iwidth == 0 || hscale == 0)
1245                 return;
1246
1247 /*extern int start_logging;
1248 if (start_logging)
1249         WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1250                 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1251 //#define OP_DEBUG_BMP
1252 //#ifdef OP_DEBUG_BMP
1253 //      WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1254 //              iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1255 //#endif
1256
1257         int32 startPos = xpos, endPos = xpos +
1258                 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1259         uint32 clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1260         bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false);        // VMODE
1261         // Not sure if this is Jaguar Two only location or what...
1262         // From the docs, it is... If we want to limit here we should think of something else.
1263 //      int32 limit = GET16(tom_ram_8, 0x0008);                 // LIMIT
1264         int32 limit = 720;
1265 //      int32 lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1266         int32 lbufWidth = 719;  // Zero based limit...
1267
1268         // If the image is completely to the left or right of the line buffer, then bail.
1269 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1270 //There are four possibilities:
1271 //  1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1272 //  2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1273 //  3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1274 //  4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1275 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1276 // numbers 1 & 3 are of concern.
1277 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1278 //      if (rightMargin < 0 || leftMargin > lbufWidth)
1279
1280 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1281 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1282 // Still have to be careful with the DATA and IWIDTH values though...
1283
1284         if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1285                 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1286                 return;
1287
1288         // Otherwise, find the clip limits and clip the phrase as well...
1289         // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
1290         //       line buffer, but it shouldn't matter since there are two unused line
1291         //       buffers below and nothing above and I'll at most write 40 bytes outside
1292         //       the line buffer... I could use a fractional clip begin/end value, but
1293         //       this makes the blit a *lot* more hairy. I might fix this in the future
1294         //       if it becomes necessary. (JLH)
1295         //       Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
1296         //       which pixel in the phrase is being written, and quit when either end of phrases
1297         //       is reached or line buffer extents are surpassed.
1298
1299 //This stuff is probably wrong as well... !!! FIX !!!
1300 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1301 //Yup. Seems that JagMania doesn't work correctly with this...
1302 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1303 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the problem lies
1304 //elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases seems to draw the ground
1305 // a bit more accurately... Strange!
1306 //It's probably a case of the REFLECT flag being set and the background being written
1307 //from the right side of the screen...
1308 //But no, it isn't... At least if the diagnostics are telling the truth!
1309
1310         // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1311         // ALSO: There may be another case where we start out of bounds and end out of bounds...!
1312         // !!! FIX !!!
1313
1314 //There's a problem here with scaledPhrasePixels in that it can be forced to zero when
1315 //the scaling factor is small. So fix it already! !!! FIX !!!
1316 /*if (scaledPhrasePixels == 0)
1317 {
1318         WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1319         DumpScaledObject(p0, p1, p2);
1320 }//*/
1321 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1322
1323 //Try a simple example...
1324 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1325 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1326 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1327 //
1328 // Normally, we would expect this in the line buffer:
1329 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1330 //
1331 // But instead we're getting:
1332 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1333 //
1334 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1335 // on negative boundary--or are we? Hmm...
1336 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1337 //
1338 // Let's try a real world example:
1339 //
1340 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1341 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1342 //
1343 // Really, spp is 27.75 in the second case...
1344 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1345 // start position (14 * 27.75), we get -6.5... NOT -17!
1346
1347 //Now it seems we're working OK, at least for the first case...
1348 uint32 scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1349
1350         if (startPos < 0)                       // Case #1: Begin out, end in, L to R
1351 {
1352 extern int start_logging;
1353 if (start_logging)
1354         WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1355 //              clippedWidth = 0 - startPos,
1356                 clippedWidth = (0 - startPos) << 5,
1357 //              dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1358                 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1359 //              startPos = 0 - (clippedWidth % scaledPhrasePixels);
1360                 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1361 if (start_logging)
1362         WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1363 }
1364
1365         if (endPos < 0)                         // Case #2: Begin in, end out, R to L
1366                 clippedWidth = 0 - endPos,
1367                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1368
1369         if (endPos > lbufWidth)         // Case #3: Begin in, end out, L to R
1370                 clippedWidth = endPos - lbufWidth,
1371                 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1372
1373         if (startPos > lbufWidth)       // Case #4: Begin out, end in, R to L
1374                 clippedWidth = startPos - lbufWidth,
1375                 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1376                 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1377
1378 extern int op_start_log;
1379 if (op_start_log && clippedWidth != 0)
1380         WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1381 if (op_start_log && startPos == 13)
1382 {
1383         WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1384         DumpScaledObject(p0, p1, p2);
1385         if (iwidth == 7)
1386         {
1387                 WriteLog("    %08X: ", data);
1388                 for(int i=0; i<7*8; i++)
1389                         WriteLog("%02X ", JaguarReadByte(data+i));
1390                 WriteLog("\n");
1391         }
1392 }
1393         // If the image is sitting on the line buffer left or right edge, we need to compensate
1394         // by decreasing the image phrase width accordingly.
1395         iwidth -= phraseClippedWidth;
1396
1397         // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1398         // the pixel data.
1399 //      data += phraseClippedWidth * (pitch << 3);
1400         data += dataClippedWidth * (pitch << 3);
1401
1402         // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1403         //       bitmap! This makes clipping & etc. MUCH, much easier...!
1404 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1405 //      uint32 lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1406         uint32 lbufAddress = 0x1800 + startPos * 2;
1407         uint8 * currentLineBuffer = &tomRam8[lbufAddress];
1408 //uint8 * lineBufferLowerLimit = &tom_ram_8[0x1800],
1409 //      * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1410
1411         // Render.
1412
1413 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1414 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1415 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1416 // anyway.
1417 // This seems to be the case (at least according to the Midsummer docs)...!
1418
1419         if (depth == 0)                                                                 // 1 BPP
1420         {
1421 if (firstPix != 0)
1422         WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1423                 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1424                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1425
1426                 int pixCount = 0;
1427                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1428
1429                 while ((int32)iwidth > 0)
1430                 {
1431                         uint8 bits = pixels >> 63;
1432
1433 #ifndef OP_USES_PALETTE_ZERO
1434                         if (flagTRANS && bits == 0)
1435 #else
1436                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1437 #endif
1438                                 ;       // Do nothing...
1439                         else
1440                         {
1441                                 if (!flagRMW)
1442                                         // This is the *only* correct use of endian-dependent code
1443                                         // (i.e., mem-to-mem direct copying)!
1444                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1445                                 else
1446                                         *currentLineBuffer =
1447                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1448                                         *(currentLineBuffer + 1) =
1449                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1450                         }
1451
1452                         currentLineBuffer += lbufDelta;
1453
1454 /*
1455 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1456 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1457 wide, so we could probably go back to that (as long as we make it an int16 and not a uint16!)
1458 */
1459 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1460                         while (horizontalRemainder & 0x80)
1461                         {
1462                                 horizontalRemainder += hscale;
1463                                 pixCount++;
1464                                 pixels <<= 1;
1465                         }//*/
1466 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 1.0 (*before* subtraction)
1467                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1468                         {
1469                                 horizontalRemainder += hscale;
1470                                 pixCount++;
1471                                 pixels <<= 1;
1472                         }
1473                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1474
1475                         if (pixCount > 63)
1476                         {
1477                                 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1478
1479                                 data += (pitch << 3) * phrasesToSkip;
1480                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1481                                 pixels <<= 1 * pixelShift;
1482                                 iwidth -= phrasesToSkip;
1483                                 pixCount = pixelShift;
1484                         }
1485                 }
1486         }
1487         else if (depth == 1)                                                    // 2 BPP
1488         {
1489 if (firstPix != 0)
1490         WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1491                 index &= 0xFC;                                                          // Top six bits form CLUT index
1492                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1493                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1494
1495                 int pixCount = 0;
1496                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1497
1498                 while ((int32)iwidth > 0)
1499                 {
1500                         uint8 bits = pixels >> 62;
1501
1502 #ifndef OP_USES_PALETTE_ZERO
1503                         if (flagTRANS && bits == 0)
1504 #else
1505                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1506 #endif
1507                                 ;       // Do nothing...
1508                         else
1509                         {
1510                                 if (!flagRMW)
1511                                         // This is the *only* correct use of endian-dependent code
1512                                         // (i.e., mem-to-mem direct copying)!
1513                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1514                                 else
1515                                         *currentLineBuffer =
1516                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1517                                         *(currentLineBuffer + 1) =
1518                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1519                         }
1520
1521                         currentLineBuffer += lbufDelta;
1522
1523 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1524                         while (horizontalRemainder & 0x80)
1525                         {
1526                                 horizontalRemainder += hscale;
1527                                 pixCount++;
1528                                 pixels <<= 2;
1529                         }//*/
1530 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1531                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1532                         {
1533                                 horizontalRemainder += hscale;
1534                                 pixCount++;
1535                                 pixels <<= 2;
1536                         }
1537                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1538
1539                         if (pixCount > 31)
1540                         {
1541                                 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1542
1543                                 data += (pitch << 3) * phrasesToSkip;
1544                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1545                                 pixels <<= 2 * pixelShift;
1546                                 iwidth -= phrasesToSkip;
1547                                 pixCount = pixelShift;
1548                         }
1549                 }
1550         }
1551         else if (depth == 2)                                                    // 4 BPP
1552         {
1553 if (firstPix != 0)
1554         WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1555                 index &= 0xF0;                                                          // Top four bits form CLUT index
1556                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1557                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1558
1559                 int pixCount = 0;
1560                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1561
1562                 while ((int32)iwidth > 0)
1563                 {
1564                         uint8 bits = pixels >> 60;
1565
1566 #ifndef OP_USES_PALETTE_ZERO
1567                         if (flagTRANS && bits == 0)
1568 #else
1569                         if (flagTRANS && (paletteRAM16[index | bits] == 0))
1570 #endif
1571                                 ;       // Do nothing...
1572                         else
1573                         {
1574                                 if (!flagRMW)
1575                                         // This is the *only* correct use of endian-dependent code
1576                                         // (i.e., mem-to-mem direct copying)!
1577                                         *(uint16 *)currentLineBuffer = paletteRAM16[index | bits];
1578                                 else
1579                                         *currentLineBuffer =
1580                                                 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1581                                         *(currentLineBuffer + 1) =
1582                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1583                         }
1584
1585                         currentLineBuffer += lbufDelta;
1586
1587 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1588                         while (horizontalRemainder & 0x80)
1589                         {
1590                                 horizontalRemainder += hscale;
1591                                 pixCount++;
1592                                 pixels <<= 4;
1593                         }//*/
1594 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1595                         while (horizontalRemainder < 0x20)              // I.e., it's <= 0 (*before* subtraction)
1596                         {
1597                                 horizontalRemainder += hscale;
1598                                 pixCount++;
1599                                 pixels <<= 4;
1600                         }
1601                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1602
1603                         if (pixCount > 15)
1604                         {
1605                                 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1606
1607                                 data += (pitch << 3) * phrasesToSkip;
1608                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1609                                 pixels <<= 4 * pixelShift;
1610                                 iwidth -= phrasesToSkip;
1611                                 pixCount = pixelShift;
1612                         }
1613                 }
1614         }
1615         else if (depth == 3)                                                    // 8 BPP
1616         {
1617 if (firstPix)
1618         WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1619                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1620                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1621
1622                 int pixCount = 0;
1623                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1624
1625                 while ((int32)iwidth > 0)
1626                 {
1627                         uint8 bits = pixels >> 56;
1628
1629 #ifndef OP_USES_PALETTE_ZERO
1630                         if (flagTRANS && bits == 0)
1631 #else
1632                         if (flagTRANS && (paletteRAM16[bits] == 0))
1633 #endif
1634                                 ;       // Do nothing...
1635                         else
1636                         {
1637                                 if (!flagRMW)
1638                                         // This is the *only* correct use of endian-dependent code
1639                                         // (i.e., mem-to-mem direct copying)!
1640                                         *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1641 /*                              {
1642                                         if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1643                                                 *(uint16 *)currentLineBuffer = paletteRAM16[bits];
1644                                 }*/
1645                                 else
1646                                         *currentLineBuffer =
1647                                                 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1648                                         *(currentLineBuffer + 1) =
1649                                                 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1650                         }
1651
1652                         currentLineBuffer += lbufDelta;
1653
1654 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1655                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1656                         {
1657                                 horizontalRemainder += hscale;
1658                                 pixCount++;
1659                                 pixels <<= 8;
1660                         }
1661                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1662
1663                         if (pixCount > 7)
1664                         {
1665                                 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1666
1667                                 data += (pitch << 3) * phrasesToSkip;
1668                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1669                                 pixels <<= 8 * pixelShift;
1670                                 iwidth -= phrasesToSkip;
1671                                 pixCount = pixelShift;
1672                         }
1673                 }
1674         }
1675         else if (depth == 4)                                                    // 16 BPP
1676         {
1677 if (firstPix != 0)
1678         WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1679                 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1680                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 5) | 0x02;
1681
1682                 int pixCount = 0;
1683                 uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1684
1685                 while ((int32)iwidth > 0)
1686                 {
1687                         uint8 bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1688
1689 //This doesn't seem right... Let's try the encoded black value ($8800):
1690 //Apparently, CRY 0 maps to $8800...
1691                                 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1692 //                              if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1693                                 ;       // Do nothing...
1694                         else
1695                         {
1696                                 if (!flagRMW)
1697                                         *currentLineBuffer = bitsHi,
1698                                         *(currentLineBuffer + 1) = bitsLo;
1699                                 else
1700                                         *currentLineBuffer =
1701                                                 BLEND_CR(*currentLineBuffer, bitsHi),
1702                                         *(currentLineBuffer + 1) =
1703                                                 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1704                         }
1705
1706                         currentLineBuffer += lbufDelta;
1707
1708 /*                      horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1709                         while (horizontalRemainder & 0x80)
1710                         {
1711                                 horizontalRemainder += hscale;
1712                                 pixCount++;
1713                                 pixels <<= 16;
1714                         }//*/
1715 //                      while (horizontalRemainder <= 0x20)             // I.e., it's <= 0 (*before* subtraction)
1716                         while (horizontalRemainder < 0x20)              // I.e., it's <= 1.0 (*before* subtraction)
1717                         {
1718                                 horizontalRemainder += hscale;
1719                                 pixCount++;
1720                                 pixels <<= 16;
1721                         }
1722                         horizontalRemainder -= 0x20;            // Subtract 1.0f in [3.5] fixed point format
1723 //*/
1724                         if (pixCount > 3)
1725                         {
1726                                 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1727
1728                                 data += (pitch << 3) * phrasesToSkip;
1729                                 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1730                                 pixels <<= 16 * pixelShift;
1731
1732                                 iwidth -= phrasesToSkip;
1733
1734                                 pixCount = pixelShift;
1735                         }
1736                 }
1737         }
1738         else if (depth == 5)                                                    // 24 BPP
1739         {
1740 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1741 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1742 if (firstPix != 0)
1743         WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1744                 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1745                 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1746                 int32 lbufDelta = ((int8)((flags << 7) & 0xFF) >> 4) | 0x04;
1747
1748                 while (iwidth--)
1749                 {
1750                         // Fetch phrase...
1751                         uint64 pixels = ((uint64)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1752                         data += pitch << 3;                                             // Multiply pitch * 8 (optimize: precompute this value)
1753
1754                         for(int i=0; i<2; i++)
1755                         {
1756                                 uint8 bits3 = pixels >> 56, bits2 = pixels >> 48,
1757                                         bits1 = pixels >> 40, bits0 = pixels >> 32;
1758
1759                                 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1760                                         ;       // Do nothing...
1761                                 else
1762                                         *currentLineBuffer = bits3,
1763                                         *(currentLineBuffer + 1) = bits2,
1764                                         *(currentLineBuffer + 2) = bits1,
1765                                         *(currentLineBuffer + 3) = bits0;
1766
1767                                 currentLineBuffer += lbufDelta;
1768                                 pixels <<= 32;
1769                         }
1770                 }
1771         }
1772 }