// Floating point to IEEE-754 conversion routines
//
// by James Hammons
-// (C) 2018 Underground Software
+// (C) 2019 Underground Software
//
// Since there are no guarantees vis-a-vis floating point numbers in C, we have
// to utilize routines like the following in order to guarantee that the thing
#include "fltpoint.h"
#include <float.h>
#include <math.h>
+#include <stdio.h>
+#include "error.h"
//
// Check for IEEE-754 conformance (C99 compilers should be OK here)
uint32_t FloatToIEEE754(float f)
{
- uint32_t sign = (f < 0 ? 0x80000000 : 0);
+ uint32_t sign = (signbit(f) ? 0x80000000 : 0);
// Split the float into normalized mantissa (range: (-1, -0.5], 0,
// [+0.5, +1)) and base-2 exponent
// d = mantissa * (2 ^ exponent) *exactly* for FLT_RADIX=2
// Also, since we want the mantissa to be non-inverted (2's complemented),
- // we make sure to pass in a positive number (floats/doubles are not 2's
+ // we make sure to pass in a positive number (floats/doubles are *not* 2's
// complemented) as we already captured the sign bit above.
int32_t exponent;
float mantissa = frexpf((f < 0 ? -f : f), &exponent);
uint64_t DoubleToIEEE754(double d)
{
- uint64_t sign = (d < 0 ? 0x8000000000000000LL : 0);
+ uint64_t sign = (signbit(d) ? 0x8000000000000000LL : 0);
int32_t exponent;
// Split double into normalized mantissa (range: (-1, -0.5], 0, [+0.5, +1))
// and base-2 exponent
// d = mantissa * (2 ^ exponent) *exactly* for FLT_RADIX=2
// Also, since we want the mantissa to be non-inverted (2's complemented),
- // we make sure to pass in a positive number (floats/doubles are not 2's
+ // we make sure to pass in a positive number (floats/doubles are *not* 2's
// complemented) as we already captured the sign bit above.
double mantissa = frexp((d < 0 ? -d : d), &exponent);
void DoubleToExtended(double d, uint8_t out[])
{
+ int8_t sign = (signbit(d) ? 0x80 : 0);
int32_t exponent;
double mantissa = frexp((d < 0 ? -d : d), &exponent);
- exponent += 0x3FFF;
+ exponent += 0x3FFE;
if (d == 0)
exponent = 0;
// Motorola extended floating point is 96 bits, so we pack it into the
// 12-byte array that's passed in. The format is as follows: 1 bit (sign),
// 15 bits (exponent w/$3FFF bias), 16 bits of zero, 64 bits of mantissa.
- out[0] = (d < 0 ? 0x80 : 0x00) | ((exponent >> 8) & 0x7F);
+ out[0] = sign | ((exponent >> 8) & 0x7F);
out[1] = exponent & 0xFF;
out[2] = 0;
out[3] = 0;
out[11] = intMant & 0xFF;
}
+
+//
+// Convert a double to a DSP56001 style fixed point float.
+// Seems to be 23 bits of float value with 1 bit (MSB) for the sign.
+//
+uint32_t DoubleToDSPFloat(double d)
+{
+ if (d >= 1)
+ {
+ warn("DSP value clamped to +1.");
+ return 0x7FFFFF;
+ }
+ else if (d <= -1)
+ {
+ warn("DSP value clamped to -1.");
+ return 0x800000;
+ }
+
+ // The casts are here because some compilers do weird shit. See bug #149.
+ return (uint32_t)((int32_t)trunc(round(ldexp(d, 23))));
+}
+
+
+//
+// Convert a host native floating point number to a fixed point number.
+//
+uint64_t DoubleToFixedPoint(double d, int intBits, int fracBits)
+{
+ uint8_t signBit = (signbit(d) ? 1 : 0);
+
+ // Ensure what we're working on is positive...
+ if (d < 0)
+ d *= -1;
+
+ double scaleFactor = (double)(1 << fracBits);
+ uint64_t result = (uint64_t)(d * scaleFactor);
+
+ // Invert the result, if necessary
+ if (signBit == 1)
+ result = (result = 0xFFFFFFFFFFFFFFFFLL) + 1;
+
+ return result;
+}
+