]> Shamusworld >> Repos - rmac/blobdiff - token.c
Lots of fixes for floating point handling; version now at 1.11.0.
[rmac] / token.c
diff --git a/token.c b/token.c
index 4ceaa579abb3e232931c1aae68533408f6ff746b..f1f88caec4b81fa921993566eb4a8a3084fa49b7 100644 (file)
--- a/token.c
+++ b/token.c
@@ -7,6 +7,8 @@
 //
 
 #include "token.h"
+
+#include <errno.h>
 #include "direct.h"
 #include "error.h"
 #include "macro.h"
@@ -35,7 +37,7 @@ WORD cfileno;                         // Current file number
 TOKEN * tok;                           // Ptr to current token
 TOKEN * etok;                          // Ptr past last token in tokbuf[]
 TOKEN tokeol[1] = {EOL};       // Bailout end-of-line token
-char * string[TOKBUFSIZE*2];   // Token buffer string pointer storage
+char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
 int optimizeOff;                       // Optimization override flag
 
 // File record, used to maintain a list of every include file ever visited
@@ -206,13 +208,13 @@ void InitTokenizer(void)
        dotxtab['W'] = DOTW;
        dotxtab['l'] = DOTL;                                    // .l .L
        dotxtab['L'] = DOTL;
-       dotxtab['i'] = DOTI;                                    // .i .I (???)
+       dotxtab['i'] = DOTI;                                    // .i .I (WTF is this???)
        dotxtab['I'] = DOTI;
-       dotxtab['D'] = DOTD;                                    // .d .D (quad word)
+       dotxtab['D'] = DOTD;                                    // .d .D (double)
        dotxtab['d'] = DOTD;
        dotxtab['S'] = DOTS;                                    // .s .S
        dotxtab['s'] = DOTS;
-       dotxtab['Q'] = DOTQ;                                    // .q .Q
+       dotxtab['Q'] = DOTQ;                                    // .q .Q (quad word)
        dotxtab['q'] = DOTQ;
        dotxtab['X'] = DOTX;                                    // .x .x
        dotxtab['x'] = DOTX;
@@ -390,7 +392,7 @@ int ExpandMacro(char * src, char * dest, int destsiz)
                                *dst++ = *s++;
                                continue;
                        case '?':                                               // \? <macro>  set `questmark' flag
-                               ++s;
+                               s++;
                                questmark = 1;
                                break;
                        case '#':                                               // \#, number of arguments
@@ -568,8 +570,8 @@ DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
 //         to choke on legitimate code... Need to investigate this further
 //         before changing anything else here!
                                                        case CONST:
-                                                               tk++;   // Skip the hi LONG...
                                                                sprintf(numbuf, "$%lx", (uint64_t)*tk++);
+                                                               tk++;
                                                                d = numbuf;
                                                                break;
                                                        case DEQUALS:
@@ -714,6 +716,11 @@ char * GetNextRepeatLine(void)
 
 //             strp = irept->ir_nextln;
        }
+       // Mark the current macro line in the irept object
+       // This is probably overkill - a global variable
+       // would suffice here (it only gets used during
+       // error reporting anyway)
+       irept->lineno = irept->ir_nextln->lineno;
 
 //     strcpy(irbuf, (char *)(irept->ir_nextln + 1));
        strcpy(irbuf, irept->ir_nextln->line);
@@ -946,11 +953,13 @@ int TokenizeLine(void)
 {
        uint8_t * ln = NULL;            // Ptr to current position in line
        uint8_t * p;                            // Random character ptr
-       TOKEN * tk;                                     // Token-deposit ptr
+       PTR tk;                                         // Token-deposit ptr
        int state = 0;                          // State for keyword detector
        int j = 0;                                      // Var for keyword detector
        uint8_t c;                                      // Random char
        uint64_t v;                                     // Random value
+       uint32_t cursize = 0;           // Current line's size (.b, .w, .l, .s, .q, .d)
+       double f;                                       // Random float
        uint8_t * nullspot = NULL;      // Spot to clobber for SYMBOL termination
        int stuffnull;                          // 1:terminate SYMBOL '\0' at *nullspot
        uint8_t c1;
@@ -958,7 +967,7 @@ int TokenizeLine(void)
 
 retry:
 
-       if (cur_inobj == NULL)                                  // Return EOF if input stack is empty
+       if (cur_inobj == NULL)                  // Return EOF if input stack is empty
                return TKEOF;
 
        // Get another line of input from the current input source: a file, a
@@ -1045,8 +1054,8 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
                strcpy(lnbuf, ln);
 
        // General housekeeping
-       tok = tokeol;                   // Set "tok" to EOL in case of error
-       tk = etok;                              // Reset token ptr
+       tok = tokeol;           // Set "tok" to EOL in case of error
+       tk.u32 = etok;                  // Reset token ptr
        stuffnull = 0;                  // Don't stuff nulls
        totlines++;                             // Bump total #lines assembled
 
@@ -1109,7 +1118,7 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
                                        // token stream:
                                        ln++;
                                        stuffnull = 0;
-                                       *tk++ = (TOKEN)dotxtab[*ln++];
+                                       *tk.u32++ = (TOKEN)dotxtab[*ln++];
                                        continue;
                                }
                        }
@@ -1127,7 +1136,7 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
                                *ln++ = EOS;            // Terminate symbol
                                stuffnull = 0;          // And never try it again
 
-                               // Character following the `.' must have a DOT attribute, and
+                               // Character following the '.' must have a DOT attribute, and
                                // the chararacter after THAT one must not have a start-symbol
                                // attribute (to prevent symbols that look like, for example,
                                // "zingo.barf", which might be a good idea anyway....)
@@ -1135,6 +1144,7 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
                                        return error("[bwsl] must follow '.' in symbol");
 
                                v = (uint32_t)dotxtab[*ln++];
+                               cursize = (uint32_t)v;
 
                                if (chrtab[*ln] & CTSYM)
                                        return error("misuse of '.'; not allowed in symbols");
@@ -1191,7 +1201,7 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
                        // If not tokenized keyword OR token was not found
                        if ((j < 0) || (state < 0))
                        {
-                               *tk++ = SYMBOL;
+                               *tk.u32++ = SYMBOL;
 //#warning
 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
 //system, this will cause all kinds of mischief.
@@ -1199,18 +1209,18 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
                                *tk++ = (TOKEN)nullspot;
 #else
                                string[stringNum] = nullspot;
-                               *tk++ = stringNum;
+                               *tk.u32++ = stringNum;
                                stringNum++;
 #endif
                        }
                        else
                        {
-                               *tk++ = (TOKEN)j;
+                               *tk.u32++ = (TOKEN)j;
                                stuffnull = 0;
                        }
 
                        if (v)                                                  // Record attribute token (if any)
-                               *tk++ = (TOKEN)v;
+                               *tk.u32++ = (TOKEN)v;
 
                        if (stuffnull)                                  // Arrange for string termination on next pass
                                nullspot = ln;
@@ -1221,7 +1231,7 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
                // Handle identity tokens
                if (c & SELF)
                {
-                       *tk++ = *ln++;
+                       *tk.u32++ = *ln++;
                        continue;
                }
 
@@ -1233,27 +1243,27 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
                        case '!':               // ! or !=
                                if (*ln == '=')
                                {
-                                       *tk++ = NE;
-                                       ++ln;
+                                       *tk.u32++ = NE;
+                                       ln++;
                                }
                                else
-                                       *tk++ = '!';
+                                       *tk.u32++ = '!';
 
                                continue;
                        case '\'':              // 'string'
                                if (m6502)
                                {
                                        // Hardcoded for now, maybe this will change in the future
-                                       *tk++ = STRINGA8;
+                                       *tk.u32++ = STRINGA8;
                                        goto dostring;
                                }
                                // Fall through
                        case '\"':              // "string"
-                               *tk++ = STRING;
+                               *tk.u32++ = STRING;
 dostring:
                                c1 = ln[-1];
                                string[stringNum] = ln;
-                               *tk++ = stringNum;
+                               *tk.u32++ = stringNum;
                                stringNum++;
 
                                for(p=ln; *ln!=EOS && *ln!=c1;)
@@ -1344,9 +1354,8 @@ dostring:
                                                }
                                        }
 
-                                       *tk++ = CONST;
-                                       *tk++ = v >> 32;                // High LONG of 64-bit value
-                                       *tk++ = v & 0xFFFFFFFF; // Low LONG of 64-bit value
+                                       *tk.u32++ = CONST;
+                                       *tk.u64++ = v;
 
                                        if (obj_format == ALCYON)
                                        {
@@ -1354,79 +1363,79 @@ dostring:
                                                {
                                                        if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
                                                        {
-                                                               *tk++ = DOTW;
+                                                               *tk.u32++ = DOTW;
                                                                ln += 2;
                                                        }
                                                        else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
                                                        {
-                                                               *tk++ = DOTL;
+                                                               *tk.u32++ = DOTL;
                                                                ln += 2;
                                                        }
                                                }
                                        }
                                }
                                else
-                                       *tk++ = '$';
+                                       *tk.u32++ = '$';
 
                                continue;
                        case '<':               // < or << or <> or <=
                                switch (*ln)
                                {
                                case '<':
-                                       *tk++ = SHL;
-                                       ++ln;
+                                       *tk.u32++ = SHL;
+                                       ln++;
                                        continue;
                                case '>':
-                                       *tk++ = NE;
-                                       ++ln;
+                                       *tk.u32++ = NE;
+                                       ln++;
                                        continue;
                                case '=':
-                                       *tk++ = LE;
-                                       ++ln;
+                                       *tk.u32++ = LE;
+                                       ln++;
                                        continue;
                                default:
-                                       *tk++ = '<';
+                                       *tk.u32++ = '<';
                                        continue;
                                }
                        case ':':               // : or ::
                                if (*ln == ':')
                                {
-                                       *tk++ = DCOLON;
-                                       ++ln;
+                                       *tk.u32++ = DCOLON;
+                                       ln++;
                                }
                                else
-                                       *tk++ = ':';
+                                       *tk.u32++ = ':';
 
                                continue;
                        case '=':               // = or ==
                                if (*ln == '=')
                                {
-                                       *tk++ = DEQUALS;
-                                       ++ln;
+                                       *tk.u32++ = DEQUALS;
+                                       ln++;
                                }
                                else
-                                       *tk++ = '=';
+                                       *tk.u32++ = '=';
 
                                continue;
                        case '>':               // > or >> or >=
                                switch (*ln)
                                {
                                case '>':
-                                       *tk++ = SHR;
+                                       *tk.u32++ = SHR;
                                        ln++;
                                        continue;
                                case '=':
-                                       *tk++ = GE;
+                                       *tk.u32++ = GE;
                                        ln++;
                                        continue;
                                default:
-                                       *tk++ = '>';
+                                       *tk.u32++ = '>';
                                        continue;
                                }
                        case '%':               // % or binary constant
                                if (*ln < '0' || *ln > '1')
                                {
-                                       *tk++ = '%';
+                                       *tk.u32++ = '%';
                                        continue;
                                }
 
@@ -1456,14 +1465,13 @@ dostring:
                                        }
                                }
 
-                               *tk++ = CONST;
-                               *tk++ = v >> 32;                // High LONG of 64-bit value
-                               *tk++ = v & 0xFFFFFFFF; // Low LONG of 64-bit value
+                               *tk.u32++ = CONST;
+                               *tk.u64++ = v;
                                continue;
                        case '@':               // @ or octal constant
                                if (*ln < '0' || *ln > '7')
                                {
-                                       *tk++ = '@';
+                                       *tk.u32++ = '@';
                                        continue;
                                }
 
@@ -1474,33 +1482,32 @@ dostring:
 
                                if (*ln == '.')
                                {
-                                       if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
+                                       if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
                                        {
                                                v &= 0x000000FF;
                                                ln += 2;
                                        }
 
-                                       if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
+                                       if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
                                        {
                                                v &= 0x0000FFFF;
                                                ln += 2;
                                        }
 
-                                       if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
+                                       if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
                                        {
                                                v &= 0xFFFFFFFF;
                                                ln += 2;
                                        }
                                }
 
-                               *tk++ = CONST;
-                               *tk++ = v >> 32;                // High LONG of 64-bit value
-                               *tk++ = v & 0xFFFFFFFF; // Low LONG of 64-bit value
+                               *tk.u32++ = CONST;
+                               *tk.u64++ = v;
                                continue;
                        case '^':               // ^ or ^^ <operator-name>
                                if (*ln != '^')
                                {
-                                       *tk++ = '^';
+                                       *tk.u32++ = '^';
                                        continue;
                                }
 
@@ -1546,7 +1553,7 @@ dostring:
                                        continue;
                                }
 
-                               *tk++ = (TOKEN)j;
+                               *tk.u32++ = (TOKEN)j;
                                continue;
                        default:
                                interror(2);    // Bad MULTX entry in chrtab
@@ -1557,6 +1564,7 @@ dostring:
                // Handle decimal constant
                if (c & DIGIT)
                {
+                       uint8_t * numStart = ln;
                        v = 0;
 
                        while ((int)chrtab[*ln] & DIGIT)
@@ -1569,35 +1577,53 @@ dostring:
                                {
                                        v &= 0x000000FF;
                                        ln += 2;
-                                       *tk++ = CONST;
-                                       *tk++ = 0;                      // Hi LONG of 64-bits
-                                       *tk++ = v;
-                                       *tk++ = DOTB;
+                                       *tk.u32++ = CONST;
+                                       *tk.u64++ = v;
+                                       *tk.u32++ = DOTB;
                                }
                                else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
                                {
                                        v &= 0x0000FFFF;
                                        ln += 2;
-                                       *tk++ = CONST;
-                                       *tk++ = 0;                      // Hi LONG of 64-bits
-                                       *tk++ = v;
-                                       *tk++ = DOTW;
+                                       *tk.u32++ = CONST;
+                                       *tk.u64++ = v;
+                                       *tk.u32++ = DOTW;
                                }
                                else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
                                {
                                        v &= 0xFFFFFFFF;
                                        ln += 2;
-                                       *tk++ = CONST;
-                                       *tk++ = 0;                      // Hi LONG of 64-bits
-                                       *tk++ = v;
-                                       *tk++ = DOTL;
+                                       *tk.u32++ = CONST;
+                                       *tk.u64++ = v;
+                                       *tk.u32++ = DOTL;
+                               }
+                               else if ((int)chrtab[*(ln + 1)] & DIGIT)
+                               {
+                                       // Hey, more digits after the dot, so we assume it's a
+                                       // floating point number of some kind... numEnd will point
+                                       // to the first non-float character after it's done
+                                       char * numEnd;
+                                       errno = 0;
+                                       double f = strtod(numStart, &numEnd);
+                                       ln = (uint8_t *)numEnd;
+
+                                       if (errno != 0)
+                                               return error("floating point parse error");
+
+                                       // N.B.: We use the C compiler's internal double
+                                       //       representation for all internal float calcs and
+                                       //       are reasonably sure that the size of said double
+                                       //       is 8 bytes long (which we check for in fltpoint.c)
+                                       *tk.u32++ = FCONST;
+                                       *tk.dp = f;
+                                       tk.u64++;
+                                       continue;
                                }
                        }
                        else
                        {
-                               *tk++ = CONST;
-                               *tk++ = v >> 32;                // High LONG of 64-bit value
-                               *tk++ = v & 0xFFFFFFFF; // Low LONG of 64-bit value
+                               *tk.u32++ = CONST;
+                               *tk.u64++ = v;
                        }
 
 //printf("CONST: %i\n", v);
@@ -1611,12 +1637,12 @@ dostring:
        // Terminate line of tokens and return "success."
 
 goteol:
-       tok = etok;                                                             // Set tok to beginning of line
+       tok = etok;                                                     // Set tok to beginning of line
 
        if (stuffnull)                                                  // Terminate last SYMBOL
                *nullspot = EOS;
 
-       *tk++ = EOL;
+       *tk.u32++ = EOL;
 
        return OK;
 }
@@ -1687,6 +1713,87 @@ int d_goto(WORD unused)
 }
 
 
+void DumpToken(TOKEN t)
+{
+       if (t == COLON)
+               printf("[COLON]");
+       else if (t == CONST)
+               printf("[CONST]");
+       else if (t == FCONST)
+               printf("[FCONST]");
+       else if (t == ACONST)
+               printf("[ACONST]");
+       else if (t == STRING)
+               printf("[STRING]");
+       else if (t == SYMBOL)
+               printf("[SYMBOL]");
+       else if (t == EOS)
+               printf("[EOS]");
+       else if (t == TKEOF)
+               printf("[TKEOF]");
+       else if (t == DEQUALS)
+               printf("[DEQUALS]");
+       else if (t == SET)
+               printf("[SET]");
+       else if (t == REG)
+               printf("[REG]");
+       else if (t == DCOLON)
+               printf("[DCOLON]");
+       else if (t == GE)
+               printf("[GE]");
+       else if (t == LE)
+               printf("[LE]");
+       else if (t == NE)
+               printf("[NE]");
+       else if (t == SHR)
+               printf("[SHR]");
+       else if (t == SHL)
+               printf("[SHL]");
+       else if (t == UNMINUS)
+               printf("[UNMINUS]");
+       else if (t == DOTB)
+               printf("[DOTB]");
+       else if (t == DOTW)
+               printf("[DOTW]");
+       else if (t == DOTL)
+               printf("[DOTL]");
+       else if (t == DOTQ)
+               printf("[DOTQ]");
+       else if (t == DOTS)
+               printf("[DOTS]");
+       else if (t == DOTD)
+               printf("[DOTD]");
+       else if (t == DOTI)
+               printf("[DOTI]");
+       else if (t == ENDEXPR)
+               printf("[ENDEXPR]");
+       else if (t == CR_ABSCOUNT)
+               printf("[CR_ABSCOUNT]");
+       else if (t == CR_DEFINED)
+               printf("[CR_DEFINED]");
+       else if (t == CR_REFERENCED)
+               printf("[CR_REFERENCED]");
+       else if (t == CR_STREQ)
+               printf("[CR_STREQ]");
+       else if (t == CR_MACDEF)
+               printf("[CR_MACDEF]");
+       else if (t == CR_TIME)
+               printf("[CR_TIME]");
+       else if (t == CR_DATE)
+               printf("[CR_DATE]");
+       else if (t >= 0x20 && t <= 0x2F)
+               printf("[%c]", (char)t);
+       else if (t >= 0x3A && t <= 0x3F)
+               printf("[%c]", (char)t);
+       else if (t >= 0x80 && t <= 0x87)
+               printf("[D%u]", ((uint32_t)t) - 0x80);
+       else if (t >= 0x88 && t <= 0x8F)
+               printf("[A%u]", ((uint32_t)t) - 0x88);
+       else
+               printf("[%X:%c]", (uint32_t)t, (char)t);
+}
+
+
 void DumpTokenBuffer(void)
 {
        printf("Tokens [%X]: ", sloc);
@@ -1697,7 +1804,16 @@ void DumpTokenBuffer(void)
                        printf("[COLON]");
                else if (*t == CONST)
                {
-                       printf("[CONST: $%lX]", ((uint64_t)t[1] << 32) | (uint64_t)t[2]);
+                       PTR tp;
+                       tp.u32 = t + 1;
+                       printf("[CONST: $%lX]", *tp.u64);
+                       t += 2;
+               }
+               else if (*t == FCONST)
+               {
+                       PTR tp;
+                       tp.u32 = t + 1;
+                       printf("[FCONST: $%lX]", *tp.u64);
                        t += 2;
                }
                else if (*t == ACONST)
@@ -1745,6 +1861,12 @@ void DumpTokenBuffer(void)
                        printf("[DOTW]");
                else if (*t == DOTL)
                        printf("[DOTL]");
+               else if (*t == DOTQ)
+                       printf("[DOTQ]");
+               else if (*t == DOTS)
+                       printf("[DOTS]");
+               else if (*t == DOTD)
+                       printf("[DOTD]");
                else if (*t == DOTI)
                        printf("[DOTI]");
                else if (*t == ENDEXPR)