Version bump for last commit.

[rmac] / token.c
diff --git a/token.c b/token.c

index e1be9b5b8871ea3cde208b29b8e9a072f43ec64d..9cbf8b4e0ad8fd5dd613ac6feab5dbb8b0d948b5 100644 (file)
--- a/token.c
+++ b/token.c
@@ -1,7 +1,7 @@
  //
-// RMAC - Reboot's Macro Assembler for all Atari computers
+// RMAC - Renamed Macro Assembler for all Atari computers
  // TOKEN.C - Token Handling
-// Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
+// Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
  // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
  // Source utilised with the kind permission of Landon Dyer
  //
@@ -19,10 +19,17 @@
  #define DECL_KW                                // Declare keyword arrays
  #define DEF_KW                         // Declare keyword values
  #include "kwtab.h"                     // Incl generated keyword tables & defs
+#define DEF_REG68                      // Incl 68k register definitions
+#include "68kregs.h"
+#define DEF_REGRISC                    // Include GPU/DSP register definitions
+#include "riscregs.h"
+#define DEF_UNARY                      // Declare unary values
+#define DECL_UNARY                     // Incl uanry keyword state machine tables
+#include "unarytab.h"          // Incl generated unary tables & defs
  
  
  int lnsave;                                    // 1; strcpy() text of current line
-uint16_t curlineno;                    // Current line number (64K max currently)
+uint32_t curlineno;                    // Current line number (64K max currently)
  int totlines;                          // Total # of lines
  int mjump_align = 0;           // mjump alignment flag
  char lntag;                                    // Line tag
@@ -40,13 +47,6 @@ TOKEN tokeol[1] = {EOL};     // Bailout end-of-line token
  char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
  int optimizeOff;                       // Optimization override flag
  
-// File record, used to maintain a list of every include file ever visited
-#define FILEREC struct _filerec
-FILEREC
-{
-   FILEREC * frec_next;
-   char * frec_name;
-};
  
  FILEREC * filerec;
  FILEREC * last_fr;
@@ -154,13 +154,6 @@ static char * regname[] = {
         "a10","b10","x","y","","","ab","ba"  // 312,319
  };
  
-static char * riscregname[] = {
-        "r0",  "r1",  "r2",  "r3",  "r4", "r5",   "r6",  "r7",
-        "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",
-       "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
-       "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
-};
-
  
  //
  // Initialize tokenizer
@@ -516,14 +509,9 @@ arg_num:
                                                 // This is a hack. It might be better table-driven.
                                                 d = NULL;
  
-                                               if ((*tk >= KW_D0) && !rdsp && !rgpu)
-                                               {
-                                                       d = regname[(int)*tk++ - KW_D0];
-                                                       goto strcopy;
-                                               }
-                                               else if ((*tk >= KW_R0) && (*tk <= KW_R31))
+                                               if (*tk >= REG68_D0)
                                                 {
-                                                       d = riscregname[(int)*tk++ - KW_R0];
+                                                       d = regname[(int)*tk++ - REG68_D0];
                                                         goto strcopy;
                                                 }
                                                 else
@@ -531,22 +519,12 @@ arg_num:
                                                         switch ((int)*tk++)
                                                         {
                                                         case SYMBOL:
-#if 0
-//                                                             d = (char *)*tk++;
-                                                               d = string[*tk++];
-#else
-                                                               // This fix should be done for strings too
                                                                 d = symbolString[*tk++];
  DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
-#endif
                                                                 break;
                                                         case STRING:
-#if 0
-//                                                             d = (char *)*tk++;
-                                                               d = string[*tk++];
-#else
                                                                 d = symbolString[*tk++];
-#endif
+
                                                                 if (dst >= edst)
                                                                         goto overflow;
  
@@ -570,7 +548,8 @@ DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
  //         to choke on legitimate code... Need to investigate this further
  //         before changing anything else here!
                                                         case CONST:
-                                                               sprintf(numbuf, "$%lx", (uint64_t)*tk++);
+//                                                             sprintf(numbuf, "$%lx", (uint64_t)*tk++);
+                                                               sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
                                                                 tk++;
                                                                 d = numbuf;
                                                                 break;
@@ -613,6 +592,9 @@ DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
                                                         case CR_ABSCOUNT:
                                                                 d = "^^abscount";
                                                                 break;
+                                                       case CR_FILESIZE:
+                                                               d = "^^filesize";
+                                                               break;
                                                         case CR_DATE:
                                                                 d = "^^date";
                                                                 break;
@@ -679,7 +661,6 @@ overflow:
  char * GetNextMacroLine(void)
  {
         IMACRO * imacro = cur_inobj->inobj.imacro;
-//     LONG * strp = imacro->im_nextln;
         LLIST * strp = imacro->im_nextln;
  
         if (strp == NULL)                                               // End-of-macro
@@ -713,12 +694,42 @@ char * GetNextRepeatLine(void)
                         DEBUG { printf("end-repeat-block\n"); }
                         return NULL;
                 }
-
+               reptuniq++;
  //             strp = irept->ir_nextln;
         }
+       // Mark the current macro line in the irept object
+       // This is probably overkill - a global variable
+       // would suffice here (it only gets used during
+       // error reporting anyway)
+       irept->lineno = irept->ir_nextln->lineno;
+
+       // Copy the rept lines verbatim, unless we're in nest level 0.
+       // Then, expand any \~ labels to unique numbers (Rn)
+       if (rptlevel)
+       {
+               strcpy(irbuf, irept->ir_nextln->line);
+       }
+       else
+       {
+               uint32_t linelen = strlen(irept->ir_nextln->line);
+               uint8_t *p_line = irept->ir_nextln->line;
+               char *irbufwrite = irbuf;
+               for (int i = 0; i <= linelen; i++)
+               {
+                       uint8_t c;
+                       c = *p_line++;
+                       if (c == '\\' && *p_line == '~')
+                       {
+                               p_line++;
+                               irbufwrite += sprintf(irbufwrite, "R%u", reptuniq);
+                       }
+                       else
+                       {
+                               *irbufwrite++ = c;
+                       }
+               }
+       }
  
-//     strcpy(irbuf, (char *)(irept->ir_nextln + 1));
-       strcpy(irbuf, irept->ir_nextln->line);
         DEBUG { printf("repeat line='%s'\n", irbuf); }
  //     irept->ir_nextln = (LONG *)*strp;
         irept->ir_nextln = irept->ir_nextln->next;
@@ -793,7 +804,7 @@ int fpop(void)
         if (numUnmatched > 0)
                 warn("missing %d .endif(s)", numUnmatched);
  
-       tok = inobj->in_otok;   // Restore tok and otok
+       tok = inobj->in_otok;   // Restore tok and etok
         etok = inobj->in_etok;
  
         switch (inobj->in_type)
@@ -954,15 +965,16 @@ int TokenizeLine(void)
         uint8_t c;                                      // Random char
         uint64_t v;                                     // Random value
         uint32_t cursize = 0;           // Current line's size (.b, .w, .l, .s, .q, .d)
-       double f;                                       // Random float
         uint8_t * nullspot = NULL;      // Spot to clobber for SYMBOL termination
         int stuffnull;                          // 1:terminate SYMBOL '\0' at *nullspot
         uint8_t c1;
         int stringNum = 0;                      // Pointer to string locations in tokenized line
+       SYM* sy;                                        // For looking up symbols (.equr)
+       int equrundef = 0;                      // Flag for equrundef scanning
  
  retry:
  
-       if (cur_inobj == NULL)                  // Return EOF if input stack is empty
+       if (cur_inobj == NULL)          // Return EOF if input stack is empty
                 return TKEOF;
  
         // Get another line of input from the current input source: a file, a
@@ -978,8 +990,8 @@ retry:
                 if ((ln = GetNextLine()) == NULL)
                 {
  DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
-                       if (fpop() == 0)                                // Pop input level
-                               goto retry;                                     // Try for more lines
+                       if (fpop() == 0)        // Pop input level
+                               goto retry;             // Try for more lines
                         else
                         {
                                 ifent->if_prev = (IFENT *)-1;   //Signal Assemble() that we have reached EOF with unbalanced if/endifs
@@ -987,29 +999,9 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
                         }
                 }
  
-               curlineno++;                                            // Bump line number
+               curlineno++;                    // Bump line number
                 lntag = SPACE;
  
-               if (as68_flag)
-               {
-                       // AS68 compatibility, throw away all lines starting with
-                       // back-quotes, tildes, or '*'
-                       // On other lines, turn the first '*' into a semi-colon.
-                       if (*ln == '`' || *ln == '~' || *ln == '*')
-                               *ln = ';';
-                       else
-                       {
-                               for(p=ln; *p!=EOS; p++)
-                               {
-                                       if (*p == '*')
-                                       {
-                                               *p = ';';
-                                               break;
-                                       }
-                               }
-                       }
-               }
-
                 break;
  
         // Macro-block:
@@ -1046,10 +1038,16 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
         // macro-type blocks, since it is expensive to unconditionally copy every
         // line.
         if (lnsave)
+       {
+               // Sanity check
+               if (strlen(ln) > LNSIZ)
+                       return error("line too long (%d, max %d)", strlen(ln), LNSIZ);
+
                 strcpy(lnbuf, ln);
+       }
  
         // General housekeeping
-       tok = tokeol;           // Set "tok" to EOL in case of error
+       tok = tokeol;                   // Set "tok" to EOL in case of error
         tk.u32 = etok;                  // Reset token ptr
         stuffnull = 0;                  // Don't stuff nulls
         totlines++;                             // Bump total #lines assembled
@@ -1077,6 +1075,12 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
         //  o  handle multiple-character tokens (constants, strings, etc.).
         for(; *ln!=EOS;)
         {
+               // Check to see if there's enough space in the token buffer
+               if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20)
+               {
+                       return error("token buffer overrun");
+               }
+
                 // Skip whitespace, handle EOL
                 while (chrtab[*ln] & WHITE)
                         ln++;
@@ -1147,14 +1151,15 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
  
                         // If the symbol is small, check to see if it's really the name of
                         // a register.
-                       if (j <= KWSIZE)
+                       uint8_t *p2 = p;
+                       if (j <= 5)
                         {
-                               for(state=0; state>=0;)
+                               for (state = 0; state >= 0;)
                                 {
                                         j = (int)tolowertab[*p++];
-                                       j += kwbase[state];
+                                       j += regbase[state];
  
-                                       if (kwcheck[j] != state)
+                                       if (regcheck[j] != state)
                                         {
                                                 j = -1;
                                                 break;
@@ -1162,51 +1167,83 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
  
                                         if (*p == EOS || p == ln)
                                         {
-                                               j = kwaccept[j];
+                                               j = regaccept[j];
+                                               goto skip_keyword;
                                                 break;
                                         }
  
-                                       state = kwtab[j];
+                                       state = regtab[j];
                                 }
                         }
-                       else
-                       {
-                               j = -1;
-                       }
  
-                       // Make j = -1 if user tries to use a RISC register while in 68K mode
-                       if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
+                       // Scan for keywords
+                       if ((j <= 0 || state <= 0) || p==p2)
                         {
-                               j = -1;
+                               if (j <= KWSIZE)
+                               {
+                                       for (state = 0; state >= 0;)
+                                       {
+                                               j = (int)tolowertab[*p2++];
+                                               j += kwbase[state];
+                       
+                                               if (kwcheck[j] != state)
+                                               {
+                                                       j = -1;
+                                                       break;
+                                               }
+                       
+                                               if (*p == EOS || p2 == ln)
+                                               {
+                                                       j = kwaccept[j];
+                                                       break;
+                                               }
+                       
+                                               state = kwtab[j];
+                                       }
+                               }
+                               else
+                               {
+                                       j = -1;
+                               }
                         }
  
-                       // Make j = -1 if time, date etc with no preceeding ^^
-                       // defined, referenced, streq, macdef, date and time
-                       switch ((TOKEN)j)
+                       skip_keyword:
+
+                       // If we detected equrundef/regundef set relevant flag
+                       if (j == KW_EQURUNDEF)
                         {
-                       case 112:   // defined
-                       case 113:   // referenced
-                       case 118:   // streq
-                       case 119:   // macdef
-                       case 120:   // time
-                       case 121:   // date
+                               equrundef = 1;
                                 j = -1;
                         }
  
                         // If not tokenized keyword OR token was not found
                         if ((j < 0) || (state < 0))
                         {
+                               // Only proceed if no equrundef has been detected. In that case we need to store the symbol
+                               // because the directive handler (d_equrundef) will run outside this loop, further into procln.c
+                               if (!equrundef && !disabled)
+                               {
+                                       // Last attempt: let's see if this is an equated register.
+                                       // If yes, then just store the register's keyword value instead of the symbol
+                                       char temp = *ln;
+                                       *ln = 0;
+                                       sy = lookup(nullspot, LABEL, 0);
+                                       *ln = temp;
+                                       if (sy)
+                                       {
+                                               if (sy->sattre & EQUATEDREG)
+                                               {
+                                                       *tk.u32++ = sy->svalue;
+                                                       stuffnull = 0;
+                                                       continue;
+                                               }
+                                       }
+                               }
+                               // Ok, that failed, let's store the symbol instead
                                 *tk.u32++ = SYMBOL;
-//#warning
-//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
-//system, this will cause all kinds of mischief.
-#if 0
-                               *tk++ = (TOKEN)nullspot;
-#else
                                 string[stringNum] = nullspot;
                                 *tk.u32++ = stringNum;
                                 stringNum++;
-#endif
                         }
                         else
                         {
@@ -1214,12 +1251,20 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
                                 stuffnull = 0;
                         }
  
-                       if (v)                                                  // Record attribute token (if any)
+                       if (v)                  // Record attribute token (if any)
                                 *tk.u32++ = (TOKEN)v;
  
-                       if (stuffnull)                                  // Arrange for string termination on next pass
+                       if (stuffnull)  // Arrange for string termination on next pass
                                 nullspot = ln;
  
+                       if (disabled)
+                       {
+                               // When we are in a disabled code block, the only thing that can break out
+                               // of this is an ".endif" keyword, so this is the minimum we have to parse
+                               // in order to discover such a keyword.
+                               goto goteol;
+                       }
+
                         continue;
                 }
  
@@ -1298,6 +1343,10 @@ dostring:
                                                 case '\\':
                                                         c = '\\';
                                                         break;
+                                               case '{':
+                                                       // If we're evaluating a macro
+                                                       // this is valid because it's
+                                                       // a parameter expansion
                                                 case '!':
                                                         // If we're evaluating a macro
                                                         // this is valid and expands to
@@ -1327,45 +1376,20 @@ dostring:
                                         while (hextab[*ln] >= 0)
                                                 v = (v << 4) + (int)hextab[*ln++];
  
-                                       if (*ln == '.')
-                                       {
-                                               if (obj_format == BSD)
-                                               {
-                                                       if ((*(ln + 1) & 0xDF) == 'B')
-                                                       {
-                                                               v &= 0x000000FF;
-                                                               ln += 2;
-                                                       }
-                                                       else if ((*(ln + 1) & 0xDF) == 'W')
-                                                       {
-                                                               v &= 0x0000FFFF;
-                                                               ln += 2;
-                                                       }
-                                                       else if ((*(ln + 1) & 0xDF) == 'L')
-                                                       {
-                                                               v &= 0xFFFFFFFF;
-                                                               ln += 2;
-                                                       }
-                                               }
-                                       }
-
                                         *tk.u32++ = CONST;
                                         *tk.u64++ = v;
  
-                                       if (obj_format == ALCYON)
+                                       if (*ln == '.')
                                         {
-                                               if (*ln == '.')
+                                               if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
                                                 {
-                                                       if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
-                                                       {
-                                                               *tk.u32++ = DOTW;
-                                                               ln += 2;
-                                                       }
-                                                       else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
-                                                       {
-                                                               *tk.u32++ = DOTL;
-                                                               ln += 2;
-                                                       }
+                                                       *tk.u32++ = DOTW;
+                                                       ln += 2;
+                                               }
+                                               else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+                                               {
+                                                       *tk.u32++ = DOTL;
+                                                       ln += 2;
                                                 }
                                         }
                                 }
@@ -1520,14 +1544,14 @@ dostring:
                                 for(state=0; state>=0;)
                                 {
                                         // Get char, convert to lowercase
-                                       j = *p++;
+                                       j = (int)tolowertab[*p++];
  
-                                       if (j >= 'A' && j <= 'Z')
-                                               j += 0x20;
+                                       //if (j >= 'A' && j <= 'Z')
+                                       //      j += 0x20;
  
-                                       j += kwbase[state];
+                                       j += unarybase[state];
  
-                                       if (kwcheck[j] != state)
+                                       if (unarycheck[j] != state)
                                         {
                                                 j = -1;
                                                 break;
@@ -1535,11 +1559,11 @@ dostring:
  
                                         if (*p == EOS || p == ln)
                                         {
-                                               j = kwaccept[j];
+                                               j = unaryaccept[j];
                                                 break;
                                         }
  
-                                       state = kwtab[j];
+                                       state = unarytab[j];
                                 }
  
                                 if (j < 0 || state < 0)
@@ -1595,19 +1619,8 @@ dostring:
                                 else if ((int)chrtab[*(ln + 1)] & DIGIT)
                                 {
                                         // Hey, more digits after the dot, so we assume it's a
-                                       // floating point number of some kind
-#if 0
-                                       double fract = 10;
-                                       ln++;
-                                       f = (double)v;
-
-                                       while ((int)chrtab[*ln] & DIGIT)
-                                       {
-                                               f = f + (double)(*ln++ - '0') / fract;
-                                               fract *= 10;
-                                       }
-#else
-                                       // Here we parse the whole floating point number
+                                       // floating point number of some kind... numEnd will point
+                                       // to the first non-float character after it's done
                                         char * numEnd;
                                         errno = 0;
                                         double f = strtod(numStart, &numEnd);
@@ -1615,11 +1628,14 @@ dostring:
  
                                         if (errno != 0)
                                                 return error("floating point parse error");
-#endif
  
+                                       // N.B.: We use the C compiler's internal double
+                                       //       representation for all internal float calcs and
+                                       //       are reasonably sure that the size of said double
+                                       //       is 8 bytes long (which we check for in fltpoint.c)
                                         *tk.u32++ = FCONST;
-// Shamus: Well, this is all kinds of icky--not the least of which is that unlike uintNN_t types, we have no guarantees of any kind when it comes to the size of floating point numbers in C (as far as I know of). If there is, we need to use those kinds here, or else figure out at runtime what sizes we're dealing with and act accordingly. To be fair, this is OK as long as the double type is less than 64 bits wide, but again, there's no guarantee that it isn't. :-/
-                                       *tk.u64++ = f;
+                                       *tk.dp = f;
+                                       tk.u64++;
                                         continue;
                                 }
                         }
@@ -1640,9 +1656,9 @@ dostring:
         // Terminate line of tokens and return "success."
  
  goteol:
-       tok = etok;                                                     // Set tok to beginning of line
+       tok = etok;                             // Set tok to beginning of line
  
-       if (stuffnull)                                                  // Terminate last SYMBOL
+       if (stuffnull)                  // Terminate last SYMBOL
                 *nullspot = EOS;
  
         *tk.u32++ = EOL;
@@ -1689,7 +1705,7 @@ int d_goto(WORD unused)
                 {
                         // Compare names (sleazo string compare)
                         char * s1 = sym;
-                       char * s2 = defln->line;
+                       char * s2 = defln->line + 1;
  
                         // Either we will match the strings to EOS on both, or we will
                         // match EOS on string 1 to whitespace on string 2. Otherwise, we
@@ -1722,6 +1738,8 @@ void DumpToken(TOKEN t)
                 printf("[COLON]");
         else if (t == CONST)
                 printf("[CONST]");
+       else if (t == FCONST)
+               printf("[FCONST]");
         else if (t == ACONST)
                 printf("[ACONST]");
         else if (t == STRING)
@@ -1770,6 +1788,8 @@ void DumpToken(TOKEN t)
                 printf("[ENDEXPR]");
         else if (t == CR_ABSCOUNT)
                 printf("[CR_ABSCOUNT]");
+       else if (t == CR_FILESIZE)
+               printf("[CR_FILESIZE]");
         else if (t == CR_DEFINED)
                 printf("[CR_DEFINED]");
         else if (t == CR_REFERENCED)
@@ -1810,6 +1830,13 @@ void DumpTokenBuffer(void)
                         printf("[CONST: $%lX]", *tp.u64);
                         t += 2;
                 }
+               else if (*t == FCONST)
+               {
+                       PTR tp;
+                       tp.u32 = t + 1;
+                       printf("[FCONST: $%lX]", *tp.u64);
+                       t += 2;
+               }
                 else if (*t == ACONST)
                 {
                         printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
@@ -1867,6 +1894,8 @@ void DumpTokenBuffer(void)
                         printf("[ENDEXPR]");
                 else if (*t == CR_ABSCOUNT)
                         printf("[CR_ABSCOUNT]");
+               else if (*t == CR_FILESIZE)
+                       printf("[CR_FILESIZE]");
                 else if (*t == CR_DEFINED)
                         printf("[CR_DEFINED]");
                 else if (*t == CR_REFERENCED)