Version bump. Thanks to ggn for the report & patch (bug #73)!

[rmac] / token.c
diff --git a/token.c b/token.c

index 4039c8ebdd4a620d55f953b97e23985715816810..7bba1993fd815aa13befd0c80228063fcd07e0eb 100644 (file)
--- a/token.c
+++ b/token.c
@@ -3,7 +3,7 @@
  // TOKEN.C - Token Handling
  // Copyright (C) 199x Landon Dyer, 2011-2012 Reboot and Friends
  // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
-// Source Utilised with the Kind Permission of Landon Dyer
+// Source utilised with the kind permission of Landon Dyer
  //
  
  #include "token.h"
@@ -24,7 +24,7 @@ int mjump_align = 0;          // mjump alignment flag
  char lntag;                                    // Line tag
  char * curfname;                       // Current filename
  char tolowertab[128];          // Uppercase ==> lowercase 
-char hextab[128];                      // Table of hex values
+int8_t hextab[128];                    // Table of hex values
  char dotxtab[128];                     // Table for ".b", ".s", etc.
  char irbuf[LNSIZ];                     // Text for .rept block line
  char lnbuf[LNSIZ];                     // Text of current line
@@ -118,7 +118,7 @@ static char * riscregname[] = {
  
  
  //
-// Initialize Tokenizer
+// Initialize tokenizer
  //
  void InitTokenizer(void)
  {
@@ -554,6 +554,9 @@ DEBUG printf("ExM: SYMBOL=\"%s\"", d);
                                                         case DOTL:
                                                                 d = ".l";
                                                                 break;
+                                                       case CR_ABSCOUNT:
+                                                               d = "^^abscount";
+                                                               break;
                                                         case CR_DATE:
                                                                 d = "^^date";
                                                                 break;
@@ -613,11 +616,11 @@ overflow:
  
  
  //
-// Get Next Line of Text from a Macro
+// Get next line of text from a macro
  //
  char * GetNextMacroLine(void)
  {
-       unsigned source_addr;
+//     unsigned source_addr;
  
         IMACRO * imacro = cur_inobj->inobj.imacro;
  //     LONG * strp = imacro->im_nextln;
@@ -636,7 +639,7 @@ char * GetNextMacroLine(void)
  
  
  //
-// Get Next Line of Text from a Repeat Block
+// Get next line of text from a repeat block
  //
  char * GetNextRepeatLine(void)
  {
@@ -668,7 +671,7 @@ char * GetNextRepeatLine(void)
  
  
  //
-// Include a Source File used at the Root, and for ".include" Files
+// Include a source file used at the root, and for ".include" files
  //
  int include(int handle, char * fname)
  {
@@ -676,8 +679,8 @@ int include(int handle, char * fname)
         INOBJ * inobj;
         FILEREC * fr;
  
-       // Verbose mode
-       if (verb_flag)
+       // Debug mode
+       if (debug)
                 printf("[include: %s, cfileno=%u]\n", fname, cfileno);
  
         // Alloc and initialize include-descriptors
@@ -714,7 +717,7 @@ int include(int handle, char * fname)
  
  
  //
-// Pop the Current Input Level
+// Pop the current input level
  //
  int fpop(void)
  {
@@ -728,7 +731,10 @@ int fpop(void)
                 // Pop IFENT levels until we reach the conditional assembly context we
                 // were at when the input object was entered.
                 while (ifent != inobj->in_ifent)
-                       d_endif();
+               {
+                       if (d_endif() != 0)                             // Something bad happened during endif parsing?
+                               return -1;                                      // If yes, bail instead of getting stuck in a loop
+               }
  
                 tok = inobj->in_otok;                           // Restore tok and otok
                 etok = inobj->in_etok;
@@ -736,21 +742,21 @@ int fpop(void)
                 switch (inobj->in_type)
                 {
                 case SRC_IFILE:                                         // Pop and release an IFILE
-                       if (verb_flag)
+                       if (debug)
                                 printf("[Leaving: %s]\n", curfname);
  
                         ifile = inobj->inobj.ifile;
                         ifile->if_link = f_ifile;
                         f_ifile = ifile;
                         close(ifile->ifhandle);                 // Close source file
-if (verb_flag) printf("[fpop (pre):  curfname=%s]\n", curfname);
+if (debug)     printf("[fpop (pre):  curfname=%s]\n", curfname);
                         curfname = ifile->ifoldfname;   // Set current filename
-if (verb_flag) printf("[fpop (post): curfname=%s]\n", curfname);
-if (verb_flag) printf("[fpop: (pre)  cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
+if (debug)     printf("[fpop (post): curfname=%s]\n", curfname);
+if (debug)     printf("[fpop: (pre)  cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
                         curlineno = ifile->ifoldlineno; // Set current line# 
                         DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno);
                         cfileno = ifile->ifno;                  // Restore current file number
-if (verb_flag) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
+if (debug)     printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
                         break;
                 case SRC_IMACRO:                                        // Pop and release an IMACRO
                         imacro = inobj->inobj.imacro;
@@ -795,17 +801,9 @@ char * GetNextLine(void)
                 // Scan for next end-of-line; handle stupid text formats by treating
                 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
                 // check for '\n').
-#if 0
-               i = 0;
-               j = fl->ifcnt;
-               d = &fl->ifbuf[fl->ifind];
-
-               for(p=d; i<j; i++, p++)
-#else
                 d = &fl->ifbuf[fl->ifind];
  
                 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
-#endif
                 {
                         if (*p == '\r' || *p == '\n')
                         {
@@ -814,7 +812,7 @@ char * GetNextLine(void)
                                 if (*p == '\r')
                                 {
                                         if (i >= j)
-                                               break;                  // Need to read more, then look for '\n' to eat 
+                                               break;  // Need to read more, then look for '\n' to eat 
                                         else if (p[1] == '\n')
                                                 i++;
                                 }
@@ -838,7 +836,8 @@ char * GetNextLine(void)
                         *p = '\0';
                         return NULL;
  #else
-                       // Really should check to see if we're at the end of the buffer! :-P
+                       // Really should check to see if we're at the end of the buffer!
+                       // :-P
                         fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
                         fl->ifcnt = 0;
                         return &fl->ifbuf[fl->ifind];
@@ -879,29 +878,29 @@ char * GetNextLine(void)
  
  
  //
-// Tokenize a Line
+// Tokenize a line
  //
  int TokenizeLine(void)
  {
-       char * ln = NULL;                               // Ptr to current position in line
-       char * p;                                               // Random character ptr
-       TOKEN * tk;                                             // Token-deposit ptr
-       int state = 0;                                  // State for keyword detector
-       int j = 0;                                              // Var for keyword detector
-       char c;                                                 // Random char
-       VALUE v;                                                // Random value
-       char * nullspot = NULL;                 // Spot to clobber for SYMBOL terminatn
-       int stuffnull;                                  // 1:terminate SYMBOL '\0' at *nullspot
+       char * ln = NULL;                       // Ptr to current position in line
+       char * p;                                       // Random character ptr
+       TOKEN * tk;                                     // Token-deposit ptr
+       int state = 0;                          // State for keyword detector
+       int j = 0;                                      // Var for keyword detector
+       char c;                                         // Random char
+       VALUE v;                                        // Random value
+       char * nullspot = NULL;         // Spot to clobber for SYMBOL termination
+       int stuffnull;                          // 1:terminate SYMBOL '\0' at *nullspot
         char c1;
-       int stringNum = 0;                              // Pointer to string locations in tokenized line
+       int stringNum = 0;                      // Pointer to string locations in tokenized line
  
  retry:
  
         if (cur_inobj == NULL)                                  // Return EOF if input stack is empty
                 return TKEOF;
  
-       // Get another line of input from the current input source: a file,
-       // a macro, or a repeat-block
+       // Get another line of input from the current input source: a file, a
+       // macro, or a repeat-block
         switch (cur_inobj->in_type)
         {
         // Include-file:
@@ -912,9 +911,14 @@ retry:
         case SRC_IFILE:
                 if ((ln = GetNextLine()) == NULL)
                 {
-if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
-                       fpop();                                                 // Pop input level
-                       goto retry;                                             // Try for more lines 
+if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
+                       if (fpop()==0)                                  // Pop input level
+                               goto retry;                                     // Try for more lines 
+                       else
+                       {
+                               ifent->if_prev = (IFENT *) - 1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
+                               return TKEOF;
+                       }
                 }
  
                 curlineno++;                                            // Bump line number
@@ -947,8 +951,10 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
         case SRC_IMACRO:
                 if ((ln = GetNextMacroLine()) == NULL)
                 {
-                       ExitMacro();                                    // Exit macro (pop args, do fpop(), etc)
-                       goto retry;                                             // Try for more lines...
+                       if (ExitMacro() == 0)                   // Exit macro (pop args, do fpop(), etc)
+                               goto retry;                                     // Try for more lines...
+                       else
+                               return TKEOF;                           // Oops, we got a non zero return code, signal EOF
                 }
  
                 lntag = '@';
@@ -959,7 +965,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
         case SRC_IREPT:
                 if ((ln = GetNextRepeatLine()) == NULL)
                 {
-if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
+if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                         fpop();
                         goto retry;
                 }
@@ -975,10 +981,10 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                 strcpy(lnbuf, ln);
  
         // General house-keeping
-       tok = tokeol;                                                   // Set "tok" to EOL in case of error
-       tk = etok;                                                              // Reset token ptr
-       stuffnull = 0;                                                  // Don't stuff nulls
-       totlines++;                                                             // Bump total #lines assembled
+       tok = tokeol;                   // Set "tok" to EOL in case of error
+       tk = etok;                              // Reset token ptr
+       stuffnull = 0;                  // Don't stuff nulls
+       totlines++;                             // Bump total #lines assembled
  
         // See if the entire line is a comment. This is a win if the programmer
         // puts in lots of comments
@@ -1008,12 +1014,12 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
  
                 if (c & STSYM)
                 {
-                       if (stuffnull)                                  // Terminate old symbol from previous pass
+                       if (stuffnull)                  // Terminate old symbol from previous pass
                                 *nullspot = EOS;
  
-                       v = 0;                                                  // Assume no DOT attrib follows symbol
+                       v = 0;                                  // Assume no DOT attrib follows symbol
                         stuffnull = 1;
-                       p = nullspot = ln++;                    // Nullspot -> start of this symbol
+                       p = nullspot = ln++;    // Nullspot -> start of this symbol
  
                         // Find end of symbol (and compute its length)
                         for(j=1; (int)chrtab[*ln]&CTSYM; j++)
@@ -1023,8 +1029,8 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                         // symbol or keyword:
                         if (*ln == '.')
                         {
-                               *ln++ = EOS;                            // Terminate symbol
-                               stuffnull = 0;                          // And never try it again 
+                               *ln++ = EOS;            // Terminate symbol
+                               stuffnull = 0;          // And never try it again 
  
                                 // Character following the `.' must have a DOT attribute, and
                                 // the chararacter after THAT one must not have a start-symbol
@@ -1074,8 +1080,8 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                                 j = -1;
                         }
  
-                       //make j = -1 if time, date etc with no preceeding ^^
-                       //defined, referenced, streq, macdef, date and time
+                       // Make j = -1 if time, date etc with no preceeding ^^
+                       // defined, referenced, streq, macdef, date and time
                         switch ((TOKEN)j)
                         {
                         case 112:   // defined
@@ -1085,7 +1091,6 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                         case 120:   // time
                         case 121:   // date
                                 j = -1;
-//                             break;
                         }
  
                         // If not tokenized keyword OR token was not found
@@ -1093,8 +1098,8 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                         {
                                 *tk++ = SYMBOL;
  //#warning
-//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit system,
-//this will cause all kinds of mischief.
+//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
+//system, this will cause all kinds of mischief.
  #if 0
                                 *tk++ = (TOKEN)nullspot;
  #else
@@ -1130,7 +1135,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                 {
                         switch (*ln++)
                         {
-                       case '!':                                       // ! or != 
+                       case '!':               // ! or != 
                                 if (*ln == '=')
                                 {
                                         *tk++ = NE;
@@ -1140,8 +1145,8 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                                         *tk++ = '!';
  
                                 continue;
-                       case '\'':                                      // 'string' 
-                       case '\"':                                      // "string" 
+                       case '\'':              // 'string' 
+                       case '\"':              // "string" 
                                 c1 = ln[-1];
                                 *tk++ = STRING;
  //#warning
@@ -1207,31 +1212,66 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
  
                                 *p++ = EOS;
                                 continue;
-                       case '$':                                       // $, hex constant
-                               if ((int)chrtab[*ln] & HDIGIT)
+                       case '$':               // $, hex constant
+                               if (chrtab[*ln] & HDIGIT)
                                 {
                                         v = 0;
  
-                                       while ((int)hextab[*ln] >= 0)
+                                       // Parse the hex value
+                                       while (hextab[*ln] >= 0)
                                                 v = (v << 4) + (int)hextab[*ln++];
  
+                                       // ggn: Okay, some comments here are in order I think....
+                                       // The original madmac sources didn't parse the size at
+                                       // this point (i.e. .b/.w/.l). It was probably done at
+                                       // another point, although it's unclear to me exactly
+                                       // where. So why change this? My understanding (at least
+                                       // from what SCPCD said on IRC) is that .w addressing
+                                       // formats produce wrong code on jaguar (or doesn't execute
+                                       // properly? something like that). So the code was changed
+                                       // to mask off the upper bits depending on length (note: I
+                                       // don't think .b is valid at all! I only know of .w/.l, so
+                                       // this should probably be wiped). Then the code that
+                                       // parses the constant and checks to see if it's between
+                                       // $ffff0000 and $8000 never got triggered, so yay job
+                                       // done! ...now say we want to assemble a st .prg. One of
+                                       // the most widely spread optimisations is move.X expr.w,Y
+                                       // (or vice versa, or both, anyway...) to access hardware
+                                       // registers (which are mapped to $fxxxxx). This botchy
+                                       // thing would create "hilarious" code while trying to
+                                       // access hardware registers. So I made a condition to see
+                                       // if st mode or jaguar is active and apply the both or
+                                       // not. One last note: this is hardcoded to get optimised
+                                       // for now on ST mode, i.e. it can't generate code like
+                                       // move.w $00001234,d0 - it'll always get optimised to
+                                       // move.w $1234.w,d0. It's probably ok, but maybe a warning
+                                       // should be emitted? Or maybe finding a way to make it not
+                                       // auto-optimise? I think it's ok for now...
                                         if (*ln == '.')
                                         {
-                                               if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
+                                               if (obj_format == ALCYON)
                                                 {
-                                                       v &= 0x000000FF;
-                                                       ln += 2;
-                                               }
-
-                                               if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
-                                               {
-                                                       v &= 0x0000FFFF;
-                                                       ln += 2;
+                                                       if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B') || (*(ln + 1) == 'w') || (*(ln + 1) == 'W') || (*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+                                                       {
+                                                               ln += 2;
+                                                       }
                                                 }
-
-                                               if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+                                               else
                                                 {
-                                                       ln += 2;
+                                                       if ((*(ln + 1) & 0xDF) == 'B')
+                                                       {
+                                                               v &= 0x000000FF;
+                                                               ln += 2;
+                                                       }
+                                                       else if ((*(ln + 1) & 0xDF) == 'W')
+                                                       {
+                                                               v &= 0x0000FFFF;
+                                                               ln += 2;
+                                                       }
+                                                       else if ((*(ln + 1) & 0xDF) == 'L')
+                                                       {
+                                                               ln += 2;
+                                                       }
                                                 }
                                         }
  
@@ -1242,7 +1282,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                                         *tk++ = '$';
  
                                 continue;
-                       case '<':                                       // < or << or <> or <= 
+                       case '<':               // < or << or <> or <= 
                                 switch (*ln)
                                 {
                                 case '<':
@@ -1261,7 +1301,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                                         *tk++ = '<';
                                         continue;
                                 }
-                       case ':':                                       // : or ::
+                       case ':':               // : or ::
                                 if (*ln == ':')
                                 {
                                         *tk++ = DCOLON;
@@ -1271,7 +1311,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                                         *tk++ = ':';
  
                                 continue;
-                       case '=':                                       // = or == 
+                       case '=':               // = or == 
                                 if (*ln == '=')
                                 {
                                         *tk++ = DEQUALS;
@@ -1281,7 +1321,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                                         *tk++ = '=';
  
                                 continue;
-                       case '>':                                       // > or >> or >= 
+                       case '>':               // > or >> or >= 
                                 switch (*ln)
                                 {
                                 case '>':
@@ -1296,7 +1336,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                                         *tk++ = '>';
                                         continue;
                                 }
-                       case '%':                                       // % or binary constant 
+                       case '%':               // % or binary constant 
                                 if (*ln < '0' || *ln > '1')
                                 {
                                         *tk++ = '%';
@@ -1331,7 +1371,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                                 *tk++ = CONST;
                                 *tk++ = v;
                                 continue;
-                       case '@':                                       // @ or octal constant 
+                       case '@':               // @ or octal constant 
                                 if (*ln < '0' || *ln > '7')
                                 {
                                         *tk++ = '@';
@@ -1366,7 +1406,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                                 *tk++ = CONST;
                                 *tk++ = v;
                                 continue;
-                       case '^':                                       // ^ or ^^ <operator-name>
+                       case '^':               // ^ or ^^ <operator-name>
                                 if (*ln != '^')
                                 {
                                         *tk++ = '^';
@@ -1418,7 +1458,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                                 *tk++ = (TOKEN)j;
                                 continue;
                         default:
-                               interror(2);                                 // Bad MULTX entry in chrtab
+                               interror(2);    // Bad MULTX entry in chrtab
                                 continue;
                         }
                 }
@@ -1493,11 +1533,7 @@ goteol:
  //int d_goto(void)
  int d_goto(WORD unused)
  {
-//     char * sym;                                               // Label to search for 
-//     LONG * defln;                                             // Macro definition strings 
-       char * s1;                                                // Temps for string comparison 
-       char * s2;
-//     IMACRO * imacro;                                          // Macro invocation block
+       char * s1, * s2;
  
         // Setup for the search
         if (*tok != SYMBOL)
@@ -1524,7 +1560,7 @@ int d_goto(WORD unused)
                         // Compare names (sleazo string compare)
                         // This string compare is not right. Doesn't check for lengths.
                         // (actually it does, but in a crappy, unclear way.)
-#warning "!!! Bad string comparison !!!"
+WARNING(!!!! Bad string comparison !!!)
                         s1 = sym;
  //                     s2 = (char *)(defln + 1) + 1;
                         s2 = defln->line;
@@ -1570,7 +1606,6 @@ void DumpTokenBuffer(void)
                 else if (*t == ACONST)
                         printf("[ACONST]");
                 else if (*t == STRING)
-//                     printf("[STRING]");
                 {
                         t++;
                         printf("[STRING:\"%s\"]", string[*t]);
@@ -1614,6 +1649,8 @@ void DumpTokenBuffer(void)
                         printf("[DOTI]");
                 else if (*t == ENDEXPR)
                         printf("[ENDEXPR]");
+               else if (*t == CR_ABSCOUNT)
+                       printf("[CR_ABSCOUNT]");
                 else if (*t == CR_DEFINED)
                         printf("[CR_DEFINED]");
                 else if (*t == CR_REFERENCED)
@@ -1636,7 +1673,6 @@ void DumpTokenBuffer(void)
                         printf("[A%u]", ((uint32_t)*t) - 0x88);
                 else
                         printf("[%X:%c]", (uint32_t)*t, (char)*t);
-//                     printf("[%X]", (uint32_t)*t);
         }
  
         printf("[EOL]\n");