]> Shamusworld >> Repos - rmac/blobdiff - token.c
Fixed bad char reporting, revamp of the error/warning system.
[rmac] / token.c
diff --git a/token.c b/token.c
index 24b7c955d6cbe04591fc6d634453b629ec5be9e2..6f42cca5d8f5306904c387e23f4f615bd928bfae 100644 (file)
--- a/token.c
+++ b/token.c
@@ -1,5 +1,5 @@
 //
-// RMAC - Reboot's Macro Assembler for the Atari Jaguar Console System
+// RMAC - Reboot's Macro Assembler for all Atari computers
 // TOKEN.C - Token Handling
 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
@@ -55,7 +55,7 @@ static IMACRO * f_imacro;                             // Ptr list of free IMACROs
 
 static TOKEN tokbuf[TOKBUFSIZE];               // Token buffer (stack-like, all files)
 
-char chrtab[] = {
+uint8_t chrtab[0x100] = {
        ILLEG, ILLEG, ILLEG, ILLEG,                     // NUL SOH STX ETX
        ILLEG, ILLEG, ILLEG, ILLEG,                     // EOT ENQ ACK BEL
        ILLEG, WHITE, ILLEG, ILLEG,                     // BS HT LF VT
@@ -79,36 +79,76 @@ char chrtab[] = {
        MULTX, MULTX,                                                           // : ;
        MULTX, MULTX, MULTX, STSYM+CTSYM,                       // < = > ?
 
-       MULTX, STSYM+CTSYM+HDIGIT,                                                                      // @ A
-       (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,       // B C
-       STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,                                         // D E
-       STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                                                        // F G
-       STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM,                     // H I J K
-       (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM,   // L M N O
-
-       STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM,   // P Q R S
-       STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM,   // T U V W
-       STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                            // X Y Z [
-       SELF, SELF, MULTX, STSYM+CTSYM,                                                         // \ ] ^ _
-
-       ILLEG, STSYM+CTSYM+HDIGIT,                                                                      // ` a
-       (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,       // b c
-       STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,                                         // d e
-       STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                                                        // f g
-       STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM,                     // h i j k
-       (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM,   // l m n o
-
-       STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM,   // p q r s
-       STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM,   // t u v w
-       STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                            // x y z {
-       SELF, SELF, SELF, ILLEG                                                                         // | } ~ DEL
+       MULTX, STSYM+CTSYM+HDIGIT,                                      // @ A
+       DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // B C
+       DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // D E
+       STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // F G
+       STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
+       DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
+
+       DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
+       STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
+       STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
+       SELF, SELF, MULTX, STSYM+CTSYM,                         // \ ] ^ _
+
+       ILLEG, STSYM+CTSYM+HDIGIT,                                      // ` a
+       DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // b c
+       DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // d e
+       STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // f g
+       STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
+       DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
+
+       DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
+       STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
+       DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                // x y z {
+       SELF, SELF, SELF, ILLEG,                                        // | } ~ DEL
+
+       // Anything above $7F is illegal (and yes, we need to check for this,
+       // otherwise you get strange and spurious errors that will lead you astray)
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+       ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
 };
 
 // Names of registers
 static char * regname[] = {
-       "d0", "d1",  "d2",  "d3", "d4", "d5", "d6", "d7",
-       "a0", "a1",  "a2",  "a3", "a4", "a5", "a6", "a7",
-       "pc", "ssp", "usp", "sr", "ccr"
+       "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
+       "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
+       "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
+       "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
+       "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
+       "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
+       "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
+       "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
+       "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
+       "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
+       "tt0","tt1","crp","","","","","", // 208,215
+       "","","","","fpiar","fpsr","fpcr","", // 216,223
+       "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
+       "","","","","","","","", // 232,239
+       "","","","","","","","", // 240,247
+       "","","","","","","","", // 248,255
+       "","","","","x0","x1","y0","y1", // 256,263
+       "","b0","","b2","","b1","a","b", // 264,271
+       "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
+       "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
+       "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
+       "","","","","","","l","p", // 296,303
+       "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
+       "a10","b10","x","y","","","ab","ba"  // 312,319
 };
 
 static char * riscregname[] = {
@@ -159,14 +199,24 @@ void InitTokenizer(void)
        // These characters are legal immediately after a period
        dotxtab['b'] = DOTB;                                    // .b .B .s .S
        dotxtab['B'] = DOTB;
-       dotxtab['s'] = DOTB;
-       dotxtab['S'] = DOTB;
+       //dotxtab['s'] = DOTB;
+       //dotxtab['S'] = DOTB;
        dotxtab['w'] = DOTW;                                    // .w .W
        dotxtab['W'] = DOTW;
        dotxtab['l'] = DOTL;                                    // .l .L
        dotxtab['L'] = DOTL;
        dotxtab['i'] = DOTI;                                    // .i .I (???)
        dotxtab['I'] = DOTI;
+       dotxtab['D'] = DOTD;                                    // .d .D (quad word)
+       dotxtab['d'] = DOTD;
+       dotxtab['S'] = DOTS;                                    // .s .S
+       dotxtab['s'] = DOTS;
+       dotxtab['Q'] = DOTQ;                                    // .q .Q
+       dotxtab['q'] = DOTQ;
+       dotxtab['X'] = DOTX;                                    // .x .x
+       dotxtab['x'] = DOTX;
+       dotxtab['P'] = DOTP;                                    // .p .P
+       dotxtab['p'] = DOTP;
 }
 
 
@@ -245,7 +295,7 @@ INOBJ * a_inobj(int typ)
                break;
        case SRC_IREPT:                                                 // Alloc and init an IREPT
                inobj->inobj.irept = malloc(sizeof(IREPT));
-               DEBUG printf("alloc IREPT\n");
+               DEBUG { printf("alloc IREPT\n"); }
                break;
        }
 
@@ -403,7 +453,7 @@ copy_d:
                                        *d++ = *s++;
 
                                if (*s != '}')
-                                       return error("missing '}'");
+                                       return error("missing closing brace ('}')");
                                else
                                        s++;
                        }
@@ -412,10 +462,10 @@ copy_d:
 
                        // Lookup the argument and copy its (string) value into the
                        // destination string
-                       DEBUG printf("argument='%s'\n", mname);
+                       DEBUG { printf("argument='%s'\n", mname); }
 
                        if ((arg = lookup(mname, MACARG, macnum)) == NULL)
-                               return errors("undefined argument: '%s'", mname);
+                               return error("undefined argument: '%s'", mname);
                        else
                        {
                                // Convert a string of tokens (terminated with EOL) back into
@@ -423,7 +473,7 @@ copy_d:
                                // macro invocation) then it is ignored.
                                i = (int)arg->svalue;
 arg_num:
-                               DEBUG printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase);
+                               DEBUG { printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase); }
                                tk = NULL;
 
                                if (i < imacro->im_nargs)
@@ -488,7 +538,7 @@ arg_num:
 #else
                                                                // This fix should be done for strings too
                                                                d = symbolString[*tk++];
-DEBUG printf("ExM: SYMBOL=\"%s\"", d);
+DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
 #endif
                                                                break;
                                                        case STRING:
@@ -618,7 +668,7 @@ skipcomments:
 
 overflow:
        *dst = EOS;
-       DEBUG printf("*** OVERFLOW LINE ***\n%s\n", dest);
+       DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
        return fatal("line too long as a result of macro expansion");
 }
 
@@ -654,12 +704,12 @@ char * GetNextRepeatLine(void)
        // Do repeat at end of .rept block's string list
        if (strp == NULL)
        {
-               DEBUG printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count);
+               DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
                irept->ir_nextln = irept->ir_firstln;   // copy first line
 
                if (irept->ir_count-- == 0)
                {
-                       DEBUG printf("end-repeat-block\n");
+                       DEBUG { printf("end-repeat-block\n"); }
                        return NULL;
                }
 
@@ -680,8 +730,7 @@ char * GetNextRepeatLine(void)
 int include(int handle, char * fname)
 {
        // Debug mode
-       if (debug)
-               printf("[include: %s, cfileno=%u]\n", fname, cfileno);
+       DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
 
        // Alloc and initialize include-descriptors
        INOBJ * inobj = a_inobj(SRC_IFILE);
@@ -709,7 +758,7 @@ int include(int handle, char * fname)
                last_fr->frec_next = fr;                        // Append to list of filerecs
 
        last_fr = fr;
-       DEBUG printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno);
+       DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
 
        return OK;
 }
@@ -741,7 +790,7 @@ int fpop(void)
 
                // Give a warning to the user that we had to wipe their bum for them
                if (numUnmatched > 0)
-                       warni("missing %d .endif(s)", numUnmatched);
+                       warn("missing %d .endif(s)", numUnmatched);
 
                tok = inobj->in_otok;           // Restore tok and otok
                etok = inobj->in_etok;
@@ -749,21 +798,20 @@ int fpop(void)
                switch (inobj->in_type)
                {
                case SRC_IFILE:                         // Pop and release an IFILE
-                       if (debug)
-                               printf("[Leaving: %s]\n", curfname);
+                       DEBUG { printf("[Leaving: %s]\n", curfname); }
 
                        ifile = inobj->inobj.ifile;
                        ifile->if_link = f_ifile;
                        f_ifile = ifile;
                        close(ifile->ifhandle);                 // Close source file
-if (debug)     printf("[fpop (pre):  curfname=%s]\n", curfname);
+DEBUG { printf("[fpop (pre):  curfname=%s]\n", curfname); }
                        curfname = ifile->ifoldfname;   // Set current filename
-if (debug)     printf("[fpop (post): curfname=%s]\n", curfname);
-if (debug)     printf("[fpop: (pre)  cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
+DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
+DEBUG { printf("[fpop: (pre)  cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
                        curlineno = ifile->ifoldlineno; // Set current line#
                        DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno);
                        cfileno = ifile->ifno;                  // Restore current file number
-if (debug)     printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
+DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
                        break;
                case SRC_IMACRO:                                        // Pop and release an IMACRO
                        imacro = inobj->inobj.imacro;
@@ -889,16 +937,16 @@ char * GetNextLine(void)
 //
 int TokenizeLine(void)
 {
-       char * ln = NULL;                       // Ptr to current position in line
-       char * p;                                       // Random character ptr
+       uint8_t * ln = NULL;            // Ptr to current position in line
+       uint8_t * p;                            // Random character ptr
        TOKEN * tk;                                     // Token-deposit ptr
        int state = 0;                          // State for keyword detector
        int j = 0;                                      // Var for keyword detector
-       char c;                                         // Random char
+       uint8_t c;                                      // Random char
        VALUE v;                                        // Random value
-       char * nullspot = NULL;         // Spot to clobber for SYMBOL termination
+       uint8_t * nullspot = NULL;      // Spot to clobber for SYMBOL termination
        int stuffnull;                          // 1:terminate SYMBOL '\0' at *nullspot
-       char c1;
+       uint8_t c1;
        int stringNum = 0;                      // Pointer to string locations in tokenized line
 
 retry:
@@ -918,7 +966,7 @@ retry:
        case SRC_IFILE:
                if ((ln = GetNextLine()) == NULL)
                {
-if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
+DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
                        if (fpop() == 0)                                // Pop input level
                                goto retry;                                     // Try for more lines
                        else
@@ -972,7 +1020,7 @@ if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
        case SRC_IREPT:
                if ((ln = GetNextRepeatLine()) == NULL)
                {
-if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
+DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
                        fpop();
                        goto retry;
                }
@@ -999,15 +1047,15 @@ if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                goto goteol;
 
        // Main tokenization loop;
-       // o  skip whitespace;
-       // o  handle end-of-line;
-       // o  handle symbols;
-       // o  handle single-character tokens (operators, etc.);
-       // o  handle multiple-character tokens (constants, strings, etc.).
+       //  o  skip whitespace;
+       //  o  handle end-of-line;
+       //  o  handle symbols;
+       //  o  handle single-character tokens (operators, etc.);
+       //  o  handle multiple-character tokens (constants, strings, etc.).
        for(; *ln!=EOS;)
        {
                // Skip whitespace, handle EOL
-               while ((int)chrtab[*ln] & WHITE)
+               while (chrtab[*ln] & WHITE)
                        ln++;
 
                // Handle EOL, comment with ';'
@@ -1070,7 +1118,7 @@ if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                                v = (VALUE)dotxtab[*ln++];
 
                                if (chrtab[*ln] & CTSYM)
-                                       return error("misuse of '.', not allowed in symbols");
+                                       return error("misuse of '.'; not allowed in symbols");
                        }
 
                        // If the symbol is small, check to see if it's really the name of
@@ -1161,7 +1209,6 @@ if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
                // Handle multiple-character tokens
                if (c & MULTX)
                {
-
                        switch (*ln++)
                        {
                        case '!':               // ! or !=
@@ -1251,32 +1298,6 @@ dostring:
                                        while (hextab[*ln] >= 0)
                                                v = (v << 4) + (int)hextab[*ln++];
 
-                                       // ggn: Okay, some comments here are in order I think....
-                                       // The original madmac sources didn't parse the size at
-                                       // this point (i.e. .b/.w/.l). It was probably done at
-                                       // another point, although it's unclear to me exactly
-                                       // where. So why change this? My understanding (at least
-                                       // from what SCPCD said on IRC) is that .w addressing
-                                       // formats produce wrong code on jaguar (or doesn't execute
-                                       // properly? something like that). So the code was changed
-                                       // to mask off the upper bits depending on length (note: I
-                                       // don't think .b is valid at all! I only know of .w/.l, so
-                                       // this should probably be wiped). Then the code that
-                                       // parses the constant and checks to see if it's between
-                                       // $ffff0000 and $8000 never got triggered, so yay job
-                                       // done! ...now say we want to assemble a st .prg. One of
-                                       // the most widely spread optimisations is move.X expr.w,Y
-                                       // (or vice versa, or both, anyway...) to access hardware
-                                       // registers (which are mapped to $fxxxxx). This botchy
-                                       // thing would create "hilarious" code while trying to
-                                       // access hardware registers. So I made a condition to see
-                                       // if st mode or jaguar is active and apply the both or
-                                       // not. One last note: this is hardcoded to get optimised
-                                       // for now on ST mode, i.e. it can't generate code like
-                                       // move.w $00001234,d0 - it'll always get optimised to
-                                       // move.w $1234.w,d0. It's probably ok, but maybe a warning
-                                       // should be emitted? Or maybe finding a way to make it not
-                                       // auto-optimise? I think it's ok for now...
                                        if (*ln == '.')
                                        {
                                                if (obj_format == BSD)
@@ -1303,19 +1324,19 @@ dostring:
 
                                        if (obj_format == ALCYON)
                                        {
-                        if (*ln == '.')
-                        {
-                            if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
-                            {
-                                *tk++ = DOTW;
-                                ln += 2;
-                            }
-                            else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
-                            {
-                                *tk++ = DOTL;
-                                ln += 2;
-                            }
-                        }
+                                               if (*ln == '.')
+                                               {
+                                                       if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
+                                                       {
+                                                               *tk++ = DOTW;
+                                                               ln += 2;
+                                                       }
+                                                       else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+                                                       {
+                                                               *tk++ = DOTL;
+                                                               ln += 2;
+                                                       }
+                                               }
                                        }
                                }
                                else
@@ -1537,7 +1558,7 @@ dostring:
                }
 
                // Handle illegal character
-               return error("illegal character");
+               return error("illegal character $%02X found", *ln);
        }
 
        // Terminate line of tokens and return "success."