X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?p=rmac;a=blobdiff_plain;f=token.c;h=8726eb4b9ee256ed536f9eff6ae061bb48d27dc0;hp=250aa46bded07a66d02aa6cfd4705270a3d687b3;hb=60f204cb9e3905100da0d89f14bb40db764acd9e;hpb=3f2bccb78ab4cd59654d521c8aedfe5512ee6608 diff --git a/token.c b/token.c index 250aa46..8726eb4 100644 --- a/token.c +++ b/token.c @@ -1,9 +1,9 @@ // // RMAC - Reboot's Macro Assembler for the Atari Jaguar Console System // TOKEN.C - Token Handling -// Copyright (C) 199x Landon Dyer, 2011-2012 Reboot and Friends +// Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986 -// Source Utilised with the Kind Permission of Landon Dyer +// Source utilised with the kind permission of Landon Dyer // #include "token.h" @@ -13,7 +13,7 @@ #include "symbol.h" #define DECL_KW // Declare keyword arrays -#define DEF_KW // Declare keyword values +#define DEF_KW // Declare keyword values #include "kwtab.h" // Incl generated keyword tables & defs @@ -23,8 +23,8 @@ int totlines; // Total # of lines int mjump_align = 0; // mjump alignment flag char lntag; // Line tag char * curfname; // Current filename -char tolowertab[128]; // Uppercase ==> lowercase -char hextab[128]; // Table of hex values +char tolowertab[128]; // Uppercase ==> lowercase +int8_t hextab[128]; // Table of hex values char dotxtab[128]; // Table for ".b", ".s", etc. char irbuf[LNSIZ]; // Text for .rept block line char lnbuf[LNSIZ]; // Text of current line @@ -54,28 +54,28 @@ static IMACRO * f_imacro; // Ptr list of free IMACROs static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files) char chrtab[] = { - ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX - ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL - ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT - WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI + ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX + ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL + ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT + WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI - ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3 - ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB - ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC - ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US + ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3 + ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB + ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC + ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US WHITE, MULTX, MULTX, SELF, // SP ! " # MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & ' SELF, SELF, SELF, SELF, // ( ) * + SELF, SELF, STSYM, SELF, // , - . / - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1 - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3 - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5 - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7 - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9 - MULTX, MULTX, // : ; - MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ? + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1 + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3 + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5 + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7 + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9 + MULTX, MULTX, // : ; + MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ? MULTX, STSYM+CTSYM+HDIGIT, // @ A (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C @@ -96,10 +96,10 @@ char chrtab[] = { STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // p q r s - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // t u v w - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z { - SELF, SELF, SELF, ILLEG // | } ~ DEL + STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // p q r s + STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // t u v w + STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z { + SELF, SELF, SELF, ILLEG // | } ~ DEL }; // Names of registers @@ -110,7 +110,7 @@ static char * regname[] = { }; static char * riscregname[] = { - "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" @@ -155,15 +155,15 @@ void InitTokenizer(void) tolowertab[i] |= 0x20; // These characters are legal immediately after a period - dotxtab['b'] = DOTB; // .b .B .s .S + dotxtab['b'] = DOTB; // .b .B .s .S dotxtab['B'] = DOTB; dotxtab['s'] = DOTB; dotxtab['S'] = DOTB; - dotxtab['w'] = DOTW; // .w .W + dotxtab['w'] = DOTW; // .w .W dotxtab['W'] = DOTW; - dotxtab['l'] = DOTL; // .l .L + dotxtab['l'] = DOTL; // .l .L dotxtab['L'] = DOTL; - dotxtab['i'] = DOTI; // .i .I (???) + dotxtab['i'] = DOTI; // .i .I (???) dotxtab['I'] = DOTI; } @@ -230,7 +230,7 @@ INOBJ * a_inobj(int typ) inobj->inobj.ifile = ifile; break; - case SRC_IMACRO: // Alloc and init an IMACRO + case SRC_IMACRO: // Alloc and init an IMACRO if (f_imacro == NULL) imacro = malloc(sizeof(IMACRO)); else @@ -288,7 +288,6 @@ int ExpandMacro(char * src, char * dest, int destsiz) IMACRO * imacro = cur_inobj->inobj.imacro; int macnum = (int)(imacro->im_macro->sattr); -// destsiz--; char * dst = dest; // Next dest slot char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer @@ -314,6 +313,11 @@ int ExpandMacro(char * src, char * dest, int destsiz) if (dst >= edst) goto overflow; + // Skip comments in case a loose @ or \ is in there + // In that case the tokeniser was trying to expand it. + if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/'))) + goto skipcomments; + *dst++ = *s++; } // Do macro expansion @@ -330,11 +334,11 @@ int ExpandMacro(char * src, char * dest, int destsiz) *dst++ = *s++; continue; - case '?': // \? set `questmark' flag + case '?': // \? set `questmark' flag ++s; questmark = 1; break; - case '#': // \#, number of arguments + case '#': // \#, number of arguments sprintf(numbuf, "%d", (int)imacro->im_nargs); goto copystr; case '!': // \! size suffix supplied on invocation @@ -347,7 +351,7 @@ int ExpandMacro(char * src, char * dest, int destsiz) } goto copy_d; - case '~': // ==> unique label string Mnnnn... + case '~': // ==> unique label string Mnnnn... sprintf(numbuf, "M%u", curuniq); copystr: d = numbuf; @@ -554,6 +558,9 @@ DEBUG printf("ExM: SYMBOL=\"%s\"", d); case DOTL: d = ".l"; break; + case CR_ABSCOUNT: + d = "^^abscount"; + break; case CR_DATE: d = "^^date"; break; @@ -601,6 +608,8 @@ strcopy: } } +skipcomments: + *dst = EOS; DEBUG { printf("ExM: dst=\"%s\"\n", dest); } return OK; @@ -617,8 +626,6 @@ overflow: // char * GetNextMacroLine(void) { - unsigned source_addr; - IMACRO * imacro = cur_inobj->inobj.imacro; // LONG * strp = imacro->im_nextln; struct LineList * strp = imacro->im_nextln; @@ -626,7 +633,6 @@ char * GetNextMacroLine(void) if (strp == NULL) // End-of-macro return NULL; -// imacro->im_nextln = (LONG *)*strp; imacro->im_nextln = strp->next; // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ); ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ); @@ -672,17 +678,13 @@ char * GetNextRepeatLine(void) // int include(int handle, char * fname) { - IFILE * ifile; - INOBJ * inobj; - FILEREC * fr; - - // Verbose mode - if (verb_flag) + // Debug mode + if (debug) printf("[include: %s, cfileno=%u]\n", fname, cfileno); // Alloc and initialize include-descriptors - inobj = a_inobj(SRC_IFILE); - ifile = inobj->inobj.ifile; + INOBJ * inobj = a_inobj(SRC_IFILE); + IFILE * ifile = inobj->inobj.ifile; ifile->ifhandle = handle; // Setup file handle ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices @@ -690,21 +692,20 @@ int include(int handle, char * fname) ifile->ifoldfname = curfname; // Save old filename ifile->ifno = cfileno; // Save old file number -// cfileno = filecount++; // Compute new file number // NB: This *must* be preincrement, we're adding one to the filecount here! cfileno = ++filecount; // Compute NEW file number curfname = strdup(fname); // Set current filename (alloc storage) curlineno = 0; // Start on line zero // Add another file to the file-record - fr = (FILEREC *)malloc(sizeof(FILEREC)); + FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC)); fr->frec_next = NULL; fr->frec_name = curfname; if (last_fr == NULL) - filerec = fr; // Add first filerec + filerec = fr; // Add first filerec else - last_fr->frec_next = fr; // Append to list of filerecs + last_fr->frec_next = fr; // Append to list of filerecs last_fr = fr; DEBUG printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); @@ -727,30 +728,41 @@ int fpop(void) { // Pop IFENT levels until we reach the conditional assembly context we // were at when the input object was entered. + int numUnmatched = 0; + while (ifent != inobj->in_ifent) - d_endif(); + { + if (d_endif() != 0) // Something bad happened during endif parsing? + return -1; // If yes, bail instead of getting stuck in a loop + + numUnmatched++; + } + + // Give a warning to the user that we had to wipe their bum for them + if (numUnmatched > 0) + warni("missing %d .endif(s)", numUnmatched); - tok = inobj->in_otok; // Restore tok and otok + tok = inobj->in_otok; // Restore tok and otok etok = inobj->in_etok; switch (inobj->in_type) { - case SRC_IFILE: // Pop and release an IFILE - if (verb_flag) + case SRC_IFILE: // Pop and release an IFILE + if (debug) printf("[Leaving: %s]\n", curfname); ifile = inobj->inobj.ifile; ifile->if_link = f_ifile; f_ifile = ifile; close(ifile->ifhandle); // Close source file -if (verb_flag) printf("[fpop (pre): curfname=%s]\n", curfname); +if (debug) printf("[fpop (pre): curfname=%s]\n", curfname); curfname = ifile->ifoldfname; // Set current filename -if (verb_flag) printf("[fpop (post): curfname=%s]\n", curfname); -if (verb_flag) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); - curlineno = ifile->ifoldlineno; // Set current line# +if (debug) printf("[fpop (post): curfname=%s]\n", curfname); +if (debug) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); + curlineno = ifile->ifoldlineno; // Set current line# DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); cfileno = ifile->ifno; // Restore current file number -if (verb_flag) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); +if (debug) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); break; case SRC_IMACRO: // Pop and release an IMACRO imacro = inobj->inobj.imacro; @@ -806,7 +818,7 @@ char * GetNextLine(void) if (*p == '\r') { if (i >= j) - break; // Need to read more, then look for '\n' to eat + break; // Need to read more, then look for '\n' to eat else if (p[1] == '\n') i++; } @@ -905,9 +917,14 @@ retry: case SRC_IFILE: if ((ln = GetNextLine()) == NULL) { -if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); - fpop(); // Pop input level - goto retry; // Try for more lines +if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); + if (fpop() == 0) // Pop input level + goto retry; // Try for more lines + else + { + ifent->if_prev = (IFENT *) - 1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs + return TKEOF; + } } curlineno++; // Bump line number @@ -940,8 +957,10 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); case SRC_IMACRO: if ((ln = GetNextMacroLine()) == NULL) { - ExitMacro(); // Exit macro (pop args, do fpop(), etc) - goto retry; // Try for more lines... + if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc) + goto retry; // Try for more lines... + else + return TKEOF; // Oops, we got a non zero return code, signal EOF } lntag = '@'; @@ -952,7 +971,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); case SRC_IREPT: if ((ln = GetNextRepeatLine()) == NULL) { -if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); +if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); fpop(); goto retry; } @@ -991,7 +1010,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); ln++; // Handle EOL, comment with ';' - if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/'))) + if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/'))) break; // Handle start of symbol. Symbols are null-terminated in place. The @@ -1006,6 +1025,27 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); v = 0; // Assume no DOT attrib follows symbol stuffnull = 1; + + // In some cases, we need to check for a DOTx at the *beginning* + // of a symbol, as the "start" of the line we're currently looking + // at could be somewhere in the middle of that line! + if (*ln == '.') + { + // Make sure that it's *only* a .[bwsl] following, and not the + // start of a local symbol: + if ((chrtab[*(ln + 1)] & DOT) + && (dotxtab[*(ln + 1)] != 0) + && !(chrtab[*(ln + 2)] & CTSYM)) + { + // We found a legitimate DOTx construct, so add it to the + // token stream: + ln++; + stuffnull = 0; + *tk++ = (TOKEN)dotxtab[*ln++]; + continue; + } + } + p = nullspot = ln++; // Nullspot -> start of this symbol // Find end of symbol (and compute its length) @@ -1017,19 +1057,19 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); if (*ln == '.') { *ln++ = EOS; // Terminate symbol - stuffnull = 0; // And never try it again + stuffnull = 0; // And never try it again // Character following the `.' must have a DOT attribute, and // the chararacter after THAT one must not have a start-symbol // attribute (to prevent symbols that look like, for example, // "zingo.barf", which might be a good idea anyway....) - if ((((int)chrtab[*ln] & DOT) == 0) || ((int)dotxtab[*ln] <= 0)) - return error("[bwsl] must follow `.' in symbol"); + if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0)) + return error("[bwsl] must follow '.' in symbol"); v = (VALUE)dotxtab[*ln++]; - if ((int)chrtab[*ln] & CTSYM) - return error("misuse of `.', not allowed in symbols"); + if (chrtab[*ln] & CTSYM) + return error("misuse of '.', not allowed in symbols"); } // If the symbol is small, check to see if it's really the name of @@ -1081,7 +1121,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); } // If not tokenized keyword OR token was not found - if (j < 0 || state < 0) + if ((j < 0) || (state < 0)) { *tk++ = SYMBOL; //#warning @@ -1122,7 +1162,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); { switch (*ln++) { - case '!': // ! or != + case '!': // ! or != if (*ln == '=') { *tk++ = NE; @@ -1132,8 +1172,8 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = '!'; continue; - case '\'': // 'string' - case '\"': // "string" + case '\'': // 'string' + case '\"': // "string" c1 = ln[-1]; *tk++ = STRING; //#warning @@ -1200,41 +1240,83 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *p++ = EOS; continue; case '$': // $, hex constant - if ((int)chrtab[*ln] & HDIGIT) + if (chrtab[*ln] & HDIGIT) { v = 0; - while ((int)hextab[*ln] >= 0) + // Parse the hex value + while (hextab[*ln] >= 0) v = (v << 4) + (int)hextab[*ln++]; + // ggn: Okay, some comments here are in order I think.... + // The original madmac sources didn't parse the size at + // this point (i.e. .b/.w/.l). It was probably done at + // another point, although it's unclear to me exactly + // where. So why change this? My understanding (at least + // from what SCPCD said on IRC) is that .w addressing + // formats produce wrong code on jaguar (or doesn't execute + // properly? something like that). So the code was changed + // to mask off the upper bits depending on length (note: I + // don't think .b is valid at all! I only know of .w/.l, so + // this should probably be wiped). Then the code that + // parses the constant and checks to see if it's between + // $ffff0000 and $8000 never got triggered, so yay job + // done! ...now say we want to assemble a st .prg. One of + // the most widely spread optimisations is move.X expr.w,Y + // (or vice versa, or both, anyway...) to access hardware + // registers (which are mapped to $fxxxxx). This botchy + // thing would create "hilarious" code while trying to + // access hardware registers. So I made a condition to see + // if st mode or jaguar is active and apply the both or + // not. One last note: this is hardcoded to get optimised + // for now on ST mode, i.e. it can't generate code like + // move.w $00001234,d0 - it'll always get optimised to + // move.w $1234.w,d0. It's probably ok, but maybe a warning + // should be emitted? Or maybe finding a way to make it not + // auto-optimise? I think it's ok for now... if (*ln == '.') { - if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B')) + if (obj_format == BSD) { - v &= 0x000000FF; - ln += 2; + if ((*(ln + 1) & 0xDF) == 'B') + { + v &= 0x000000FF; + ln += 2; + } + else if ((*(ln + 1) & 0xDF) == 'W') + { + v &= 0x0000FFFF; + ln += 2; + } + else if ((*(ln + 1) & 0xDF) == 'L') + { + ln += 2; + } } + } + + *tk++ = CONST; + *tk++ = v; + if (obj_format == ALCYON) + { if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) { - v &= 0x0000FFFF; + *tk++ = DOTW; ln += 2; } - - if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) + else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { + *tk++ = DOTL; ln += 2; } } - - *tk++ = CONST; - *tk++ = v; } else *tk++ = '$'; continue; - case '<': // < or << or <> or <= + case '<': // < or << or <> or <= switch (*ln) { case '<': @@ -1263,7 +1345,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = ':'; continue; - case '=': // = or == + case '=': // = or == if (*ln == '=') { *tk++ = DEQUALS; @@ -1273,7 +1355,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = '='; continue; - case '>': // > or >> or >= + case '>': // > or >> or >= switch (*ln) { case '>': @@ -1288,7 +1370,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = '>'; continue; } - case '%': // % or binary constant + case '%': // % or binary constant if (*ln < '0' || *ln > '1') { *tk++ = '%'; @@ -1323,7 +1405,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = CONST; *tk++ = v; continue; - case '@': // @ or octal constant + case '@': // @ or octal constant if (*ln < '0' || *ln > '7') { *tk++ = '@'; @@ -1378,7 +1460,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); for(state=0; state>=0;) { - // Get char, convert to lowercase + // Get char, convert to lowercase j = *p++; if (j >= 'A' && j <= 'Z') @@ -1468,15 +1550,15 @@ goteol: // // .GOTO