X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?p=rmac;a=blobdiff_plain;f=token.c;h=e1be9b5b8871ea3cde208b29b8e9a072f43ec64d;hp=ac76526e89d6c039635ed9f8358a30f0366abc20;hb=eace4e1b294ccec54a5c476619f616f5da0bf8a9;hpb=03dd34951a331e0b8971195ccef1600fffaea2e6 diff --git a/token.c b/token.c index ac76526..e1be9b5 100644 --- a/token.c +++ b/token.c @@ -7,6 +7,8 @@ // #include "token.h" + +#include #include "direct.h" #include "error.h" #include "macro.h" @@ -35,7 +37,8 @@ WORD cfileno; // Current file number TOKEN * tok; // Ptr to current token TOKEN * etok; // Ptr past last token in tokbuf[] TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token -char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage +char * string[TOKBUFSIZE*2];// Token buffer string pointer storage +int optimizeOff; // Optimization override flag // File record, used to maintain a list of every include file ever visited #define FILEREC struct _filerec @@ -48,14 +51,14 @@ FILEREC FILEREC * filerec; FILEREC * last_fr; -INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO) -static INOBJ * f_inobj; // Ptr list of free INOBJs -static IFILE * f_ifile; // Ptr list of free IFILEs -static IMACRO * f_imacro; // Ptr list of free IMACROs +INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO) +static INOBJ * f_inobj; // Ptr list of free INOBJs +static IFILE * f_ifile; // Ptr list of free IFILEs +static IMACRO * f_imacro; // Ptr list of free IMACROs -static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files) +static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files) -char chrtab[] = { +uint8_t chrtab[0x100] = { ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT @@ -79,29 +82,48 @@ char chrtab[] = { MULTX, MULTX, // : ; MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ? - MULTX, STSYM+CTSYM+HDIGIT, // @ A - (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C - (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E - STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G - STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K - (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O - - (char)((BYTE)DOT)+STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // P Q R S - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // T U V W - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // X Y Z [ - SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _ - - ILLEG, STSYM+CTSYM+HDIGIT, // ` a - (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c - (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e - STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g - STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k - (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o - - (char)((BYTE)DOT)+STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // p q r s - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // t u v w - (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z { - SELF, SELF, SELF, ILLEG // | } ~ DEL + MULTX, STSYM+CTSYM+HDIGIT, // @ A + DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C + DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E + STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G + STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K + DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O + + DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S + STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W + STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [ + SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _ + + ILLEG, STSYM+CTSYM+HDIGIT, // ` a + DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c + DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e + STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g + STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k + DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o + + DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s + STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w + DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z { + SELF, SELF, SELF, ILLEG, // | } ~ DEL + + // Anything above $7F is illegal (and yes, we need to check for this, + // otherwise you get strange and spurious errors that will lead you astray) + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG }; // Names of registers @@ -186,13 +208,13 @@ void InitTokenizer(void) dotxtab['W'] = DOTW; dotxtab['l'] = DOTL; // .l .L dotxtab['L'] = DOTL; - dotxtab['i'] = DOTI; // .i .I (???) + dotxtab['i'] = DOTI; // .i .I (WTF is this???) dotxtab['I'] = DOTI; - dotxtab['D'] = DOTD; // .d .D (quad word) + dotxtab['D'] = DOTD; // .d .D (double) dotxtab['d'] = DOTD; dotxtab['S'] = DOTS; // .s .S dotxtab['s'] = DOTS; - dotxtab['Q'] = DOTQ; // .q .Q + dotxtab['Q'] = DOTQ; // .q .Q (quad word) dotxtab['q'] = DOTQ; dotxtab['X'] = DOTX; // .x .x dotxtab['x'] = DOTX; @@ -263,6 +285,7 @@ INOBJ * a_inobj(int typ) inobj->inobj.ifile = ifile; break; + case SRC_IMACRO: // Alloc and init an IMACRO if (f_imacro == NULL) imacro = malloc(sizeof(IMACRO)); @@ -274,9 +297,10 @@ INOBJ * a_inobj(int typ) inobj->inobj.imacro = imacro; break; + case SRC_IREPT: // Alloc and init an IREPT inobj->inobj.irept = malloc(sizeof(IREPT)); - DEBUG printf("alloc IREPT\n"); + DEBUG { printf("alloc IREPT\n"); } break; } @@ -368,7 +392,7 @@ int ExpandMacro(char * src, char * dest, int destsiz) *dst++ = *s++; continue; case '?': // \? set `questmark' flag - ++s; + s++; questmark = 1; break; case '#': // \#, number of arguments @@ -434,7 +458,7 @@ copy_d: *d++ = *s++; if (*s != '}') - return error("missing '}'"); + return error("missing closing brace ('}')"); else s++; } @@ -443,10 +467,10 @@ copy_d: // Lookup the argument and copy its (string) value into the // destination string - DEBUG printf("argument='%s'\n", mname); + DEBUG { printf("argument='%s'\n", mname); } if ((arg = lookup(mname, MACARG, macnum)) == NULL) - return errors("undefined argument: '%s'", mname); + return error("undefined argument: '%s'", mname); else { // Convert a string of tokens (terminated with EOL) back into @@ -454,24 +478,18 @@ copy_d: // macro invocation) then it is ignored. i = (int)arg->svalue; arg_num: - DEBUG printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase); + DEBUG { printf("~argnumber=%d\n", i); } tk = NULL; if (i < imacro->im_nargs) { -#if 0 -// tk = argp[i]; -// tk = argPtrs[i]; - tk = argPtrs[imacro->argBase + i]; -#else tk = imacro->argument[i].token; symbolString = imacro->argument[i].string; //DEBUG //{ // printf("ExM: Preparing to parse argument #%u...\n", i); -// dumptok(tk); +// DumpTokens(tk); //} -#endif } // \?arg yields: @@ -519,7 +537,7 @@ arg_num: #else // This fix should be done for strings too d = symbolString[*tk++]; -DEBUG printf("ExM: SYMBOL=\"%s\"", d); +DEBUG { printf("ExM: SYMBOL=\"%s\"", d); } #endif break; case STRING: @@ -552,7 +570,8 @@ DEBUG printf("ExM: SYMBOL=\"%s\"", d); // to choke on legitimate code... Need to investigate this further // before changing anything else here! case CONST: - sprintf(numbuf, "$%lx", (long unsigned int)*tk++); + sprintf(numbuf, "$%lx", (uint64_t)*tk++); + tk++; d = numbuf; break; case DEQUALS: @@ -649,7 +668,7 @@ skipcomments: overflow: *dst = EOS; - DEBUG printf("*** OVERFLOW LINE ***\n%s\n", dest); + DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); } return fatal("line too long as a result of macro expansion"); } @@ -661,7 +680,7 @@ char * GetNextMacroLine(void) { IMACRO * imacro = cur_inobj->inobj.imacro; // LONG * strp = imacro->im_nextln; - struct LineList * strp = imacro->im_nextln; + LLIST * strp = imacro->im_nextln; if (strp == NULL) // End-of-macro return NULL; @@ -680,26 +699,29 @@ char * GetNextMacroLine(void) char * GetNextRepeatLine(void) { IREPT * irept = cur_inobj->inobj.irept; - LONG * strp = irept->ir_nextln; // initial null +// LONG * strp = irept->ir_nextln; // initial null // Do repeat at end of .rept block's string list - if (strp == NULL) +// if (strp == NULL) + if (irept->ir_nextln == NULL) { - DEBUG printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); + DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); } irept->ir_nextln = irept->ir_firstln; // copy first line if (irept->ir_count-- == 0) { - DEBUG printf("end-repeat-block\n"); + DEBUG { printf("end-repeat-block\n"); } return NULL; } - strp = irept->ir_nextln; +// strp = irept->ir_nextln; } - strcpy(irbuf, (char *)(irept->ir_nextln + 1)); - DEBUG printf("repeat line='%s'\n", irbuf); - irept->ir_nextln = (LONG *)*strp; +// strcpy(irbuf, (char *)(irept->ir_nextln + 1)); + strcpy(irbuf, irept->ir_nextln->line); + DEBUG { printf("repeat line='%s'\n", irbuf); } +// irept->ir_nextln = (LONG *)*strp; + irept->ir_nextln = irept->ir_nextln->next; return irbuf; } @@ -711,23 +733,22 @@ char * GetNextRepeatLine(void) int include(int handle, char * fname) { // Debug mode - if (debug) - printf("[include: %s, cfileno=%u]\n", fname, cfileno); + DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); } // Alloc and initialize include-descriptors INOBJ * inobj = a_inobj(SRC_IFILE); IFILE * ifile = inobj->inobj.ifile; - ifile->ifhandle = handle; // Setup file handle - ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices - ifile->ifoldlineno = curlineno; // Save old line number - ifile->ifoldfname = curfname; // Save old filename - ifile->ifno = cfileno; // Save old file number + ifile->ifhandle = handle; // Setup file handle + ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices + ifile->ifoldlineno = curlineno; // Save old line number + ifile->ifoldfname = curfname; // Save old filename + ifile->ifno = cfileno; // Save old file number // NB: This *must* be preincrement, we're adding one to the filecount here! - cfileno = ++filecount; // Compute NEW file number - curfname = strdup(fname); // Set current filename (alloc storage) - curlineno = 0; // Start on line zero + cfileno = ++filecount; // Compute NEW file number + curfname = strdup(fname); // Set current filename (alloc storage) + curlineno = 0; // Start on line zero // Add another file to the file-record FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC)); @@ -735,12 +756,12 @@ int include(int handle, char * fname) fr->frec_name = curfname; if (last_fr == NULL) - filerec = fr; // Add first filerec + filerec = fr; // Add first filerec else - last_fr->frec_next = fr; // Append to list of filerecs + last_fr->frec_next = fr; // Append to list of filerecs last_fr = fr; - DEBUG printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); + DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); } return OK; } @@ -751,73 +772,78 @@ int include(int handle, char * fname) // int fpop(void) { - IFILE * ifile; - IMACRO * imacro; - LONG * p, * p1; INOBJ * inobj = cur_inobj; - if (inobj != NULL) + if (inobj == NULL) + return 0; + + // Pop IFENT levels until we reach the conditional assembly context we + // were at when the input object was entered. + int numUnmatched = 0; + + while (ifent != inobj->in_ifent) { - // Pop IFENT levels until we reach the conditional assembly context we - // were at when the input object was entered. - int numUnmatched = 0; + if (d_endif() != 0) // Something bad happened during endif parsing? + return -1; // If yes, bail instead of getting stuck in a loop - while (ifent != inobj->in_ifent) - { - if (d_endif() != 0) // Something bad happened during endif parsing? - return -1; // If yes, bail instead of getting stuck in a loop + numUnmatched++; + } - numUnmatched++; - } + // Give a warning to the user that we had to wipe their bum for them + if (numUnmatched > 0) + warn("missing %d .endif(s)", numUnmatched); - // Give a warning to the user that we had to wipe their bum for them - if (numUnmatched > 0) - warni("missing %d .endif(s)", numUnmatched); + tok = inobj->in_otok; // Restore tok and otok + etok = inobj->in_etok; - tok = inobj->in_otok; // Restore tok and otok - etok = inobj->in_etok; + switch (inobj->in_type) + { + case SRC_IFILE: // Pop and release an IFILE + { + DEBUG { printf("[Leaving: %s]\n", curfname); } + + IFILE * ifile = inobj->inobj.ifile; + ifile->if_link = f_ifile; + f_ifile = ifile; + close(ifile->ifhandle); // Close source file +DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); } + curfname = ifile->ifoldfname; // Set current filename +DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); } +DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); } + curlineno = ifile->ifoldlineno; // Set current line# + DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); } + cfileno = ifile->ifno; // Restore current file number +DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); } + break; + } - switch (inobj->in_type) - { - case SRC_IFILE: // Pop and release an IFILE - if (debug) - printf("[Leaving: %s]\n", curfname); - - ifile = inobj->inobj.ifile; - ifile->if_link = f_ifile; - f_ifile = ifile; - close(ifile->ifhandle); // Close source file -if (debug) printf("[fpop (pre): curfname=%s]\n", curfname); - curfname = ifile->ifoldfname; // Set current filename -if (debug) printf("[fpop (post): curfname=%s]\n", curfname); -if (debug) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); - curlineno = ifile->ifoldlineno; // Set current line# - DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); - cfileno = ifile->ifno; // Restore current file number -if (debug) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); - break; - case SRC_IMACRO: // Pop and release an IMACRO - imacro = inobj->inobj.imacro; - imacro->im_link = f_imacro; - f_imacro = imacro; - break; - case SRC_IREPT: // Pop and release an IREPT - DEBUG printf("dealloc IREPT\n"); - p = inobj->inobj.irept->ir_firstln; + case SRC_IMACRO: // Pop and release an IMACRO + { + IMACRO * imacro = inobj->inobj.imacro; + imacro->im_link = f_imacro; + f_imacro = imacro; + break; + } - while (p != NULL) - { - p1 = (LONG *)*p; - p = p1; - } + case SRC_IREPT: // Pop and release an IREPT + { + DEBUG { printf("dealloc IREPT\n"); } + LLIST * p = inobj->inobj.irept->ir_firstln; - break; + // Deallocate repeat lines + while (p != NULL) + { + free(p->line); + p = p->next; } - cur_inobj = inobj->in_link; - inobj->in_link = f_inobj; - f_inobj = inobj; + break; } + } + + cur_inobj = inobj->in_link; + inobj->in_link = f_inobj; + f_inobj = inobj; return 0; } @@ -920,21 +946,23 @@ char * GetNextLine(void) // int TokenizeLine(void) { - char * ln = NULL; // Ptr to current position in line - char * p; // Random character ptr - TOKEN * tk; // Token-deposit ptr + uint8_t * ln = NULL; // Ptr to current position in line + uint8_t * p; // Random character ptr + PTR tk; // Token-deposit ptr int state = 0; // State for keyword detector int j = 0; // Var for keyword detector - char c; // Random char - VALUE v; // Random value - char * nullspot = NULL; // Spot to clobber for SYMBOL termination + uint8_t c; // Random char + uint64_t v; // Random value + uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d) + double f; // Random float + uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot - char c1; + uint8_t c1; int stringNum = 0; // Pointer to string locations in tokenized line retry: - if (cur_inobj == NULL) // Return EOF if input stack is empty + if (cur_inobj == NULL) // Return EOF if input stack is empty return TKEOF; // Get another line of input from the current input source: a file, a @@ -949,12 +977,12 @@ retry: case SRC_IFILE: if ((ln = GetNextLine()) == NULL) { -if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); +DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); } if (fpop() == 0) // Pop input level goto retry; // Try for more lines else { - ifent->if_prev = (IFENT *) - 1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs + ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs return TKEOF; } } @@ -983,27 +1011,29 @@ if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); } break; + // Macro-block: // o Handle end-of-macro; // o tag the listing-line with an at (@) sign. case SRC_IMACRO: if ((ln = GetNextMacroLine()) == NULL) { - if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc) - goto retry; // Try for more lines... + if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc) + goto retry; // Try for more lines... else - return TKEOF; // Oops, we got a non zero return code, signal EOF + return TKEOF; // Oops, we got a non zero return code, signal EOF } lntag = '@'; break; + // Repeat-block: // o Handle end-of-repeat-block; // o tag the listing-line with a pound (#) sign. case SRC_IREPT: if ((ln = GetNextRepeatLine()) == NULL) { -if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); + DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); } fpop(); goto retry; } @@ -1012,15 +1042,15 @@ if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); break; } - // Save text of the line. We only do this during listings and within + // Save text of the line. We only do this during listings and within // macro-type blocks, since it is expensive to unconditionally copy every // line. if (lnsave) strcpy(lnbuf, ln); - // General house-keeping - tok = tokeol; // Set "tok" to EOL in case of error - tk = etok; // Reset token ptr + // General housekeeping + tok = tokeol; // Set "tok" to EOL in case of error + tk.u32 = etok; // Reset token ptr stuffnull = 0; // Don't stuff nulls totlines++; // Bump total #lines assembled @@ -1029,16 +1059,26 @@ if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/'))) goto goteol; + // And here we have a very ugly hack for signalling a single line 'turn off + // optimization'. There's really no nice way to do this, so hack it is! + optimizeOff = 0; // Default is to take optimizations as they come + + if (*ln == '!') + { + optimizeOff = 1; // Signal that we don't want to optimize this line + ln++; // & skip over the darned thing + } + // Main tokenization loop; - // o skip whitespace; - // o handle end-of-line; - // o handle symbols; - // o handle single-character tokens (operators, etc.); - // o handle multiple-character tokens (constants, strings, etc.). + // o skip whitespace; + // o handle end-of-line; + // o handle symbols; + // o handle single-character tokens (operators, etc.); + // o handle multiple-character tokens (constants, strings, etc.). for(; *ln!=EOS;) { // Skip whitespace, handle EOL - while ((int)chrtab[*ln] & WHITE) + while (chrtab[*ln] & WHITE) ln++; // Handle EOL, comment with ';' @@ -1073,7 +1113,7 @@ if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); // token stream: ln++; stuffnull = 0; - *tk++ = (TOKEN)dotxtab[*ln++]; + *tk.u32++ = (TOKEN)dotxtab[*ln++]; continue; } } @@ -1091,17 +1131,18 @@ if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *ln++ = EOS; // Terminate symbol stuffnull = 0; // And never try it again - // Character following the `.' must have a DOT attribute, and + // Character following the '.' must have a DOT attribute, and // the chararacter after THAT one must not have a start-symbol // attribute (to prevent symbols that look like, for example, // "zingo.barf", which might be a good idea anyway....) if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0)) return error("[bwsl] must follow '.' in symbol"); - v = (VALUE)dotxtab[*ln++]; + v = (uint32_t)dotxtab[*ln++]; + cursize = (uint32_t)v; if (chrtab[*ln] & CTSYM) - return error("misuse of '.', not allowed in symbols"); + return error("misuse of '.'; not allowed in symbols"); } // If the symbol is small, check to see if it's really the name of @@ -1155,7 +1196,7 @@ if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); // If not tokenized keyword OR token was not found if ((j < 0) || (state < 0)) { - *tk++ = SYMBOL; + *tk.u32++ = SYMBOL; //#warning //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit //system, this will cause all kinds of mischief. @@ -1163,18 +1204,18 @@ if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = (TOKEN)nullspot; #else string[stringNum] = nullspot; - *tk++ = stringNum; + *tk.u32++ = stringNum; stringNum++; #endif } else { - *tk++ = (TOKEN)j; + *tk.u32++ = (TOKEN)j; stuffnull = 0; } if (v) // Record attribute token (if any) - *tk++ = (TOKEN)v; + *tk.u32++ = (TOKEN)v; if (stuffnull) // Arrange for string termination on next pass nullspot = ln; @@ -1185,40 +1226,39 @@ if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); // Handle identity tokens if (c & SELF) { - *tk++ = *ln++; + *tk.u32++ = *ln++; continue; } // Handle multiple-character tokens if (c & MULTX) { - switch (*ln++) { case '!': // ! or != if (*ln == '=') { - *tk++ = NE; - ++ln; + *tk.u32++ = NE; + ln++; } else - *tk++ = '!'; + *tk.u32++ = '!'; continue; case '\'': // 'string' if (m6502) { // Hardcoded for now, maybe this will change in the future - *tk++ = STRINGA8; + *tk.u32++ = STRINGA8; goto dostring; } // Fall through case '\"': // "string" - *tk++ = STRING; + *tk.u32++ = STRING; dostring: c1 = ln[-1]; string[stringNum] = ln; - *tk++ = stringNum; + *tk.u32++ = stringNum; stringNum++; for(p=ln; *ln!=EOS && *ln!=c1;) @@ -1258,6 +1298,11 @@ dostring: case '\\': c = '\\'; break; + case '!': + // If we're evaluating a macro + // this is valid and expands to + // "dot-size" + break; default: warn("bad backslash code in string"); ln--; @@ -1298,13 +1343,14 @@ dostring: } else if ((*(ln + 1) & 0xDF) == 'L') { + v &= 0xFFFFFFFF; ln += 2; } } } - *tk++ = CONST; - *tk++ = v; + *tk.u32++ = CONST; + *tk.u64++ = v; if (obj_format == ALCYON) { @@ -1312,79 +1358,79 @@ dostring: { if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) { - *tk++ = DOTW; + *tk.u32++ = DOTW; ln += 2; } else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { - *tk++ = DOTL; + *tk.u32++ = DOTL; ln += 2; } } } } else - *tk++ = '$'; + *tk.u32++ = '$'; continue; case '<': // < or << or <> or <= switch (*ln) { case '<': - *tk++ = SHL; - ++ln; + *tk.u32++ = SHL; + ln++; continue; case '>': - *tk++ = NE; - ++ln; + *tk.u32++ = NE; + ln++; continue; case '=': - *tk++ = LE; - ++ln; + *tk.u32++ = LE; + ln++; continue; default: - *tk++ = '<'; + *tk.u32++ = '<'; continue; } case ':': // : or :: if (*ln == ':') { - *tk++ = DCOLON; - ++ln; + *tk.u32++ = DCOLON; + ln++; } else - *tk++ = ':'; + *tk.u32++ = ':'; continue; case '=': // = or == if (*ln == '=') { - *tk++ = DEQUALS; - ++ln; + *tk.u32++ = DEQUALS; + ln++; } else - *tk++ = '='; + *tk.u32++ = '='; continue; case '>': // > or >> or >= switch (*ln) { case '>': - *tk++ = SHR; + *tk.u32++ = SHR; ln++; continue; case '=': - *tk++ = GE; + *tk.u32++ = GE; ln++; continue; default: - *tk++ = '>'; + *tk.u32++ = '>'; continue; } case '%': // % or binary constant if (*ln < '0' || *ln > '1') { - *tk++ = '%'; + *tk.u32++ = '%'; continue; } @@ -1409,17 +1455,18 @@ dostring: if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { + v &= 0xFFFFFFFF; ln += 2; } } - *tk++ = CONST; - *tk++ = v; + *tk.u32++ = CONST; + *tk.u64++ = v; continue; case '@': // @ or octal constant if (*ln < '0' || *ln > '7') { - *tk++ = '@'; + *tk.u32++ = '@'; continue; } @@ -1430,31 +1477,32 @@ dostring: if (*ln == '.') { - if ((*(ln+1) == 'b') || (*(ln+1) == 'B')) + if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B')) { v &= 0x000000FF; ln += 2; } - if ((*(ln+1) == 'w') || (*(ln+1) == 'W')) + if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) { v &= 0x0000FFFF; ln += 2; } - if ((*(ln+1) == 'l') || (*(ln+1) == 'L')) + if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { + v &= 0xFFFFFFFF; ln += 2; } } - *tk++ = CONST; - *tk++ = v; + *tk.u32++ = CONST; + *tk.u64++ = v; continue; case '^': // ^ or ^^ if (*ln != '^') { - *tk++ = '^'; + *tk.u32++ = '^'; continue; } @@ -1500,7 +1548,7 @@ dostring: continue; } - *tk++ = (TOKEN)j; + *tk.u32++ = (TOKEN)j; continue; default: interror(2); // Bad MULTX entry in chrtab @@ -1511,6 +1559,7 @@ dostring: // Handle decimal constant if (c & DIGIT) { + uint8_t * numStart = ln; v = 0; while ((int)chrtab[*ln] & DIGIT) @@ -1523,37 +1572,80 @@ dostring: { v &= 0x000000FF; ln += 2; + *tk.u32++ = CONST; + *tk.u64++ = v; + *tk.u32++ = DOTB; } else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) { v &= 0x0000FFFF; ln += 2; + *tk.u32++ = CONST; + *tk.u64++ = v; + *tk.u32++ = DOTW; } else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { + v &= 0xFFFFFFFF; ln += 2; + *tk.u32++ = CONST; + *tk.u64++ = v; + *tk.u32++ = DOTL; + } + else if ((int)chrtab[*(ln + 1)] & DIGIT) + { + // Hey, more digits after the dot, so we assume it's a + // floating point number of some kind +#if 0 + double fract = 10; + ln++; + f = (double)v; + + while ((int)chrtab[*ln] & DIGIT) + { + f = f + (double)(*ln++ - '0') / fract; + fract *= 10; + } +#else + // Here we parse the whole floating point number + char * numEnd; + errno = 0; + double f = strtod(numStart, &numEnd); + ln = (uint8_t *)numEnd; + + if (errno != 0) + return error("floating point parse error"); +#endif + + *tk.u32++ = FCONST; +// Shamus: Well, this is all kinds of icky--not the least of which is that unlike uintNN_t types, we have no guarantees of any kind when it comes to the size of floating point numbers in C (as far as I know of). If there is, we need to use those kinds here, or else figure out at runtime what sizes we're dealing with and act accordingly. To be fair, this is OK as long as the double type is less than 64 bits wide, but again, there's no guarantee that it isn't. :-/ + *tk.u64++ = f; + continue; } } + else + { + *tk.u32++ = CONST; + *tk.u64++ = v; + } - *tk++ = CONST; - *tk++ = v; //printf("CONST: %i\n", v); continue; } // Handle illegal character - return error("illegal character"); + return error("illegal character $%02X found", *ln); } // Terminate line of tokens and return "success." goteol: - tok = etok; // Set tok to beginning of line + tok = etok; // Set tok to beginning of line if (stuffnull) // Terminate last SYMBOL *nullspot = EOS; - *tk++ = EOL; + *tk.u32++ = EOL; return OK; } @@ -1587,7 +1679,7 @@ int d_goto(WORD unused) return error("goto not in macro"); IMACRO * imacro = cur_inobj->inobj.imacro; - struct LineList * defln = imacro->im_macro->lineList; + LLIST * defln = imacro->im_macro->lineList; // Attempt to find the label, starting with the first line. for(; defln!=NULL; defln=defln->next) @@ -1624,22 +1716,105 @@ int d_goto(WORD unused) } +void DumpToken(TOKEN t) +{ + if (t == COLON) + printf("[COLON]"); + else if (t == CONST) + printf("[CONST]"); + else if (t == ACONST) + printf("[ACONST]"); + else if (t == STRING) + printf("[STRING]"); + else if (t == SYMBOL) + printf("[SYMBOL]"); + else if (t == EOS) + printf("[EOS]"); + else if (t == TKEOF) + printf("[TKEOF]"); + else if (t == DEQUALS) + printf("[DEQUALS]"); + else if (t == SET) + printf("[SET]"); + else if (t == REG) + printf("[REG]"); + else if (t == DCOLON) + printf("[DCOLON]"); + else if (t == GE) + printf("[GE]"); + else if (t == LE) + printf("[LE]"); + else if (t == NE) + printf("[NE]"); + else if (t == SHR) + printf("[SHR]"); + else if (t == SHL) + printf("[SHL]"); + else if (t == UNMINUS) + printf("[UNMINUS]"); + else if (t == DOTB) + printf("[DOTB]"); + else if (t == DOTW) + printf("[DOTW]"); + else if (t == DOTL) + printf("[DOTL]"); + else if (t == DOTQ) + printf("[DOTQ]"); + else if (t == DOTS) + printf("[DOTS]"); + else if (t == DOTD) + printf("[DOTD]"); + else if (t == DOTI) + printf("[DOTI]"); + else if (t == ENDEXPR) + printf("[ENDEXPR]"); + else if (t == CR_ABSCOUNT) + printf("[CR_ABSCOUNT]"); + else if (t == CR_DEFINED) + printf("[CR_DEFINED]"); + else if (t == CR_REFERENCED) + printf("[CR_REFERENCED]"); + else if (t == CR_STREQ) + printf("[CR_STREQ]"); + else if (t == CR_MACDEF) + printf("[CR_MACDEF]"); + else if (t == CR_TIME) + printf("[CR_TIME]"); + else if (t == CR_DATE) + printf("[CR_DATE]"); + else if (t >= 0x20 && t <= 0x2F) + printf("[%c]", (char)t); + else if (t >= 0x3A && t <= 0x3F) + printf("[%c]", (char)t); + else if (t >= 0x80 && t <= 0x87) + printf("[D%u]", ((uint32_t)t) - 0x80); + else if (t >= 0x88 && t <= 0x8F) + printf("[A%u]", ((uint32_t)t) - 0x88); + else + printf("[%X:%c]", (uint32_t)t, (char)t); +} + + void DumpTokenBuffer(void) { - TOKEN * t; printf("Tokens [%X]: ", sloc); - for(t=tokbuf; *t!=EOL; t++) + for(TOKEN * t=tokbuf; *t!=EOL; t++) { if (*t == COLON) printf("[COLON]"); else if (*t == CONST) { - t++; - printf("[CONST: $%X]", (uint32_t)*t); + PTR tp; + tp.u32 = t + 1; + printf("[CONST: $%lX]", *tp.u64); + t += 2; } else if (*t == ACONST) - printf("[ACONST]"); + { + printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]); + t += 2; + } else if (*t == STRING) { t++; @@ -1680,6 +1855,12 @@ void DumpTokenBuffer(void) printf("[DOTW]"); else if (*t == DOTL) printf("[DOTL]"); + else if (*t == DOTQ) + printf("[DOTQ]"); + else if (*t == DOTS) + printf("[DOTS]"); + else if (*t == DOTD) + printf("[DOTD]"); else if (*t == DOTI) printf("[DOTI]"); else if (*t == ENDEXPR)