X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?p=rmac;a=blobdiff_plain;f=token.c;h=8726eb4b9ee256ed536f9eff6ae061bb48d27dc0;hp=cd515a80a5ea8cde304470096bc0663194b03020;hb=60f204cb9e3905100da0d89f14bb40db764acd9e;hpb=49cce96fba11282e4244187f15be418d5ae5bb8d diff --git a/token.c b/token.c index cd515a8..8726eb4 100644 --- a/token.c +++ b/token.c @@ -1,29 +1,30 @@ // // RMAC - Reboot's Macro Assembler for the Atari Jaguar Console System // TOKEN.C - Token Handling -// Copyright (C) 199x Landon Dyer, 2011-2012 Reboot and Friends +// Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986 -// Source Utilised with the Kind Permission of Landon Dyer +// Source utilised with the kind permission of Landon Dyer // #include "token.h" -#include "symbol.h" -#include "procln.h" -#include "macro.h" #include "error.h" +#include "macro.h" +#include "procln.h" +#include "symbol.h" #define DECL_KW // Declare keyword arrays -#define DEF_KW // Declare keyword values +#define DEF_KW // Declare keyword values #include "kwtab.h" // Incl generated keyword tables & defs + int lnsave; // 1; strcpy() text of current line int curlineno; // Current line number int totlines; // Total # of lines int mjump_align = 0; // mjump alignment flag char lntag; // Line tag char * curfname; // Current filename -char tolowertab[128]; // Uppercase ==> lowercase -char hextab[128]; // Table of hex values +char tolowertab[128]; // Uppercase ==> lowercase +int8_t hextab[128]; // Table of hex values char dotxtab[128]; // Table for ".b", ".s", etc. char irbuf[LNSIZ]; // Text for .rept block line char lnbuf[LNSIZ]; // Text of current line @@ -32,7 +33,7 @@ WORD cfileno; // Current file number TOKEN * tok; // Ptr to current token TOKEN * etok; // Ptr past last token in tokbuf[] TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token -char * string[TOKBUFSIZE]; // Token buffer string pointer storage +char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage // File record, used to maintain a list of every include file ever visited #define FILEREC struct _filerec @@ -53,28 +54,28 @@ static IMACRO * f_imacro; // Ptr list of free IMACROs static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files) char chrtab[] = { - ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX - ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL - ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT - WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI + ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX + ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL + ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT + WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI - ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3 - ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB - ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC - ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US + ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3 + ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB + ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC + ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US WHITE, MULTX, MULTX, SELF, // SP ! " # MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & ' SELF, SELF, SELF, SELF, // ( ) * + SELF, SELF, STSYM, SELF, // , - . / - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1 - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3 - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5 - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7 - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9 - MULTX, MULTX, // : ; - MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ? + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1 + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3 + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5 + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7 + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9 + MULTX, MULTX, // : ; + MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ? MULTX, STSYM+CTSYM+HDIGIT, // @ A (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C @@ -95,10 +96,10 @@ char chrtab[] = { STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // p q r s - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // t u v w - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z { - SELF, SELF, SELF, ILLEG // | } ~ DEL + STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // p q r s + STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // t u v w + STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z { + SELF, SELF, SELF, ILLEG // | } ~ DEL }; // Names of registers @@ -109,7 +110,7 @@ static char * regname[] = { }; static char * riscregname[] = { - "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" @@ -117,18 +118,84 @@ static char * riscregname[] = { // -// Make `fnum' the Current `curfname' +// Initialize tokenizer // -void setfnum(WORD fnum) +void InitTokenizer(void) { - FILEREC * fr; + int i; // Iterator + char * htab = "0123456789abcdefABCDEF"; // Hex character table - for(fr=filerec; fr!=NULL && fnum--; fr=fr->frec_next); + lnsave = 0; // Don't save lines + curfname = ""; // No file, empty filename + filecount = (WORD)-1; + cfileno = (WORD)-1; // cfileno gets bumped to 0 + curlineno = 0; + totlines = 0; + etok = tokbuf; + f_inobj = NULL; + f_ifile = NULL; + f_imacro = NULL; + cur_inobj = NULL; + filerec = NULL; + last_fr = NULL; + lntag = SPACE; - if (fr == NULL) + // Initialize hex, "dot" and tolower tables + for(i=0; i<128; i++) + { + hextab[i] = -1; + dotxtab[i] = 0; + tolowertab[i] = (char)i; + } + + for(i=0; htab[i]!=EOS; i++) + hextab[htab[i]] = (char)((i < 16) ? i : i - 6); + + for(i='A'; i<='Z'; i++) + tolowertab[i] |= 0x20; + + // These characters are legal immediately after a period + dotxtab['b'] = DOTB; // .b .B .s .S + dotxtab['B'] = DOTB; + dotxtab['s'] = DOTB; + dotxtab['S'] = DOTB; + dotxtab['w'] = DOTW; // .w .W + dotxtab['W'] = DOTW; + dotxtab['l'] = DOTL; // .l .L + dotxtab['L'] = DOTL; + dotxtab['i'] = DOTI; // .i .I (???) + dotxtab['I'] = DOTI; +} + + +void SetFilenameForErrorReporting(void) +{ + WORD fnum = cfileno; + + // Check for absolute top filename (this should never happen) + if (fnum == -1) + { curfname = "(*top*)"; - else - curfname = fr->frec_name; + return; + } + + FILEREC * fr = filerec; + + // Advance to the correct record... + while (fr != NULL && fnum != 0) + { + fr = fr->frec_next; + fnum--; + } + + // Check for file # record not found (this should never happen either) + if (fr == NULL) + { + curfname = "(*NOT FOUND*)"; + return; + } + + curfname = fr->frec_name; } @@ -143,8 +210,7 @@ INOBJ * a_inobj(int typ) // Allocate and initialize INOBJ first if (f_inobj == NULL) -// inobj = (INOBJ *)amem((LONG)sizeof(INOBJ)); - inobj = (INOBJ *)malloc(sizeof(INOBJ)); + inobj = malloc(sizeof(INOBJ)); else { inobj = f_inobj; @@ -155,8 +221,7 @@ INOBJ * a_inobj(int typ) { case SRC_IFILE: // Alloc and init an IFILE if (f_ifile == NULL) -// ifile = (IFILE *)amem((LONG)sizeof(IFILE)); - ifile = (IFILE *)malloc(sizeof(IFILE)); + ifile = malloc(sizeof(IFILE)); else { ifile = f_ifile; @@ -165,10 +230,9 @@ INOBJ * a_inobj(int typ) inobj->inobj.ifile = ifile; break; - case SRC_IMACRO: // Alloc and init an IMACRO + case SRC_IMACRO: // Alloc and init an IMACRO if (f_imacro == NULL) -// imacro = (IMACRO *)amem((LONG)sizeof(IMACRO)); - imacro = (IMACRO *)malloc(sizeof(IMACRO)); + imacro = malloc(sizeof(IMACRO)); else { imacro = f_imacro; @@ -178,8 +242,7 @@ INOBJ * a_inobj(int typ) inobj->inobj.imacro = imacro; break; case SRC_IREPT: // Alloc and init an IREPT -// inobj->inobj.irept = (IREPT *)amem((LONG)sizeof(IREPT)); - inobj->inobj.irept = (IREPT *)malloc(sizeof(IREPT)); + inobj->inobj.irept = malloc(sizeof(IREPT)); DEBUG printf("alloc IREPT\n"); break; } @@ -210,30 +273,27 @@ INOBJ * a_inobj(int typ) // (the colon must be in the first column). These labels are stripped before // macro expansion takes place. // -int mexpand(char * src, char * dest, int destsiz) +int ExpandMacro(char * src, char * dest, int destsiz) { - char * s; - char * d = NULL; - char * dst; // Next dest slot - char * edst; // End+1 of dest buffer int i; int questmark; // \? for testing argument existence - TOKEN * tk; char mname[128]; // Assume max size of a formal arg name - int macnum; - SYM * arg; - IMACRO * imacro; char numbuf[20]; // Buffer for text of CONSTs + TOKEN * tk; + SYM * arg; + char ** symbolString; + + DEBUG { printf("ExM: src=\"%s\"\n", src); } - imacro = cur_inobj->inobj.imacro; - macnum = (int)(imacro->im_macro->sattr); + IMACRO * imacro = cur_inobj->inobj.imacro; + int macnum = (int)(imacro->im_macro->sattr); - destsiz--; - dst = dest; - edst = dest + destsiz; + char * dst = dest; // Next dest slot + char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer // Check for (and skip over) any "label" on the line - s = src; + char * s = src; + char * d = NULL; if (*s == ':') { @@ -253,6 +313,11 @@ int mexpand(char * src, char * dest, int destsiz) if (dst >= edst) goto overflow; + // Skip comments in case a loose @ or \ is in there + // In that case the tokeniser was trying to expand it. + if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/'))) + goto skipcomments; + *dst++ = *s++; } // Do macro expansion @@ -269,11 +334,11 @@ int mexpand(char * src, char * dest, int destsiz) *dst++ = *s++; continue; - case '?': // \? set `questmark' flag + case '?': // \? set `questmark' flag ++s; questmark = 1; break; - case '#': // \#, number of arguments + case '#': // \#, number of arguments sprintf(numbuf, "%d", (int)imacro->im_nargs); goto copystr; case '!': // \! size suffix supplied on invocation @@ -286,12 +351,12 @@ int mexpand(char * src, char * dest, int destsiz) } goto copy_d; - case '~': // ==> unique label string Mnnnn... - sprintf(numbuf, "M%ud", curuniq); + case '~': // ==> unique label string Mnnnn... + sprintf(numbuf, "M%u", curuniq); copystr: d = numbuf; copy_d: - ++s; + s++; while (*d != EOS) { @@ -320,7 +385,7 @@ copy_d: // Get argument name: \name, \{name} d = mname; - // \foo + // \label if (*s != '{') { do @@ -329,7 +394,7 @@ copy_d: } while (chrtab[*s] & CTSYM); } - // \\{foo} + // \\{label} else { for(++s; *s != EOS && *s != '}';) @@ -338,14 +403,14 @@ copy_d: if (*s != '}') return error("missing '}'"); else - ++s; + s++; } *d = EOS; // Lookup the argument and copy its (string) value into the // destination string - DEBUG printf("mname='%s'\n", mname); + DEBUG printf("argument='%s'\n", mname); if ((arg = lookup(mname, MACARG, macnum)) == NULL) return errors("undefined argument: '%s'", mname); @@ -356,12 +421,25 @@ copy_d: // macro invocation) then it is ignored. i = (int)arg->svalue; arg_num: - DEBUG printf("~argnumber=%d\n", i); - + DEBUG printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase); tk = NULL; if (i < imacro->im_nargs) - tk = argp[i]; + { +#if 0 +// tk = argp[i]; +// tk = argPtrs[i]; + tk = argPtrs[imacro->argBase + i]; +#else + tk = imacro->argument[i].token; + symbolString = imacro->argument[i].string; +//DEBUG +//{ +// printf("ExM: Preparing to parse argument #%u...\n", i); +// dumptok(tk); +//} +#endif + } // \?arg yields: // 0 if the argument is empty or non-existant, @@ -378,7 +456,8 @@ arg_num: continue; } - if (tk != NULL) // arg# is in range, so expand it + // Argument # is in range, so expand it + if (tk != NULL) { while (*tk != EOL) { @@ -401,11 +480,22 @@ arg_num: switch ((int)*tk++) { case SYMBOL: - d = (char *)*tk++; +#if 0 +// d = (char *)*tk++; + d = string[*tk++]; +#else + // This fix should be done for strings too + d = symbolString[*tk++]; +DEBUG printf("ExM: SYMBOL=\"%s\"", d); +#endif break; case STRING: - d = (char *)*tk++; - +#if 0 +// d = (char *)*tk++; + d = string[*tk++]; +#else + d = symbolString[*tk++]; +#endif if (dst >= edst) goto overflow; @@ -468,6 +558,9 @@ arg_num: case DOTL: d = ".l"; break; + case CR_ABSCOUNT: + d = "^^abscount"; + break; case CR_DATE: d = "^^date"; break; @@ -515,61 +608,43 @@ strcopy: } } +skipcomments: + *dst = EOS; + DEBUG { printf("ExM: dst=\"%s\"\n", dest); } return OK; overflow: *dst = EOS; + DEBUG printf("*** OVERFLOW LINE ***\n%s\n", dest); return fatal("line too long as a result of macro expansion"); } // -// Get Next Line of Text from a Macro +// Get next line of text from a macro // -char * getmln(void) +char * GetNextMacroLine(void) { - unsigned source_addr; - IMACRO * imacro = cur_inobj->inobj.imacro; - LONG * strp = imacro->im_nextln; +// LONG * strp = imacro->im_nextln; + struct LineList * strp = imacro->im_nextln; if (strp == NULL) // End-of-macro return NULL; - imacro->im_nextln = (LONG *)*strp; - mexpand((char *)(strp + 1), imacro->im_lnbuf, LNSIZ); - - if (!strcmp(imacro->im_macro->sname, "mjump") && !mjump_align) - { - // if we need to adjust the alignment of the jump source address to - // meet the rules of gpu main execution we need to skip the first nop - // of the macro. This is simpler than trying to insert nop's mid macro. - source_addr = (orgactive) ? orgaddr : sloc; - source_addr += 8; - - if (source_addr % 4) - { - strp = imacro->im_nextln; - - if (strp == NULL) - return NULL; - - imacro->im_nextln = (LONG *)*strp; - mexpand((char *)(strp + 1), imacro->im_lnbuf, LNSIZ); - } - - mjump_align = 1; - } + imacro->im_nextln = strp->next; +// ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ); + ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ); return imacro->im_lnbuf; } // -// Get Next Line of Text from a Repeat Block +// Get next line of text from a repeat block // -char * getrln(void) +char * GetNextRepeatLine(void) { IREPT * irept = cur_inobj->inobj.irept; @@ -587,7 +662,7 @@ char * getrln(void) return NULL; } - strp = irept->ir_nextln; //strp + strp = irept->ir_nextln; } strcpy(irbuf, (char *)(irept->ir_nextln + 1)); @@ -599,135 +674,95 @@ char * getrln(void) // -// Include a Source File used at the Root, and for ".include" Files +// Include a source file used at the root, and for ".include" files // int include(int handle, char * fname) { - IFILE * ifile; - INOBJ * inobj; - FILEREC * fr; - - // Verbose mode - if (verb_flag) - printf("[Including: %s]\n", fname); + // Debug mode + if (debug) + printf("[include: %s, cfileno=%u]\n", fname, cfileno); // Alloc and initialize include-descriptors - inobj = a_inobj(SRC_IFILE); - ifile = inobj->inobj.ifile; + INOBJ * inobj = a_inobj(SRC_IFILE); + IFILE * ifile = inobj->inobj.ifile; ifile->ifhandle = handle; // Setup file handle ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices ifile->ifoldlineno = curlineno; // Save old line number ifile->ifoldfname = curfname; // Save old filename ifile->ifno = cfileno; // Save old file number - cfileno = filecount++; // Compute new file number + + // NB: This *must* be preincrement, we're adding one to the filecount here! + cfileno = ++filecount; // Compute NEW file number curfname = strdup(fname); // Set current filename (alloc storage) curlineno = 0; // Start on line zero // Add another file to the file-record -// fr = (FILEREC *)amem((LONG)sizeof(FILEREC)); - fr = (FILEREC *)malloc(sizeof(FILEREC)); + FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC)); fr->frec_next = NULL; fr->frec_name = curfname; if (last_fr == NULL) - filerec = fr; // Add first filerec + filerec = fr; // Add first filerec else - last_fr->frec_next = fr; // Append to list of filerecs + last_fr->frec_next = fr; // Append to list of filerecs last_fr = fr; + DEBUG printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); return OK; } // -// Initialize Tokenizer -// -void init_token(void) -{ - int i; // Iterator - char * htab = "0123456789abcdefABCDEF"; // Hex character table - - lnsave = 0; // Don't save lines - curfname = ""; // No file, empty filename - filecount = (WORD)-1; - cfileno = (WORD)-1; // cfileno gets bumped to 0 - curlineno = 0; - totlines = 0; - etok = tokbuf; - f_inobj = NULL; - f_ifile = NULL; - f_imacro = NULL; - cur_inobj = NULL; - filerec = NULL; - last_fr = NULL; - lntag = SPACE; - - // Initialize hex, "dot" and tolower tables - for(i=0; i<128; i++) - { - hextab[i] = -1; - dotxtab[i] = 0; - tolowertab[i] = (char)i; - } - - for(i=0; htab[i]!=EOS; i++) - hextab[htab[i]] = (char)((i < 16) ? i : i - 6); - - for(i='A'; i<='Z'; i++) - tolowertab[i] |= 0x20; - - // These characters are legal immediately after a period - dotxtab['b'] = DOTB; // .b .B .s .S - dotxtab['B'] = DOTB; - dotxtab['s'] = DOTB; - dotxtab['S'] = DOTB; - dotxtab['w'] = DOTW; // .w .W - dotxtab['W'] = DOTW; - dotxtab['l'] = DOTL; // .l .L - dotxtab['L'] = DOTL; - dotxtab['I'] = DOTI; // .l .L - dotxtab['I'] = DOTI; -} - - -// -// Pop the Current Input Level +// Pop the current input level // int fpop(void) { - INOBJ * inobj; IFILE * ifile; IMACRO * imacro; LONG * p, * p1; - - inobj = cur_inobj; + INOBJ * inobj = cur_inobj; if (inobj != NULL) { // Pop IFENT levels until we reach the conditional assembly context we // were at when the input object was entered. + int numUnmatched = 0; + while (ifent != inobj->in_ifent) - d_endif(); + { + if (d_endif() != 0) // Something bad happened during endif parsing? + return -1; // If yes, bail instead of getting stuck in a loop + + numUnmatched++; + } - tok = inobj->in_otok; // Restore tok and otok + // Give a warning to the user that we had to wipe their bum for them + if (numUnmatched > 0) + warni("missing %d .endif(s)", numUnmatched); + + tok = inobj->in_otok; // Restore tok and otok etok = inobj->in_etok; switch (inobj->in_type) { - case SRC_IFILE: // Pop and release an IFILE - if (verb_flag) + case SRC_IFILE: // Pop and release an IFILE + if (debug) printf("[Leaving: %s]\n", curfname); ifile = inobj->inobj.ifile; ifile->if_link = f_ifile; f_ifile = ifile; close(ifile->ifhandle); // Close source file +if (debug) printf("[fpop (pre): curfname=%s]\n", curfname); curfname = ifile->ifoldfname; // Set current filename - curlineno = ifile->ifoldlineno; // Set current line# +if (debug) printf("[fpop (post): curfname=%s]\n", curfname); +if (debug) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); + curlineno = ifile->ifoldlineno; // Set current line# DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); cfileno = ifile->ifno; // Restore current file number +if (debug) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); break; case SRC_IMACRO: // Pop and release an IMACRO imacro = inobj->inobj.imacro; @@ -760,7 +795,7 @@ int fpop(void) // Get line from file into buf, return NULL on EOF or ptr to the start of a // null-term line // -char * getln(void) +char * GetNextLine(void) { int i, j; char * p, * d; @@ -772,11 +807,9 @@ char * getln(void) // Scan for next end-of-line; handle stupid text formats by treating // \r\n the same as \n. (lone '\r' at end of buffer means we have to // check for '\n'). - i = 0; - j = fl->ifcnt; d = &fl->ifbuf[fl->ifind]; - for(p=d; iifcnt; i= j) - { - break; // Look for '\n' to eat - } + break; // Need to read more, then look for '\n' to eat else if (p[1] == '\n') - { i++; - } } + // Cover up the newline with end-of-string sentinel *p = '\0'; fl->ifind += i; @@ -804,11 +834,20 @@ char * getln(void) // Handle hanging lines by ignoring them (Input file is exhausted, no // \r or \n on last line) + // Shamus: This is retarded. Never ignore any input! if (!readamt && fl->ifcnt) { +#if 0 fl->ifcnt = 0; *p = '\0'; return NULL; +#else + // Really should check to see if we're at the end of the buffer! + // :-P + fl->ifbuf[fl->ifind + fl->ifcnt] = '\0'; + fl->ifcnt = 0; + return &fl->ifbuf[fl->ifind]; +#endif } // Truncate and return absurdly long lines. @@ -833,7 +872,9 @@ char * getln(void) fl->ifind = fl->ifcnt & 1; } - if ((readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM)) < 0) + readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM); + + if (readamt < 0) return NULL; if ((fl->ifcnt += readamt) == 0) @@ -843,29 +884,29 @@ char * getln(void) // -// Tokenize a Line +// Tokenize a line // -int tokln(void) +int TokenizeLine(void) { - char * ln = NULL; // Ptr to current position in line - char * p; // Random character ptr - TOKEN * tk; // Token-deposit ptr - int state = 0; // State for keyword detector - int j = 0; // Var for keyword detector - char c; // Random char - VALUE v; // Random value - char * nullspot = NULL; // Spot to clobber for SYMBOL terminatn - int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot + char * ln = NULL; // Ptr to current position in line + char * p; // Random character ptr + TOKEN * tk; // Token-deposit ptr + int state = 0; // State for keyword detector + int j = 0; // Var for keyword detector + char c; // Random char + VALUE v; // Random value + char * nullspot = NULL; // Spot to clobber for SYMBOL termination + int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot char c1; - int stringNum = 0; // Pointer to string locations in tokenized line + int stringNum = 0; // Pointer to string locations in tokenized line - retry: +retry: if (cur_inobj == NULL) // Return EOF if input stack is empty return TKEOF; - // Get another line of input from the current input source: a file, - // a macro, or a repeat-block + // Get another line of input from the current input source: a file, a + // macro, or a repeat-block switch (cur_inobj->in_type) { // Include-file: @@ -874,10 +915,16 @@ int tokln(void) // o tag the listing-line with a space; // o kludge lines generated by Alcyon C. case SRC_IFILE: - if ((ln = getln()) == NULL) + if ((ln = GetNextLine()) == NULL) { - fpop(); // Pop input level - goto retry; // Try for more lines +if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); + if (fpop() == 0) // Pop input level + goto retry; // Try for more lines + else + { + ifent->if_prev = (IFENT *) - 1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs + return TKEOF; + } } curlineno++; // Bump line number @@ -892,7 +939,7 @@ int tokln(void) *ln = ';'; else { - for(p=ln; *p!=EOS; ++p) + for(p=ln; *p!=EOS; p++) { if (*p == '*') { @@ -908,10 +955,12 @@ int tokln(void) // o Handle end-of-macro; // o tag the listing-line with an at (@) sign. case SRC_IMACRO: - if ((ln = getmln()) == NULL) + if ((ln = GetNextMacroLine()) == NULL) { - exitmac(); // Exit macro (pop args, do fpop(), etc) - goto retry; // Try for more lines... + if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc) + goto retry; // Try for more lines... + else + return TKEOF; // Oops, we got a non zero return code, signal EOF } lntag = '@'; @@ -920,8 +969,9 @@ int tokln(void) // o Handle end-of-repeat-block; // o tag the listing-line with a pound (#) sign. case SRC_IREPT: - if ((ln = getrln()) == NULL) + if ((ln = GetNextRepeatLine()) == NULL) { +if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); fpop(); goto retry; } @@ -937,10 +987,10 @@ int tokln(void) strcpy(lnbuf, ln); // General house-keeping - tok = tokeol; // Set "tok" to EOL in case of error - tk = etok; // Reset token ptr - stuffnull = 0; // Don't stuff nulls - totlines++; // Bump total #lines assembled + tok = tokeol; // Set "tok" to EOL in case of error + tk = etok; // Reset token ptr + stuffnull = 0; // Don't stuff nulls + totlines++; // Bump total #lines assembled // See if the entire line is a comment. This is a win if the programmer // puts in lots of comments @@ -960,7 +1010,7 @@ int tokln(void) ln++; // Handle EOL, comment with ';' - if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/'))) + if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/'))) break; // Handle start of symbol. Symbols are null-terminated in place. The @@ -970,12 +1020,33 @@ int tokln(void) if (c & STSYM) { - if (stuffnull) // Terminate old symbol + if (stuffnull) // Terminate old symbol from previous pass *nullspot = EOS; - v = 0; // Assume no DOT attrib follows symbol + v = 0; // Assume no DOT attrib follows symbol stuffnull = 1; - p = nullspot = ln++; // Nullspot -> start of this symbol + + // In some cases, we need to check for a DOTx at the *beginning* + // of a symbol, as the "start" of the line we're currently looking + // at could be somewhere in the middle of that line! + if (*ln == '.') + { + // Make sure that it's *only* a .[bwsl] following, and not the + // start of a local symbol: + if ((chrtab[*(ln + 1)] & DOT) + && (dotxtab[*(ln + 1)] != 0) + && !(chrtab[*(ln + 2)] & CTSYM)) + { + // We found a legitimate DOTx construct, so add it to the + // token stream: + ln++; + stuffnull = 0; + *tk++ = (TOKEN)dotxtab[*ln++]; + continue; + } + } + + p = nullspot = ln++; // Nullspot -> start of this symbol // Find end of symbol (and compute its length) for(j=1; (int)chrtab[*ln]&CTSYM; j++) @@ -985,20 +1056,20 @@ int tokln(void) // symbol or keyword: if (*ln == '.') { - *ln++ = EOS; // Terminate symbol - stuffnull = 0; // And never try it again + *ln++ = EOS; // Terminate symbol + stuffnull = 0; // And never try it again // Character following the `.' must have a DOT attribute, and // the chararacter after THAT one must not have a start-symbol // attribute (to prevent symbols that look like, for example, // "zingo.barf", which might be a good idea anyway....) - if ((((int)chrtab[*ln] & DOT) == 0) || ((int)dotxtab[*ln] <= 0)) - return error("[bwsl] must follow `.' in symbol"); + if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0)) + return error("[bwsl] must follow '.' in symbol"); v = (VALUE)dotxtab[*ln++]; - if ((int)chrtab[*ln] & CTSYM) - return error("misuse of `.', not allowed in symbols"); + if (chrtab[*ln] & CTSYM) + return error("misuse of '.', not allowed in symbols"); } // If the symbol is small, check to see if it's really the name of @@ -1030,8 +1101,14 @@ int tokln(void) j = -1; } - //make j = -1 if time, date etc with no preceeding ^^ - //defined, referenced, streq, macdef, date and time + // Make j = -1 if user tries to use a RISC register while in 68K mode + if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31)) + { + j = -1; + } + + // Make j = -1 if time, date etc with no preceeding ^^ + // defined, referenced, streq, macdef, date and time switch ((TOKEN)j) { case 112: // defined @@ -1041,16 +1118,22 @@ int tokln(void) case 120: // time case 121: // date j = -1; - break; } - if (j < 0 || state < 0) + // If not tokenized keyword OR token was not found + if ((j < 0) || (state < 0)) { *tk++ = SYMBOL; -#warning -//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit system, -//this will cause all kinds of mischief. +//#warning +//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit +//system, this will cause all kinds of mischief. +#if 0 *tk++ = (TOKEN)nullspot; +#else + string[stringNum] = nullspot; + *tk++ = stringNum; + stringNum++; +#endif } else { @@ -1061,7 +1144,7 @@ int tokln(void) if (v) // Record attribute token (if any) *tk++ = (TOKEN)v; - if (stuffnull) // Arrange for string termination + if (stuffnull) // Arrange for string termination on next pass nullspot = ln; continue; @@ -1079,7 +1162,7 @@ int tokln(void) { switch (*ln++) { - case '!': // ! or != + case '!': // ! or != if (*ln == '=') { *tk++ = NE; @@ -1089,14 +1172,20 @@ int tokln(void) *tk++ = '!'; continue; - case '\'': // 'string' - case '\"': // "string" + case '\'': // 'string' + case '\"': // "string" c1 = ln[-1]; *tk++ = STRING; -#warning +//#warning // More char * stuffing (8 bytes) into the space of 4 (TOKEN). // Need to figure out how to fix this crap. +#if 0 *tk++ = (TOKEN)ln; +#else + string[stringNum] = ln; + *tk++ = stringNum; + stringNum++; +#endif for(p=ln; *ln!=EOS && *ln!=c1;) { @@ -1137,7 +1226,7 @@ int tokln(void) break; default: warn("bad backslash code in string"); - --ln; + ln--; break; } } @@ -1150,42 +1239,84 @@ int tokln(void) *p++ = EOS; continue; - case '$': // $, hex constant - if ((int)chrtab[*ln] & HDIGIT) + case '$': // $, hex constant + if (chrtab[*ln] & HDIGIT) { v = 0; - while ((int)hextab[*ln] >= 0) + // Parse the hex value + while (hextab[*ln] >= 0) v = (v << 4) + (int)hextab[*ln++]; + // ggn: Okay, some comments here are in order I think.... + // The original madmac sources didn't parse the size at + // this point (i.e. .b/.w/.l). It was probably done at + // another point, although it's unclear to me exactly + // where. So why change this? My understanding (at least + // from what SCPCD said on IRC) is that .w addressing + // formats produce wrong code on jaguar (or doesn't execute + // properly? something like that). So the code was changed + // to mask off the upper bits depending on length (note: I + // don't think .b is valid at all! I only know of .w/.l, so + // this should probably be wiped). Then the code that + // parses the constant and checks to see if it's between + // $ffff0000 and $8000 never got triggered, so yay job + // done! ...now say we want to assemble a st .prg. One of + // the most widely spread optimisations is move.X expr.w,Y + // (or vice versa, or both, anyway...) to access hardware + // registers (which are mapped to $fxxxxx). This botchy + // thing would create "hilarious" code while trying to + // access hardware registers. So I made a condition to see + // if st mode or jaguar is active and apply the both or + // not. One last note: this is hardcoded to get optimised + // for now on ST mode, i.e. it can't generate code like + // move.w $00001234,d0 - it'll always get optimised to + // move.w $1234.w,d0. It's probably ok, but maybe a warning + // should be emitted? Or maybe finding a way to make it not + // auto-optimise? I think it's ok for now... if (*ln == '.') { - if ((*(ln+1) == 'b') || (*(ln+1) == 'B')) + if (obj_format == BSD) { - v &= 0x000000FF; - ln += 2; + if ((*(ln + 1) & 0xDF) == 'B') + { + v &= 0x000000FF; + ln += 2; + } + else if ((*(ln + 1) & 0xDF) == 'W') + { + v &= 0x0000FFFF; + ln += 2; + } + else if ((*(ln + 1) & 0xDF) == 'L') + { + ln += 2; + } } + } - if ((*(ln+1) == 'w') || (*(ln+1) == 'W')) + *tk++ = CONST; + *tk++ = v; + + if (obj_format == ALCYON) + { + if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) { - v &= 0x0000FFFF; + *tk++ = DOTW; ln += 2; } - - if ((*(ln+1) == 'l') || (*(ln+1) == 'L')) + else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { + *tk++ = DOTL; ln += 2; } } - - *tk++ = CONST; - *tk++ = v; } else *tk++ = '$'; continue; - case '<': // < or << or <> or <= + case '<': // < or << or <> or <= switch (*ln) { case '<': @@ -1204,7 +1335,7 @@ int tokln(void) *tk++ = '<'; continue; } - case ':': // : or :: + case ':': // : or :: if (*ln == ':') { *tk++ = DCOLON; @@ -1214,7 +1345,7 @@ int tokln(void) *tk++ = ':'; continue; - case '=': // = or == + case '=': // = or == if (*ln == '=') { *tk++ = DEQUALS; @@ -1224,22 +1355,22 @@ int tokln(void) *tk++ = '='; continue; - case '>': // > or >> or >= + case '>': // > or >> or >= switch (*ln) { case '>': *tk++ = SHR; - ++ln; + ln++; continue; case '=': *tk++ = GE; - ++ln; + ln++; continue; default: *tk++ = '>'; continue; } - case '%': // % or binary constant + case '%': // % or binary constant if (*ln < '0' || *ln > '1') { *tk++ = '%'; @@ -1274,7 +1405,7 @@ int tokln(void) *tk++ = CONST; *tk++ = v; continue; - case '@': // @ or octal constant + case '@': // @ or octal constant if (*ln < '0' || *ln > '7') { *tk++ = '@'; @@ -1309,7 +1440,7 @@ int tokln(void) *tk++ = CONST; *tk++ = v; continue; - case '^': // ^ or ^^ + case '^': // ^ or ^^ if (*ln != '^') { *tk++ = '^'; @@ -1329,7 +1460,7 @@ int tokln(void) for(state=0; state>=0;) { - // Get char, convert to lowercase + // Get char, convert to lowercase j = *p++; if (j >= 'A' && j <= 'Z') @@ -1361,7 +1492,7 @@ int tokln(void) *tk++ = (TOKEN)j; continue; default: - interror(2); // Bad MULTX entry in chrtab + interror(2); // Bad MULTX entry in chrtab continue; } } @@ -1374,22 +1505,20 @@ int tokln(void) while ((int)chrtab[*ln] & DIGIT) v = (v * 10) + *ln++ - '0'; - // See if there's a .[bwl] after the constant, & deal with it + // See if there's a .[bwl] after the constant & deal with it if so if (*ln == '.') { - if ((*(ln+1) == 'b') || (*(ln+1) == 'B')) + if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B')) { v &= 0x000000FF; ln += 2; } - - if ((*(ln+1) == 'w') || (*(ln+1) == 'W')) + else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) { v &= 0x0000FFFF; ln += 2; } - - if ((*(ln+1) == 'l') || (*(ln+1) == 'L')) + else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { ln += 2; } @@ -1397,6 +1526,7 @@ int tokln(void) *tk++ = CONST; *tk++ = v; +//printf("CONST: %i\n", v); continue; } @@ -1420,49 +1550,53 @@ goteol: // // .GOTO