X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?p=rmac;a=blobdiff_plain;f=token.c;h=fa5af09b1dd6d60d18bae85d8f59417f643c0d7d;hp=2877b481172bb209c3deae622e100629fa126e6a;hb=HEAD;hpb=44301fed8a6d94673afa3aae3a8a0a76aebac6f7 diff --git a/token.c b/token.c index 2877b48..9cbf8b4 100644 --- a/token.c +++ b/token.c @@ -1,29 +1,41 @@ // -// RMAC - Reboot's Macro Assembler for the Atari Jaguar Console System +// RMAC - Renamed Macro Assembler for all Atari computers // TOKEN.C - Token Handling -// Copyright (C) 199x Landon Dyer, 2011-2012 Reboot and Friends +// Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986 -// Source Utilised with the Kind Permission of Landon Dyer +// Source utilised with the kind permission of Landon Dyer // #include "token.h" + +#include +#include "direct.h" #include "error.h" #include "macro.h" #include "procln.h" +#include "sect.h" #include "symbol.h" #define DECL_KW // Declare keyword arrays -#define DEF_KW // Declare keyword values +#define DEF_KW // Declare keyword values #include "kwtab.h" // Incl generated keyword tables & defs +#define DEF_REG68 // Incl 68k register definitions +#include "68kregs.h" +#define DEF_REGRISC // Include GPU/DSP register definitions +#include "riscregs.h" +#define DEF_UNARY // Declare unary values +#define DECL_UNARY // Incl uanry keyword state machine tables +#include "unarytab.h" // Incl generated unary tables & defs + int lnsave; // 1; strcpy() text of current line -int curlineno; // Current line number +uint32_t curlineno; // Current line number (64K max currently) int totlines; // Total # of lines int mjump_align = 0; // mjump alignment flag char lntag; // Line tag char * curfname; // Current filename -char tolowertab[128]; // Uppercase ==> lowercase -char hextab[128]; // Table of hex values +char tolowertab[128]; // Uppercase ==> lowercase +int8_t hextab[128]; // Table of hex values char dotxtab[128]; // Table for ".b", ".s", etc. char irbuf[LNSIZ]; // Text for .rept block line char lnbuf[LNSIZ]; // Text of current line @@ -32,118 +44,182 @@ WORD cfileno; // Current file number TOKEN * tok; // Ptr to current token TOKEN * etok; // Ptr past last token in tokbuf[] TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token -char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage +char * string[TOKBUFSIZE*2];// Token buffer string pointer storage +int optimizeOff; // Optimization override flag -// File record, used to maintain a list of every include file ever visited -#define FILEREC struct _filerec -FILEREC -{ - FILEREC * frec_next; - char * frec_name; -}; FILEREC * filerec; FILEREC * last_fr; -INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO) -static INOBJ * f_inobj; // Ptr list of free INOBJs -static IFILE * f_ifile; // Ptr list of free IFILEs -static IMACRO * f_imacro; // Ptr list of free IMACROs +INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO) +static INOBJ * f_inobj; // Ptr list of free INOBJs +static IFILE * f_ifile; // Ptr list of free IFILEs +static IMACRO * f_imacro; // Ptr list of free IMACROs -static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files) +static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files) -char chrtab[] = { - ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX - ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL - ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT - WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI +uint8_t chrtab[0x100] = { + ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX + ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL + ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT + WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI - ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3 - ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB - ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC - ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US + ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3 + ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB + ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC + ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US WHITE, MULTX, MULTX, SELF, // SP ! " # MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & ' SELF, SELF, SELF, SELF, // ( ) * + SELF, SELF, STSYM, SELF, // , - . / - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1 - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3 - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5 - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7 - DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9 - MULTX, MULTX, // : ; - MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ? - - MULTX, STSYM+CTSYM+HDIGIT, // @ A - (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C - STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E - STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K - (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O - - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // P Q R S - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // T U V W - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // X Y Z [ - SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _ - - ILLEG, STSYM+CTSYM+HDIGIT, // ` a - (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c - STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e - STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k - (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o - - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // p q r s - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // t u v w - STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z { - SELF, SELF, SELF, ILLEG // | } ~ DEL + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1 + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3 + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5 + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7 + DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9 + MULTX, MULTX, // : ; + MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ? + + MULTX, STSYM+CTSYM+HDIGIT, // @ A + DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C + DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E + STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G + STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K + DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O + + DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S + STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W + STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [ + SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _ + + ILLEG, STSYM+CTSYM+HDIGIT, // ` a + DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c + DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e + STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g + STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k + DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o + + DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s + STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w + DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z { + SELF, SELF, SELF, ILLEG, // | } ~ DEL + + // Anything above $7F is illegal (and yes, we need to check for this, + // otherwise you get strange and spurious errors that will lead you astray) + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, + ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG }; // Names of registers static char * regname[] = { - "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", - "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", - "pc", "ssp", "usp", "sr", "ccr" -}; - -static char * riscregname[] = { - "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", - "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", - "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" + "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135 + "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143 + "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151 + "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159 + "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167 + "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175 + "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183 + "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191 + "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199 + "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207 + "tt0","tt1","crp","","","","","", // 208,215 + "","","","","fpiar","fpsr","fpcr","", // 216,223 + "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231 + "","","","","","","","", // 232,239 + "","","","","","","","", // 240,247 + "","","","","","","","", // 248,255 + "","","","","x0","x1","y0","y1", // 256,263 + "","b0","","b2","","b1","a","b", // 264,271 + "mr","omr","la","lc","ssh","ssl","ss","", // 272,279 + "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287 + "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295 + "","","","","","","l","p", // 296,303 + "mr","omr","la","lc","ssh","ssl","ss","", // 304,311 + "a10","b10","x","y","","","ab","ba" // 312,319 }; -// Removing this, provided it doesn't cause unwanted side-effects :-P -#if 0 // -// Make `fnum' the Current `curfname' -// NOTE: This is currently only called from error() in error.c +// Initialize tokenizer // -void setfnum(WORD fnum) +void InitTokenizer(void) { -#if 0 - // NOTE: fnum is ZERO based, this can cause problems if you're not careful! - FILEREC * fr = filerec; + int i; // Iterator + char * htab = "0123456789abcdefABCDEF"; // Hex character table - DEBUG printf("[setfnum: fnum=%u]\n", fnum); + lnsave = 0; // Don't save lines + curfname = ""; // No file, empty filename + filecount = (WORD)-1; + cfileno = (WORD)-1; // cfileno gets bumped to 0 + curlineno = 0; + totlines = 0; + etok = tokbuf; + f_inobj = NULL; + f_ifile = NULL; + f_imacro = NULL; + cur_inobj = NULL; + filerec = NULL; + last_fr = NULL; + lntag = SPACE; - // Advance to the correct record... - while (fr != NULL && fnum != 0) + // Initialize hex, "dot" and tolower tables + for(i=0; i<128; i++) { - fr = fr->frec_next; - fnum--; + hextab[i] = -1; + dotxtab[i] = 0; + tolowertab[i] = (char)i; } - if (fr == NULL) - curfname = "(*top*)"; - else - curfname = fr->frec_name; + for(i=0; htab[i]!=EOS; i++) + hextab[htab[i]] = (char)((i < 16) ? i : i - 6); + + for(i='A'; i<='Z'; i++) + tolowertab[i] |= 0x20; + + // These characters are legal immediately after a period + dotxtab['b'] = DOTB; // .b .B .s .S + dotxtab['B'] = DOTB; + //dotxtab['s'] = DOTB; + //dotxtab['S'] = DOTB; + dotxtab['w'] = DOTW; // .w .W + dotxtab['W'] = DOTW; + dotxtab['l'] = DOTL; // .l .L + dotxtab['L'] = DOTL; + dotxtab['i'] = DOTI; // .i .I (WTF is this???) + dotxtab['I'] = DOTI; + dotxtab['D'] = DOTD; // .d .D (double) + dotxtab['d'] = DOTD; + dotxtab['S'] = DOTS; // .s .S + dotxtab['s'] = DOTS; + dotxtab['Q'] = DOTQ; // .q .Q (quad word) + dotxtab['q'] = DOTQ; + dotxtab['X'] = DOTX; // .x .x + dotxtab['x'] = DOTX; + dotxtab['P'] = DOTP; // .p .P + dotxtab['p'] = DOTP; +} + + +void SetFilenameForErrorReporting(void) +{ + WORD fnum = cfileno; - DEBUG printf("[setfnum: curfname=%s]\n", curfname); -#else // Check for absolute top filename (this should never happen) if (fnum == -1) { @@ -168,9 +244,7 @@ void setfnum(WORD fnum) } curfname = fr->frec_name; -#endif } -#endif // @@ -204,7 +278,8 @@ INOBJ * a_inobj(int typ) inobj->inobj.ifile = ifile; break; - case SRC_IMACRO: // Alloc and init an IMACRO + + case SRC_IMACRO: // Alloc and init an IMACRO if (f_imacro == NULL) imacro = malloc(sizeof(IMACRO)); else @@ -215,9 +290,10 @@ INOBJ * a_inobj(int typ) inobj->inobj.imacro = imacro; break; + case SRC_IREPT: // Alloc and init an IREPT inobj->inobj.irept = malloc(sizeof(IREPT)); - DEBUG printf("alloc IREPT\n"); + DEBUG { printf("alloc IREPT\n"); } break; } @@ -262,7 +338,6 @@ int ExpandMacro(char * src, char * dest, int destsiz) IMACRO * imacro = cur_inobj->inobj.imacro; int macnum = (int)(imacro->im_macro->sattr); -// destsiz--; char * dst = dest; // Next dest slot char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer @@ -288,6 +363,11 @@ int ExpandMacro(char * src, char * dest, int destsiz) if (dst >= edst) goto overflow; + // Skip comments in case a loose @ or \ is in there + // In that case the tokeniser was trying to expand it. + if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/'))) + goto skipcomments; + *dst++ = *s++; } // Do macro expansion @@ -304,11 +384,11 @@ int ExpandMacro(char * src, char * dest, int destsiz) *dst++ = *s++; continue; - case '?': // \? set `questmark' flag - ++s; + case '?': // \? set `questmark' flag + s++; questmark = 1; break; - case '#': // \#, number of arguments + case '#': // \#, number of arguments sprintf(numbuf, "%d", (int)imacro->im_nargs); goto copystr; case '!': // \! size suffix supplied on invocation @@ -321,7 +401,7 @@ int ExpandMacro(char * src, char * dest, int destsiz) } goto copy_d; - case '~': // ==> unique label string Mnnnn... + case '~': // ==> unique label string Mnnnn... sprintf(numbuf, "M%u", curuniq); copystr: d = numbuf; @@ -371,7 +451,7 @@ copy_d: *d++ = *s++; if (*s != '}') - return error("missing '}'"); + return error("missing closing brace ('}')"); else s++; } @@ -380,10 +460,10 @@ copy_d: // Lookup the argument and copy its (string) value into the // destination string - DEBUG printf("argument='%s'\n", mname); + DEBUG { printf("argument='%s'\n", mname); } if ((arg = lookup(mname, MACARG, macnum)) == NULL) - return errors("undefined argument: '%s'", mname); + return error("undefined argument: '%s'", mname); else { // Convert a string of tokens (terminated with EOL) back into @@ -391,24 +471,18 @@ copy_d: // macro invocation) then it is ignored. i = (int)arg->svalue; arg_num: - DEBUG printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase); + DEBUG { printf("~argnumber=%d\n", i); } tk = NULL; if (i < imacro->im_nargs) { -#if 0 -// tk = argp[i]; -// tk = argPtrs[i]; - tk = argPtrs[imacro->argBase + i]; -#else tk = imacro->argument[i].token; symbolString = imacro->argument[i].string; //DEBUG //{ // printf("ExM: Preparing to parse argument #%u...\n", i); -// dumptok(tk); +// DumpTokens(tk); //} -#endif } // \?arg yields: @@ -426,7 +500,8 @@ arg_num: continue; } - if (tk != NULL) // arg # is in range, so expand it + // Argument # is in range, so expand it + if (tk != NULL) { while (*tk != EOL) { @@ -434,14 +509,9 @@ arg_num: // This is a hack. It might be better table-driven. d = NULL; - if ((*tk >= KW_D0) && !rdsp && !rgpu) - { - d = regname[(int)*tk++ - KW_D0]; - goto strcopy; - } - else if ((*tk >= KW_R0) && (*tk <= KW_R31)) + if (*tk >= REG68_D0) { - d = riscregname[(int)*tk++ - KW_R0]; + d = regname[(int)*tk++ - REG68_D0]; goto strcopy; } else @@ -449,22 +519,12 @@ arg_num: switch ((int)*tk++) { case SYMBOL: -#if 0 -// d = (char *)*tk++; - d = string[*tk++]; -#else - // This fix should be done for strings too d = symbolString[*tk++]; -DEBUG printf("ExM: SYMBOL=\"%s\"", d); -#endif +DEBUG { printf("ExM: SYMBOL=\"%s\"", d); } break; case STRING: -#if 0 -// d = (char *)*tk++; - d = string[*tk++]; -#else d = symbolString[*tk++]; -#endif + if (dst >= edst) goto overflow; @@ -484,11 +544,13 @@ DEBUG printf("ExM: SYMBOL=\"%s\"", d); *dst++ = '"'; continue; break; -// Shamus: Changing the format specifier from %lx to %ux caused -// the assembler to choke on legitimate code... Need to investigate -// this further before changing anything else here! +// Shamus: Changing the format specifier from %lx to %ux caused the assembler +// to choke on legitimate code... Need to investigate this further +// before changing anything else here! case CONST: - sprintf(numbuf, "$%lx", (LONG)*tk++); +// sprintf(numbuf, "$%lx", (uint64_t)*tk++); + sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++); + tk++; d = numbuf; break; case DEQUALS: @@ -527,6 +589,12 @@ DEBUG printf("ExM: SYMBOL=\"%s\"", d); case DOTL: d = ".l"; break; + case CR_ABSCOUNT: + d = "^^abscount"; + break; + case CR_FILESIZE: + d = "^^filesize"; + break; case CR_DATE: d = "^^date"; break; @@ -574,254 +642,219 @@ strcopy: } } +skipcomments: + *dst = EOS; DEBUG { printf("ExM: dst=\"%s\"\n", dest); } return OK; overflow: *dst = EOS; - DEBUG printf("*** OVERFLOW LINE ***\n%s\n", dest); + DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); } return fatal("line too long as a result of macro expansion"); } // -// Get Next Line of Text from a Macro +// Get next line of text from a macro // -char * getmln(void) +char * GetNextMacroLine(void) { - unsigned source_addr; - IMACRO * imacro = cur_inobj->inobj.imacro; -// LONG * strp = imacro->im_nextln; - struct LineList * strp = imacro->im_nextln; + LLIST * strp = imacro->im_nextln; if (strp == NULL) // End-of-macro return NULL; -// imacro->im_nextln = (LONG *)*strp; imacro->im_nextln = strp->next; // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ); ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ); - if (!strcmp(imacro->im_macro->sname, "mjump") && !mjump_align) - { - // if we need to adjust the alignment of the jump source address to - // meet the rules of gpu main execution we need to skip the first nop - // of the macro. This is simpler than trying to insert nop's mid macro. - source_addr = (orgactive ? orgaddr : sloc); - source_addr += 8; - - if (source_addr % 4) - { - strp = imacro->im_nextln; - - if (strp == NULL) - return NULL; - -// imacro->im_nextln = (LONG *)*strp; -// ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ); - imacro->im_nextln = strp->next; - ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ); - } - - mjump_align = 1; - } - return imacro->im_lnbuf; } // -// Get Next Line of Text from a Repeat Block +// Get next line of text from a repeat block // -char * getrln(void) +char * GetNextRepeatLine(void) { - IREPT * irept = cur_inobj->inobj.irept; - LONG * strp = irept->ir_nextln; // initial null +// LONG * strp = irept->ir_nextln; // initial null // Do repeat at end of .rept block's string list - if (strp == NULL) +// if (strp == NULL) + if (irept->ir_nextln == NULL) { - DEBUG printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); + DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); } irept->ir_nextln = irept->ir_firstln; // copy first line if (irept->ir_count-- == 0) { - DEBUG printf("end-repeat-block\n"); + DEBUG { printf("end-repeat-block\n"); } return NULL; } - - strp = irept->ir_nextln; //strp + reptuniq++; +// strp = irept->ir_nextln; + } + // Mark the current macro line in the irept object + // This is probably overkill - a global variable + // would suffice here (it only gets used during + // error reporting anyway) + irept->lineno = irept->ir_nextln->lineno; + + // Copy the rept lines verbatim, unless we're in nest level 0. + // Then, expand any \~ labels to unique numbers (Rn) + if (rptlevel) + { + strcpy(irbuf, irept->ir_nextln->line); + } + else + { + uint32_t linelen = strlen(irept->ir_nextln->line); + uint8_t *p_line = irept->ir_nextln->line; + char *irbufwrite = irbuf; + for (int i = 0; i <= linelen; i++) + { + uint8_t c; + c = *p_line++; + if (c == '\\' && *p_line == '~') + { + p_line++; + irbufwrite += sprintf(irbufwrite, "R%u", reptuniq); + } + else + { + *irbufwrite++ = c; + } + } } - strcpy(irbuf, (char *)(irept->ir_nextln + 1)); - DEBUG printf("repeat line='%s'\n", irbuf); - irept->ir_nextln = (LONG *)*strp; + DEBUG { printf("repeat line='%s'\n", irbuf); } +// irept->ir_nextln = (LONG *)*strp; + irept->ir_nextln = irept->ir_nextln->next; return irbuf; } // -// Include a Source File used at the Root, and for ".include" Files +// Include a source file used at the root, and for ".include" files // int include(int handle, char * fname) { - IFILE * ifile; - INOBJ * inobj; - FILEREC * fr; - - // Verbose mode - if (verb_flag) - printf("[include: %s, cfileno=%u]\n", fname, cfileno); + // Debug mode + DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); } // Alloc and initialize include-descriptors - inobj = a_inobj(SRC_IFILE); - ifile = inobj->inobj.ifile; - - ifile->ifhandle = handle; // Setup file handle - ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices - ifile->ifoldlineno = curlineno; // Save old line number - ifile->ifoldfname = curfname; // Save old filename - ifile->ifno = cfileno; // Save old file number -// cfileno = filecount++; // Compute new file number - cfileno = ++filecount; // Compute new file number - curfname = strdup(fname); // Set current filename (alloc storage) - curlineno = 0; // Start on line zero + INOBJ * inobj = a_inobj(SRC_IFILE); + IFILE * ifile = inobj->inobj.ifile; + + ifile->ifhandle = handle; // Setup file handle + ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices + ifile->ifoldlineno = curlineno; // Save old line number + ifile->ifoldfname = curfname; // Save old filename + ifile->ifno = cfileno; // Save old file number + + // NB: This *must* be preincrement, we're adding one to the filecount here! + cfileno = ++filecount; // Compute NEW file number + curfname = strdup(fname); // Set current filename (alloc storage) + curlineno = 0; // Start on line zero // Add another file to the file-record -// fr = (FILEREC *)amem((LONG)sizeof(FILEREC)); - fr = (FILEREC *)malloc(sizeof(FILEREC)); + FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC)); fr->frec_next = NULL; fr->frec_name = curfname; if (last_fr == NULL) - filerec = fr; // Add first filerec + filerec = fr; // Add first filerec else - last_fr->frec_next = fr; // Append to list of filerecs + last_fr->frec_next = fr; // Append to list of filerecs last_fr = fr; - - if (verb_flag) - printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); + DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); } return OK; } // -// Initialize Tokenizer +// Pop the current input level // -void init_token(void) +int fpop(void) { - int i; // Iterator - char * htab = "0123456789abcdefABCDEF"; // Hex character table + INOBJ * inobj = cur_inobj; - lnsave = 0; // Don't save lines - curfname = ""; // No file, empty filename - filecount = (WORD)-1; - cfileno = (WORD)-1; // cfileno gets bumped to 0 - curlineno = 0; - totlines = 0; - etok = tokbuf; - f_inobj = NULL; - f_ifile = NULL; - f_imacro = NULL; - cur_inobj = NULL; - filerec = NULL; - last_fr = NULL; - lntag = SPACE; + if (inobj == NULL) + return 0; - // Initialize hex, "dot" and tolower tables - for(i=0; i<128; i++) - { - hextab[i] = -1; - dotxtab[i] = 0; - tolowertab[i] = (char)i; - } + // Pop IFENT levels until we reach the conditional assembly context we + // were at when the input object was entered. + int numUnmatched = 0; - for(i=0; htab[i]!=EOS; i++) - hextab[htab[i]] = (char)((i < 16) ? i : i - 6); + while (ifent != inobj->in_ifent) + { + if (d_endif() != 0) // Something bad happened during endif parsing? + return -1; // If yes, bail instead of getting stuck in a loop - for(i='A'; i<='Z'; i++) - tolowertab[i] |= 0x20; + numUnmatched++; + } - // These characters are legal immediately after a period - dotxtab['b'] = DOTB; // .b .B .s .S - dotxtab['B'] = DOTB; - dotxtab['s'] = DOTB; - dotxtab['S'] = DOTB; - dotxtab['w'] = DOTW; // .w .W - dotxtab['W'] = DOTW; - dotxtab['l'] = DOTL; // .l .L - dotxtab['L'] = DOTL; - dotxtab['I'] = DOTI; // .l .L - dotxtab['I'] = DOTI; -} + // Give a warning to the user that we had to wipe their bum for them + if (numUnmatched > 0) + warn("missing %d .endif(s)", numUnmatched); + tok = inobj->in_otok; // Restore tok and etok + etok = inobj->in_etok; -// -// Pop the Current Input Level -// -int fpop(void) -{ - IFILE * ifile; - IMACRO * imacro; - LONG * p, * p1; - INOBJ * inobj = cur_inobj; + switch (inobj->in_type) + { + case SRC_IFILE: // Pop and release an IFILE + { + DEBUG { printf("[Leaving: %s]\n", curfname); } + + IFILE * ifile = inobj->inobj.ifile; + ifile->if_link = f_ifile; + f_ifile = ifile; + close(ifile->ifhandle); // Close source file +DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); } + curfname = ifile->ifoldfname; // Set current filename +DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); } +DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); } + curlineno = ifile->ifoldlineno; // Set current line# + DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); } + cfileno = ifile->ifno; // Restore current file number +DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); } + break; + } - if (inobj != NULL) + case SRC_IMACRO: // Pop and release an IMACRO { - // Pop IFENT levels until we reach the conditional assembly context we - // were at when the input object was entered. - while (ifent != inobj->in_ifent) - d_endif(); + IMACRO * imacro = inobj->inobj.imacro; + imacro->im_link = f_imacro; + f_imacro = imacro; + break; + } - tok = inobj->in_otok; // Restore tok and otok - etok = inobj->in_etok; + case SRC_IREPT: // Pop and release an IREPT + { + DEBUG { printf("dealloc IREPT\n"); } + LLIST * p = inobj->inobj.irept->ir_firstln; - switch (inobj->in_type) + // Deallocate repeat lines + while (p != NULL) { - case SRC_IFILE: // Pop and release an IFILE - if (verb_flag) - printf("[Leaving: %s]\n", curfname); - - ifile = inobj->inobj.ifile; - ifile->if_link = f_ifile; - f_ifile = ifile; - close(ifile->ifhandle); // Close source file -if (verb_flag) printf("[fpop (pre): curfname=%s]\n", curfname); - curfname = ifile->ifoldfname; // Set current filename -if (verb_flag) printf("[fpop (post): curfname=%s]\n", curfname); - curlineno = ifile->ifoldlineno; // Set current line# - DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); -if (verb_flag) printf("[fpop: cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); - cfileno = ifile->ifno; // Restore current file number - break; - case SRC_IMACRO: // Pop and release an IMACRO - imacro = inobj->inobj.imacro; - imacro->im_link = f_imacro; - f_imacro = imacro; - break; - case SRC_IREPT: // Pop and release an IREPT - DEBUG printf("dealloc IREPT\n"); - p = inobj->inobj.irept->ir_firstln; - - while (p != NULL) - { - p1 = (LONG *)*p; - p = p1; - } - - break; + free(p->line); + p = p->next; } - cur_inobj = inobj->in_link; - inobj->in_link = f_inobj; - f_inobj = inobj; + break; } + } + + cur_inobj = inobj->in_link; + inobj->in_link = f_inobj; + f_inobj = inobj; return 0; } @@ -831,7 +864,7 @@ if (verb_flag) printf("[fpop: cfileno=%d ifile->ifno=%d]\n", (int)cfileno // Get line from file into buf, return NULL on EOF or ptr to the start of a // null-term line // -char * getln(void) +char * GetNextLine(void) { int i, j; char * p, * d; @@ -843,11 +876,9 @@ char * getln(void) // Scan for next end-of-line; handle stupid text formats by treating // \r\n the same as \n. (lone '\r' at end of buffer means we have to // check for '\n'). - i = 0; - j = fl->ifcnt; d = &fl->ifbuf[fl->ifind]; - for(p=d; iifcnt; i= j) - { - break; // Look for '\n' to eat - } + break; // Need to read more, then look for '\n' to eat else if (p[1] == '\n') - { i++; - } } + // Cover up the newline with end-of-string sentinel *p = '\0'; fl->ifind += i; @@ -875,11 +903,20 @@ char * getln(void) // Handle hanging lines by ignoring them (Input file is exhausted, no // \r or \n on last line) + // Shamus: This is retarded. Never ignore any input! if (!readamt && fl->ifcnt) { +#if 0 fl->ifcnt = 0; *p = '\0'; return NULL; +#else + // Really should check to see if we're at the end of the buffer! + // :-P + fl->ifbuf[fl->ifind + fl->ifcnt] = '\0'; + fl->ifcnt = 0; + return &fl->ifbuf[fl->ifind]; +#endif } // Truncate and return absurdly long lines. @@ -904,7 +941,9 @@ char * getln(void) fl->ifind = fl->ifcnt & 1; } - if ((readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM)) < 0) + readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM); + + if (readamt < 0) return NULL; if ((fl->ifcnt += readamt) == 0) @@ -914,29 +953,32 @@ char * getln(void) // -// Tokenize a Line +// Tokenize a line // -int tokln(void) +int TokenizeLine(void) { - char * ln = NULL; // Ptr to current position in line - char * p; // Random character ptr - TOKEN * tk; // Token-deposit ptr - int state = 0; // State for keyword detector - int j = 0; // Var for keyword detector - char c; // Random char - VALUE v; // Random value - char * nullspot = NULL; // Spot to clobber for SYMBOL terminatn - int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot - char c1; - int stringNum = 0; // Pointer to string locations in tokenized line - - retry: - - if (cur_inobj == NULL) // Return EOF if input stack is empty + uint8_t * ln = NULL; // Ptr to current position in line + uint8_t * p; // Random character ptr + PTR tk; // Token-deposit ptr + int state = 0; // State for keyword detector + int j = 0; // Var for keyword detector + uint8_t c; // Random char + uint64_t v; // Random value + uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d) + uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination + int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot + uint8_t c1; + int stringNum = 0; // Pointer to string locations in tokenized line + SYM* sy; // For looking up symbols (.equr) + int equrundef = 0; // Flag for equrundef scanning + +retry: + + if (cur_inobj == NULL) // Return EOF if input stack is empty return TKEOF; - // Get another line of input from the current input source: a file, - // a macro, or a repeat-block + // Get another line of input from the current input source: a file, a + // macro, or a repeat-block switch (cur_inobj->in_type) { // Include-file: @@ -945,54 +987,45 @@ int tokln(void) // o tag the listing-line with a space; // o kludge lines generated by Alcyon C. case SRC_IFILE: - if ((ln = getln()) == NULL) + if ((ln = GetNextLine()) == NULL) { - fpop(); // Pop input level - goto retry; // Try for more lines - } - - curlineno++; // Bump line number - lntag = SPACE; - - if (as68_flag) - { - // AS68 compatibility, throw away all lines starting with - // back-quotes, tildes, or '*' - // On other lines, turn the first '*' into a semi-colon. - if (*ln == '`' || *ln == '~' || *ln == '*') - *ln = ';'; +DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); } + if (fpop() == 0) // Pop input level + goto retry; // Try for more lines else { - for(p=ln; *p!=EOS; p++) - { - if (*p == '*') - { - *p = ';'; - break; - } - } + ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs + return TKEOF; } } + curlineno++; // Bump line number + lntag = SPACE; + break; + // Macro-block: // o Handle end-of-macro; // o tag the listing-line with an at (@) sign. case SRC_IMACRO: - if ((ln = getmln()) == NULL) + if ((ln = GetNextMacroLine()) == NULL) { - ExitMacro(); // Exit macro (pop args, do fpop(), etc) - goto retry; // Try for more lines... + if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc) + goto retry; // Try for more lines... + else + return TKEOF; // Oops, we got a non zero return code, signal EOF } lntag = '@'; break; + // Repeat-block: // o Handle end-of-repeat-block; // o tag the listing-line with a pound (#) sign. case SRC_IREPT: - if ((ln = getrln()) == NULL) + if ((ln = GetNextRepeatLine()) == NULL) { + DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); } fpop(); goto retry; } @@ -1001,37 +1034,59 @@ int tokln(void) break; } - // Save text of the line. We only do this during listings and within + // Save text of the line. We only do this during listings and within // macro-type blocks, since it is expensive to unconditionally copy every // line. if (lnsave) + { + // Sanity check + if (strlen(ln) > LNSIZ) + return error("line too long (%d, max %d)", strlen(ln), LNSIZ); + strcpy(lnbuf, ln); + } - // General house-keeping - tok = tokeol; // Set "tok" to EOL in case of error - tk = etok; // Reset token ptr - stuffnull = 0; // Don't stuff nulls - totlines++; // Bump total #lines assembled + // General housekeeping + tok = tokeol; // Set "tok" to EOL in case of error + tk.u32 = etok; // Reset token ptr + stuffnull = 0; // Don't stuff nulls + totlines++; // Bump total #lines assembled // See if the entire line is a comment. This is a win if the programmer // puts in lots of comments if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/'))) goto goteol; + // And here we have a very ugly hack for signalling a single line 'turn off + // optimization'. There's really no nice way to do this, so hack it is! + optimizeOff = 0; // Default is to take optimizations as they come + + if (*ln == '!') + { + optimizeOff = 1; // Signal that we don't want to optimize this line + ln++; // & skip over the darned thing + } + // Main tokenization loop; - // o skip whitespace; - // o handle end-of-line; - // o handle symbols; - // o handle single-character tokens (operators, etc.); - // o handle multiple-character tokens (constants, strings, etc.). + // o skip whitespace; + // o handle end-of-line; + // o handle symbols; + // o handle single-character tokens (operators, etc.); + // o handle multiple-character tokens (constants, strings, etc.). for(; *ln!=EOS;) { + // Check to see if there's enough space in the token buffer + if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20) + { + return error("token buffer overrun"); + } + // Skip whitespace, handle EOL - while ((int)chrtab[*ln] & WHITE) + while (chrtab[*ln] & WHITE) ln++; // Handle EOL, comment with ';' - if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/'))) + if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/'))) break; // Handle start of symbol. Symbols are null-terminated in place. The @@ -1041,12 +1096,33 @@ int tokln(void) if (c & STSYM) { - if (stuffnull) // Terminate old symbol from previous pass + if (stuffnull) // Terminate old symbol from previous pass *nullspot = EOS; - v = 0; // Assume no DOT attrib follows symbol + v = 0; // Assume no DOT attrib follows symbol stuffnull = 1; - p = nullspot = ln++; // Nullspot -> start of this symbol + + // In some cases, we need to check for a DOTx at the *beginning* + // of a symbol, as the "start" of the line we're currently looking + // at could be somewhere in the middle of that line! + if (*ln == '.') + { + // Make sure that it's *only* a .[bwsl] following, and not the + // start of a local symbol: + if ((chrtab[*(ln + 1)] & DOT) + && (dotxtab[*(ln + 1)] != 0) + && !(chrtab[*(ln + 2)] & CTSYM)) + { + // We found a legitimate DOTx construct, so add it to the + // token stream: + ln++; + stuffnull = 0; + *tk.u32++ = (TOKEN)dotxtab[*ln++]; + continue; + } + } + + p = nullspot = ln++; // Nullspot -> start of this symbol // Find end of symbol (and compute its length) for(j=1; (int)chrtab[*ln]&CTSYM; j++) @@ -1056,32 +1132,34 @@ int tokln(void) // symbol or keyword: if (*ln == '.') { - *ln++ = EOS; // Terminate symbol - stuffnull = 0; // And never try it again + *ln++ = EOS; // Terminate symbol + stuffnull = 0; // And never try it again - // Character following the `.' must have a DOT attribute, and + // Character following the '.' must have a DOT attribute, and // the chararacter after THAT one must not have a start-symbol // attribute (to prevent symbols that look like, for example, // "zingo.barf", which might be a good idea anyway....) - if ((((int)chrtab[*ln] & DOT) == 0) || ((int)dotxtab[*ln] <= 0)) - return error("[bwsl] must follow `.' in symbol"); + if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0)) + return error("[bwsl] must follow '.' in symbol"); - v = (VALUE)dotxtab[*ln++]; + v = (uint32_t)dotxtab[*ln++]; + cursize = (uint32_t)v; - if ((int)chrtab[*ln] & CTSYM) - return error("misuse of `.', not allowed in symbols"); + if (chrtab[*ln] & CTSYM) + return error("misuse of '.'; not allowed in symbols"); } // If the symbol is small, check to see if it's really the name of // a register. - if (j <= KWSIZE) + uint8_t *p2 = p; + if (j <= 5) { - for(state=0; state>=0;) + for (state = 0; state >= 0;) { j = (int)tolowertab[*p++]; - j += kwbase[state]; + j += regbase[state]; - if (kwcheck[j] != state) + if (regcheck[j] != state) { j = -1; break; @@ -1089,65 +1167,111 @@ int tokln(void) if (*p == EOS || p == ln) { - j = kwaccept[j]; + j = regaccept[j]; + goto skip_keyword; break; } - state = kwtab[j]; + state = regtab[j]; } } - else + + // Scan for keywords + if ((j <= 0 || state <= 0) || p==p2) { - j = -1; + if (j <= KWSIZE) + { + for (state = 0; state >= 0;) + { + j = (int)tolowertab[*p2++]; + j += kwbase[state]; + + if (kwcheck[j] != state) + { + j = -1; + break; + } + + if (*p == EOS || p2 == ln) + { + j = kwaccept[j]; + break; + } + + state = kwtab[j]; + } + } + else + { + j = -1; + } } - //make j = -1 if time, date etc with no preceeding ^^ - //defined, referenced, streq, macdef, date and time - switch ((TOKEN)j) + skip_keyword: + + // If we detected equrundef/regundef set relevant flag + if (j == KW_EQURUNDEF) { - case 112: // defined - case 113: // referenced - case 118: // streq - case 119: // macdef - case 120: // time - case 121: // date + equrundef = 1; j = -1; - break; } - if (j < 0 || state < 0) + // If not tokenized keyword OR token was not found + if ((j < 0) || (state < 0)) { - *tk++ = SYMBOL; -//#warning -//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit system, -//this will cause all kinds of mischief. -#if 0 - *tk++ = (TOKEN)nullspot; -#else + // Only proceed if no equrundef has been detected. In that case we need to store the symbol + // because the directive handler (d_equrundef) will run outside this loop, further into procln.c + if (!equrundef && !disabled) + { + // Last attempt: let's see if this is an equated register. + // If yes, then just store the register's keyword value instead of the symbol + char temp = *ln; + *ln = 0; + sy = lookup(nullspot, LABEL, 0); + *ln = temp; + if (sy) + { + if (sy->sattre & EQUATEDREG) + { + *tk.u32++ = sy->svalue; + stuffnull = 0; + continue; + } + } + } + // Ok, that failed, let's store the symbol instead + *tk.u32++ = SYMBOL; string[stringNum] = nullspot; - *tk++ = stringNum; + *tk.u32++ = stringNum; stringNum++; -#endif } else { - *tk++ = (TOKEN)j; + *tk.u32++ = (TOKEN)j; stuffnull = 0; } - if (v) // Record attribute token (if any) - *tk++ = (TOKEN)v; + if (v) // Record attribute token (if any) + *tk.u32++ = (TOKEN)v; - if (stuffnull) // Arrange for string termination on next pass + if (stuffnull) // Arrange for string termination on next pass nullspot = ln; + if (disabled) + { + // When we are in a disabled code block, the only thing that can break out + // of this is an ".endif" keyword, so this is the minimum we have to parse + // in order to discover such a keyword. + goto goteol; + } + continue; } // Handle identity tokens if (c & SELF) { - *tk++ = *ln++; + *tk.u32++ = *ln++; continue; } @@ -1156,30 +1280,31 @@ int tokln(void) { switch (*ln++) { - case '!': // ! or != + case '!': // ! or != if (*ln == '=') { - *tk++ = NE; - ++ln; + *tk.u32++ = NE; + ln++; } else - *tk++ = '!'; + *tk.u32++ = '!'; continue; - case '\'': // 'string' - case '\"': // "string" + case '\'': // 'string' + if (m6502) + { + // Hardcoded for now, maybe this will change in the future + *tk.u32++ = STRINGA8; + goto dostring; + } + // Fall through + case '\"': // "string" + *tk.u32++ = STRING; +dostring: c1 = ln[-1]; - *tk++ = STRING; -//#warning -// More char * stuffing (8 bytes) into the space of 4 (TOKEN). -// Need to figure out how to fix this crap. -#if 0 - *tk++ = (TOKEN)ln; -#else string[stringNum] = ln; - *tk++ = stringNum; + *tk.u32++ = stringNum; stringNum++; -#endif for(p=ln; *ln!=EOS && *ln!=c1;) { @@ -1218,9 +1343,18 @@ int tokln(void) case '\\': c = '\\'; break; + case '{': + // If we're evaluating a macro + // this is valid because it's + // a parameter expansion + case '!': + // If we're evaluating a macro + // this is valid and expands to + // "dot-size" + break; default: warn("bad backslash code in string"); - --ln; + ln--; break; } } @@ -1233,99 +1367,94 @@ int tokln(void) *p++ = EOS; continue; - case '$': // $, hex constant - if ((int)chrtab[*ln] & HDIGIT) + case '$': // $, hex constant + if (chrtab[*ln] & HDIGIT) { v = 0; - while ((int)hextab[*ln] >= 0) + // Parse the hex value + while (hextab[*ln] >= 0) v = (v << 4) + (int)hextab[*ln++]; + *tk.u32++ = CONST; + *tk.u64++ = v; + if (*ln == '.') { - if ((*(ln+1) == 'b') || (*(ln+1) == 'B')) + if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) { - v &= 0x000000FF; + *tk.u32++ = DOTW; ln += 2; } - - if ((*(ln+1) == 'w') || (*(ln+1) == 'W')) - { - v &= 0x0000FFFF; - ln += 2; - } - - if ((*(ln+1) == 'l') || (*(ln+1) == 'L')) + else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { + *tk.u32++ = DOTL; ln += 2; } } - - *tk++ = CONST; - *tk++ = v; } else - *tk++ = '$'; + *tk.u32++ = '$'; continue; - case '<': // < or << or <> or <= + case '<': // < or << or <> or <= switch (*ln) { case '<': - *tk++ = SHL; - ++ln; + *tk.u32++ = SHL; + ln++; continue; case '>': - *tk++ = NE; - ++ln; + *tk.u32++ = NE; + ln++; continue; case '=': - *tk++ = LE; - ++ln; + *tk.u32++ = LE; + ln++; continue; default: - *tk++ = '<'; + *tk.u32++ = '<'; continue; } - case ':': // : or :: + case ':': // : or :: if (*ln == ':') { - *tk++ = DCOLON; - ++ln; + *tk.u32++ = DCOLON; + ln++; } else - *tk++ = ':'; + *tk.u32++ = ':'; continue; - case '=': // = or == + case '=': // = or == if (*ln == '=') { - *tk++ = DEQUALS; - ++ln; + *tk.u32++ = DEQUALS; + ln++; } else - *tk++ = '='; + *tk.u32++ = '='; continue; - case '>': // > or >> or >= + case '>': // > or >> or >= switch (*ln) { case '>': - *tk++ = SHR; - ++ln; + *tk.u32++ = SHR; + ln++; continue; case '=': - *tk++ = GE; - ++ln; + *tk.u32++ = GE; + ln++; continue; default: - *tk++ = '>'; + *tk.u32++ = '>'; continue; } - case '%': // % or binary constant + case '%': // % or binary constant if (*ln < '0' || *ln > '1') { - *tk++ = '%'; + *tk.u32++ = '%'; continue; } @@ -1350,17 +1479,18 @@ int tokln(void) if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { + v &= 0xFFFFFFFF; ln += 2; } } - *tk++ = CONST; - *tk++ = v; + *tk.u32++ = CONST; + *tk.u64++ = v; continue; - case '@': // @ or octal constant + case '@': // @ or octal constant if (*ln < '0' || *ln > '7') { - *tk++ = '@'; + *tk.u32++ = '@'; continue; } @@ -1371,31 +1501,32 @@ int tokln(void) if (*ln == '.') { - if ((*(ln+1) == 'b') || (*(ln+1) == 'B')) + if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B')) { v &= 0x000000FF; ln += 2; } - if ((*(ln+1) == 'w') || (*(ln+1) == 'W')) + if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) { v &= 0x0000FFFF; ln += 2; } - if ((*(ln+1) == 'l') || (*(ln+1) == 'L')) + if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { + v &= 0xFFFFFFFF; ln += 2; } } - *tk++ = CONST; - *tk++ = v; + *tk.u32++ = CONST; + *tk.u64++ = v; continue; - case '^': // ^ or ^^ + case '^': // ^ or ^^ if (*ln != '^') { - *tk++ = '^'; + *tk.u32++ = '^'; continue; } @@ -1412,15 +1543,15 @@ int tokln(void) for(state=0; state>=0;) { - // Get char, convert to lowercase - j = *p++; + // Get char, convert to lowercase + j = (int)tolowertab[*p++]; - if (j >= 'A' && j <= 'Z') - j += 0x20; + //if (j >= 'A' && j <= 'Z') + // j += 0x20; - j += kwbase[state]; + j += unarybase[state]; - if (kwcheck[j] != state) + if (unarycheck[j] != state) { j = -1; break; @@ -1428,11 +1559,11 @@ int tokln(void) if (*p == EOS || p == ln) { - j = kwaccept[j]; + j = unaryaccept[j]; break; } - state = kwtab[j]; + state = unarytab[j]; } if (j < 0 || state < 0) @@ -1441,10 +1572,10 @@ int tokln(void) continue; } - *tk++ = (TOKEN)j; + *tk.u32++ = (TOKEN)j; continue; default: - interror(2); // Bad MULTX entry in chrtab + interror(2); // Bad MULTX entry in chrtab continue; } } @@ -1452,50 +1583,85 @@ int tokln(void) // Handle decimal constant if (c & DIGIT) { + uint8_t * numStart = ln; v = 0; while ((int)chrtab[*ln] & DIGIT) v = (v * 10) + *ln++ - '0'; - // See if there's a .[bwl] after the constant, & deal with it + // See if there's a .[bwl] after the constant & deal with it if so if (*ln == '.') { - if ((*(ln+1) == 'b') || (*(ln+1) == 'B')) + if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B')) { v &= 0x000000FF; ln += 2; + *tk.u32++ = CONST; + *tk.u64++ = v; + *tk.u32++ = DOTB; } - - if ((*(ln+1) == 'w') || (*(ln+1) == 'W')) + else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) { v &= 0x0000FFFF; ln += 2; + *tk.u32++ = CONST; + *tk.u64++ = v; + *tk.u32++ = DOTW; } - - if ((*(ln+1) == 'l') || (*(ln+1) == 'L')) + else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { + v &= 0xFFFFFFFF; ln += 2; + *tk.u32++ = CONST; + *tk.u64++ = v; + *tk.u32++ = DOTL; + } + else if ((int)chrtab[*(ln + 1)] & DIGIT) + { + // Hey, more digits after the dot, so we assume it's a + // floating point number of some kind... numEnd will point + // to the first non-float character after it's done + char * numEnd; + errno = 0; + double f = strtod(numStart, &numEnd); + ln = (uint8_t *)numEnd; + + if (errno != 0) + return error("floating point parse error"); + + // N.B.: We use the C compiler's internal double + // representation for all internal float calcs and + // are reasonably sure that the size of said double + // is 8 bytes long (which we check for in fltpoint.c) + *tk.u32++ = FCONST; + *tk.dp = f; + tk.u64++; + continue; } } + else + { + *tk.u32++ = CONST; + *tk.u64++ = v; + } - *tk++ = CONST; - *tk++ = v; +//printf("CONST: %i\n", v); continue; } // Handle illegal character - return error("illegal character"); + return error("illegal character $%02X found", *ln); } // Terminate line of tokens and return "success." goteol: - tok = etok; // Set tok to beginning of line + tok = etok; // Set tok to beginning of line - if (stuffnull) // Terminate last SYMBOL + if (stuffnull) // Terminate last SYMBOL *nullspot = EOS; - *tk++ = EOL; + *tk.u32++ = EOL; return OK; } @@ -1503,34 +1669,25 @@ goteol: // // .GOTO