X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?p=rmac;a=blobdiff_plain;f=token.c;h=55dbc290980722d06ed144755e43bb85587965a5;hp=f1f88caec4b81fa921993566eb4a8a3084fa49b7;hb=HEAD;hpb=29b32d134bc12831a8ddd098bf9aeeda26dcfe7c diff --git a/token.c b/token.c index f1f88ca..9cbf8b4 100644 --- a/token.c +++ b/token.c @@ -1,7 +1,7 @@ // -// RMAC - Reboot's Macro Assembler for all Atari computers +// RMAC - Renamed Macro Assembler for all Atari computers // TOKEN.C - Token Handling -// Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends +// Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986 // Source utilised with the kind permission of Landon Dyer // @@ -19,10 +19,17 @@ #define DECL_KW // Declare keyword arrays #define DEF_KW // Declare keyword values #include "kwtab.h" // Incl generated keyword tables & defs +#define DEF_REG68 // Incl 68k register definitions +#include "68kregs.h" +#define DEF_REGRISC // Include GPU/DSP register definitions +#include "riscregs.h" +#define DEF_UNARY // Declare unary values +#define DECL_UNARY // Incl uanry keyword state machine tables +#include "unarytab.h" // Incl generated unary tables & defs int lnsave; // 1; strcpy() text of current line -uint16_t curlineno; // Current line number (64K max currently) +uint32_t curlineno; // Current line number (64K max currently) int totlines; // Total # of lines int mjump_align = 0; // mjump alignment flag char lntag; // Line tag @@ -40,13 +47,6 @@ TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token char * string[TOKBUFSIZE*2];// Token buffer string pointer storage int optimizeOff; // Optimization override flag -// File record, used to maintain a list of every include file ever visited -#define FILEREC struct _filerec -FILEREC -{ - FILEREC * frec_next; - char * frec_name; -}; FILEREC * filerec; FILEREC * last_fr; @@ -154,13 +154,6 @@ static char * regname[] = { "a10","b10","x","y","","","ab","ba" // 312,319 }; -static char * riscregname[] = { - "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", - "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", - "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" -}; - // // Initialize tokenizer @@ -516,14 +509,9 @@ arg_num: // This is a hack. It might be better table-driven. d = NULL; - if ((*tk >= KW_D0) && !rdsp && !rgpu) - { - d = regname[(int)*tk++ - KW_D0]; - goto strcopy; - } - else if ((*tk >= KW_R0) && (*tk <= KW_R31)) + if (*tk >= REG68_D0) { - d = riscregname[(int)*tk++ - KW_R0]; + d = regname[(int)*tk++ - REG68_D0]; goto strcopy; } else @@ -531,22 +519,12 @@ arg_num: switch ((int)*tk++) { case SYMBOL: -#if 0 -// d = (char *)*tk++; - d = string[*tk++]; -#else - // This fix should be done for strings too d = symbolString[*tk++]; DEBUG { printf("ExM: SYMBOL=\"%s\"", d); } -#endif break; case STRING: -#if 0 -// d = (char *)*tk++; - d = string[*tk++]; -#else d = symbolString[*tk++]; -#endif + if (dst >= edst) goto overflow; @@ -570,7 +548,8 @@ DEBUG { printf("ExM: SYMBOL=\"%s\"", d); } // to choke on legitimate code... Need to investigate this further // before changing anything else here! case CONST: - sprintf(numbuf, "$%lx", (uint64_t)*tk++); +// sprintf(numbuf, "$%lx", (uint64_t)*tk++); + sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++); tk++; d = numbuf; break; @@ -613,6 +592,9 @@ DEBUG { printf("ExM: SYMBOL=\"%s\"", d); } case CR_ABSCOUNT: d = "^^abscount"; break; + case CR_FILESIZE: + d = "^^filesize"; + break; case CR_DATE: d = "^^date"; break; @@ -679,7 +661,6 @@ overflow: char * GetNextMacroLine(void) { IMACRO * imacro = cur_inobj->inobj.imacro; -// LONG * strp = imacro->im_nextln; LLIST * strp = imacro->im_nextln; if (strp == NULL) // End-of-macro @@ -713,7 +694,7 @@ char * GetNextRepeatLine(void) DEBUG { printf("end-repeat-block\n"); } return NULL; } - + reptuniq++; // strp = irept->ir_nextln; } // Mark the current macro line in the irept object @@ -722,8 +703,33 @@ char * GetNextRepeatLine(void) // error reporting anyway) irept->lineno = irept->ir_nextln->lineno; -// strcpy(irbuf, (char *)(irept->ir_nextln + 1)); - strcpy(irbuf, irept->ir_nextln->line); + // Copy the rept lines verbatim, unless we're in nest level 0. + // Then, expand any \~ labels to unique numbers (Rn) + if (rptlevel) + { + strcpy(irbuf, irept->ir_nextln->line); + } + else + { + uint32_t linelen = strlen(irept->ir_nextln->line); + uint8_t *p_line = irept->ir_nextln->line; + char *irbufwrite = irbuf; + for (int i = 0; i <= linelen; i++) + { + uint8_t c; + c = *p_line++; + if (c == '\\' && *p_line == '~') + { + p_line++; + irbufwrite += sprintf(irbufwrite, "R%u", reptuniq); + } + else + { + *irbufwrite++ = c; + } + } + } + DEBUG { printf("repeat line='%s'\n", irbuf); } // irept->ir_nextln = (LONG *)*strp; irept->ir_nextln = irept->ir_nextln->next; @@ -798,7 +804,7 @@ int fpop(void) if (numUnmatched > 0) warn("missing %d .endif(s)", numUnmatched); - tok = inobj->in_otok; // Restore tok and otok + tok = inobj->in_otok; // Restore tok and etok etok = inobj->in_etok; switch (inobj->in_type) @@ -959,15 +965,16 @@ int TokenizeLine(void) uint8_t c; // Random char uint64_t v; // Random value uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d) - double f; // Random float uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot uint8_t c1; int stringNum = 0; // Pointer to string locations in tokenized line + SYM* sy; // For looking up symbols (.equr) + int equrundef = 0; // Flag for equrundef scanning retry: - if (cur_inobj == NULL) // Return EOF if input stack is empty + if (cur_inobj == NULL) // Return EOF if input stack is empty return TKEOF; // Get another line of input from the current input source: a file, a @@ -983,8 +990,8 @@ retry: if ((ln = GetNextLine()) == NULL) { DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); } - if (fpop() == 0) // Pop input level - goto retry; // Try for more lines + if (fpop() == 0) // Pop input level + goto retry; // Try for more lines else { ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs @@ -992,29 +999,9 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); } } } - curlineno++; // Bump line number + curlineno++; // Bump line number lntag = SPACE; - if (as68_flag) - { - // AS68 compatibility, throw away all lines starting with - // back-quotes, tildes, or '*' - // On other lines, turn the first '*' into a semi-colon. - if (*ln == '`' || *ln == '~' || *ln == '*') - *ln = ';'; - else - { - for(p=ln; *p!=EOS; p++) - { - if (*p == '*') - { - *p = ';'; - break; - } - } - } - } - break; // Macro-block: @@ -1051,10 +1038,16 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); } // macro-type blocks, since it is expensive to unconditionally copy every // line. if (lnsave) + { + // Sanity check + if (strlen(ln) > LNSIZ) + return error("line too long (%d, max %d)", strlen(ln), LNSIZ); + strcpy(lnbuf, ln); + } // General housekeeping - tok = tokeol; // Set "tok" to EOL in case of error + tok = tokeol; // Set "tok" to EOL in case of error tk.u32 = etok; // Reset token ptr stuffnull = 0; // Don't stuff nulls totlines++; // Bump total #lines assembled @@ -1082,6 +1075,12 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); } // o handle multiple-character tokens (constants, strings, etc.). for(; *ln!=EOS;) { + // Check to see if there's enough space in the token buffer + if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20) + { + return error("token buffer overrun"); + } + // Skip whitespace, handle EOL while (chrtab[*ln] & WHITE) ln++; @@ -1152,14 +1151,15 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); } // If the symbol is small, check to see if it's really the name of // a register. - if (j <= KWSIZE) + uint8_t *p2 = p; + if (j <= 5) { - for(state=0; state>=0;) + for (state = 0; state >= 0;) { j = (int)tolowertab[*p++]; - j += kwbase[state]; + j += regbase[state]; - if (kwcheck[j] != state) + if (regcheck[j] != state) { j = -1; break; @@ -1167,51 +1167,83 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); } if (*p == EOS || p == ln) { - j = kwaccept[j]; + j = regaccept[j]; + goto skip_keyword; break; } - state = kwtab[j]; + state = regtab[j]; } } - else - { - j = -1; - } - // Make j = -1 if user tries to use a RISC register while in 68K mode - if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31)) + // Scan for keywords + if ((j <= 0 || state <= 0) || p==p2) { - j = -1; + if (j <= KWSIZE) + { + for (state = 0; state >= 0;) + { + j = (int)tolowertab[*p2++]; + j += kwbase[state]; + + if (kwcheck[j] != state) + { + j = -1; + break; + } + + if (*p == EOS || p2 == ln) + { + j = kwaccept[j]; + break; + } + + state = kwtab[j]; + } + } + else + { + j = -1; + } } - // Make j = -1 if time, date etc with no preceeding ^^ - // defined, referenced, streq, macdef, date and time - switch ((TOKEN)j) + skip_keyword: + + // If we detected equrundef/regundef set relevant flag + if (j == KW_EQURUNDEF) { - case 112: // defined - case 113: // referenced - case 118: // streq - case 119: // macdef - case 120: // time - case 121: // date + equrundef = 1; j = -1; } // If not tokenized keyword OR token was not found if ((j < 0) || (state < 0)) { + // Only proceed if no equrundef has been detected. In that case we need to store the symbol + // because the directive handler (d_equrundef) will run outside this loop, further into procln.c + if (!equrundef && !disabled) + { + // Last attempt: let's see if this is an equated register. + // If yes, then just store the register's keyword value instead of the symbol + char temp = *ln; + *ln = 0; + sy = lookup(nullspot, LABEL, 0); + *ln = temp; + if (sy) + { + if (sy->sattre & EQUATEDREG) + { + *tk.u32++ = sy->svalue; + stuffnull = 0; + continue; + } + } + } + // Ok, that failed, let's store the symbol instead *tk.u32++ = SYMBOL; -//#warning -//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit -//system, this will cause all kinds of mischief. -#if 0 - *tk++ = (TOKEN)nullspot; -#else string[stringNum] = nullspot; *tk.u32++ = stringNum; stringNum++; -#endif } else { @@ -1219,12 +1251,20 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); } stuffnull = 0; } - if (v) // Record attribute token (if any) + if (v) // Record attribute token (if any) *tk.u32++ = (TOKEN)v; - if (stuffnull) // Arrange for string termination on next pass + if (stuffnull) // Arrange for string termination on next pass nullspot = ln; + if (disabled) + { + // When we are in a disabled code block, the only thing that can break out + // of this is an ".endif" keyword, so this is the minimum we have to parse + // in order to discover such a keyword. + goto goteol; + } + continue; } @@ -1303,6 +1343,10 @@ dostring: case '\\': c = '\\'; break; + case '{': + // If we're evaluating a macro + // this is valid because it's + // a parameter expansion case '!': // If we're evaluating a macro // this is valid and expands to @@ -1332,45 +1376,20 @@ dostring: while (hextab[*ln] >= 0) v = (v << 4) + (int)hextab[*ln++]; - if (*ln == '.') - { - if (obj_format == BSD) - { - if ((*(ln + 1) & 0xDF) == 'B') - { - v &= 0x000000FF; - ln += 2; - } - else if ((*(ln + 1) & 0xDF) == 'W') - { - v &= 0x0000FFFF; - ln += 2; - } - else if ((*(ln + 1) & 0xDF) == 'L') - { - v &= 0xFFFFFFFF; - ln += 2; - } - } - } - *tk.u32++ = CONST; *tk.u64++ = v; - if (obj_format == ALCYON) + if (*ln == '.') { - if (*ln == '.') + if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) { - if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) - { - *tk.u32++ = DOTW; - ln += 2; - } - else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) - { - *tk.u32++ = DOTL; - ln += 2; - } + *tk.u32++ = DOTW; + ln += 2; + } + else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) + { + *tk.u32++ = DOTL; + ln += 2; } } } @@ -1525,14 +1544,14 @@ dostring: for(state=0; state>=0;) { // Get char, convert to lowercase - j = *p++; + j = (int)tolowertab[*p++]; - if (j >= 'A' && j <= 'Z') - j += 0x20; + //if (j >= 'A' && j <= 'Z') + // j += 0x20; - j += kwbase[state]; + j += unarybase[state]; - if (kwcheck[j] != state) + if (unarycheck[j] != state) { j = -1; break; @@ -1540,11 +1559,11 @@ dostring: if (*p == EOS || p == ln) { - j = kwaccept[j]; + j = unaryaccept[j]; break; } - state = kwtab[j]; + state = unarytab[j]; } if (j < 0 || state < 0) @@ -1637,9 +1656,9 @@ dostring: // Terminate line of tokens and return "success." goteol: - tok = etok; // Set tok to beginning of line + tok = etok; // Set tok to beginning of line - if (stuffnull) // Terminate last SYMBOL + if (stuffnull) // Terminate last SYMBOL *nullspot = EOS; *tk.u32++ = EOL; @@ -1686,7 +1705,7 @@ int d_goto(WORD unused) { // Compare names (sleazo string compare) char * s1 = sym; - char * s2 = defln->line; + char * s2 = defln->line + 1; // Either we will match the strings to EOS on both, or we will // match EOS on string 1 to whitespace on string 2. Otherwise, we @@ -1769,6 +1788,8 @@ void DumpToken(TOKEN t) printf("[ENDEXPR]"); else if (t == CR_ABSCOUNT) printf("[CR_ABSCOUNT]"); + else if (t == CR_FILESIZE) + printf("[CR_FILESIZE]"); else if (t == CR_DEFINED) printf("[CR_DEFINED]"); else if (t == CR_REFERENCED) @@ -1873,6 +1894,8 @@ void DumpTokenBuffer(void) printf("[ENDEXPR]"); else if (*t == CR_ABSCOUNT) printf("[CR_ABSCOUNT]"); + else if (*t == CR_FILESIZE) + printf("[CR_FILESIZE]"); else if (*t == CR_DEFINED) printf("[CR_DEFINED]"); else if (*t == CR_REFERENCED)