//
-// RMAC - Reboot's Macro Assembler for all Atari computers
+// RMAC - Renamed Macro Assembler for all Atari computers
// TOKEN.C - Token Handling
-// Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
+// Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
// RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
// Source utilised with the kind permission of Landon Dyer
//
#define DECL_KW // Declare keyword arrays
#define DEF_KW // Declare keyword values
#include "kwtab.h" // Incl generated keyword tables & defs
+#define DEF_REG68 // Incl 68k register definitions
+#include "68kregs.h"
+#define DEF_REGRISC // Include GPU/DSP register definitions
+#include "riscregs.h"
+#define DEF_UNARY // Declare unary values
+#define DECL_UNARY // Incl uanry keyword state machine tables
+#include "unarytab.h" // Incl generated unary tables & defs
int lnsave; // 1; strcpy() text of current line
-uint16_t curlineno; // Current line number (64K max currently)
+uint32_t curlineno; // Current line number (64K max currently)
int totlines; // Total # of lines
int mjump_align = 0; // mjump alignment flag
char lntag; // Line tag
char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
int optimizeOff; // Optimization override flag
-// File record, used to maintain a list of every include file ever visited
-#define FILEREC struct _filerec
-FILEREC
-{
- FILEREC * frec_next;
- char * frec_name;
-};
FILEREC * filerec;
FILEREC * last_fr;
"a10","b10","x","y","","","ab","ba" // 312,319
};
-static char * riscregname[] = {
- "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
- "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
- "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
- "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
-};
-
//
// Initialize tokenizer
// This is a hack. It might be better table-driven.
d = NULL;
- if ((*tk >= KW_D0) && !rdsp && !rgpu)
- {
- d = regname[(int)*tk++ - KW_D0];
- goto strcopy;
- }
- else if ((*tk >= KW_R0) && (*tk <= KW_R31))
+ if (*tk >= REG68_D0)
{
- d = riscregname[(int)*tk++ - KW_R0];
+ d = regname[(int)*tk++ - REG68_D0];
goto strcopy;
}
else
switch ((int)*tk++)
{
case SYMBOL:
-#if 0
-// d = (char *)*tk++;
- d = string[*tk++];
-#else
- // This fix should be done for strings too
d = symbolString[*tk++];
DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
-#endif
break;
case STRING:
-#if 0
-// d = (char *)*tk++;
- d = string[*tk++];
-#else
d = symbolString[*tk++];
-#endif
+
if (dst >= edst)
goto overflow;
// to choke on legitimate code... Need to investigate this further
// before changing anything else here!
case CONST:
- sprintf(numbuf, "$%lx", (uint64_t)*tk++);
+// sprintf(numbuf, "$%lx", (uint64_t)*tk++);
+ sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
tk++;
d = numbuf;
break;
case CR_ABSCOUNT:
d = "^^abscount";
break;
+ case CR_FILESIZE:
+ d = "^^filesize";
+ break;
case CR_DATE:
d = "^^date";
break;
char * GetNextMacroLine(void)
{
IMACRO * imacro = cur_inobj->inobj.imacro;
-// LONG * strp = imacro->im_nextln;
LLIST * strp = imacro->im_nextln;
if (strp == NULL) // End-of-macro
DEBUG { printf("end-repeat-block\n"); }
return NULL;
}
-
+ reptuniq++;
// strp = irept->ir_nextln;
}
+ // Mark the current macro line in the irept object
+ // This is probably overkill - a global variable
+ // would suffice here (it only gets used during
+ // error reporting anyway)
+ irept->lineno = irept->ir_nextln->lineno;
+
+ // Copy the rept lines verbatim, unless we're in nest level 0.
+ // Then, expand any \~ labels to unique numbers (Rn)
+ if (rptlevel)
+ {
+ strcpy(irbuf, irept->ir_nextln->line);
+ }
+ else
+ {
+ uint32_t linelen = strlen(irept->ir_nextln->line);
+ uint8_t *p_line = irept->ir_nextln->line;
+ char *irbufwrite = irbuf;
+ for (int i = 0; i <= linelen; i++)
+ {
+ uint8_t c;
+ c = *p_line++;
+ if (c == '\\' && *p_line == '~')
+ {
+ p_line++;
+ irbufwrite += sprintf(irbufwrite, "R%u", reptuniq);
+ }
+ else
+ {
+ *irbufwrite++ = c;
+ }
+ }
+ }
-// strcpy(irbuf, (char *)(irept->ir_nextln + 1));
- strcpy(irbuf, irept->ir_nextln->line);
DEBUG { printf("repeat line='%s'\n", irbuf); }
// irept->ir_nextln = (LONG *)*strp;
irept->ir_nextln = irept->ir_nextln->next;
if (numUnmatched > 0)
warn("missing %d .endif(s)", numUnmatched);
- tok = inobj->in_otok; // Restore tok and otok
+ tok = inobj->in_otok; // Restore tok and etok
etok = inobj->in_etok;
switch (inobj->in_type)
uint8_t c; // Random char
uint64_t v; // Random value
uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d)
- double f; // Random float
uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
uint8_t c1;
int stringNum = 0; // Pointer to string locations in tokenized line
+ SYM* sy; // For looking up symbols (.equr)
+ int equrundef = 0; // Flag for equrundef scanning
retry:
- if (cur_inobj == NULL) // Return EOF if input stack is empty
+ if (cur_inobj == NULL) // Return EOF if input stack is empty
return TKEOF;
// Get another line of input from the current input source: a file, a
if ((ln = GetNextLine()) == NULL)
{
DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
- if (fpop() == 0) // Pop input level
- goto retry; // Try for more lines
+ if (fpop() == 0) // Pop input level
+ goto retry; // Try for more lines
else
{
ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
}
}
- curlineno++; // Bump line number
+ curlineno++; // Bump line number
lntag = SPACE;
- if (as68_flag)
- {
- // AS68 compatibility, throw away all lines starting with
- // back-quotes, tildes, or '*'
- // On other lines, turn the first '*' into a semi-colon.
- if (*ln == '`' || *ln == '~' || *ln == '*')
- *ln = ';';
- else
- {
- for(p=ln; *p!=EOS; p++)
- {
- if (*p == '*')
- {
- *p = ';';
- break;
- }
- }
- }
- }
-
break;
// Macro-block:
// macro-type blocks, since it is expensive to unconditionally copy every
// line.
if (lnsave)
+ {
+ // Sanity check
+ if (strlen(ln) > LNSIZ)
+ return error("line too long (%d, max %d)", strlen(ln), LNSIZ);
+
strcpy(lnbuf, ln);
+ }
// General housekeeping
- tok = tokeol; // Set "tok" to EOL in case of error
+ tok = tokeol; // Set "tok" to EOL in case of error
tk.u32 = etok; // Reset token ptr
stuffnull = 0; // Don't stuff nulls
totlines++; // Bump total #lines assembled
// o handle multiple-character tokens (constants, strings, etc.).
for(; *ln!=EOS;)
{
+ // Check to see if there's enough space in the token buffer
+ if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20)
+ {
+ return error("token buffer overrun");
+ }
+
// Skip whitespace, handle EOL
while (chrtab[*ln] & WHITE)
ln++;
// If the symbol is small, check to see if it's really the name of
// a register.
- if (j <= KWSIZE)
+ uint8_t *p2 = p;
+ if (j <= 5)
{
- for(state=0; state>=0;)
+ for (state = 0; state >= 0;)
{
j = (int)tolowertab[*p++];
- j += kwbase[state];
+ j += regbase[state];
- if (kwcheck[j] != state)
+ if (regcheck[j] != state)
{
j = -1;
break;
if (*p == EOS || p == ln)
{
- j = kwaccept[j];
+ j = regaccept[j];
+ goto skip_keyword;
break;
}
- state = kwtab[j];
+ state = regtab[j];
}
}
- else
- {
- j = -1;
- }
- // Make j = -1 if user tries to use a RISC register while in 68K mode
- if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
+ // Scan for keywords
+ if ((j <= 0 || state <= 0) || p==p2)
{
- j = -1;
+ if (j <= KWSIZE)
+ {
+ for (state = 0; state >= 0;)
+ {
+ j = (int)tolowertab[*p2++];
+ j += kwbase[state];
+
+ if (kwcheck[j] != state)
+ {
+ j = -1;
+ break;
+ }
+
+ if (*p == EOS || p2 == ln)
+ {
+ j = kwaccept[j];
+ break;
+ }
+
+ state = kwtab[j];
+ }
+ }
+ else
+ {
+ j = -1;
+ }
}
- // Make j = -1 if time, date etc with no preceeding ^^
- // defined, referenced, streq, macdef, date and time
- switch ((TOKEN)j)
+ skip_keyword:
+
+ // If we detected equrundef/regundef set relevant flag
+ if (j == KW_EQURUNDEF)
{
- case 112: // defined
- case 113: // referenced
- case 118: // streq
- case 119: // macdef
- case 120: // time
- case 121: // date
+ equrundef = 1;
j = -1;
}
// If not tokenized keyword OR token was not found
if ((j < 0) || (state < 0))
{
+ // Only proceed if no equrundef has been detected. In that case we need to store the symbol
+ // because the directive handler (d_equrundef) will run outside this loop, further into procln.c
+ if (!equrundef && !disabled)
+ {
+ // Last attempt: let's see if this is an equated register.
+ // If yes, then just store the register's keyword value instead of the symbol
+ char temp = *ln;
+ *ln = 0;
+ sy = lookup(nullspot, LABEL, 0);
+ *ln = temp;
+ if (sy)
+ {
+ if (sy->sattre & EQUATEDREG)
+ {
+ *tk.u32++ = sy->svalue;
+ stuffnull = 0;
+ continue;
+ }
+ }
+ }
+ // Ok, that failed, let's store the symbol instead
*tk.u32++ = SYMBOL;
-//#warning
-//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
-//system, this will cause all kinds of mischief.
-#if 0
- *tk++ = (TOKEN)nullspot;
-#else
string[stringNum] = nullspot;
*tk.u32++ = stringNum;
stringNum++;
-#endif
}
else
{
stuffnull = 0;
}
- if (v) // Record attribute token (if any)
+ if (v) // Record attribute token (if any)
*tk.u32++ = (TOKEN)v;
- if (stuffnull) // Arrange for string termination on next pass
+ if (stuffnull) // Arrange for string termination on next pass
nullspot = ln;
+ if (disabled)
+ {
+ // When we are in a disabled code block, the only thing that can break out
+ // of this is an ".endif" keyword, so this is the minimum we have to parse
+ // in order to discover such a keyword.
+ goto goteol;
+ }
+
continue;
}
case '\\':
c = '\\';
break;
+ case '{':
+ // If we're evaluating a macro
+ // this is valid because it's
+ // a parameter expansion
case '!':
// If we're evaluating a macro
// this is valid and expands to
while (hextab[*ln] >= 0)
v = (v << 4) + (int)hextab[*ln++];
- if (*ln == '.')
- {
- if (obj_format == BSD)
- {
- if ((*(ln + 1) & 0xDF) == 'B')
- {
- v &= 0x000000FF;
- ln += 2;
- }
- else if ((*(ln + 1) & 0xDF) == 'W')
- {
- v &= 0x0000FFFF;
- ln += 2;
- }
- else if ((*(ln + 1) & 0xDF) == 'L')
- {
- v &= 0xFFFFFFFF;
- ln += 2;
- }
- }
- }
-
*tk.u32++ = CONST;
*tk.u64++ = v;
- if (obj_format == ALCYON)
+ if (*ln == '.')
{
- if (*ln == '.')
+ if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
{
- if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
- {
- *tk.u32++ = DOTW;
- ln += 2;
- }
- else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
- {
- *tk.u32++ = DOTL;
- ln += 2;
- }
+ *tk.u32++ = DOTW;
+ ln += 2;
+ }
+ else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+ {
+ *tk.u32++ = DOTL;
+ ln += 2;
}
}
}
for(state=0; state>=0;)
{
// Get char, convert to lowercase
- j = *p++;
+ j = (int)tolowertab[*p++];
- if (j >= 'A' && j <= 'Z')
- j += 0x20;
+ //if (j >= 'A' && j <= 'Z')
+ // j += 0x20;
- j += kwbase[state];
+ j += unarybase[state];
- if (kwcheck[j] != state)
+ if (unarycheck[j] != state)
{
j = -1;
break;
if (*p == EOS || p == ln)
{
- j = kwaccept[j];
+ j = unaryaccept[j];
break;
}
- state = kwtab[j];
+ state = unarytab[j];
}
if (j < 0 || state < 0)
else if ((int)chrtab[*(ln + 1)] & DIGIT)
{
// Hey, more digits after the dot, so we assume it's a
- // floating point number of some kind
-#if 0
- double fract = 10;
- ln++;
- f = (double)v;
-
- while ((int)chrtab[*ln] & DIGIT)
- {
- f = f + (double)(*ln++ - '0') / fract;
- fract *= 10;
- }
-#else
- // Here we parse the whole floating point number
+ // floating point number of some kind... numEnd will point
+ // to the first non-float character after it's done
char * numEnd;
errno = 0;
double f = strtod(numStart, &numEnd);
if (errno != 0)
return error("floating point parse error");
-#endif
+ // N.B.: We use the C compiler's internal double
+ // representation for all internal float calcs and
+ // are reasonably sure that the size of said double
+ // is 8 bytes long (which we check for in fltpoint.c)
*tk.u32++ = FCONST;
-// Shamus: Well, this is all kinds of icky--not the least of which is that unlike uintNN_t types, we have no guarantees of any kind when it comes to the size of floating point numbers in C (as far as I know of). If there is, we need to use those kinds here, or else figure out at runtime what sizes we're dealing with and act accordingly. To be fair, this is OK as long as the double type is less than 64 bits wide, but again, there's no guarantee that it isn't. :-/
- *tk.u64++ = f;
+ *tk.dp = f;
+ tk.u64++;
continue;
}
}
// Terminate line of tokens and return "success."
goteol:
- tok = etok; // Set tok to beginning of line
+ tok = etok; // Set tok to beginning of line
- if (stuffnull) // Terminate last SYMBOL
+ if (stuffnull) // Terminate last SYMBOL
*nullspot = EOS;
*tk.u32++ = EOL;
{
// Compare names (sleazo string compare)
char * s1 = sym;
- char * s2 = defln->line;
+ char * s2 = defln->line + 1;
// Either we will match the strings to EOS on both, or we will
// match EOS on string 1 to whitespace on string 2. Otherwise, we
printf("[COLON]");
else if (t == CONST)
printf("[CONST]");
+ else if (t == FCONST)
+ printf("[FCONST]");
else if (t == ACONST)
printf("[ACONST]");
else if (t == STRING)
printf("[ENDEXPR]");
else if (t == CR_ABSCOUNT)
printf("[CR_ABSCOUNT]");
+ else if (t == CR_FILESIZE)
+ printf("[CR_FILESIZE]");
else if (t == CR_DEFINED)
printf("[CR_DEFINED]");
else if (t == CR_REFERENCED)
printf("[CONST: $%lX]", *tp.u64);
t += 2;
}
+ else if (*t == FCONST)
+ {
+ PTR tp;
+ tp.u32 = t + 1;
+ printf("[FCONST: $%lX]", *tp.u64);
+ t += 2;
+ }
else if (*t == ACONST)
{
printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
printf("[ENDEXPR]");
else if (*t == CR_ABSCOUNT)
printf("[CR_ABSCOUNT]");
+ else if (*t == CR_FILESIZE)
+ printf("[CR_FILESIZE]");
else if (*t == CR_DEFINED)
printf("[CR_DEFINED]");
else if (*t == CR_REFERENCED)