//
-// RMAC - Reboot's Macro Assembler for the Atari Jaguar Console System
+// RMAC - Reboot's Macro Assembler for all Atari computers
// TOKEN.C - Token Handling
// Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
// RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
//
#include "token.h"
+#include "direct.h"
#include "error.h"
#include "macro.h"
#include "procln.h"
int lnsave; // 1; strcpy() text of current line
-int curlineno; // Current line number
+uint16_t curlineno; // Current line number (64K max currently)
int totlines; // Total # of lines
int mjump_align = 0; // mjump alignment flag
char lntag; // Line tag
static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
-char chrtab[] = {
+uint8_t chrtab[0x100] = {
ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
MULTX, MULTX, // : ;
MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
- MULTX, STSYM+CTSYM+HDIGIT, // @ A
- (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
- STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
- STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
- STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
- (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
-
- STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // P Q R S
- STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // T U V W
- STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // X Y Z [
- SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
-
- ILLEG, STSYM+CTSYM+HDIGIT, // ` a
- (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
- STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
- STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
- STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
- (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
-
- STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // p q r s
- STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // t u v w
- STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
- SELF, SELF, SELF, ILLEG // | } ~ DEL
+ MULTX, STSYM+CTSYM+HDIGIT, // @ A
+ DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
+ DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
+ STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
+ STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
+ DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
+
+ DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
+ STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
+ STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
+ SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
+
+ ILLEG, STSYM+CTSYM+HDIGIT, // ` a
+ DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
+ DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
+ STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
+ STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
+ DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
+
+ DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
+ STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
+ DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
+ SELF, SELF, SELF, ILLEG, // | } ~ DEL
+
+ // Anything above $7F is illegal (and yes, we need to check for this,
+ // otherwise you get strange and spurious errors that will lead you astray)
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
+ ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
};
// Names of registers
static char * regname[] = {
- "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
- "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
- "pc", "ssp", "usp", "sr", "ccr"
+ "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
+ "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
+ "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
+ "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
+ "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
+ "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
+ "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
+ "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
+ "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
+ "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
+ "tt0","tt1","crp","","","","","", // 208,215
+ "","","","","fpiar","fpsr","fpcr","", // 216,223
+ "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
+ "","","","","","","","", // 232,239
+ "","","","","","","","", // 240,247
+ "","","","","","","","", // 248,255
+ "","","","","x0","x1","y0","y1", // 256,263
+ "","b0","","b2","","b1","a","b", // 264,271
+ "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
+ "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
+ "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
+ "","","","","","","l","p", // 296,303
+ "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
+ "a10","b10","x","y","","","ab","ba" // 312,319
};
static char * riscregname[] = {
// These characters are legal immediately after a period
dotxtab['b'] = DOTB; // .b .B .s .S
dotxtab['B'] = DOTB;
- dotxtab['s'] = DOTB;
- dotxtab['S'] = DOTB;
+ //dotxtab['s'] = DOTB;
+ //dotxtab['S'] = DOTB;
dotxtab['w'] = DOTW; // .w .W
dotxtab['W'] = DOTW;
dotxtab['l'] = DOTL; // .l .L
dotxtab['L'] = DOTL;
dotxtab['i'] = DOTI; // .i .I (???)
dotxtab['I'] = DOTI;
+ dotxtab['D'] = DOTD; // .d .D (quad word)
+ dotxtab['d'] = DOTD;
+ dotxtab['S'] = DOTS; // .s .S
+ dotxtab['s'] = DOTS;
+ dotxtab['Q'] = DOTQ; // .q .Q
+ dotxtab['q'] = DOTQ;
+ dotxtab['X'] = DOTX; // .x .x
+ dotxtab['x'] = DOTX;
+ dotxtab['P'] = DOTP; // .p .P
+ dotxtab['p'] = DOTP;
}
break;
case SRC_IREPT: // Alloc and init an IREPT
inobj->inobj.irept = malloc(sizeof(IREPT));
- DEBUG printf("alloc IREPT\n");
+ DEBUG { printf("alloc IREPT\n"); }
break;
}
*d++ = *s++;
if (*s != '}')
- return error("missing '}'");
+ return error("missing closing brace ('}')");
else
s++;
}
// Lookup the argument and copy its (string) value into the
// destination string
- DEBUG printf("argument='%s'\n", mname);
+ DEBUG { printf("argument='%s'\n", mname); }
if ((arg = lookup(mname, MACARG, macnum)) == NULL)
- return errors("undefined argument: '%s'", mname);
+ return error("undefined argument: '%s'", mname);
else
{
// Convert a string of tokens (terminated with EOL) back into
// macro invocation) then it is ignored.
i = (int)arg->svalue;
arg_num:
- DEBUG printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase);
+ DEBUG { printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase); }
tk = NULL;
if (i < imacro->im_nargs)
#else
// This fix should be done for strings too
d = symbolString[*tk++];
-DEBUG printf("ExM: SYMBOL=\"%s\"", d);
+DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
#endif
break;
case STRING:
*dst++ = '"';
continue;
break;
-// Shamus: Changing the format specifier from %lx to %ux caused
-// the assembler to choke on legitimate code... Need to investigate
-// this further before changing anything else here!
+// Shamus: Changing the format specifier from %lx to %ux caused the assembler
+// to choke on legitimate code... Need to investigate this further
+// before changing anything else here!
case CONST:
- sprintf(numbuf, "$%lx", (LONG)*tk++);
+ sprintf(numbuf, "$%lx", (long unsigned int)*tk++);
d = numbuf;
break;
case DEQUALS:
overflow:
*dst = EOS;
- DEBUG printf("*** OVERFLOW LINE ***\n%s\n", dest);
+ DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
return fatal("line too long as a result of macro expansion");
}
//
char * GetNextRepeatLine(void)
{
-
IREPT * irept = cur_inobj->inobj.irept;
LONG * strp = irept->ir_nextln; // initial null
// Do repeat at end of .rept block's string list
if (strp == NULL)
{
- DEBUG printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count);
+ DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
irept->ir_nextln = irept->ir_firstln; // copy first line
if (irept->ir_count-- == 0)
{
- DEBUG printf("end-repeat-block\n");
+ DEBUG { printf("end-repeat-block\n"); }
return NULL;
}
int include(int handle, char * fname)
{
// Debug mode
- if (debug)
- printf("[include: %s, cfileno=%u]\n", fname, cfileno);
+ DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
// Alloc and initialize include-descriptors
INOBJ * inobj = a_inobj(SRC_IFILE);
last_fr->frec_next = fr; // Append to list of filerecs
last_fr = fr;
- DEBUG printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno);
+ DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
return OK;
}
// Give a warning to the user that we had to wipe their bum for them
if (numUnmatched > 0)
- warni("missing %d .endif(s)", numUnmatched);
+ warn("missing %d .endif(s)", numUnmatched);
tok = inobj->in_otok; // Restore tok and otok
etok = inobj->in_etok;
switch (inobj->in_type)
{
case SRC_IFILE: // Pop and release an IFILE
- if (debug)
- printf("[Leaving: %s]\n", curfname);
+ DEBUG { printf("[Leaving: %s]\n", curfname); }
ifile = inobj->inobj.ifile;
ifile->if_link = f_ifile;
f_ifile = ifile;
close(ifile->ifhandle); // Close source file
-if (debug) printf("[fpop (pre): curfname=%s]\n", curfname);
+DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
curfname = ifile->ifoldfname; // Set current filename
-if (debug) printf("[fpop (post): curfname=%s]\n", curfname);
-if (debug) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
+DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
+DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
curlineno = ifile->ifoldlineno; // Set current line#
DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno);
cfileno = ifile->ifno; // Restore current file number
-if (debug) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
+DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
break;
case SRC_IMACRO: // Pop and release an IMACRO
imacro = inobj->inobj.imacro;
//
int TokenizeLine(void)
{
- char * ln = NULL; // Ptr to current position in line
- char * p; // Random character ptr
+ uint8_t * ln = NULL; // Ptr to current position in line
+ uint8_t * p; // Random character ptr
TOKEN * tk; // Token-deposit ptr
int state = 0; // State for keyword detector
int j = 0; // Var for keyword detector
- char c; // Random char
+ uint8_t c; // Random char
VALUE v; // Random value
- char * nullspot = NULL; // Spot to clobber for SYMBOL termination
+ uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
- char c1;
+ uint8_t c1;
int stringNum = 0; // Pointer to string locations in tokenized line
retry:
case SRC_IFILE:
if ((ln = GetNextLine()) == NULL)
{
-if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
+DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
if (fpop() == 0) // Pop input level
goto retry; // Try for more lines
else
case SRC_IREPT:
if ((ln = GetNextRepeatLine()) == NULL)
{
-if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
+DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
fpop();
goto retry;
}
goto goteol;
// Main tokenization loop;
- // o skip whitespace;
- // o handle end-of-line;
- // o handle symbols;
- // o handle single-character tokens (operators, etc.);
- // o handle multiple-character tokens (constants, strings, etc.).
+ // o skip whitespace;
+ // o handle end-of-line;
+ // o handle symbols;
+ // o handle single-character tokens (operators, etc.);
+ // o handle multiple-character tokens (constants, strings, etc.).
for(; *ln!=EOS;)
{
// Skip whitespace, handle EOL
- while ((int)chrtab[*ln] & WHITE)
+ while (chrtab[*ln] & WHITE)
ln++;
// Handle EOL, comment with ';'
v = (VALUE)dotxtab[*ln++];
if (chrtab[*ln] & CTSYM)
- return error("misuse of '.', not allowed in symbols");
+ return error("misuse of '.'; not allowed in symbols");
}
// If the symbol is small, check to see if it's really the name of
// Handle multiple-character tokens
if (c & MULTX)
{
- stringtype = 0;
switch (*ln++)
{
case '!': // ! or !=
continue;
case '\'': // 'string'
- if (m6502)
- {
- stringtype = A8INT; // hardcoded for now, maybe this will change in the future
- }
- // Fall through
+ if (m6502)
+ {
+ // Hardcoded for now, maybe this will change in the future
+ *tk++ = STRINGA8;
+ goto dostring;
+ }
+ // Fall through
case '\"': // "string"
- c1 = ln[-1];
*tk++ = STRING;
-//#warning
-// More char * stuffing (8 bytes) into the space of 4 (TOKEN).
-// Need to figure out how to fix this crap.
-#if 0
- *tk++ = (TOKEN)ln;
-#else
+dostring:
+ c1 = ln[-1];
string[stringNum] = ln;
*tk++ = stringNum;
stringNum++;
-#endif
for(p=ln; *ln!=EOS && *ln!=c1;)
{
while (hextab[*ln] >= 0)
v = (v << 4) + (int)hextab[*ln++];
- // ggn: Okay, some comments here are in order I think....
- // The original madmac sources didn't parse the size at
- // this point (i.e. .b/.w/.l). It was probably done at
- // another point, although it's unclear to me exactly
- // where. So why change this? My understanding (at least
- // from what SCPCD said on IRC) is that .w addressing
- // formats produce wrong code on jaguar (or doesn't execute
- // properly? something like that). So the code was changed
- // to mask off the upper bits depending on length (note: I
- // don't think .b is valid at all! I only know of .w/.l, so
- // this should probably be wiped). Then the code that
- // parses the constant and checks to see if it's between
- // $ffff0000 and $8000 never got triggered, so yay job
- // done! ...now say we want to assemble a st .prg. One of
- // the most widely spread optimisations is move.X expr.w,Y
- // (or vice versa, or both, anyway...) to access hardware
- // registers (which are mapped to $fxxxxx). This botchy
- // thing would create "hilarious" code while trying to
- // access hardware registers. So I made a condition to see
- // if st mode or jaguar is active and apply the both or
- // not. One last note: this is hardcoded to get optimised
- // for now on ST mode, i.e. it can't generate code like
- // move.w $00001234,d0 - it'll always get optimised to
- // move.w $1234.w,d0. It's probably ok, but maybe a warning
- // should be emitted? Or maybe finding a way to make it not
- // auto-optimise? I think it's ok for now...
if (*ln == '.')
{
if (obj_format == BSD)
if (obj_format == ALCYON)
{
- if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
- {
- *tk++ = DOTW;
- ln += 2;
- }
- else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+ if (*ln == '.')
{
- *tk++ = DOTL;
- ln += 2;
+ if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
+ {
+ *tk++ = DOTW;
+ ln += 2;
+ }
+ else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+ {
+ *tk++ = DOTL;
+ ln += 2;
+ }
}
}
}
}
// Handle illegal character
- return error("illegal character");
+ return error("illegal character $%02X found", *ln);
}
// Terminate line of tokens and return "success."
// expansion, and is NOT subject to macro expansion. The whitespace may also
// be EOL.
//
-//int d_goto(WORD siz) {
-//int d_goto(void)
int d_goto(WORD unused)
{
- char * s1, * s2;
-
// Setup for the search
if (*tok != SYMBOL)
return error("missing label");
-// sym = (char *)tok[1];
char * sym = string[tok[1]];
tok += 2;
return error("goto not in macro");
IMACRO * imacro = cur_inobj->inobj.imacro;
-// defln = (LONG *)imacro->im_macro->svalue;
struct LineList * defln = imacro->im_macro->lineList;
- // Find the label, starting with the first line.
+ // Attempt to find the label, starting with the first line.
for(; defln!=NULL; defln=defln->next)
{
-// if (*(char *)(defln + 1) == ':')
+ // Must start with a colon
if (defln->line[0] == ':')
{
// Compare names (sleazo string compare)
- // This string compare is not right. Doesn't check for lengths.
- // (actually it does, but in a crappy, unclear way.)
-WARNING(!!!! Bad string comparison !!!)
- s1 = sym;
-// s2 = (char *)(defln + 1) + 1;
- s2 = defln->line;
-
- while (*s1 == *s2)
+ char * s1 = sym;
+ char * s2 = defln->line;
+
+ // Either we will match the strings to EOS on both, or we will
+ // match EOS on string 1 to whitespace on string 2. Otherwise, we
+ // have no match.
+ while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
{
+ // If we reached the end of string 1 (sym), we're done.
+ // Note that we're also checking for the end of string 2 as
+ // well, since we've established they're equal above.
if (*s1 == EOS)
- break;
- else
{
- s1++;
- s2++;
+ // Found the label, set new macro next-line and return.
+ imacro->im_nextln = defln;
+ return 0;
}
- }
- // Found the label, set new macro next-line and return.
- if ((*s2 == EOS) || ((int)chrtab[*s2] & WHITE))
- {
- imacro->im_nextln = defln;
- return 0;
+ s1++;
+ s2++;
}
}
}