// TOKEN.C - Token Handling
// Copyright (C) 199x Landon Dyer, 2011-2012 Reboot and Friends
// RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
-// Source Utilised with the Kind Permission of Landon Dyer
+// Source utilised with the kind permission of Landon Dyer
//
#include "token.h"
char lntag; // Line tag
char * curfname; // Current filename
char tolowertab[128]; // Uppercase ==> lowercase
-char hextab[128]; // Table of hex values
+int8_t hextab[128]; // Table of hex values
char dotxtab[128]; // Table for ".b", ".s", etc.
char irbuf[LNSIZ]; // Text for .rept block line
char lnbuf[LNSIZ]; // Text of current line
//
-// Initialize Tokenizer
+// Initialize tokenizer
//
void InitTokenizer(void)
{
case DOTL:
d = ".l";
break;
+ case CR_ABSCOUNT:
+ d = "^^abscount";
+ break;
case CR_DATE:
d = "^^date";
break;
//
-// Get Next Line of Text from a Macro
+// Get next line of text from a macro
//
char * GetNextMacroLine(void)
{
- unsigned source_addr;
+// unsigned source_addr;
IMACRO * imacro = cur_inobj->inobj.imacro;
// LONG * strp = imacro->im_nextln;
//
-// Get Next Line of Text from a Repeat Block
+// Get next line of text from a repeat block
//
char * GetNextRepeatLine(void)
{
//
-// Include a Source File used at the Root, and for ".include" Files
+// Include a source file used at the root, and for ".include" files
//
int include(int handle, char * fname)
{
INOBJ * inobj;
FILEREC * fr;
- // Verbose mode
- if (verb_flag)
+ // Debug mode
+ if (debug)
printf("[include: %s, cfileno=%u]\n", fname, cfileno);
// Alloc and initialize include-descriptors
//
-// Pop the Current Input Level
+// Pop the current input level
//
int fpop(void)
{
// Pop IFENT levels until we reach the conditional assembly context we
// were at when the input object was entered.
while (ifent != inobj->in_ifent)
- d_endif();
+ {
+ if (d_endif() != 0) // Something bad happened during endif parsing?
+ return -1; // If yes, bail instead of getting stuck in a loop
+ }
tok = inobj->in_otok; // Restore tok and otok
etok = inobj->in_etok;
switch (inobj->in_type)
{
case SRC_IFILE: // Pop and release an IFILE
- if (verb_flag)
+ if (debug)
printf("[Leaving: %s]\n", curfname);
ifile = inobj->inobj.ifile;
ifile->if_link = f_ifile;
f_ifile = ifile;
close(ifile->ifhandle); // Close source file
-if (verb_flag) printf("[fpop (pre): curfname=%s]\n", curfname);
+if (debug) printf("[fpop (pre): curfname=%s]\n", curfname);
curfname = ifile->ifoldfname; // Set current filename
-if (verb_flag) printf("[fpop (post): curfname=%s]\n", curfname);
-if (verb_flag) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
+if (debug) printf("[fpop (post): curfname=%s]\n", curfname);
+if (debug) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
curlineno = ifile->ifoldlineno; // Set current line#
DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno);
cfileno = ifile->ifno; // Restore current file number
-if (verb_flag) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
+if (debug) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
break;
case SRC_IMACRO: // Pop and release an IMACRO
imacro = inobj->inobj.imacro;
// Scan for next end-of-line; handle stupid text formats by treating
// \r\n the same as \n. (lone '\r' at end of buffer means we have to
// check for '\n').
-#if 0
- i = 0;
- j = fl->ifcnt;
- d = &fl->ifbuf[fl->ifind];
-
- for(p=d; i<j; i++, p++)
-#else
d = &fl->ifbuf[fl->ifind];
for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
-#endif
{
if (*p == '\r' || *p == '\n')
{
if (*p == '\r')
{
if (i >= j)
- break; // Need to read more, then look for '\n' to eat
+ break; // Need to read more, then look for '\n' to eat
else if (p[1] == '\n')
i++;
}
*p = '\0';
return NULL;
#else
- // Really should check to see if we're at the end of the buffer! :-P
+ // Really should check to see if we're at the end of the buffer!
+ // :-P
fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
fl->ifcnt = 0;
return &fl->ifbuf[fl->ifind];
//
-// Tokenize a Line
+// Tokenize a line
//
int TokenizeLine(void)
{
- char * ln = NULL; // Ptr to current position in line
- char * p; // Random character ptr
- TOKEN * tk; // Token-deposit ptr
- int state = 0; // State for keyword detector
- int j = 0; // Var for keyword detector
- char c; // Random char
- VALUE v; // Random value
- char * nullspot = NULL; // Spot to clobber for SYMBOL terminatn
- int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
+ char * ln = NULL; // Ptr to current position in line
+ char * p; // Random character ptr
+ TOKEN * tk; // Token-deposit ptr
+ int state = 0; // State for keyword detector
+ int j = 0; // Var for keyword detector
+ char c; // Random char
+ VALUE v; // Random value
+ char * nullspot = NULL; // Spot to clobber for SYMBOL termination
+ int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
char c1;
- int stringNum = 0; // Pointer to string locations in tokenized line
+ int stringNum = 0; // Pointer to string locations in tokenized line
retry:
if (cur_inobj == NULL) // Return EOF if input stack is empty
return TKEOF;
- // Get another line of input from the current input source: a file,
- // a macro, or a repeat-block
+ // Get another line of input from the current input source: a file, a
+ // macro, or a repeat-block
switch (cur_inobj->in_type)
{
// Include-file:
case SRC_IFILE:
if ((ln = GetNextLine()) == NULL)
{
-if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
- fpop(); // Pop input level
- goto retry; // Try for more lines
+if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
+ if (fpop()==0) // Pop input level
+ goto retry; // Try for more lines
+ else
+ {
+ ifent->if_prev = (IFENT *) - 1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
+ return TKEOF;
+ }
}
curlineno++; // Bump line number
case SRC_IMACRO:
if ((ln = GetNextMacroLine()) == NULL)
{
- ExitMacro(); // Exit macro (pop args, do fpop(), etc)
- goto retry; // Try for more lines...
+ if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
+ goto retry; // Try for more lines...
+ else
+ return TKEOF; // Oops, we got a non zero return code, signal EOF
}
lntag = '@';
case SRC_IREPT:
if ((ln = GetNextRepeatLine()) == NULL)
{
-if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
+if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
fpop();
goto retry;
}
strcpy(lnbuf, ln);
// General house-keeping
- tok = tokeol; // Set "tok" to EOL in case of error
- tk = etok; // Reset token ptr
- stuffnull = 0; // Don't stuff nulls
- totlines++; // Bump total #lines assembled
+ tok = tokeol; // Set "tok" to EOL in case of error
+ tk = etok; // Reset token ptr
+ stuffnull = 0; // Don't stuff nulls
+ totlines++; // Bump total #lines assembled
// See if the entire line is a comment. This is a win if the programmer
// puts in lots of comments
if (c & STSYM)
{
- if (stuffnull) // Terminate old symbol from previous pass
+ if (stuffnull) // Terminate old symbol from previous pass
*nullspot = EOS;
- v = 0; // Assume no DOT attrib follows symbol
+ v = 0; // Assume no DOT attrib follows symbol
stuffnull = 1;
- p = nullspot = ln++; // Nullspot -> start of this symbol
+ p = nullspot = ln++; // Nullspot -> start of this symbol
// Find end of symbol (and compute its length)
for(j=1; (int)chrtab[*ln]&CTSYM; j++)
// symbol or keyword:
if (*ln == '.')
{
- *ln++ = EOS; // Terminate symbol
- stuffnull = 0; // And never try it again
+ *ln++ = EOS; // Terminate symbol
+ stuffnull = 0; // And never try it again
// Character following the `.' must have a DOT attribute, and
// the chararacter after THAT one must not have a start-symbol
j = -1;
}
- //make j = -1 if time, date etc with no preceeding ^^
- //defined, referenced, streq, macdef, date and time
+ // Make j = -1 if time, date etc with no preceeding ^^
+ // defined, referenced, streq, macdef, date and time
switch ((TOKEN)j)
{
case 112: // defined
case 120: // time
case 121: // date
j = -1;
-// break;
}
// If not tokenized keyword OR token was not found
{
*tk++ = SYMBOL;
//#warning
-//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit system,
-//this will cause all kinds of mischief.
+//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
+//system, this will cause all kinds of mischief.
#if 0
*tk++ = (TOKEN)nullspot;
#else
{
switch (*ln++)
{
- case '!': // ! or !=
+ case '!': // ! or !=
if (*ln == '=')
{
*tk++ = NE;
*tk++ = '!';
continue;
- case '\'': // 'string'
- case '\"': // "string"
+ case '\'': // 'string'
+ case '\"': // "string"
c1 = ln[-1];
*tk++ = STRING;
//#warning
*p++ = EOS;
continue;
- case '$': // $, hex constant
- if ((int)chrtab[*ln] & HDIGIT)
+ case '$': // $, hex constant
+ if (chrtab[*ln] & HDIGIT)
{
v = 0;
- while ((int)hextab[*ln] >= 0)
+ // Parse the hex value
+ while (hextab[*ln] >= 0)
v = (v << 4) + (int)hextab[*ln++];
+ // ggn: Okay, some comments here are in order I think....
+ // The original madmac sources didn't parse the size at
+ // this point (i.e. .b/.w/.l). It was probably done at
+ // another point, although it's unclear to me exactly
+ // where. So why change this? My understanding (at least
+ // from what SCPCD said on IRC) is that .w addressing
+ // formats produce wrong code on jaguar (or doesn't execute
+ // properly? something like that). So the code was changed
+ // to mask off the upper bits depending on length (note: I
+ // don't think .b is valid at all! I only know of .w/.l, so
+ // this should probably be wiped). Then the code that
+ // parses the constant and checks to see if it's between
+ // $ffff0000 and $8000 never got triggered, so yay job
+ // done! ...now say we want to assemble a st .prg. One of
+ // the most widely spread optimisations is move.X expr.w,Y
+ // (or vice versa, or both, anyway...) to access hardware
+ // registers (which are mapped to $fxxxxx). This botchy
+ // thing would create "hilarious" code while trying to
+ // access hardware registers. So I made a condition to see
+ // if st mode or jaguar is active and apply the both or
+ // not. One last note: this is hardcoded to get optimised
+ // for now on ST mode, i.e. it can't generate code like
+ // move.w $00001234,d0 - it'll always get optimised to
+ // move.w $1234.w,d0. It's probably ok, but maybe a warning
+ // should be emitted? Or maybe finding a way to make it not
+ // auto-optimise? I think it's ok for now...
if (*ln == '.')
{
- if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
+ if (obj_format == ALCYON)
{
- v &= 0x000000FF;
- ln += 2;
- }
-
- if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
- {
- v &= 0x0000FFFF;
- ln += 2;
+ if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B') || (*(ln + 1) == 'w') || (*(ln + 1) == 'W') || (*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+ {
+ ln += 2;
+ }
}
-
- if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+ else
{
- ln += 2;
+ if ((*(ln + 1) & 0xDF) == 'B')
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ }
+ else if ((*(ln + 1) & 0xDF) == 'W')
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ }
+ else if ((*(ln + 1) & 0xDF) == 'L')
+ {
+ ln += 2;
+ }
}
}
*tk++ = '$';
continue;
- case '<': // < or << or <> or <=
+ case '<': // < or << or <> or <=
switch (*ln)
{
case '<':
*tk++ = '<';
continue;
}
- case ':': // : or ::
+ case ':': // : or ::
if (*ln == ':')
{
*tk++ = DCOLON;
*tk++ = ':';
continue;
- case '=': // = or ==
+ case '=': // = or ==
if (*ln == '=')
{
*tk++ = DEQUALS;
*tk++ = '=';
continue;
- case '>': // > or >> or >=
+ case '>': // > or >> or >=
switch (*ln)
{
case '>':
*tk++ = '>';
continue;
}
- case '%': // % or binary constant
+ case '%': // % or binary constant
if (*ln < '0' || *ln > '1')
{
*tk++ = '%';
*tk++ = CONST;
*tk++ = v;
continue;
- case '@': // @ or octal constant
+ case '@': // @ or octal constant
if (*ln < '0' || *ln > '7')
{
*tk++ = '@';
*tk++ = CONST;
*tk++ = v;
continue;
- case '^': // ^ or ^^ <operator-name>
+ case '^': // ^ or ^^ <operator-name>
if (*ln != '^')
{
*tk++ = '^';
*tk++ = (TOKEN)j;
continue;
default:
- interror(2); // Bad MULTX entry in chrtab
+ interror(2); // Bad MULTX entry in chrtab
continue;
}
}
//int d_goto(void)
int d_goto(WORD unused)
{
-// char * sym; // Label to search for
-// LONG * defln; // Macro definition strings
- char * s1; // Temps for string comparison
- char * s2;
-// IMACRO * imacro; // Macro invocation block
+ char * s1, * s2;
// Setup for the search
if (*tok != SYMBOL)
// Compare names (sleazo string compare)
// This string compare is not right. Doesn't check for lengths.
// (actually it does, but in a crappy, unclear way.)
-#warning "!!! Bad string comparison !!!"
+WARNING(!!!! Bad string comparison !!!)
s1 = sym;
// s2 = (char *)(defln + 1) + 1;
s2 = defln->line;
else if (*t == ACONST)
printf("[ACONST]");
else if (*t == STRING)
-// printf("[STRING]");
{
t++;
printf("[STRING:\"%s\"]", string[*t]);
printf("[DOTI]");
else if (*t == ENDEXPR)
printf("[ENDEXPR]");
+ else if (*t == CR_ABSCOUNT)
+ printf("[CR_ABSCOUNT]");
else if (*t == CR_DEFINED)
printf("[CR_DEFINED]");
else if (*t == CR_REFERENCED)
printf("[A%u]", ((uint32_t)*t) - 0x88);
else
printf("[%X:%c]", (uint32_t)*t, (char)*t);
-// printf("[%X]", (uint32_t)*t);
}
printf("[EOL]\n");