X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?p=rmac;a=blobdiff_plain;f=token.c;h=4067711be2ff7ddd1dcd5e4f51fb04cc3cad6795;hp=6ba200c769aa86f0c9d6e54b3ce979cf90b61f70;hb=66be644c3e5fbd7446d86c79e9e51b75c0442b49;hpb=d95ee7f628ceac9af515079fb6797476557a23d2 diff --git a/token.c b/token.c index 6ba200c..4067711 100644 --- a/token.c +++ b/token.c @@ -3,7 +3,7 @@ // TOKEN.C - Token Handling // Copyright (C) 199x Landon Dyer, 2011-2012 Reboot and Friends // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986 -// Source Utilised with the Kind Permission of Landon Dyer +// Source utilised with the kind permission of Landon Dyer // #include "token.h" @@ -24,7 +24,7 @@ int mjump_align = 0; // mjump alignment flag char lntag; // Line tag char * curfname; // Current filename char tolowertab[128]; // Uppercase ==> lowercase -char hextab[128]; // Table of hex values +int8_t hextab[128]; // Table of hex values char dotxtab[128]; // Table for ".b", ".s", etc. char irbuf[LNSIZ]; // Text for .rept block line char lnbuf[LNSIZ]; // Text of current line @@ -118,7 +118,7 @@ static char * riscregname[] = { // -// Initialize Tokenizer +// Initialize tokenizer // void InitTokenizer(void) { @@ -554,6 +554,9 @@ DEBUG printf("ExM: SYMBOL=\"%s\"", d); case DOTL: d = ".l"; break; + case CR_ABSCOUNT: + d = "^^abscount"; + break; case CR_DATE: d = "^^date"; break; @@ -613,11 +616,11 @@ overflow: // -// Get Next Line of Text from a Macro +// Get next line of text from a macro // char * GetNextMacroLine(void) { - unsigned source_addr; +// unsigned source_addr; IMACRO * imacro = cur_inobj->inobj.imacro; // LONG * strp = imacro->im_nextln; @@ -636,7 +639,7 @@ char * GetNextMacroLine(void) // -// Get Next Line of Text from a Repeat Block +// Get next line of text from a repeat block // char * GetNextRepeatLine(void) { @@ -668,7 +671,7 @@ char * GetNextRepeatLine(void) // -// Include a Source File used at the Root, and for ".include" Files +// Include a source file used at the root, and for ".include" files // int include(int handle, char * fname) { @@ -676,8 +679,8 @@ int include(int handle, char * fname) INOBJ * inobj; FILEREC * fr; - // Verbose mode - if (verb_flag) + // Debug mode + if (debug) printf("[include: %s, cfileno=%u]\n", fname, cfileno); // Alloc and initialize include-descriptors @@ -714,7 +717,7 @@ int include(int handle, char * fname) // -// Pop the Current Input Level +// Pop the current input level // int fpop(void) { @@ -727,30 +730,41 @@ int fpop(void) { // Pop IFENT levels until we reach the conditional assembly context we // were at when the input object was entered. + int numUnmatched = 0; + while (ifent != inobj->in_ifent) - d_endif(); + { + if (d_endif() != 0) // Something bad happened during endif parsing? + return -1; // If yes, bail instead of getting stuck in a loop + + numUnmatched++; + } - tok = inobj->in_otok; // Restore tok and otok + // Give a warning to the user that we had to wipe their bum for them + if (numUnmatched > 0) + warni("missing %d .endif(s)", numUnmatched); + + tok = inobj->in_otok; // Restore tok and otok etok = inobj->in_etok; switch (inobj->in_type) { - case SRC_IFILE: // Pop and release an IFILE - if (verb_flag) + case SRC_IFILE: // Pop and release an IFILE + if (debug) printf("[Leaving: %s]\n", curfname); ifile = inobj->inobj.ifile; ifile->if_link = f_ifile; f_ifile = ifile; close(ifile->ifhandle); // Close source file -if (verb_flag) printf("[fpop (pre): curfname=%s]\n", curfname); +if (debug) printf("[fpop (pre): curfname=%s]\n", curfname); curfname = ifile->ifoldfname; // Set current filename -if (verb_flag) printf("[fpop (post): curfname=%s]\n", curfname); -if (verb_flag) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); +if (debug) printf("[fpop (post): curfname=%s]\n", curfname); +if (debug) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); curlineno = ifile->ifoldlineno; // Set current line# DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); cfileno = ifile->ifno; // Restore current file number -if (verb_flag) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); +if (debug) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); break; case SRC_IMACRO: // Pop and release an IMACRO imacro = inobj->inobj.imacro; @@ -795,17 +809,9 @@ char * GetNextLine(void) // Scan for next end-of-line; handle stupid text formats by treating // \r\n the same as \n. (lone '\r' at end of buffer means we have to // check for '\n'). -#if 0 - i = 0; - j = fl->ifcnt; - d = &fl->ifbuf[fl->ifind]; - - for(p=d; iifbuf[fl->ifind]; for(p=d, i=0, j=fl->ifcnt; i= j) - break; // Need to read more, then look for '\n' to eat + break; // Need to read more, then look for '\n' to eat else if (p[1] == '\n') i++; } @@ -838,7 +844,8 @@ char * GetNextLine(void) *p = '\0'; return NULL; #else - // Really should check to see if we're at the end of the buffer! :-P + // Really should check to see if we're at the end of the buffer! + // :-P fl->ifbuf[fl->ifind + fl->ifcnt] = '\0'; fl->ifcnt = 0; return &fl->ifbuf[fl->ifind]; @@ -879,29 +886,29 @@ char * GetNextLine(void) // -// Tokenize a Line +// Tokenize a line // int TokenizeLine(void) { - char * ln = NULL; // Ptr to current position in line - char * p; // Random character ptr - TOKEN * tk; // Token-deposit ptr - int state = 0; // State for keyword detector - int j = 0; // Var for keyword detector - char c; // Random char - VALUE v; // Random value - char * nullspot = NULL; // Spot to clobber for SYMBOL terminatn - int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot + char * ln = NULL; // Ptr to current position in line + char * p; // Random character ptr + TOKEN * tk; // Token-deposit ptr + int state = 0; // State for keyword detector + int j = 0; // Var for keyword detector + char c; // Random char + VALUE v; // Random value + char * nullspot = NULL; // Spot to clobber for SYMBOL termination + int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot char c1; - int stringNum = 0; // Pointer to string locations in tokenized line + int stringNum = 0; // Pointer to string locations in tokenized line retry: if (cur_inobj == NULL) // Return EOF if input stack is empty return TKEOF; - // Get another line of input from the current input source: a file, - // a macro, or a repeat-block + // Get another line of input from the current input source: a file, a + // macro, or a repeat-block switch (cur_inobj->in_type) { // Include-file: @@ -912,9 +919,14 @@ retry: case SRC_IFILE: if ((ln = GetNextLine()) == NULL) { -if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); - fpop(); // Pop input level - goto retry; // Try for more lines +if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); + if (fpop() == 0) // Pop input level + goto retry; // Try for more lines + else + { + ifent->if_prev = (IFENT *) - 1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs + return TKEOF; + } } curlineno++; // Bump line number @@ -947,8 +959,10 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); case SRC_IMACRO: if ((ln = GetNextMacroLine()) == NULL) { - ExitMacro(); // Exit macro (pop args, do fpop(), etc) - goto retry; // Try for more lines... + if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc) + goto retry; // Try for more lines... + else + return TKEOF; // Oops, we got a non zero return code, signal EOF } lntag = '@'; @@ -959,7 +973,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); case SRC_IREPT: if ((ln = GetNextRepeatLine()) == NULL) { -if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); +if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); fpop(); goto retry; } @@ -975,10 +989,10 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); strcpy(lnbuf, ln); // General house-keeping - tok = tokeol; // Set "tok" to EOL in case of error - tk = etok; // Reset token ptr - stuffnull = 0; // Don't stuff nulls - totlines++; // Bump total #lines assembled + tok = tokeol; // Set "tok" to EOL in case of error + tk = etok; // Reset token ptr + stuffnull = 0; // Don't stuff nulls + totlines++; // Bump total #lines assembled // See if the entire line is a comment. This is a win if the programmer // puts in lots of comments @@ -1013,6 +1027,27 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); v = 0; // Assume no DOT attrib follows symbol stuffnull = 1; + + // In some cases, we need to check for a DOTx at the *beginning* + // of a symbol, as the "start" of the line we're currently looking + // at could be somewhere in the middle of that line! + if (*ln == '.') + { + // Make sure that it's *only* a .[bwsl] following, and not the + // start of a local symbol: + if ((chrtab[*(ln + 1)] & DOT) + && (dotxtab[*(ln + 1)] != 0) + && !(chrtab[*(ln + 2)] & CTSYM)) + { + // We found a legitimate DOTx construct, so add it to the + // token stream: + ln++; + stuffnull = 0; + *tk++ = (TOKEN)dotxtab[*ln++]; + continue; + } + } + p = nullspot = ln++; // Nullspot -> start of this symbol // Find end of symbol (and compute its length) @@ -1030,13 +1065,13 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); // the chararacter after THAT one must not have a start-symbol // attribute (to prevent symbols that look like, for example, // "zingo.barf", which might be a good idea anyway....) - if ((((int)chrtab[*ln] & DOT) == 0) || ((int)dotxtab[*ln] <= 0)) - return error("[bwsl] must follow `.' in symbol"); + if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0)) + return error("[bwsl] must follow '.' in symbol"); v = (VALUE)dotxtab[*ln++]; - if ((int)chrtab[*ln] & CTSYM) - return error("misuse of `.', not allowed in symbols"); + if (chrtab[*ln] & CTSYM) + return error("misuse of '.', not allowed in symbols"); } // If the symbol is small, check to see if it's really the name of @@ -1074,8 +1109,8 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); j = -1; } - //make j = -1 if time, date etc with no preceeding ^^ - //defined, referenced, streq, macdef, date and time + // Make j = -1 if time, date etc with no preceeding ^^ + // defined, referenced, streq, macdef, date and time switch ((TOKEN)j) { case 112: // defined @@ -1088,7 +1123,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); } // If not tokenized keyword OR token was not found - if (j < 0 || state < 0) + if ((j < 0) || (state < 0)) { *tk++ = SYMBOL; //#warning @@ -1129,7 +1164,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); { switch (*ln++) { - case '!': // ! or != + case '!': // ! or != if (*ln == '=') { *tk++ = NE; @@ -1139,8 +1174,8 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = '!'; continue; - case '\'': // 'string' - case '\"': // "string" + case '\'': // 'string' + case '\"': // "string" c1 = ln[-1]; *tk++ = STRING; //#warning @@ -1206,42 +1241,84 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *p++ = EOS; continue; - case '$': // $, hex constant - if ((int)chrtab[*ln] & HDIGIT) + case '$': // $, hex constant + if (chrtab[*ln] & HDIGIT) { v = 0; - while ((int)hextab[*ln] >= 0) + // Parse the hex value + while (hextab[*ln] >= 0) v = (v << 4) + (int)hextab[*ln++]; + // ggn: Okay, some comments here are in order I think.... + // The original madmac sources didn't parse the size at + // this point (i.e. .b/.w/.l). It was probably done at + // another point, although it's unclear to me exactly + // where. So why change this? My understanding (at least + // from what SCPCD said on IRC) is that .w addressing + // formats produce wrong code on jaguar (or doesn't execute + // properly? something like that). So the code was changed + // to mask off the upper bits depending on length (note: I + // don't think .b is valid at all! I only know of .w/.l, so + // this should probably be wiped). Then the code that + // parses the constant and checks to see if it's between + // $ffff0000 and $8000 never got triggered, so yay job + // done! ...now say we want to assemble a st .prg. One of + // the most widely spread optimisations is move.X expr.w,Y + // (or vice versa, or both, anyway...) to access hardware + // registers (which are mapped to $fxxxxx). This botchy + // thing would create "hilarious" code while trying to + // access hardware registers. So I made a condition to see + // if st mode or jaguar is active and apply the both or + // not. One last note: this is hardcoded to get optimised + // for now on ST mode, i.e. it can't generate code like + // move.w $00001234,d0 - it'll always get optimised to + // move.w $1234.w,d0. It's probably ok, but maybe a warning + // should be emitted? Or maybe finding a way to make it not + // auto-optimise? I think it's ok for now... if (*ln == '.') { - if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B')) + if (obj_format == BSD) { - v &= 0x000000FF; - ln += 2; + if ((*(ln + 1) & 0xDF) == 'B') + { + v &= 0x000000FF; + ln += 2; + } + else if ((*(ln + 1) & 0xDF) == 'W') + { + v &= 0x0000FFFF; + ln += 2; + } + else if ((*(ln + 1) & 0xDF) == 'L') + { + ln += 2; + } } + } + + *tk++ = CONST; + *tk++ = v; + if (obj_format == ALCYON) + { if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) { - v &= 0x0000FFFF; + *tk++ = DOTW; ln += 2; } - - if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) + else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { + *tk++ = DOTL; ln += 2; } } - - *tk++ = CONST; - *tk++ = v; } else *tk++ = '$'; continue; - case '<': // < or << or <> or <= + case '<': // < or << or <> or <= switch (*ln) { case '<': @@ -1260,7 +1337,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = '<'; continue; } - case ':': // : or :: + case ':': // : or :: if (*ln == ':') { *tk++ = DCOLON; @@ -1270,7 +1347,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = ':'; continue; - case '=': // = or == + case '=': // = or == if (*ln == '=') { *tk++ = DEQUALS; @@ -1280,7 +1357,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = '='; continue; - case '>': // > or >> or >= + case '>': // > or >> or >= switch (*ln) { case '>': @@ -1295,7 +1372,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = '>'; continue; } - case '%': // % or binary constant + case '%': // % or binary constant if (*ln < '0' || *ln > '1') { *tk++ = '%'; @@ -1330,7 +1407,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = CONST; *tk++ = v; continue; - case '@': // @ or octal constant + case '@': // @ or octal constant if (*ln < '0' || *ln > '7') { *tk++ = '@'; @@ -1365,7 +1442,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = CONST; *tk++ = v; continue; - case '^': // ^ or ^^ + case '^': // ^ or ^^ if (*ln != '^') { *tk++ = '^'; @@ -1417,7 +1494,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = (TOKEN)j; continue; default: - interror(2); // Bad MULTX entry in chrtab + interror(2); // Bad MULTX entry in chrtab continue; } } @@ -1492,8 +1569,7 @@ goteol: //int d_goto(void) int d_goto(WORD unused) { - char * s1; // Temps for string comparison - char * s2; + char * s1, * s2; // Setup for the search if (*tok != SYMBOL) @@ -1520,7 +1596,7 @@ int d_goto(WORD unused) // Compare names (sleazo string compare) // This string compare is not right. Doesn't check for lengths. // (actually it does, but in a crappy, unclear way.) -#warning "!!! Bad string comparison !!!" +WARNING(!!!! Bad string comparison !!!) s1 = sym; // s2 = (char *)(defln + 1) + 1; s2 = defln->line; @@ -1566,7 +1642,6 @@ void DumpTokenBuffer(void) else if (*t == ACONST) printf("[ACONST]"); else if (*t == STRING) -// printf("[STRING]"); { t++; printf("[STRING:\"%s\"]", string[*t]); @@ -1610,6 +1685,8 @@ void DumpTokenBuffer(void) printf("[DOTI]"); else if (*t == ENDEXPR) printf("[ENDEXPR]"); + else if (*t == CR_ABSCOUNT) + printf("[CR_ABSCOUNT]"); else if (*t == CR_DEFINED) printf("[CR_DEFINED]"); else if (*t == CR_REFERENCED) @@ -1632,7 +1709,6 @@ void DumpTokenBuffer(void) printf("[A%u]", ((uint32_t)*t) - 0x88); else printf("[%X:%c]", (uint32_t)*t, (char)*t); -// printf("[%X]", (uint32_t)*t); } printf("[EOL]\n");