X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?p=rmac;a=blobdiff_plain;f=token.c;h=c78d97ac6f0054d8ca02bf68b0a3a1c0c10406e4;hp=78de051d7e7df3ae3281023a99f7eb65f96fe415;hb=d9c1d993917d9f0101747e137709fe2ba9854b7f;hpb=b10167d55798ea184f97fafda075255c0852f3b6 diff --git a/token.c b/token.c index 78de051..c78d97a 100644 --- a/token.c +++ b/token.c @@ -3,7 +3,7 @@ // TOKEN.C - Token Handling // Copyright (C) 199x Landon Dyer, 2011-2012 Reboot and Friends // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986 -// Source Utilised with the Kind Permission of Landon Dyer +// Source utilised with the kind permission of Landon Dyer // #include "token.h" @@ -118,7 +118,7 @@ static char * riscregname[] = { // -// Initialize Tokenizer +// Initialize tokenizer // void InitTokenizer(void) { @@ -452,7 +452,8 @@ arg_num: continue; } - if (tk != NULL) // arg # is in range, so expand it + // Argument # is in range, so expand it + if (tk != NULL) { while (*tk != EOL) { @@ -612,11 +613,11 @@ overflow: // -// Get Next Line of Text from a Macro +// Get next line of text from a macro // char * GetNextMacroLine(void) { - unsigned source_addr; +// unsigned source_addr; IMACRO * imacro = cur_inobj->inobj.imacro; // LONG * strp = imacro->im_nextln; @@ -635,7 +636,7 @@ char * GetNextMacroLine(void) // -// Get Next Line of Text from a Repeat Block +// Get next line of text from a repeat block // char * GetNextRepeatLine(void) { @@ -667,7 +668,7 @@ char * GetNextRepeatLine(void) // -// Include a Source File used at the Root, and for ".include" Files +// Include a source file used at the root, and for ".include" files // int include(int handle, char * fname) { @@ -713,7 +714,7 @@ int include(int handle, char * fname) // -// Pop the Current Input Level +// Pop the current input level // int fpop(void) { @@ -727,7 +728,10 @@ int fpop(void) // Pop IFENT levels until we reach the conditional assembly context we // were at when the input object was entered. while (ifent != inobj->in_ifent) - d_endif(); + { + if (d_endif() != 0) // Something bad happened during endif parsing? + return -1; // If yes, bail instead of getting stuck in a loop + } tok = inobj->in_otok; // Restore tok and otok etok = inobj->in_etok; @@ -794,11 +798,9 @@ char * GetNextLine(void) // Scan for next end-of-line; handle stupid text formats by treating // \r\n the same as \n. (lone '\r' at end of buffer means we have to // check for '\n'). - i = 0; - j = fl->ifcnt; d = &fl->ifbuf[fl->ifind]; - for(p=d; iifcnt; i= j) - { - break; // Look for '\n' to eat - } + break; // Need to read more, then look for '\n' to eat else if (p[1] == '\n') - { i++; - } } + // Cover up the newline with end-of-string sentinel *p = '\0'; fl->ifind += i; @@ -826,11 +825,20 @@ char * GetNextLine(void) // Handle hanging lines by ignoring them (Input file is exhausted, no // \r or \n on last line) + // Shamus: This is retarded. Never ignore any input! if (!readamt && fl->ifcnt) { +#if 0 fl->ifcnt = 0; *p = '\0'; return NULL; +#else + // Really should check to see if we're at the end of the buffer! + // :-P + fl->ifbuf[fl->ifind + fl->ifcnt] = '\0'; + fl->ifcnt = 0; + return &fl->ifbuf[fl->ifind]; +#endif } // Truncate and return absurdly long lines. @@ -855,7 +863,9 @@ char * GetNextLine(void) fl->ifind = fl->ifcnt & 1; } - if ((readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM)) < 0) + readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM); + + if (readamt < 0) return NULL; if ((fl->ifcnt += readamt) == 0) @@ -865,29 +875,29 @@ char * GetNextLine(void) // -// Tokenize a Line +// Tokenize a line // int TokenizeLine(void) { - char * ln = NULL; // Ptr to current position in line - char * p; // Random character ptr - TOKEN * tk; // Token-deposit ptr - int state = 0; // State for keyword detector - int j = 0; // Var for keyword detector - char c; // Random char - VALUE v; // Random value - char * nullspot = NULL; // Spot to clobber for SYMBOL terminatn - int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot + char * ln = NULL; // Ptr to current position in line + char * p; // Random character ptr + TOKEN * tk; // Token-deposit ptr + int state = 0; // State for keyword detector + int j = 0; // Var for keyword detector + char c; // Random char + VALUE v; // Random value + char * nullspot = NULL; // Spot to clobber for SYMBOL termination + int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot char c1; - int stringNum = 0; // Pointer to string locations in tokenized line + int stringNum = 0; // Pointer to string locations in tokenized line retry: if (cur_inobj == NULL) // Return EOF if input stack is empty return TKEOF; - // Get another line of input from the current input source: a file, - // a macro, or a repeat-block + // Get another line of input from the current input source: a file, a + // macro, or a repeat-block switch (cur_inobj->in_type) { // Include-file: @@ -933,8 +943,10 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); case SRC_IMACRO: if ((ln = GetNextMacroLine()) == NULL) { - ExitMacro(); // Exit macro (pop args, do fpop(), etc) - goto retry; // Try for more lines... + if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc) + goto retry; // Try for more lines... + else + return TKEOF; // Oops, we got a non zero return code, signal EOF } lntag = '@'; @@ -961,10 +973,10 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); strcpy(lnbuf, ln); // General house-keeping - tok = tokeol; // Set "tok" to EOL in case of error - tk = etok; // Reset token ptr - stuffnull = 0; // Don't stuff nulls - totlines++; // Bump total #lines assembled + tok = tokeol; // Set "tok" to EOL in case of error + tk = etok; // Reset token ptr + stuffnull = 0; // Don't stuff nulls + totlines++; // Bump total #lines assembled // See if the entire line is a comment. This is a win if the programmer // puts in lots of comments @@ -994,12 +1006,12 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); if (c & STSYM) { - if (stuffnull) // Terminate old symbol from previous pass + if (stuffnull) // Terminate old symbol from previous pass *nullspot = EOS; - v = 0; // Assume no DOT attrib follows symbol + v = 0; // Assume no DOT attrib follows symbol stuffnull = 1; - p = nullspot = ln++; // Nullspot -> start of this symbol + p = nullspot = ln++; // Nullspot -> start of this symbol // Find end of symbol (and compute its length) for(j=1; (int)chrtab[*ln]&CTSYM; j++) @@ -1009,8 +1021,8 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); // symbol or keyword: if (*ln == '.') { - *ln++ = EOS; // Terminate symbol - stuffnull = 0; // And never try it again + *ln++ = EOS; // Terminate symbol + stuffnull = 0; // And never try it again // Character following the `.' must have a DOT attribute, and // the chararacter after THAT one must not have a start-symbol @@ -1054,8 +1066,14 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); j = -1; } - //make j = -1 if time, date etc with no preceeding ^^ - //defined, referenced, streq, macdef, date and time + // Make j = -1 if user tries to use a RISC register while in 68K mode + if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31)) + { + j = -1; + } + + // Make j = -1 if time, date etc with no preceeding ^^ + // defined, referenced, streq, macdef, date and time switch ((TOKEN)j) { case 112: // defined @@ -1065,15 +1083,15 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); case 120: // time case 121: // date j = -1; - break; } + // If not tokenized keyword OR token was not found if (j < 0 || state < 0) { *tk++ = SYMBOL; //#warning -//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit system, -//this will cause all kinds of mischief. +//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit +//system, this will cause all kinds of mischief. #if 0 *tk++ = (TOKEN)nullspot; #else @@ -1109,7 +1127,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); { switch (*ln++) { - case '!': // ! or != + case '!': // ! or != if (*ln == '=') { *tk++ = NE; @@ -1119,8 +1137,8 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = '!'; continue; - case '\'': // 'string' - case '\"': // "string" + case '\'': // 'string' + case '\"': // "string" c1 = ln[-1]; *tk++ = STRING; //#warning @@ -1186,31 +1204,66 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *p++ = EOS; continue; - case '$': // $, hex constant + case '$': // $, hex constant if ((int)chrtab[*ln] & HDIGIT) { v = 0; + // Parse the hex value while ((int)hextab[*ln] >= 0) v = (v << 4) + (int)hextab[*ln++]; + // ggn: Okay, some comments here are in order I think.... + // The original madmac sources didn't parse the size at + // this point (i.e. .b/.w/.l). It was probably done at + // another point, although it's unclear to me exactly + // where. So why change this? My understanding (at least + // from what SCPCD said on IRC) is that .w addressing + // formats produce wrong code on jaguar (or doesn't execute + // properly? something like that). So the code was changed + // to mask off the upper bits depending on length (note: I + // don't think .b is valid at all! I only know of .w/.l, so + // this should probably be wiped). Then the code that + // parses the constant and checks to see if it's between + // $ffff0000 and $8000 never got triggered, so yay job + // done! ...now say we want to assemble a st .prg. One of + // the most widely spread optimisations is move.X expr.w,Y + // (or vice versa, or both, anyway...) to access hardware + // registers (which are mapped to $fxxxxx). This botchy + // thing would create "hilarious" code while trying to + // access hardware registers. So I made a condition to see + // if st mode or jaguar is active and apply the both or + // not. One last note: this is hardcoded to get optimised + // for now on ST mode, i.e. it can't generate code like + // move.w $00001234,d0 - it'll always get optimised to + // move.w $1234.w,d0. It's probably ok, but maybe a warning + // should be emitted? Or maybe finding a way to make it not + // auto-optimise? I think it's ok for now... if (*ln == '.') { - if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B')) - { - v &= 0x000000FF; - ln += 2; - } - - if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) + if (obj_format == ALCYON) { - v &= 0x0000FFFF; - ln += 2; + if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B') || (*(ln + 1) == 'w') || (*(ln + 1) == 'W') || (*(ln + 1) == 'l') || (*(ln + 1) == 'L')) + { + ln += 2; + } } - - if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) + else { - ln += 2; + if ((*(ln + 1) & 0xDF) == 'B') + { + v &= 0x000000FF; + ln += 2; + } + else if ((*(ln + 1) & 0xDF) == 'W') + { + v &= 0x0000FFFF; + ln += 2; + } + else if ((*(ln + 1) & 0xDF) == 'L') + { + ln += 2; + } } } @@ -1221,7 +1274,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = '$'; continue; - case '<': // < or << or <> or <= + case '<': // < or << or <> or <= switch (*ln) { case '<': @@ -1240,7 +1293,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = '<'; continue; } - case ':': // : or :: + case ':': // : or :: if (*ln == ':') { *tk++ = DCOLON; @@ -1250,7 +1303,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = ':'; continue; - case '=': // = or == + case '=': // = or == if (*ln == '=') { *tk++ = DEQUALS; @@ -1260,7 +1313,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = '='; continue; - case '>': // > or >> or >= + case '>': // > or >> or >= switch (*ln) { case '>': @@ -1275,7 +1328,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = '>'; continue; } - case '%': // % or binary constant + case '%': // % or binary constant if (*ln < '0' || *ln > '1') { *tk++ = '%'; @@ -1310,7 +1363,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = CONST; *tk++ = v; continue; - case '@': // @ or octal constant + case '@': // @ or octal constant if (*ln < '0' || *ln > '7') { *tk++ = '@'; @@ -1345,7 +1398,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = CONST; *tk++ = v; continue; - case '^': // ^ or ^^ + case '^': // ^ or ^^ if (*ln != '^') { *tk++ = '^'; @@ -1397,7 +1450,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = (TOKEN)j; continue; default: - interror(2); // Bad MULTX entry in chrtab + interror(2); // Bad MULTX entry in chrtab continue; } } @@ -1410,22 +1463,20 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); while ((int)chrtab[*ln] & DIGIT) v = (v * 10) + *ln++ - '0'; - // See if there's a .[bwl] after the constant, & deal with it + // See if there's a .[bwl] after the constant & deal with it if so if (*ln == '.') { - if ((*(ln+1) == 'b') || (*(ln+1) == 'B')) + if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B')) { v &= 0x000000FF; ln += 2; } - - if ((*(ln+1) == 'w') || (*(ln+1) == 'W')) + else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W')) { v &= 0x0000FFFF; ln += 2; } - - if ((*(ln+1) == 'l') || (*(ln+1) == 'L')) + else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L')) { ln += 2; } @@ -1433,6 +1484,7 @@ if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); *tk++ = CONST; *tk++ = v; +//printf("CONST: %i\n", v); continue; } @@ -1473,11 +1525,7 @@ goteol: //int d_goto(void) int d_goto(WORD unused) { -// char * sym; // Label to search for -// LONG * defln; // Macro definition strings - char * s1; // Temps for string comparison - char * s2; -// IMACRO * imacro; // Macro invocation block + char * s1, * s2; // Setup for the search if (*tok != SYMBOL) @@ -1504,7 +1552,7 @@ int d_goto(WORD unused) // Compare names (sleazo string compare) // This string compare is not right. Doesn't check for lengths. // (actually it does, but in a crappy, unclear way.) -#warning "!!! Bad string comparison !!!" +WARNING(!!!! Bad string comparison !!!) s1 = sym; // s2 = (char *)(defln + 1) + 1; s2 = defln->line; @@ -1550,7 +1598,6 @@ void DumpTokenBuffer(void) else if (*t == ACONST) printf("[ACONST]"); else if (*t == STRING) -// printf("[STRING]"); { t++; printf("[STRING:\"%s\"]", string[*t]); @@ -1616,7 +1663,6 @@ void DumpTokenBuffer(void) printf("[A%u]", ((uint32_t)*t) - 0x88); else printf("[%X:%c]", (uint32_t)*t, (char)*t); -// printf("[%X]", (uint32_t)*t); } printf("[EOL]\n");