+ uint8_t * ln = NULL; // Ptr to current position in line
+ uint8_t * p; // Random character ptr
+ TOKEN * tk; // Token-deposit ptr
+ int state = 0; // State for keyword detector
+ int j = 0; // Var for keyword detector
+ uint8_t c; // Random char
+ VALUE v; // Random value
+ uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
+ int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
+ uint8_t c1;
+ int stringNum = 0; // Pointer to string locations in tokenized line
+
+retry:
+
+ if (cur_inobj == NULL) // Return EOF if input stack is empty
+ return TKEOF;
+
+ // Get another line of input from the current input source: a file, a
+ // macro, or a repeat-block
+ switch (cur_inobj->in_type)
+ {
+ // Include-file:
+ // o handle EOF;
+ // o bump source line number;
+ // o tag the listing-line with a space;
+ // o kludge lines generated by Alcyon C.
+ case SRC_IFILE:
+ if ((ln = GetNextLine()) == NULL)
+ {
+DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
+ if (fpop() == 0) // Pop input level
+ goto retry; // Try for more lines
+ else
+ {
+ ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
+ return TKEOF;
+ }
+ }
+
+ curlineno++; // Bump line number
+ lntag = SPACE;
+
+ if (as68_flag)
+ {
+ // AS68 compatibility, throw away all lines starting with
+ // back-quotes, tildes, or '*'
+ // On other lines, turn the first '*' into a semi-colon.
+ if (*ln == '`' || *ln == '~' || *ln == '*')
+ *ln = ';';
+ else
+ {
+ for(p=ln; *p!=EOS; p++)
+ {
+ if (*p == '*')
+ {
+ *p = ';';
+ break;
+ }
+ }
+ }
+ }
+
+ break;
+
+ // Macro-block:
+ // o Handle end-of-macro;
+ // o tag the listing-line with an at (@) sign.
+ case SRC_IMACRO:
+ if ((ln = GetNextMacroLine()) == NULL)
+ {
+ if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
+ goto retry; // Try for more lines...
+ else
+ return TKEOF; // Oops, we got a non zero return code, signal EOF
+ }
+
+ lntag = '@';
+ break;
+
+ // Repeat-block:
+ // o Handle end-of-repeat-block;
+ // o tag the listing-line with a pound (#) sign.
+ case SRC_IREPT:
+ if ((ln = GetNextRepeatLine()) == NULL)
+ {
+ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
+ fpop();
+ goto retry;
+ }
+
+ lntag = '#';
+ break;
+ }
+
+ // Save text of the line. We only do this during listings and within
+ // macro-type blocks, since it is expensive to unconditionally copy every
+ // line.
+ if (lnsave)
+ strcpy(lnbuf, ln);
+
+ // General housekeeping
+ tok = tokeol; // Set "tok" to EOL in case of error
+ tk = etok; // Reset token ptr
+ stuffnull = 0; // Don't stuff nulls
+ totlines++; // Bump total #lines assembled
+
+ // See if the entire line is a comment. This is a win if the programmer
+ // puts in lots of comments
+ if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
+ goto goteol;
+
+ // And here we have a very ugly hack for signalling a single line 'turn off
+ // optimization'. There's really no nice way to do this, so hack it is!
+ optimizeOff = 0; // Default is to take optimizations as they come
+
+ if (*ln == '!')
+ {
+ optimizeOff = 1; // Signal that we don't want to optimize this line
+ ln++; // & skip over the darned thing
+ }
+
+ // Main tokenization loop;
+ // o skip whitespace;
+ // o handle end-of-line;
+ // o handle symbols;
+ // o handle single-character tokens (operators, etc.);
+ // o handle multiple-character tokens (constants, strings, etc.).
+ for(; *ln!=EOS;)
+ {
+ // Skip whitespace, handle EOL
+ while (chrtab[*ln] & WHITE)
+ ln++;
+
+ // Handle EOL, comment with ';'
+ if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
+ break;
+
+ // Handle start of symbol. Symbols are null-terminated in place. The
+ // termination is always one symbol behind, since there may be no place
+ // for a null in the case that an operator immediately follows the name.
+ c = chrtab[*ln];
+
+ if (c & STSYM)
+ {
+ if (stuffnull) // Terminate old symbol from previous pass
+ *nullspot = EOS;
+
+ v = 0; // Assume no DOT attrib follows symbol
+ stuffnull = 1;
+
+ // In some cases, we need to check for a DOTx at the *beginning*
+ // of a symbol, as the "start" of the line we're currently looking
+ // at could be somewhere in the middle of that line!
+ if (*ln == '.')
+ {
+ // Make sure that it's *only* a .[bwsl] following, and not the
+ // start of a local symbol:
+ if ((chrtab[*(ln + 1)] & DOT)
+ && (dotxtab[*(ln + 1)] != 0)
+ && !(chrtab[*(ln + 2)] & CTSYM))
+ {
+ // We found a legitimate DOTx construct, so add it to the
+ // token stream:
+ ln++;
+ stuffnull = 0;
+ *tk++ = (TOKEN)dotxtab[*ln++];
+ continue;
+ }
+ }
+
+ p = nullspot = ln++; // Nullspot -> start of this symbol
+
+ // Find end of symbol (and compute its length)
+ for(j=1; (int)chrtab[*ln]&CTSYM; j++)
+ ln++;
+
+ // Handle "DOT" special forms (like ".b") that follow a normal
+ // symbol or keyword:
+ if (*ln == '.')
+ {
+ *ln++ = EOS; // Terminate symbol
+ stuffnull = 0; // And never try it again
+
+ // Character following the `.' must have a DOT attribute, and
+ // the chararacter after THAT one must not have a start-symbol
+ // attribute (to prevent symbols that look like, for example,
+ // "zingo.barf", which might be a good idea anyway....)
+ if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
+ return error("[bwsl] must follow '.' in symbol");
+
+ v = (VALUE)dotxtab[*ln++];
+
+ if (chrtab[*ln] & CTSYM)
+ return error("misuse of '.'; not allowed in symbols");
+ }
+
+ // If the symbol is small, check to see if it's really the name of
+ // a register.
+ if (j <= KWSIZE)
+ {
+ for(state=0; state>=0;)
+ {
+ j = (int)tolowertab[*p++];
+ j += kwbase[state];
+
+ if (kwcheck[j] != state)
+ {
+ j = -1;
+ break;
+ }
+
+ if (*p == EOS || p == ln)
+ {
+ j = kwaccept[j];
+ break;
+ }
+
+ state = kwtab[j];
+ }
+ }
+ else
+ {
+ j = -1;
+ }
+
+ // Make j = -1 if user tries to use a RISC register while in 68K mode
+ if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
+ {
+ j = -1;
+ }
+
+ // Make j = -1 if time, date etc with no preceeding ^^
+ // defined, referenced, streq, macdef, date and time
+ switch ((TOKEN)j)
+ {
+ case 112: // defined
+ case 113: // referenced
+ case 118: // streq
+ case 119: // macdef
+ case 120: // time
+ case 121: // date
+ j = -1;
+ }
+
+ // If not tokenized keyword OR token was not found
+ if ((j < 0) || (state < 0))
+ {
+ *tk++ = SYMBOL;
+//#warning
+//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
+//system, this will cause all kinds of mischief.
+#if 0
+ *tk++ = (TOKEN)nullspot;
+#else
+ string[stringNum] = nullspot;
+ *tk++ = stringNum;
+ stringNum++;
+#endif
+ }
+ else
+ {
+ *tk++ = (TOKEN)j;
+ stuffnull = 0;
+ }
+
+ if (v) // Record attribute token (if any)
+ *tk++ = (TOKEN)v;
+
+ if (stuffnull) // Arrange for string termination on next pass
+ nullspot = ln;
+
+ continue;
+ }
+
+ // Handle identity tokens
+ if (c & SELF)
+ {
+ *tk++ = *ln++;
+ continue;
+ }
+
+ // Handle multiple-character tokens
+ if (c & MULTX)
+ {
+ switch (*ln++)
+ {
+ case '!': // ! or !=
+ if (*ln == '=')
+ {
+ *tk++ = NE;
+ ++ln;
+ }
+ else
+ *tk++ = '!';
+
+ continue;
+ case '\'': // 'string'
+ if (m6502)
+ {
+ // Hardcoded for now, maybe this will change in the future
+ *tk++ = STRINGA8;
+ goto dostring;
+ }
+ // Fall through
+ case '\"': // "string"
+ *tk++ = STRING;
+dostring:
+ c1 = ln[-1];
+ string[stringNum] = ln;
+ *tk++ = stringNum;
+ stringNum++;
+
+ for(p=ln; *ln!=EOS && *ln!=c1;)
+ {
+ c = *ln++;
+
+ if (c == '\\')
+ {
+ switch (*ln++)
+ {
+ case EOS:
+ return(error("unterminated string"));
+ case 'e':
+ c = '\033';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case '\"':
+ c = '\"';
+ break;
+ case '\'':
+ c = '\'';
+ break;
+ case '\\':
+ c = '\\';
+ break;
+ case '!':
+ // If we're evaluating a macro
+ // this is valid and expands to
+ // "dot-size"
+ break;
+ default:
+ warn("bad backslash code in string");
+ ln--;
+ break;
+ }
+ }
+
+ *p++ = c;
+ }
+
+ if (*ln++ != c1)
+ return error("unterminated string");
+
+ *p++ = EOS;
+ continue;
+ case '$': // $, hex constant
+ if (chrtab[*ln] & HDIGIT)
+ {
+ v = 0;
+
+ // Parse the hex value
+ while (hextab[*ln] >= 0)
+ v = (v << 4) + (int)hextab[*ln++];
+
+ if (*ln == '.')
+ {
+ if (obj_format == BSD)
+ {
+ if ((*(ln + 1) & 0xDF) == 'B')
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ }
+ else if ((*(ln + 1) & 0xDF) == 'W')
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ }
+ else if ((*(ln + 1) & 0xDF) == 'L')
+ {
+ ln += 2;
+ }
+ }
+ }
+
+ *tk++ = CONST;
+ *tk++ = v;
+
+ if (obj_format == ALCYON)
+ {
+ if (*ln == '.')
+ {
+ if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
+ {
+ *tk++ = DOTW;
+ ln += 2;
+ }
+ else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+ {
+ *tk++ = DOTL;
+ ln += 2;
+ }
+ }
+ }
+ }
+ else
+ *tk++ = '$';
+
+ continue;
+ case '<': // < or << or <> or <=
+ switch (*ln)
+ {
+ case '<':
+ *tk++ = SHL;
+ ++ln;
+ continue;
+ case '>':
+ *tk++ = NE;
+ ++ln;
+ continue;
+ case '=':
+ *tk++ = LE;
+ ++ln;
+ continue;
+ default:
+ *tk++ = '<';
+ continue;
+ }
+ case ':': // : or ::
+ if (*ln == ':')
+ {
+ *tk++ = DCOLON;
+ ++ln;
+ }
+ else
+ *tk++ = ':';
+
+ continue;
+ case '=': // = or ==
+ if (*ln == '=')
+ {
+ *tk++ = DEQUALS;
+ ++ln;
+ }
+ else
+ *tk++ = '=';
+
+ continue;
+ case '>': // > or >> or >=
+ switch (*ln)
+ {
+ case '>':
+ *tk++ = SHR;
+ ln++;
+ continue;
+ case '=':
+ *tk++ = GE;
+ ln++;
+ continue;
+ default:
+ *tk++ = '>';
+ continue;
+ }
+ case '%': // % or binary constant
+ if (*ln < '0' || *ln > '1')
+ {
+ *tk++ = '%';
+ continue;
+ }
+
+ v = 0;
+
+ while (*ln >= '0' && *ln <= '1')
+ v = (v << 1) + *ln++ - '0';
+
+ if (*ln == '.')
+ {
+ if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ }
+
+ if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ }
+
+ if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+ {
+ ln += 2;
+ }
+ }
+
+ *tk++ = CONST;
+ *tk++ = v;
+ continue;
+ case '@': // @ or octal constant
+ if (*ln < '0' || *ln > '7')
+ {
+ *tk++ = '@';
+ continue;
+ }
+
+ v = 0;
+
+ while (*ln >= '0' && *ln <= '7')
+ v = (v << 3) + *ln++ - '0';
+
+ if (*ln == '.')
+ {
+ if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ }
+
+ if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ }
+
+ if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
+ {
+ ln += 2;
+ }
+ }
+
+ *tk++ = CONST;
+ *tk++ = v;
+ continue;
+ case '^': // ^ or ^^ <operator-name>
+ if (*ln != '^')
+ {
+ *tk++ = '^';
+ continue;
+ }
+
+ if (((int)chrtab[*++ln] & STSYM) == 0)
+ {
+ error("invalid symbol following ^^");
+ continue;
+ }
+
+ p = ln++;
+
+ while ((int)chrtab[*ln] & CTSYM)
+ ++ln;
+
+ for(state=0; state>=0;)
+ {
+ // Get char, convert to lowercase
+ j = *p++;
+
+ if (j >= 'A' && j <= 'Z')
+ j += 0x20;
+
+ j += kwbase[state];
+
+ if (kwcheck[j] != state)
+ {
+ j = -1;
+ break;
+ }
+
+ if (*p == EOS || p == ln)
+ {
+ j = kwaccept[j];
+ break;
+ }
+
+ state = kwtab[j];
+ }
+
+ if (j < 0 || state < 0)
+ {
+ error("unknown symbol following ^^");
+ continue;
+ }
+
+ *tk++ = (TOKEN)j;
+ continue;
+ default:
+ interror(2); // Bad MULTX entry in chrtab
+ continue;
+ }
+ }
+
+ // Handle decimal constant
+ if (c & DIGIT)
+ {
+ v = 0;
+
+ while ((int)chrtab[*ln] & DIGIT)
+ v = (v * 10) + *ln++ - '0';
+
+ // See if there's a .[bwl] after the constant & deal with it if so
+ if (*ln == '.')
+ {
+ if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ *tk++ = CONST;
+ *tk++ = v;
+ *tk++ = DOTB;
+ }
+ else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ *tk++ = CONST;
+ *tk++ = v;
+ *tk++ = DOTW;
+ }
+ else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+ {
+ ln += 2;
+ *tk++ = CONST;
+ *tk++ = v;
+ *tk++ = DOTL;
+ }