- char *ln = NULL; // Ptr to current position in line
- char *p; // Random character ptr
- TOKEN *tk; // Token-deposit ptr
- int state = 0; // State for keyword detector
- int j = 0; // Var for keyword detector
- char c; // Random char
- VALUE v; // Random value
- char *nullspot = NULL; // Spot to clobber for SYMBOL terminatn
- int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
- char c1;
-
- retry:
-
- if(cur_inobj == NULL) // Return EOF if input stack is empty
- return(TKEOF);
-
- // Get another line of input from the current input source: a file, a macro, or a repeat-block
- switch(cur_inobj->in_type) {
- // Include-file:
- // o handle EOF;
- // o bump source line number;
- // o tag the listing-line with a space;
- // o kludge lines generated by Alcyon C.
- case SRC_IFILE:
- if((ln = getln()) == NULL) {
- fpop(); // Pop input level
- goto retry; // Try for more lines
- }
- ++curlineno; // Bump line number
- lntag = SPACE;
- if(as68_flag) {
- // AS68 compatibility, throw away all lines starting with back-quotes, tildes, or '*'
- // On other lines, turn the first '*' into a semi-colon.
- if(*ln == '`' || *ln == '~' || *ln == '*') *ln = ';';
- else for(p = ln; *p != EOS; ++p) {
- if(*p == '*') {
- *p = ';';
- break;
- }
- }
- }
- break;
-
- // Macro-block:
- // o Handle end-of-macro;
- // o tag the listing-line with an at (@) sign.
- case SRC_IMACRO:
- if((ln = getmln()) == NULL) {
- exitmac(); // Exit macro (pop args, do fpop(), etc)
- goto retry; // Try for more lines...
- }
- lntag = '@';
- break;
-
- // Repeat-block:
- // o Handle end-of-repeat-block;
- // o tag the listing-line with a pound (#) sign.
- case SRC_IREPT:
- if((ln = getrln()) == NULL) {
- fpop();
- goto retry;
- }
- lntag = '#';
- break;
- }
-
- // Save text of the line. We only do this during listings and within macro-type blocks,
- // since it is expensive to unconditionally copy every line.
- if(lnsave) strcpy(lnbuf, ln);
-
- // General house-keeping
- tok = tokeol; // Set "tok" to EOL in case of error
- tk = etok; // Reset token ptr
- stuffnull = 0; // Don't stuff nulls
- ++totlines; // Bump total #lines assembled
-
- // See if the entire line is a comment. This is a win if the programmer puts in lots of comments
- if(*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln+1) == '/'))) goto goteol;
-
- // Main tokenization loop;
- // o skip whitespace;
- // o handle end-of-line;
- // o handle symbols;
- // o handle single-character tokens (operators, etc.);
- // o handle multiple-character tokens (constants, strings, etc.).
- for(; *ln != EOS;) {
- // Skip whitespace, handle EOL
- while((int)chrtab[*ln] & WHITE)
- ++ln;
-
- // Handle EOL, comment with ';'
- if(*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln+1) == '/')))
- break;
-
- // Handle start of symbol. Symbols are null-terminated in place. The termination is
- // always one symbol behind, since there may be no place for a null in the case that
- // an operator immediately follows the name.
- c = chrtab[*ln];
- if(c & STSYM) {
- if(stuffnull) // Terminate old symbol
- *nullspot = EOS;
- v = 0; // Assume no DOT attrib follows symbol
- stuffnull = 1;
- p = nullspot = ln++; // Nullspot -> start of this symbol
-
- // Find end of symbol (and compute its length)
- for(j = 1; (int)chrtab[*ln] & CTSYM; ++j)
- ++ln;
-
- // Handle "DOT" special forms (like ".b") that follow a normal symbol or keyword:
- if(*ln == '.') {
- *ln++ = EOS; // Terminate symbol
- stuffnull = 0; // And never try it again
-
- // Character following the `.' must have a DOT attribute, and the chararacter after
- // THAT one must not have a start-symbol attribute (to prevent symbols that look
- // like, for example, "zingo.barf", which might be a good idea anyway....)
- if((((int)chrtab[*ln] & DOT) == 0) || ((int)dotxtab[*ln] <= 0))
- return(error("[bwsl] must follow `.' in symbol"));
- v = (VALUE)dotxtab[*ln++];
- if((int)chrtab[*ln] & CTSYM)
- return(error("misuse of `.', not allowed in symbols"));
- }
-
- // If the symbol is small, check to see if it's really the name of a register.
- if(j <= KWSIZE) {
- for(state = 0; state >= 0;) {
- j = (int)tolowertab[*p++];
- j += kwbase[state];
- if(kwcheck[j] != state) {
- j = -1;
- break;
- }
-
- if(*p == EOS || p == ln) {
- j = kwaccept[j];
- break;
- }
-
- state = kwtab[j];
- }
- } else {
- j = -1;
- }
-
- //make j = -1 if time, date etc with no preceeding ^^
- //defined, referenced, streq, macdef, date and time
- switch((TOKEN)j) {
- case 112: // defined
- case 113: // referenced
- case 118: // streq
- case 119: // macdef
- case 120: // time
- case 121: // date
- j = -1;
- break;
- }
-
- if(j < 0 || state < 0) {
- *tk++ = SYMBOL;
- *tk++ = (TOKEN)nullspot;
- } else {
- *tk++ = (TOKEN)j;
- stuffnull = 0;
- }
-
- if(v) // Record attribute token (if any)
- *tk++ = (TOKEN)v;
-
- if(stuffnull) // Arrange for string termination
- nullspot = ln;
- continue;
- }
-
- // Handle identity tokens
- if(c & SELF) {
- *tk++ = *ln++;
- continue;
- }
-
- // Handle multiple-character tokens
- if(c & MULTX) {
- switch(*ln++) {
- case '!': // ! or !=
- if(*ln == '=') {
- *tk++ = NE;
- ++ln;
- } else *tk++ = '!';
- continue;
- case '\'': // 'string'
- case '\"': // "string"
- c1 = ln[-1];
- *tk++ = STRING;
- *tk++ = (TOKEN)ln;
-
- for(p = ln; *ln != EOS && *ln != c1;) {
- c = *ln++;
- if(c == '\\')
- switch(*ln++) {
- case EOS:
- return(error("unterminated string"));
- case 'e':
- c = '\033';
- break;
- case 'n':
- c = '\n';
- break;
- case 'b':
- c = '\b';
- break;
- case 't':
- c = '\t';
- break;
- case 'r':
- c = '\r';
- break;
- case 'f':
- c = '\f';
- break;
- case '\"':
- c = '\"';
- break;
- case '\'':
- c = '\'';
- break;
- case '\\':
- c = '\\';
- break;
- default:
- warn("bad backslash code in string");
- --ln;
- break;
- }
- *p++ = c;
- }
-
- if(*ln++ != c1)
- return(error("unterminated string"));
- *p++ = EOS;
- continue;
- case '$': // $, hex constant
- if((int)chrtab[*ln] & HDIGIT) {
- v = 0;
- while((int)hextab[*ln] >= 0)
- v = (v << 4) + (int)hextab[*ln++];
- if(*ln == '.') {
- if((*(ln+1) == 'b') || (*(ln+1) == 'B')) { v &= 0x000000FF; ln += 2; }
- if((*(ln+1) == 'w') || (*(ln+1) == 'W')) { v &= 0x0000FFFF; ln += 2; }
- if((*(ln+1) == 'l') || (*(ln+1) == 'L')) { ln += 2; }
- }
- *tk++ = CONST;
- *tk++ = v;
- } else *tk++ = '$';
- continue;
- case '<': // < or << or <> or <=
- switch(*ln) {
- case '<':
- *tk++ = SHL;
- ++ln;
- continue;
- case '>':
- *tk++ = NE;
- ++ln;
- continue;
- case '=':
- *tk++ = LE;
- ++ln;
- continue;
- default:
- *tk++ = '<';
- continue;
- }
- case ':': // : or ::
- if(*ln == ':') {
- *tk++ = DCOLON;
- ++ln;
- } else *tk++ = ':';
- continue;
- case '=': // = or ==
- if(*ln == '=') {
- *tk++ = DEQUALS;
- ++ln;
- } else *tk++ = '=';
- continue;
- case '>': // > or >> or >=
- switch(*ln) {
- case '>':
- *tk++ = SHR;
- ++ln;
- continue;
- case '=':
- *tk++ = GE;
- ++ln;
- continue;
- default:
- *tk++ = '>';
- continue;
- }
- case '%': // % or binary constant
- if(*ln < '0' || *ln > '1') {
- *tk++ = '%';
- continue;
- }
- v = 0;
- while(*ln >= '0' && *ln <= '1')
- v = (v << 1) + *ln++ - '0';
- if(*ln == '.') {
- if((*(ln+1) == 'b') || (*(ln+1) == 'B')) { v &= 0x000000FF; ln += 2; }
- if((*(ln+1) == 'w') || (*(ln+1) == 'W')) { v &= 0x0000FFFF; ln += 2; }
- if((*(ln+1) == 'l') || (*(ln+1) == 'L')) { ln += 2; }
- }
- *tk++ = CONST;
- *tk++ = v;
- continue;
- case '@': // @ or octal constant
- if(*ln < '0' || *ln > '7') {
- *tk++ = '@';
- continue;
- }
- v = 0;
- while(*ln >= '0' && *ln <= '7')
- v = (v << 3) + *ln++ - '0';
- if(*ln == '.') {
- if((*(ln+1) == 'b') || (*(ln+1) == 'B')) { v &= 0x000000FF; ln += 2; }
- if((*(ln+1) == 'w') || (*(ln+1) == 'W')) { v &= 0x0000FFFF; ln += 2; }
- if((*(ln+1) == 'l') || (*(ln+1) == 'L')) { ln += 2; }
- }
- *tk++ = CONST;
- *tk++ = v;
- continue;
- case '^': // ^ or ^^ <operator-name>
- if(*ln != '^') {
- *tk++ = '^';
- continue;
- }
-
- if(((int)chrtab[*++ln] & STSYM) == 0) {
- error("invalid symbol following ^^");
- continue;
- }
-
- p = ln++;
- while((int)chrtab[*ln] & CTSYM)
- ++ln;
-
- for(state = 0; state >= 0;) {
- // Get char, convert to lowercase
- j = *p++;
- if(j >= 'A' && j <= 'Z')
- j += 0x20;
-
- j += kwbase[state];
- if(kwcheck[j] != state) {
- j = -1;
- break;
- }
-
- if(*p == EOS || p == ln) {
- j = kwaccept[j];
- break;
- }
- state = kwtab[j];
- }
-
- if(j < 0 || state < 0) {
- error("unknown symbol following ^^");
- continue;
- }
-
- *tk++ = (TOKEN)j;
- continue;
- default:
- interror(2); // Bad MULTX entry in chrtab
- continue;
- }
- }
-
-
- // Handle decimal constant
- if(c & DIGIT) {
- v = 0;
- while((int)chrtab[*ln] & DIGIT)
- v = (v * 10) + *ln++ - '0';
- if(*ln == '.') {
- if((*(ln+1) == 'b') || (*(ln+1) == 'B')) { v &= 0x000000FF; ln += 2; }
- if((*(ln+1) == 'w') || (*(ln+1) == 'W')) { v &= 0x0000FFFF; ln += 2; }
- if((*(ln+1) == 'l') || (*(ln+1) == 'L')) { ln += 2; }
- }
- *tk++ = CONST;
- *tk++ = v;
- continue;
- }
-
- // Handle illegal character
- return(error("illegal character"));
- }
-
- // Terminate line of tokens and return "success."
-
- goteol:
-
- tok = etok; // Set tok to beginning of line
- if(stuffnull) // Terminate last SYMBOL
- *nullspot = EOS;
- *tk++ = EOL;
-
- return(OK);
+ uint8_t * ln = NULL; // Ptr to current position in line
+ uint8_t * p; // Random character ptr
+ PTR tk; // Token-deposit ptr
+ int state = 0; // State for keyword detector
+ int j = 0; // Var for keyword detector
+ uint8_t c; // Random char
+ uint64_t v; // Random value
+ uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d)
+ double f; // Random float
+ uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
+ int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
+ uint8_t c1;
+ int stringNum = 0; // Pointer to string locations in tokenized line
+
+retry:
+
+ if (cur_inobj == NULL) // Return EOF if input stack is empty
+ return TKEOF;
+
+ // Get another line of input from the current input source: a file, a
+ // macro, or a repeat-block
+ switch (cur_inobj->in_type)
+ {
+ // Include-file:
+ // o handle EOF;
+ // o bump source line number;
+ // o tag the listing-line with a space;
+ // o kludge lines generated by Alcyon C.
+ case SRC_IFILE:
+ if ((ln = GetNextLine()) == NULL)
+ {
+DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
+ if (fpop() == 0) // Pop input level
+ goto retry; // Try for more lines
+ else
+ {
+ ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
+ return TKEOF;
+ }
+ }
+
+ curlineno++; // Bump line number
+ lntag = SPACE;
+
+ if (as68_flag)
+ {
+ // AS68 compatibility, throw away all lines starting with
+ // back-quotes, tildes, or '*'
+ // On other lines, turn the first '*' into a semi-colon.
+ if (*ln == '`' || *ln == '~' || *ln == '*')
+ *ln = ';';
+ else
+ {
+ for(p=ln; *p!=EOS; p++)
+ {
+ if (*p == '*')
+ {
+ *p = ';';
+ break;
+ }
+ }
+ }
+ }
+
+ break;
+
+ // Macro-block:
+ // o Handle end-of-macro;
+ // o tag the listing-line with an at (@) sign.
+ case SRC_IMACRO:
+ if ((ln = GetNextMacroLine()) == NULL)
+ {
+ if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
+ goto retry; // Try for more lines...
+ else
+ return TKEOF; // Oops, we got a non zero return code, signal EOF
+ }
+
+ lntag = '@';
+ break;
+
+ // Repeat-block:
+ // o Handle end-of-repeat-block;
+ // o tag the listing-line with a pound (#) sign.
+ case SRC_IREPT:
+ if ((ln = GetNextRepeatLine()) == NULL)
+ {
+ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
+ fpop();
+ goto retry;
+ }
+
+ lntag = '#';
+ break;
+ }
+
+ // Save text of the line. We only do this during listings and within
+ // macro-type blocks, since it is expensive to unconditionally copy every
+ // line.
+ if (lnsave)
+ strcpy(lnbuf, ln);
+
+ // General housekeeping
+ tok = tokeol; // Set "tok" to EOL in case of error
+ tk.u32 = etok; // Reset token ptr
+ stuffnull = 0; // Don't stuff nulls
+ totlines++; // Bump total #lines assembled
+
+ // See if the entire line is a comment. This is a win if the programmer
+ // puts in lots of comments
+ if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
+ goto goteol;
+
+ // And here we have a very ugly hack for signalling a single line 'turn off
+ // optimization'. There's really no nice way to do this, so hack it is!
+ optimizeOff = 0; // Default is to take optimizations as they come
+
+ if (*ln == '!')
+ {
+ optimizeOff = 1; // Signal that we don't want to optimize this line
+ ln++; // & skip over the darned thing
+ }
+
+ // Main tokenization loop;
+ // o skip whitespace;
+ // o handle end-of-line;
+ // o handle symbols;
+ // o handle single-character tokens (operators, etc.);
+ // o handle multiple-character tokens (constants, strings, etc.).
+ for(; *ln!=EOS;)
+ {
+ // Skip whitespace, handle EOL
+ while (chrtab[*ln] & WHITE)
+ ln++;
+
+ // Handle EOL, comment with ';'
+ if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
+ break;
+
+ // Handle start of symbol. Symbols are null-terminated in place. The
+ // termination is always one symbol behind, since there may be no place
+ // for a null in the case that an operator immediately follows the name.
+ c = chrtab[*ln];
+
+ if (c & STSYM)
+ {
+ if (stuffnull) // Terminate old symbol from previous pass
+ *nullspot = EOS;
+
+ v = 0; // Assume no DOT attrib follows symbol
+ stuffnull = 1;
+
+ // In some cases, we need to check for a DOTx at the *beginning*
+ // of a symbol, as the "start" of the line we're currently looking
+ // at could be somewhere in the middle of that line!
+ if (*ln == '.')
+ {
+ // Make sure that it's *only* a .[bwsl] following, and not the
+ // start of a local symbol:
+ if ((chrtab[*(ln + 1)] & DOT)
+ && (dotxtab[*(ln + 1)] != 0)
+ && !(chrtab[*(ln + 2)] & CTSYM))
+ {
+ // We found a legitimate DOTx construct, so add it to the
+ // token stream:
+ ln++;
+ stuffnull = 0;
+ *tk.u32++ = (TOKEN)dotxtab[*ln++];
+ continue;
+ }
+ }
+
+ p = nullspot = ln++; // Nullspot -> start of this symbol
+
+ // Find end of symbol (and compute its length)
+ for(j=1; (int)chrtab[*ln]&CTSYM; j++)
+ ln++;
+
+ // Handle "DOT" special forms (like ".b") that follow a normal
+ // symbol or keyword:
+ if (*ln == '.')
+ {
+ *ln++ = EOS; // Terminate symbol
+ stuffnull = 0; // And never try it again
+
+ // Character following the '.' must have a DOT attribute, and
+ // the chararacter after THAT one must not have a start-symbol
+ // attribute (to prevent symbols that look like, for example,
+ // "zingo.barf", which might be a good idea anyway....)
+ if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
+ return error("[bwsl] must follow '.' in symbol");
+
+ v = (uint32_t)dotxtab[*ln++];
+ cursize = (uint32_t)v;
+
+ if (chrtab[*ln] & CTSYM)
+ return error("misuse of '.'; not allowed in symbols");
+ }
+
+ // If the symbol is small, check to see if it's really the name of
+ // a register.
+ if (j <= KWSIZE)
+ {
+ for(state=0; state>=0;)
+ {
+ j = (int)tolowertab[*p++];
+ j += kwbase[state];
+
+ if (kwcheck[j] != state)
+ {
+ j = -1;
+ break;
+ }
+
+ if (*p == EOS || p == ln)
+ {
+ j = kwaccept[j];
+ break;
+ }
+
+ state = kwtab[j];
+ }
+ }
+ else
+ {
+ j = -1;
+ }
+
+ // Make j = -1 if user tries to use a RISC register while in 68K mode
+ if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
+ {
+ j = -1;
+ }
+
+ // Make j = -1 if time, date etc with no preceeding ^^
+ // defined, referenced, streq, macdef, date and time
+ switch ((TOKEN)j)
+ {
+ case 112: // defined
+ case 113: // referenced
+ case 118: // streq
+ case 119: // macdef
+ case 120: // time
+ case 121: // date
+ j = -1;
+ }
+
+ // If not tokenized keyword OR token was not found
+ if ((j < 0) || (state < 0))
+ {
+ *tk.u32++ = SYMBOL;
+//#warning
+//problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
+//system, this will cause all kinds of mischief.
+#if 0
+ *tk++ = (TOKEN)nullspot;
+#else
+ string[stringNum] = nullspot;
+ *tk.u32++ = stringNum;
+ stringNum++;
+#endif
+ }
+ else
+ {
+ *tk.u32++ = (TOKEN)j;
+ stuffnull = 0;
+ }
+
+ if (v) // Record attribute token (if any)
+ *tk.u32++ = (TOKEN)v;
+
+ if (stuffnull) // Arrange for string termination on next pass
+ nullspot = ln;
+
+ continue;
+ }
+
+ // Handle identity tokens
+ if (c & SELF)
+ {
+ *tk.u32++ = *ln++;
+ continue;
+ }
+
+ // Handle multiple-character tokens
+ if (c & MULTX)
+ {
+ switch (*ln++)
+ {
+ case '!': // ! or !=
+ if (*ln == '=')
+ {
+ *tk.u32++ = NE;
+ ln++;
+ }
+ else
+ *tk.u32++ = '!';
+
+ continue;
+ case '\'': // 'string'
+ if (m6502)
+ {
+ // Hardcoded for now, maybe this will change in the future
+ *tk.u32++ = STRINGA8;
+ goto dostring;
+ }
+ // Fall through
+ case '\"': // "string"
+ *tk.u32++ = STRING;
+dostring:
+ c1 = ln[-1];
+ string[stringNum] = ln;
+ *tk.u32++ = stringNum;
+ stringNum++;
+
+ for(p=ln; *ln!=EOS && *ln!=c1;)
+ {
+ c = *ln++;
+
+ if (c == '\\')
+ {
+ switch (*ln++)
+ {
+ case EOS:
+ return(error("unterminated string"));
+ case 'e':
+ c = '\033';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case '\"':
+ c = '\"';
+ break;
+ case '\'':
+ c = '\'';
+ break;
+ case '\\':
+ c = '\\';
+ break;
+ case '!':
+ // If we're evaluating a macro
+ // this is valid and expands to
+ // "dot-size"
+ break;
+ default:
+ warn("bad backslash code in string");
+ ln--;
+ break;
+ }
+ }
+
+ *p++ = c;
+ }
+
+ if (*ln++ != c1)
+ return error("unterminated string");
+
+ *p++ = EOS;
+ continue;
+ case '$': // $, hex constant
+ if (chrtab[*ln] & HDIGIT)
+ {
+ v = 0;
+
+ // Parse the hex value
+ while (hextab[*ln] >= 0)
+ v = (v << 4) + (int)hextab[*ln++];
+
+ if (*ln == '.')
+ {
+ if (obj_format == BSD)
+ {
+ if ((*(ln + 1) & 0xDF) == 'B')
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ }
+ else if ((*(ln + 1) & 0xDF) == 'W')
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ }
+ else if ((*(ln + 1) & 0xDF) == 'L')
+ {
+ v &= 0xFFFFFFFF;
+ ln += 2;
+ }
+ }
+ }
+
+ *tk.u32++ = CONST;
+ *tk.u64++ = v;
+
+ if (obj_format == ALCYON)
+ {
+ if (*ln == '.')
+ {
+ if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
+ {
+ *tk.u32++ = DOTW;
+ ln += 2;
+ }
+ else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+ {
+ *tk.u32++ = DOTL;
+ ln += 2;
+ }
+ }
+ }
+ }
+ else
+ *tk.u32++ = '$';
+
+ continue;
+ case '<': // < or << or <> or <=
+ switch (*ln)
+ {
+ case '<':
+ *tk.u32++ = SHL;
+ ln++;
+ continue;
+ case '>':
+ *tk.u32++ = NE;
+ ln++;
+ continue;
+ case '=':
+ *tk.u32++ = LE;
+ ln++;
+ continue;
+ default:
+ *tk.u32++ = '<';
+ continue;
+ }
+ case ':': // : or ::
+ if (*ln == ':')
+ {
+ *tk.u32++ = DCOLON;
+ ln++;
+ }
+ else
+ *tk.u32++ = ':';
+
+ continue;
+ case '=': // = or ==
+ if (*ln == '=')
+ {
+ *tk.u32++ = DEQUALS;
+ ln++;
+ }
+ else
+ *tk.u32++ = '=';
+
+ continue;
+ case '>': // > or >> or >=
+ switch (*ln)
+ {
+ case '>':
+ *tk.u32++ = SHR;
+ ln++;
+ continue;
+ case '=':
+ *tk.u32++ = GE;
+ ln++;
+ continue;
+ default:
+ *tk.u32++ = '>';
+ continue;
+ }
+ case '%': // % or binary constant
+ if (*ln < '0' || *ln > '1')
+ {
+ *tk.u32++ = '%';
+ continue;
+ }
+
+ v = 0;
+
+ while (*ln >= '0' && *ln <= '1')
+ v = (v << 1) + *ln++ - '0';
+
+ if (*ln == '.')
+ {
+ if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ }
+
+ if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ }
+
+ if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+ {
+ v &= 0xFFFFFFFF;
+ ln += 2;
+ }
+ }
+
+ *tk.u32++ = CONST;
+ *tk.u64++ = v;
+ continue;
+ case '@': // @ or octal constant
+ if (*ln < '0' || *ln > '7')
+ {
+ *tk.u32++ = '@';
+ continue;
+ }
+
+ v = 0;
+
+ while (*ln >= '0' && *ln <= '7')
+ v = (v << 3) + *ln++ - '0';
+
+ if (*ln == '.')
+ {
+ if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ }
+
+ if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ }
+
+ if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+ {
+ v &= 0xFFFFFFFF;
+ ln += 2;
+ }
+ }
+
+ *tk.u32++ = CONST;
+ *tk.u64++ = v;
+ continue;
+ case '^': // ^ or ^^ <operator-name>
+ if (*ln != '^')
+ {
+ *tk.u32++ = '^';
+ continue;
+ }
+
+ if (((int)chrtab[*++ln] & STSYM) == 0)
+ {
+ error("invalid symbol following ^^");
+ continue;
+ }
+
+ p = ln++;
+
+ while ((int)chrtab[*ln] & CTSYM)
+ ++ln;
+
+ for(state=0; state>=0;)
+ {
+ // Get char, convert to lowercase
+ j = *p++;
+
+ if (j >= 'A' && j <= 'Z')
+ j += 0x20;
+
+ j += kwbase[state];
+
+ if (kwcheck[j] != state)
+ {
+ j = -1;
+ break;
+ }
+
+ if (*p == EOS || p == ln)
+ {
+ j = kwaccept[j];
+ break;
+ }
+
+ state = kwtab[j];
+ }
+
+ if (j < 0 || state < 0)
+ {
+ error("unknown symbol following ^^");
+ continue;
+ }
+
+ *tk.u32++ = (TOKEN)j;
+ continue;
+ default:
+ interror(2); // Bad MULTX entry in chrtab
+ continue;
+ }
+ }
+
+ // Handle decimal constant
+ if (c & DIGIT)
+ {
+ uint8_t * numStart = ln;
+ v = 0;
+
+ while ((int)chrtab[*ln] & DIGIT)
+ v = (v * 10) + *ln++ - '0';
+
+ // See if there's a .[bwl] after the constant & deal with it if so
+ if (*ln == '.')
+ {
+ if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ *tk.u32++ = CONST;
+ *tk.u64++ = v;
+ *tk.u32++ = DOTB;
+ }
+ else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ *tk.u32++ = CONST;
+ *tk.u64++ = v;
+ *tk.u32++ = DOTW;
+ }
+ else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
+ {
+ v &= 0xFFFFFFFF;
+ ln += 2;
+ *tk.u32++ = CONST;
+ *tk.u64++ = v;
+ *tk.u32++ = DOTL;
+ }
+ else if ((int)chrtab[*(ln + 1)] & DIGIT)
+ {
+ // Hey, more digits after the dot, so we assume it's a
+ // floating point number of some kind
+#if 0
+ double fract = 10;
+ ln++;
+ f = (double)v;
+
+ while ((int)chrtab[*ln] & DIGIT)
+ {
+ f = f + (double)(*ln++ - '0') / fract;
+ fract *= 10;
+ }
+#else
+ // Here we parse the whole floating point number
+ char * numEnd;
+ errno = 0;
+ double f = strtod(numStart, &numEnd);
+ ln = (uint8_t *)numEnd;
+
+ if (errno != 0)
+ return error("floating point parse error");
+#endif
+
+ *tk.u32++ = FCONST;
+// Shamus: Well, this is all kinds of icky--not the least of which is that unlike uintNN_t types, we have no guarantees of any kind when it comes to the size of floating point numbers in C (as far as I know of). If there is, we need to use those kinds here, or else figure out at runtime what sizes we're dealing with and act accordingly. To be fair, this is OK as long as the double type is less than 64 bits wide, but again, there's no guarantee that it isn't. :-/
+ *tk.u64++ = f;
+ continue;
+ }
+ }
+ else
+ {
+ *tk.u32++ = CONST;
+ *tk.u64++ = v;
+ }
+
+//printf("CONST: %i\n", v);
+ continue;
+ }
+
+ // Handle illegal character
+ return error("illegal character $%02X found", *ln);
+ }
+
+ // Terminate line of tokens and return "success."
+
+goteol:
+ tok = etok; // Set tok to beginning of line
+
+ if (stuffnull) // Terminate last SYMBOL
+ *nullspot = EOS;
+
+ *tk.u32++ = EOL;
+
+ return OK;