- // See if the entire line is a comment. This is a win if the programmer puts in lots of comments
- if(*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln+1) == '/'))) goto goteol;
-
- // Main tokenization loop;
- // o skip whitespace;
- // o handle end-of-line;
- // o handle symbols;
- // o handle single-character tokens (operators, etc.);
- // o handle multiple-character tokens (constants, strings, etc.).
- for(; *ln != EOS;) {
- // Skip whitespace, handle EOL
- while((int)chrtab[*ln] & WHITE)
- ++ln;
-
- // Handle EOL, comment with ';'
- if(*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln+1) == '/')))
- break;
-
- // Handle start of symbol. Symbols are null-terminated in place. The termination is
- // always one symbol behind, since there may be no place for a null in the case that
- // an operator immediately follows the name.
- c = chrtab[*ln];
- if(c & STSYM) {
- if(stuffnull) // Terminate old symbol
- *nullspot = EOS;
- v = 0; // Assume no DOT attrib follows symbol
- stuffnull = 1;
- p = nullspot = ln++; // Nullspot -> start of this symbol
-
- // Find end of symbol (and compute its length)
- for(j = 1; (int)chrtab[*ln] & CTSYM; ++j)
- ++ln;
-
- // Handle "DOT" special forms (like ".b") that follow a normal symbol or keyword:
- if(*ln == '.') {
- *ln++ = EOS; // Terminate symbol
- stuffnull = 0; // And never try it again
-
- // Character following the `.' must have a DOT attribute, and the chararacter after
- // THAT one must not have a start-symbol attribute (to prevent symbols that look
- // like, for example, "zingo.barf", which might be a good idea anyway....)
- if((((int)chrtab[*ln] & DOT) == 0) || ((int)dotxtab[*ln] <= 0))
- return(error("[bwsl] must follow `.' in symbol"));
- v = (VALUE)dotxtab[*ln++];
- if((int)chrtab[*ln] & CTSYM)
- return(error("misuse of `.', not allowed in symbols"));
- }
-
- // If the symbol is small, check to see if it's really the name of a register.
- if(j <= KWSIZE) {
- for(state = 0; state >= 0;) {
- j = (int)tolowertab[*p++];
- j += kwbase[state];
- if(kwcheck[j] != state) {
- j = -1;
- break;
- }
-
- if(*p == EOS || p == ln) {
- j = kwaccept[j];
- break;
- }
-
- state = kwtab[j];
- }
- } else {
- j = -1;
- }
-
- //make j = -1 if time, date etc with no preceeding ^^
- //defined, referenced, streq, macdef, date and time
- switch((TOKEN)j) {
- case 112: // defined
- case 113: // referenced
- case 118: // streq
- case 119: // macdef
- case 120: // time
- case 121: // date
- j = -1;
- break;
- }
-
- if(j < 0 || state < 0) {
- *tk++ = SYMBOL;
- *tk++ = (TOKEN)nullspot;
- } else {
- *tk++ = (TOKEN)j;
- stuffnull = 0;
- }
-
- if(v) // Record attribute token (if any)
- *tk++ = (TOKEN)v;
-
- if(stuffnull) // Arrange for string termination
- nullspot = ln;
- continue;
- }
-
- // Handle identity tokens
- if(c & SELF) {
- *tk++ = *ln++;
- continue;
- }
-
- // Handle multiple-character tokens
- if(c & MULTX) {
- switch(*ln++) {
- case '!': // ! or !=
- if(*ln == '=') {
- *tk++ = NE;
- ++ln;
- } else *tk++ = '!';
- continue;
- case '\'': // 'string'
- case '\"': // "string"
- c1 = ln[-1];
- *tk++ = STRING;
- *tk++ = (TOKEN)ln;
-
- for(p = ln; *ln != EOS && *ln != c1;) {
- c = *ln++;
- if(c == '\\')
- switch(*ln++) {
- case EOS:
- return(error("unterminated string"));
- case 'e':
- c = '\033';
- break;
- case 'n':
- c = '\n';
- break;
- case 'b':
- c = '\b';
- break;
- case 't':
- c = '\t';
- break;
- case 'r':
- c = '\r';
- break;
- case 'f':
- c = '\f';
- break;
- case '\"':
- c = '\"';
- break;
- case '\'':
- c = '\'';
- break;
- case '\\':
- c = '\\';
- break;
- default:
- warn("bad backslash code in string");
- --ln;
- break;
- }
- *p++ = c;
- }
-
- if(*ln++ != c1)
- return(error("unterminated string"));
- *p++ = EOS;
- continue;
- case '$': // $, hex constant
- if((int)chrtab[*ln] & HDIGIT) {
- v = 0;
- while((int)hextab[*ln] >= 0)
- v = (v << 4) + (int)hextab[*ln++];
- if(*ln == '.') {
- if((*(ln+1) == 'b') || (*(ln+1) == 'B')) { v &= 0x000000FF; ln += 2; }
- if((*(ln+1) == 'w') || (*(ln+1) == 'W')) { v &= 0x0000FFFF; ln += 2; }
- if((*(ln+1) == 'l') || (*(ln+1) == 'L')) { ln += 2; }
- }
- *tk++ = CONST;
- *tk++ = v;
- } else *tk++ = '$';
- continue;
- case '<': // < or << or <> or <=
- switch(*ln) {
- case '<':
- *tk++ = SHL;
- ++ln;
- continue;
- case '>':
- *tk++ = NE;
- ++ln;
- continue;
- case '=':
- *tk++ = LE;
- ++ln;
- continue;
- default:
- *tk++ = '<';
- continue;
- }
- case ':': // : or ::
- if(*ln == ':') {
- *tk++ = DCOLON;
- ++ln;
- } else *tk++ = ':';
- continue;
- case '=': // = or ==
- if(*ln == '=') {
- *tk++ = DEQUALS;
- ++ln;
- } else *tk++ = '=';
- continue;
- case '>': // > or >> or >=
- switch(*ln) {
- case '>':
- *tk++ = SHR;
- ++ln;
- continue;
- case '=':
- *tk++ = GE;
- ++ln;
- continue;
- default:
- *tk++ = '>';
- continue;
- }
- case '%': // % or binary constant
- if(*ln < '0' || *ln > '1') {
- *tk++ = '%';
- continue;
- }
- v = 0;
- while(*ln >= '0' && *ln <= '1')
- v = (v << 1) + *ln++ - '0';
- if(*ln == '.') {
- if((*(ln+1) == 'b') || (*(ln+1) == 'B')) { v &= 0x000000FF; ln += 2; }
- if((*(ln+1) == 'w') || (*(ln+1) == 'W')) { v &= 0x0000FFFF; ln += 2; }
- if((*(ln+1) == 'l') || (*(ln+1) == 'L')) { ln += 2; }
- }
- *tk++ = CONST;
- *tk++ = v;
- continue;
- case '@': // @ or octal constant
- if(*ln < '0' || *ln > '7') {
- *tk++ = '@';
- continue;
- }
- v = 0;
- while(*ln >= '0' && *ln <= '7')
- v = (v << 3) + *ln++ - '0';
- if(*ln == '.') {
- if((*(ln+1) == 'b') || (*(ln+1) == 'B')) { v &= 0x000000FF; ln += 2; }
- if((*(ln+1) == 'w') || (*(ln+1) == 'W')) { v &= 0x0000FFFF; ln += 2; }
- if((*(ln+1) == 'l') || (*(ln+1) == 'L')) { ln += 2; }
- }
- *tk++ = CONST;
- *tk++ = v;
- continue;
- case '^': // ^ or ^^ <operator-name>
- if(*ln != '^') {
- *tk++ = '^';
- continue;
- }
-
- if(((int)chrtab[*++ln] & STSYM) == 0) {
- error("invalid symbol following ^^");
- continue;
- }
-
- p = ln++;
- while((int)chrtab[*ln] & CTSYM)
- ++ln;
-
- for(state = 0; state >= 0;) {
- // Get char, convert to lowercase
- j = *p++;
- if(j >= 'A' && j <= 'Z')
- j += 0x20;
-
- j += kwbase[state];
- if(kwcheck[j] != state) {
- j = -1;
- break;
- }
-
- if(*p == EOS || p == ln) {
- j = kwaccept[j];
- break;
- }
- state = kwtab[j];
- }
-
- if(j < 0 || state < 0) {
- error("unknown symbol following ^^");
- continue;
- }
-
- *tk++ = (TOKEN)j;
- continue;
- default:
- interror(2); // Bad MULTX entry in chrtab
- continue;
- }
- }
-
-
- // Handle decimal constant
- if(c & DIGIT) {
- v = 0;
- while((int)chrtab[*ln] & DIGIT)
- v = (v * 10) + *ln++ - '0';
- if(*ln == '.') {
- if((*(ln+1) == 'b') || (*(ln+1) == 'B')) { v &= 0x000000FF; ln += 2; }
- if((*(ln+1) == 'w') || (*(ln+1) == 'W')) { v &= 0x0000FFFF; ln += 2; }
- if((*(ln+1) == 'l') || (*(ln+1) == 'L')) { ln += 2; }
- }
- *tk++ = CONST;
- *tk++ = v;
- continue;
- }
-
- // Handle illegal character
- return(error("illegal character"));
- }
-
- // Terminate line of tokens and return "success."
-
- goteol:
-
- tok = etok; // Set tok to beginning of line
- if(stuffnull) // Terminate last SYMBOL
- *nullspot = EOS;
- *tk++ = EOL;
-
- return(OK);
+ // See if the entire line is a comment. This is a win if the programmer puts in lots of comments
+ if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln+1) == '/')))
+ goto goteol;
+
+ // Main tokenization loop;
+ // o skip whitespace;
+ // o handle end-of-line;
+ // o handle symbols;
+ // o handle single-character tokens (operators, etc.);
+ // o handle multiple-character tokens (constants, strings, etc.).
+ for(; *ln!=EOS;)
+ {
+ // Skip whitespace, handle EOL
+ while ((int)chrtab[*ln] & WHITE)
+ ++ln;
+
+ // Handle EOL, comment with ';'
+ if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln+1) == '/')))
+ break;
+
+ // Handle start of symbol. Symbols are null-terminated in place. The termination is
+ // always one symbol behind, since there may be no place for a null in the case that
+ // an operator immediately follows the name.
+ c = chrtab[*ln];
+
+ if (c & STSYM)
+ {
+ if (stuffnull) // Terminate old symbol
+ *nullspot = EOS;
+
+ v = 0; // Assume no DOT attrib follows symbol
+ stuffnull = 1;
+ p = nullspot = ln++; // Nullspot -> start of this symbol
+
+ // Find end of symbol (and compute its length)
+ for(j=1; (int)chrtab[*ln]&CTSYM; ++j)
+ ++ln;
+
+ // Handle "DOT" special forms (like ".b") that follow a normal symbol or keyword:
+ if (*ln == '.')
+ {
+ *ln++ = EOS; // Terminate symbol
+ stuffnull = 0; // And never try it again
+
+ // Character following the `.' must have a DOT attribute, and the chararacter after
+ // THAT one must not have a start-symbol attribute (to prevent symbols that look
+ // like, for example, "zingo.barf", which might be a good idea anyway....)
+ if ((((int)chrtab[*ln] & DOT) == 0) || ((int)dotxtab[*ln] <= 0))
+ return error("[bwsl] must follow `.' in symbol");
+
+ v = (VALUE)dotxtab[*ln++];
+
+ if ((int)chrtab[*ln] & CTSYM)
+ return error("misuse of `.', not allowed in symbols");
+ }
+
+ // If the symbol is small, check to see if it's really the name of a register.
+ if (j <= KWSIZE)
+ {
+ for(state=0; state>=0;)
+ {
+ j = (int)tolowertab[*p++];
+ j += kwbase[state];
+
+ if (kwcheck[j] != state)
+ {
+ j = -1;
+ break;
+ }
+
+ if (*p == EOS || p == ln)
+ {
+ j = kwaccept[j];
+ break;
+ }
+
+ state = kwtab[j];
+ }
+ }
+ else
+ {
+ j = -1;
+ }
+
+ //make j = -1 if time, date etc with no preceeding ^^
+ //defined, referenced, streq, macdef, date and time
+ switch ((TOKEN)j)
+ {
+ case 112: // defined
+ case 113: // referenced
+ case 118: // streq
+ case 119: // macdef
+ case 120: // time
+ case 121: // date
+ j = -1;
+ break;
+ }
+
+ if (j < 0 || state < 0)
+ {
+ *tk++ = SYMBOL;
+ *tk++ = (TOKEN)nullspot;
+ }
+ else
+ {
+ *tk++ = (TOKEN)j;
+ stuffnull = 0;
+ }
+
+ if (v) // Record attribute token (if any)
+ *tk++ = (TOKEN)v;
+
+ if (stuffnull) // Arrange for string termination
+ nullspot = ln;
+
+ continue;
+ }
+
+ // Handle identity tokens
+ if (c & SELF)
+ {
+ *tk++ = *ln++;
+ continue;
+ }
+
+ // Handle multiple-character tokens
+ if (c & MULTX)
+ {
+ switch (*ln++)
+ {
+ case '!': // ! or !=
+ if (*ln == '=')
+ {
+ *tk++ = NE;
+ ++ln;
+ }
+ else
+ *tk++ = '!';
+
+ continue;
+ case '\'': // 'string'
+ case '\"': // "string"
+ c1 = ln[-1];
+ *tk++ = STRING;
+ *tk++ = (TOKEN)ln;
+
+ for(p=ln; *ln!=EOS && *ln!=c1;)
+ {
+ c = *ln++;
+
+ if (c == '\\')
+ {
+ switch (*ln++)
+ {
+ case EOS:
+ return(error("unterminated string"));
+ case 'e':
+ c = '\033';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case '\"':
+ c = '\"';
+ break;
+ case '\'':
+ c = '\'';
+ break;
+ case '\\':
+ c = '\\';
+ break;
+ default:
+ warn("bad backslash code in string");
+ --ln;
+ break;
+ }
+ }
+
+ *p++ = c;
+ }
+
+ if (*ln++ != c1)
+ return error("unterminated string");
+
+ *p++ = EOS;
+ continue;
+ case '$': // $, hex constant
+ if ((int)chrtab[*ln] & HDIGIT)
+ {
+ v = 0;
+
+ while ((int)hextab[*ln] >= 0)
+ v = (v << 4) + (int)hextab[*ln++];
+
+ if (*ln == '.')
+ {
+ if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ }
+
+ if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ }
+
+ if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
+ {
+ ln += 2;
+ }
+ }
+
+ *tk++ = CONST;
+ *tk++ = v;
+ }
+ else
+ *tk++ = '$';
+
+ continue;
+ case '<': // < or << or <> or <=
+ switch (*ln)
+ {
+ case '<':
+ *tk++ = SHL;
+ ++ln;
+ continue;
+ case '>':
+ *tk++ = NE;
+ ++ln;
+ continue;
+ case '=':
+ *tk++ = LE;
+ ++ln;
+ continue;
+ default:
+ *tk++ = '<';
+ continue;
+ }
+ case ':': // : or ::
+ if (*ln == ':')
+ {
+ *tk++ = DCOLON;
+ ++ln;
+ }
+ else
+ *tk++ = ':';
+
+ continue;
+ case '=': // = or ==
+ if (*ln == '=')
+ {
+ *tk++ = DEQUALS;
+ ++ln;
+ }
+ else
+ *tk++ = '=';
+
+ continue;
+ case '>': // > or >> or >=
+ switch (*ln)
+ {
+ case '>':
+ *tk++ = SHR;
+ ++ln;
+ continue;
+ case '=':
+ *tk++ = GE;
+ ++ln;
+ continue;
+ default:
+ *tk++ = '>';
+ continue;
+ }
+ case '%': // % or binary constant
+ if (*ln < '0' || *ln > '1')
+ {
+ *tk++ = '%';
+ continue;
+ }
+
+ v = 0;
+
+ while (*ln >= '0' && *ln <= '1')
+ v = (v << 1) + *ln++ - '0';
+
+ if (*ln == '.')
+ {
+ if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ }
+
+ if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ }
+
+ if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
+ {
+ ln += 2;
+ }
+ }
+
+ *tk++ = CONST;
+ *tk++ = v;
+ continue;
+ case '@': // @ or octal constant
+ if (*ln < '0' || *ln > '7')
+ {
+ *tk++ = '@';
+ continue;
+ }
+
+ v = 0;
+
+ while (*ln >= '0' && *ln <= '7')
+ v = (v << 3) + *ln++ - '0';
+
+ if (*ln == '.')
+ {
+ if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ }
+
+ if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ }
+
+ if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
+ {
+ ln += 2;
+ }
+ }
+
+ *tk++ = CONST;
+ *tk++ = v;
+ continue;
+ case '^': // ^ or ^^ <operator-name>
+ if (*ln != '^')
+ {
+ *tk++ = '^';
+ continue;
+ }
+
+ if (((int)chrtab[*++ln] & STSYM) == 0)
+ {
+ error("invalid symbol following ^^");
+ continue;
+ }
+
+ p = ln++;
+
+ while ((int)chrtab[*ln] & CTSYM)
+ ++ln;
+
+ for(state=0; state>=0;)
+ {
+ // Get char, convert to lowercase
+ j = *p++;
+
+ if (j >= 'A' && j <= 'Z')
+ j += 0x20;
+
+ j += kwbase[state];
+
+ if (kwcheck[j] != state)
+ {
+ j = -1;
+ break;
+ }
+
+ if (*p == EOS || p == ln)
+ {
+ j = kwaccept[j];
+ break;
+ }
+
+ state = kwtab[j];
+ }
+
+ if (j < 0 || state < 0)
+ {
+ error("unknown symbol following ^^");
+ continue;
+ }
+
+ *tk++ = (TOKEN)j;
+ continue;
+ default:
+ interror(2); // Bad MULTX entry in chrtab
+ continue;
+ }
+ }
+
+ // Handle decimal constant
+ if (c & DIGIT)
+ {
+ v = 0;
+
+ while ((int)chrtab[*ln] & DIGIT)
+ v = (v * 10) + *ln++ - '0';
+
+ if (*ln == '.')
+ {
+ if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
+ {
+ v &= 0x000000FF;
+ ln += 2;
+ }
+
+ if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
+ {
+ v &= 0x0000FFFF;
+ ln += 2;
+ }
+
+ if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
+ {
+ ln += 2;
+ }
+ }
+
+ *tk++ = CONST;
+ *tk++ = v;
+ continue;
+ }
+
+ // Handle illegal character
+ return error("illegal character");
+ }
+
+ // Terminate line of tokens and return "success."
+
+goteol:
+ tok = etok; // Set tok to beginning of line
+
+ if (stuffnull) // Terminate last SYMBOL
+ *nullspot = EOS;
+
+ *tk++ = EOL;
+
+ return OK;