2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
19 #define DECL_KW // Declare keyword arrays
20 #define DEF_KW // Declare keyword values
21 #include "kwtab.h" // Incl generated keyword tables & defs
22 #define DEF_REG68 // Incl 68k register definitions
24 #define DEF_REGRISC // Include GPU/DSP register definitions
26 #define DEF_UNARY // Declare unary values
27 #define DECL_UNARY // Incl uanry keyword state machine tables
28 #include "unarytab.h" // Incl generated unary tables & defs
31 int lnsave; // 1; strcpy() text of current line
32 uint32_t curlineno; // Current line number (64K max currently)
33 int totlines; // Total # of lines
34 int mjump_align = 0; // mjump alignment flag
35 char lntag; // Line tag
36 char * curfname; // Current filename
37 char tolowertab[128]; // Uppercase ==> lowercase
38 int8_t hextab[128]; // Table of hex values
39 char dotxtab[128]; // Table for ".b", ".s", etc.
40 char irbuf[LNSIZ]; // Text for .rept block line
41 char lnbuf[LNSIZ]; // Text of current line
42 WORD filecount; // Unique file number counter
43 WORD cfileno; // Current file number
44 TOKEN * tok; // Ptr to current token
45 TOKEN * etok; // Ptr past last token in tokbuf[]
46 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
47 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
48 int optimizeOff; // Optimization override flag
54 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
55 static INOBJ * f_inobj; // Ptr list of free INOBJs
56 static IFILE * f_ifile; // Ptr list of free IFILEs
57 static IMACRO * f_imacro; // Ptr list of free IMACROs
59 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
61 uint8_t chrtab[0x100] = {
62 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
63 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
64 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
65 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
67 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
68 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
69 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
70 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
72 WHITE, MULTX, MULTX, SELF, // SP ! " #
73 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
74 SELF, SELF, SELF, SELF, // ( ) * +
75 SELF, SELF, STSYM, SELF, // , - . /
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
79 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
80 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
81 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
83 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
85 MULTX, STSYM+CTSYM+HDIGIT, // @ A
86 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
87 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
88 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
89 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
90 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
92 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
93 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
94 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
95 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
97 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
98 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
99 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
100 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
101 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
102 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
104 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
105 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
106 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
107 SELF, SELF, SELF, ILLEG, // | } ~ DEL
109 // Anything above $7F is illegal (and yes, we need to check for this,
110 // otherwise you get strange and spurious errors that will lead you astray)
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
125 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
126 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
129 // Names of registers
130 static char * regname[] = {
131 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
132 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
133 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
134 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
135 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
136 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
137 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
138 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
139 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
140 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
141 "tt0","tt1","crp","","","","","", // 208,215
142 "","","","","fpiar","fpsr","fpcr","", // 216,223
143 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
144 "","","","","","","","", // 232,239
145 "","","","","","","","", // 240,247
146 "","","","","","","","", // 248,255
147 "","","","","x0","x1","y0","y1", // 256,263
148 "","b0","","b2","","b1","a","b", // 264,271
149 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
150 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
151 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
152 "","","","","","","l","p", // 296,303
153 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
154 "a10","b10","x","y","","","ab","ba" // 312,319
159 // Initialize tokenizer
161 void InitTokenizer(void)
164 char * htab = "0123456789abcdefABCDEF"; // Hex character table
166 lnsave = 0; // Don't save lines
167 curfname = ""; // No file, empty filename
168 filecount = (WORD)-1;
169 cfileno = (WORD)-1; // cfileno gets bumped to 0
181 // Initialize hex, "dot" and tolower tables
186 tolowertab[i] = (char)i;
189 for(i=0; htab[i]!=EOS; i++)
190 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
192 for(i='A'; i<='Z'; i++)
193 tolowertab[i] |= 0x20;
195 // These characters are legal immediately after a period
196 dotxtab['b'] = DOTB; // .b .B .s .S
198 //dotxtab['s'] = DOTB;
199 //dotxtab['S'] = DOTB;
200 dotxtab['w'] = DOTW; // .w .W
202 dotxtab['l'] = DOTL; // .l .L
204 dotxtab['i'] = DOTI; // .i .I (WTF is this???)
206 dotxtab['D'] = DOTD; // .d .D (double)
208 dotxtab['S'] = DOTS; // .s .S
210 dotxtab['Q'] = DOTQ; // .q .Q (quad word)
212 dotxtab['X'] = DOTX; // .x .x
214 dotxtab['P'] = DOTP; // .p .P
219 void SetFilenameForErrorReporting(void)
223 // Check for absolute top filename (this should never happen)
226 curfname = "(*top*)";
230 FILEREC * fr = filerec;
232 // Advance to the correct record...
233 while (fr != NULL && fnum != 0)
239 // Check for file # record not found (this should never happen either)
242 curfname = "(*NOT FOUND*)";
246 curfname = fr->frec_name;
251 // Allocate an IFILE or IMACRO
253 INOBJ * a_inobj(int typ)
259 // Allocate and initialize INOBJ first
261 inobj = malloc(sizeof(INOBJ));
265 f_inobj = f_inobj->in_link;
270 case SRC_IFILE: // Alloc and init an IFILE
272 ifile = malloc(sizeof(IFILE));
276 f_ifile = f_ifile->if_link;
279 inobj->inobj.ifile = ifile;
282 case SRC_IMACRO: // Alloc and init an IMACRO
283 if (f_imacro == NULL)
284 imacro = malloc(sizeof(IMACRO));
288 f_imacro = f_imacro->im_link;
291 inobj->inobj.imacro = imacro;
294 case SRC_IREPT: // Alloc and init an IREPT
295 inobj->inobj.irept = malloc(sizeof(IREPT));
296 DEBUG { printf("alloc IREPT\n"); }
300 // Install INOBJ on top of input stack
301 inobj->in_ifent = ifent; // Record .if context on entry
302 inobj->in_type = (WORD)typ;
303 inobj->in_otok = tok;
304 inobj->in_etok = etok;
305 inobj->in_link = cur_inobj;
313 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
314 // A macro reference is in one of two forms:
315 // \name <non-name-character>
317 // A doubled backslash (\\) is compressed to a single backslash (\).
318 // Argument definitions have been pre-tokenized, so we have to turn them back
319 // into text. This means that numbers, in particular, become hex, regardless of
320 // their representation when the macro was invoked. This is a hack.
321 // A label may appear at the beginning of the line:
322 // :<name><whitespace>
323 // (the colon must be in the first column). These labels are stripped before
324 // macro expansion takes place.
326 int ExpandMacro(char * src, char * dest, int destsiz)
329 int questmark; // \? for testing argument existence
330 char mname[128]; // Assume max size of a formal arg name
331 char numbuf[20]; // Buffer for text of CONSTs
334 char ** symbolString;
336 DEBUG { printf("ExM: src=\"%s\"\n", src); }
338 IMACRO * imacro = cur_inobj->inobj.imacro;
339 int macnum = (int)(imacro->im_macro->sattr);
341 char * dst = dest; // Next dest slot
342 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
344 // Check for (and skip over) any "label" on the line
350 while (*s != EOS && !(chrtab[*s] & WHITE))
354 s++; // Skip first whitespace
357 // Expand the rest of the line
360 // Copy single character
366 // Skip comments in case a loose @ or \ is in there
367 // In that case the tokeniser was trying to expand it.
368 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
373 // Do macro expansion
381 case '\\': // \\, \ (collapse to single backslash)
387 case '?': // \? <macro> set `questmark' flag
391 case '#': // \#, number of arguments
392 sprintf(numbuf, "%d", (int)imacro->im_nargs);
394 case '!': // \! size suffix supplied on invocation
395 switch ((int)imacro->im_siz)
397 case SIZN: d = ""; break;
398 case SIZB: d = ".b"; break;
399 case SIZW: d = ".w"; break;
400 case SIZL: d = ".l"; break;
404 case '~': // ==> unique label string Mnnnn...
405 sprintf(numbuf, "M%u", curuniq);
421 return error("missing argument name");
424 // \n ==> argument number 'n', 0..9
425 if (chrtab[*s] & DIGIT)
435 // Get argument name: \name, \{name}
445 while (chrtab[*s] & CTSYM);
450 for(++s; *s != EOS && *s != '}';)
454 return error("missing closing brace ('}')");
461 // Lookup the argument and copy its (string) value into the
462 // destination string
463 DEBUG { printf("argument='%s'\n", mname); }
465 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
466 return error("undefined argument: '%s'", mname);
469 // Convert a string of tokens (terminated with EOL) back into
470 // text. If an argument is out of range (not specified in the
471 // macro invocation) then it is ignored.
472 i = (int)arg->svalue;
474 DEBUG { printf("~argnumber=%d\n", i); }
477 if (i < imacro->im_nargs)
479 tk = imacro->argument[i].token;
480 symbolString = imacro->argument[i].string;
483 // printf("ExM: Preparing to parse argument #%u...\n", i);
489 // 0 if the argument is empty or non-existant,
490 // 1 if the argument is not empty
493 if (tk == NULL || *tk == EOL)
499 *dst++ = (char)(questmark + '0');
503 // Argument # is in range, so expand it
508 // Reverse-translation from a token number to a string.
509 // This is a hack. It might be better table-driven.
514 d = regname[(int)*tk++ - REG68_D0];
522 d = symbolString[*tk++];
523 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
526 d = symbolString[*tk++];
547 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
548 // to choke on legitimate code... Need to investigate this further
549 // before changing anything else here!
551 // sprintf(numbuf, "$%lx", (uint64_t)*tk++);
552 sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
620 *dst++ = (char)*(tk - 1);
625 // If 'd' != NULL, copy string to destination
629 DEBUG printf("d='%s'\n", d);
648 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
653 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
654 return fatal("line too long as a result of macro expansion");
659 // Get next line of text from a macro
661 char * GetNextMacroLine(void)
663 IMACRO * imacro = cur_inobj->inobj.imacro;
664 LLIST * strp = imacro->im_nextln;
666 if (strp == NULL) // End-of-macro
669 imacro->im_nextln = strp->next;
670 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
671 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
673 return imacro->im_lnbuf;
678 // Get next line of text from a repeat block
680 char * GetNextRepeatLine(void)
682 IREPT * irept = cur_inobj->inobj.irept;
683 // LONG * strp = irept->ir_nextln; // initial null
685 // Do repeat at end of .rept block's string list
687 if (irept->ir_nextln == NULL)
689 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
690 irept->ir_nextln = irept->ir_firstln; // copy first line
692 if (irept->ir_count-- == 0)
694 DEBUG { printf("end-repeat-block\n"); }
698 // strp = irept->ir_nextln;
700 // Mark the current macro line in the irept object
701 // This is probably overkill - a global variable
702 // would suffice here (it only gets used during
703 // error reporting anyway)
704 irept->lineno = irept->ir_nextln->lineno;
706 // Copy the rept lines verbatim, unless we're in nest level 0.
707 // Then, expand any \~ labels to unique numbers (Rn)
710 strcpy(irbuf, irept->ir_nextln->line);
714 uint32_t linelen = strlen(irept->ir_nextln->line);
715 uint8_t *p_line = irept->ir_nextln->line;
716 char *irbufwrite = irbuf;
717 for (int i = 0; i <= linelen; i++)
721 if (c == '\\' && *p_line == '~')
724 irbufwrite += sprintf(irbufwrite, "R%u", reptuniq);
733 DEBUG { printf("repeat line='%s'\n", irbuf); }
734 // irept->ir_nextln = (LONG *)*strp;
735 irept->ir_nextln = irept->ir_nextln->next;
742 // Include a source file used at the root, and for ".include" files
744 int include(int handle, char * fname)
747 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
749 // Alloc and initialize include-descriptors
750 INOBJ * inobj = a_inobj(SRC_IFILE);
751 IFILE * ifile = inobj->inobj.ifile;
753 ifile->ifhandle = handle; // Setup file handle
754 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
755 ifile->ifoldlineno = curlineno; // Save old line number
756 ifile->ifoldfname = curfname; // Save old filename
757 ifile->ifno = cfileno; // Save old file number
759 // NB: This *must* be preincrement, we're adding one to the filecount here!
760 cfileno = ++filecount; // Compute NEW file number
761 curfname = strdup(fname); // Set current filename (alloc storage)
762 curlineno = 0; // Start on line zero
764 // Add another file to the file-record
765 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
766 fr->frec_next = NULL;
767 fr->frec_name = curfname;
770 filerec = fr; // Add first filerec
772 last_fr->frec_next = fr; // Append to list of filerecs
775 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
782 // Pop the current input level
786 INOBJ * inobj = cur_inobj;
791 // Pop IFENT levels until we reach the conditional assembly context we
792 // were at when the input object was entered.
793 int numUnmatched = 0;
795 while (ifent != inobj->in_ifent)
797 if (d_endif() != 0) // Something bad happened during endif parsing?
798 return -1; // If yes, bail instead of getting stuck in a loop
803 // Give a warning to the user that we had to wipe their bum for them
804 if (numUnmatched > 0)
805 warn("missing %d .endif(s)", numUnmatched);
807 tok = inobj->in_otok; // Restore tok and etok
808 etok = inobj->in_etok;
810 switch (inobj->in_type)
812 case SRC_IFILE: // Pop and release an IFILE
814 DEBUG { printf("[Leaving: %s]\n", curfname); }
816 IFILE * ifile = inobj->inobj.ifile;
817 ifile->if_link = f_ifile;
819 close(ifile->ifhandle); // Close source file
820 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
821 curfname = ifile->ifoldfname; // Set current filename
822 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
823 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
824 curlineno = ifile->ifoldlineno; // Set current line#
825 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
826 cfileno = ifile->ifno; // Restore current file number
827 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
831 case SRC_IMACRO: // Pop and release an IMACRO
833 IMACRO * imacro = inobj->inobj.imacro;
834 imacro->im_link = f_imacro;
839 case SRC_IREPT: // Pop and release an IREPT
841 DEBUG { printf("dealloc IREPT\n"); }
842 LLIST * p = inobj->inobj.irept->ir_firstln;
844 // Deallocate repeat lines
855 cur_inobj = inobj->in_link;
856 inobj->in_link = f_inobj;
864 // Get line from file into buf, return NULL on EOF or ptr to the start of a
867 char * GetNextLine(void)
871 int readamt = -1; // 0 if last read() yeilded 0 bytes
872 IFILE * fl = cur_inobj->inobj.ifile;
876 // Scan for next end-of-line; handle stupid text formats by treating
877 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
879 d = &fl->ifbuf[fl->ifind];
881 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
883 if (*p == '\r' || *p == '\n')
890 break; // Need to read more, then look for '\n' to eat
891 else if (p[1] == '\n')
895 // Cover up the newline with end-of-string sentinel
904 // Handle hanging lines by ignoring them (Input file is exhausted, no
905 // \r or \n on last line)
906 // Shamus: This is retarded. Never ignore any input!
907 if (!readamt && fl->ifcnt)
914 // Really should check to see if we're at the end of the buffer!
916 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
918 return &fl->ifbuf[fl->ifind];
922 // Truncate and return absurdly long lines.
923 if (fl->ifcnt >= QUANTUM)
925 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
927 return &fl->ifbuf[fl->ifind];
930 // Relocate what's left of a line to the beginning of the buffer, and
931 // read some more of the file in; return NULL if the buffer's empty and
935 p = &fl->ifbuf[fl->ifind];
936 d = &fl->ifbuf[fl->ifcnt & 1];
938 for(i=0; i<fl->ifcnt; i++)
941 fl->ifind = fl->ifcnt & 1;
944 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
949 if ((fl->ifcnt += readamt) == 0)
958 int TokenizeLine(void)
960 uint8_t * ln = NULL; // Ptr to current position in line
961 uint8_t * p; // Random character ptr
962 PTR tk; // Token-deposit ptr
963 int state = 0; // State for keyword detector
964 int j = 0; // Var for keyword detector
965 uint8_t c; // Random char
966 uint64_t v; // Random value
967 uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d)
968 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
969 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
971 int stringNum = 0; // Pointer to string locations in tokenized line
972 SYM* sy; // For looking up symbols (.equr)
973 int equrundef = 0; // Flag for equrundef scanning
977 if (cur_inobj == NULL) // Return EOF if input stack is empty
980 // Get another line of input from the current input source: a file, a
981 // macro, or a repeat-block
982 switch (cur_inobj->in_type)
986 // o bump source line number;
987 // o tag the listing-line with a space;
988 // o kludge lines generated by Alcyon C.
990 if ((ln = GetNextLine()) == NULL)
992 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
993 if (fpop() == 0) // Pop input level
994 goto retry; // Try for more lines
997 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
1002 curlineno++; // Bump line number
1008 // o Handle end-of-macro;
1009 // o tag the listing-line with an at (@) sign.
1011 if ((ln = GetNextMacroLine()) == NULL)
1013 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1014 goto retry; // Try for more lines...
1016 return TKEOF; // Oops, we got a non zero return code, signal EOF
1023 // o Handle end-of-repeat-block;
1024 // o tag the listing-line with a pound (#) sign.
1026 if ((ln = GetNextRepeatLine()) == NULL)
1028 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1037 // Save text of the line. We only do this during listings and within
1038 // macro-type blocks, since it is expensive to unconditionally copy every
1043 if (strlen(ln) > LNSIZ)
1044 return error("line too long (%d, max %d)", strlen(ln), LNSIZ);
1049 // General housekeeping
1050 tok = tokeol; // Set "tok" to EOL in case of error
1051 tk.u32 = etok; // Reset token ptr
1052 stuffnull = 0; // Don't stuff nulls
1053 totlines++; // Bump total #lines assembled
1055 // See if the entire line is a comment. This is a win if the programmer
1056 // puts in lots of comments
1057 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1060 // And here we have a very ugly hack for signalling a single line 'turn off
1061 // optimization'. There's really no nice way to do this, so hack it is!
1062 optimizeOff = 0; // Default is to take optimizations as they come
1066 optimizeOff = 1; // Signal that we don't want to optimize this line
1067 ln++; // & skip over the darned thing
1070 // Main tokenization loop;
1071 // o skip whitespace;
1072 // o handle end-of-line;
1073 // o handle symbols;
1074 // o handle single-character tokens (operators, etc.);
1075 // o handle multiple-character tokens (constants, strings, etc.).
1078 // Check to see if there's enough space in the token buffer
1079 if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20)
1081 return error("token buffer overrun");
1084 // Skip whitespace, handle EOL
1085 while (chrtab[*ln] & WHITE)
1088 // Handle EOL, comment with ';'
1089 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1092 // Handle start of symbol. Symbols are null-terminated in place. The
1093 // termination is always one symbol behind, since there may be no place
1094 // for a null in the case that an operator immediately follows the name.
1099 if (stuffnull) // Terminate old symbol from previous pass
1102 v = 0; // Assume no DOT attrib follows symbol
1105 // In some cases, we need to check for a DOTx at the *beginning*
1106 // of a symbol, as the "start" of the line we're currently looking
1107 // at could be somewhere in the middle of that line!
1110 // Make sure that it's *only* a .[bwsl] following, and not the
1111 // start of a local symbol:
1112 if ((chrtab[*(ln + 1)] & DOT)
1113 && (dotxtab[*(ln + 1)] != 0)
1114 && !(chrtab[*(ln + 2)] & CTSYM))
1116 // We found a legitimate DOTx construct, so add it to the
1120 *tk.u32++ = (TOKEN)dotxtab[*ln++];
1125 p = nullspot = ln++; // Nullspot -> start of this symbol
1127 // Find end of symbol (and compute its length)
1128 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1131 // Handle "DOT" special forms (like ".b") that follow a normal
1132 // symbol or keyword:
1135 *ln++ = EOS; // Terminate symbol
1136 stuffnull = 0; // And never try it again
1138 // Character following the '.' must have a DOT attribute, and
1139 // the chararacter after THAT one must not have a start-symbol
1140 // attribute (to prevent symbols that look like, for example,
1141 // "zingo.barf", which might be a good idea anyway....)
1142 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1143 return error("[bwsl] must follow '.' in symbol");
1145 v = (uint32_t)dotxtab[*ln++];
1146 cursize = (uint32_t)v;
1148 if (chrtab[*ln] & CTSYM)
1149 return error("misuse of '.'; not allowed in symbols");
1152 // If the symbol is small, check to see if it's really the name of
1157 for (state = 0; state >= 0;)
1159 j = (int)tolowertab[*p++];
1160 j += regbase[state];
1162 if (regcheck[j] != state)
1168 if (*p == EOS || p == ln)
1179 // Scan for keywords
1180 if ((j <= 0 || state <= 0) || p==p2)
1184 for (state = 0; state >= 0;)
1186 j = (int)tolowertab[*p2++];
1189 if (kwcheck[j] != state)
1195 if (*p == EOS || p2 == ln)
1212 // If we detected equrundef/regundef set relevant flag
1213 if (j == KW_EQURUNDEF)
1219 // If not tokenized keyword OR token was not found
1220 if ((j < 0) || (state < 0))
1222 // Only proceed if no equrundef has been detected. In that case we need to store the symbol
1223 // because the directive handler (d_equrundef) will run outside this loop, further into procln.c
1224 if (!equrundef && !disabled)
1226 // Last attempt: let's see if this is an equated register.
1227 // If yes, then just store the register's keyword value instead of the symbol
1230 sy = lookup(nullspot, LABEL, 0);
1234 if (sy->sattre & EQUATEDREG)
1236 *tk.u32++ = sy->svalue;
1242 // Ok, that failed, let's store the symbol instead
1244 string[stringNum] = nullspot;
1245 *tk.u32++ = stringNum;
1250 *tk.u32++ = (TOKEN)j;
1254 if (v) // Record attribute token (if any)
1255 *tk.u32++ = (TOKEN)v;
1257 if (stuffnull) // Arrange for string termination on next pass
1262 // When we are in a disabled code block, the only thing that can break out
1263 // of this is an ".endif" keyword, so this is the minimum we have to parse
1264 // in order to discover such a keyword.
1271 // Handle identity tokens
1278 // Handle multiple-character tokens
1283 case '!': // ! or !=
1293 case '\'': // 'string'
1296 // Hardcoded for now, maybe this will change in the future
1297 *tk.u32++ = STRINGA8;
1301 case '\"': // "string"
1305 string[stringNum] = ln;
1306 *tk.u32++ = stringNum;
1309 for(p=ln; *ln!=EOS && *ln!=c1;)
1318 return(error("unterminated string"));
1347 // If we're evaluating a macro
1348 // this is valid because it's
1349 // a parameter expansion
1351 // If we're evaluating a macro
1352 // this is valid and expands to
1356 warn("bad backslash code in string");
1366 return error("unterminated string");
1370 case '$': // $, hex constant
1371 if (chrtab[*ln] & HDIGIT)
1375 // Parse the hex value
1376 while (hextab[*ln] >= 0)
1377 v = (v << 4) + (int)hextab[*ln++];
1384 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1389 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1400 case '<': // < or << or <> or <=
1419 case ':': // : or ::
1429 case '=': // = or ==
1432 *tk.u32++ = DEQUALS;
1439 case '>': // > or >> or >=
1454 case '%': // % or binary constant
1455 if (*ln < '0' || *ln > '1')
1463 while (*ln >= '0' && *ln <= '1')
1464 v = (v << 1) + *ln++ - '0';
1468 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1474 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1480 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1490 case '@': // @ or octal constant
1491 if (*ln < '0' || *ln > '7')
1499 while (*ln >= '0' && *ln <= '7')
1500 v = (v << 3) + *ln++ - '0';
1504 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1510 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1516 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1526 case '^': // ^ or ^^ <operator-name>
1533 if (((int)chrtab[*++ln] & STSYM) == 0)
1535 error("invalid symbol following ^^");
1541 while ((int)chrtab[*ln] & CTSYM)
1544 for(state=0; state>=0;)
1546 // Get char, convert to lowercase
1547 j = (int)tolowertab[*p++];
1549 //if (j >= 'A' && j <= 'Z')
1552 j += unarybase[state];
1554 if (unarycheck[j] != state)
1560 if (*p == EOS || p == ln)
1566 state = unarytab[j];
1569 if (j < 0 || state < 0)
1571 error("unknown symbol following ^^");
1575 *tk.u32++ = (TOKEN)j;
1578 interror(2); // Bad MULTX entry in chrtab
1583 // Handle decimal constant
1586 uint8_t * numStart = ln;
1589 while ((int)chrtab[*ln] & DIGIT)
1590 v = (v * 10) + *ln++ - '0';
1592 // See if there's a .[bwl] after the constant & deal with it if so
1595 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1603 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1611 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1619 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1621 // Hey, more digits after the dot, so we assume it's a
1622 // floating point number of some kind... numEnd will point
1623 // to the first non-float character after it's done
1626 double f = strtod(numStart, &numEnd);
1627 ln = (uint8_t *)numEnd;
1630 return error("floating point parse error");
1632 // N.B.: We use the C compiler's internal double
1633 // representation for all internal float calcs and
1634 // are reasonably sure that the size of said double
1635 // is 8 bytes long (which we check for in fltpoint.c)
1648 //printf("CONST: %i\n", v);
1652 // Handle illegal character
1653 return error("illegal character $%02X found", *ln);
1656 // Terminate line of tokens and return "success."
1659 tok = etok; // Set tok to beginning of line
1661 if (stuffnull) // Terminate last SYMBOL
1671 // .GOTO <label> goto directive
1673 // The label is searched for starting from the first line of the current,
1674 // enclosing macro definition. If no enclosing macro exists, an error is
1677 // A label is of the form:
1679 // :<name><whitespace>
1681 // The colon must appear in column 1. The label is stripped prior to macro
1682 // expansion, and is NOT subject to macro expansion. The whitespace may also
1685 int d_goto(WORD unused)
1687 // Setup for the search
1689 return error("missing label");
1691 char * sym = string[tok[1]];
1694 if (cur_inobj->in_type != SRC_IMACRO)
1695 return error("goto not in macro");
1697 IMACRO * imacro = cur_inobj->inobj.imacro;
1698 LLIST * defln = imacro->im_macro->lineList;
1700 // Attempt to find the label, starting with the first line.
1701 for(; defln!=NULL; defln=defln->next)
1703 // Must start with a colon
1704 if (defln->line[0] == ':')
1706 // Compare names (sleazo string compare)
1708 char * s2 = defln->line + 1;
1710 // Either we will match the strings to EOS on both, or we will
1711 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1713 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1715 // If we reached the end of string 1 (sym), we're done.
1716 // Note that we're also checking for the end of string 2 as
1717 // well, since we've established they're equal above.
1720 // Found the label, set new macro next-line and return.
1721 imacro->im_nextln = defln;
1731 return error("goto label not found");
1735 void DumpToken(TOKEN t)
1739 else if (t == CONST)
1741 else if (t == FCONST)
1743 else if (t == ACONST)
1745 else if (t == STRING)
1747 else if (t == SYMBOL)
1751 else if (t == TKEOF)
1753 else if (t == DEQUALS)
1754 printf("[DEQUALS]");
1759 else if (t == DCOLON)
1771 else if (t == UNMINUS)
1772 printf("[UNMINUS]");
1787 else if (t == ENDEXPR)
1788 printf("[ENDEXPR]");
1789 else if (t == CR_ABSCOUNT)
1790 printf("[CR_ABSCOUNT]");
1791 else if (t == CR_FILESIZE)
1792 printf("[CR_FILESIZE]");
1793 else if (t == CR_DEFINED)
1794 printf("[CR_DEFINED]");
1795 else if (t == CR_REFERENCED)
1796 printf("[CR_REFERENCED]");
1797 else if (t == CR_STREQ)
1798 printf("[CR_STREQ]");
1799 else if (t == CR_MACDEF)
1800 printf("[CR_MACDEF]");
1801 else if (t == CR_TIME)
1802 printf("[CR_TIME]");
1803 else if (t == CR_DATE)
1804 printf("[CR_DATE]");
1805 else if (t >= 0x20 && t <= 0x2F)
1806 printf("[%c]", (char)t);
1807 else if (t >= 0x3A && t <= 0x3F)
1808 printf("[%c]", (char)t);
1809 else if (t >= 0x80 && t <= 0x87)
1810 printf("[D%u]", ((uint32_t)t) - 0x80);
1811 else if (t >= 0x88 && t <= 0x8F)
1812 printf("[A%u]", ((uint32_t)t) - 0x88);
1814 printf("[%X:%c]", (uint32_t)t, (char)t);
1818 void DumpTokenBuffer(void)
1820 printf("Tokens [%X]: ", sloc);
1822 for(TOKEN * t=tokbuf; *t!=EOL; t++)
1826 else if (*t == CONST)
1830 printf("[CONST: $%lX]", *tp.u64);
1833 else if (*t == FCONST)
1837 printf("[FCONST: $%lX]", *tp.u64);
1840 else if (*t == ACONST)
1842 printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1845 else if (*t == STRING)
1848 printf("[STRING:\"%s\"]", string[*t]);
1850 else if (*t == SYMBOL)
1853 printf("[SYMBOL:\"%s\"]", string[*t]);
1857 else if (*t == TKEOF)
1859 else if (*t == DEQUALS)
1860 printf("[DEQUALS]");
1865 else if (*t == DCOLON)
1877 else if (*t == UNMINUS)
1878 printf("[UNMINUS]");
1879 else if (*t == DOTB)
1881 else if (*t == DOTW)
1883 else if (*t == DOTL)
1885 else if (*t == DOTQ)
1887 else if (*t == DOTS)
1889 else if (*t == DOTD)
1891 else if (*t == DOTI)
1893 else if (*t == ENDEXPR)
1894 printf("[ENDEXPR]");
1895 else if (*t == CR_ABSCOUNT)
1896 printf("[CR_ABSCOUNT]");
1897 else if (*t == CR_FILESIZE)
1898 printf("[CR_FILESIZE]");
1899 else if (*t == CR_DEFINED)
1900 printf("[CR_DEFINED]");
1901 else if (*t == CR_REFERENCED)
1902 printf("[CR_REFERENCED]");
1903 else if (*t == CR_STREQ)
1904 printf("[CR_STREQ]");
1905 else if (*t == CR_MACDEF)
1906 printf("[CR_MACDEF]");
1907 else if (*t == CR_TIME)
1908 printf("[CR_TIME]");
1909 else if (*t == CR_DATE)
1910 printf("[CR_DATE]");
1911 else if (*t >= 0x20 && *t <= 0x2F)
1912 printf("[%c]", (char)*t);
1913 else if (*t >= 0x3A && *t <= 0x3F)
1914 printf("[%c]", (char)*t);
1915 else if (*t >= 0x80 && *t <= 0x87)
1916 printf("[D%u]", ((uint32_t)*t) - 0x80);
1917 else if (*t >= 0x88 && *t <= 0x8F)
1918 printf("[A%u]", ((uint32_t)*t) - 0x88);
1920 printf("[%X:%c]", (uint32_t)*t, (char)*t);