2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
19 #define DECL_KW // Declare keyword arrays
20 #define DEF_KW // Declare keyword values
21 #include "kwtab.h" // Incl generated keyword tables & defs
24 int lnsave; // 1; strcpy() text of current line
25 uint32_t curlineno; // Current line number (64K max currently)
26 int totlines; // Total # of lines
27 int mjump_align = 0; // mjump alignment flag
28 char lntag; // Line tag
29 char * curfname; // Current filename
30 char tolowertab[128]; // Uppercase ==> lowercase
31 int8_t hextab[128]; // Table of hex values
32 char dotxtab[128]; // Table for ".b", ".s", etc.
33 char irbuf[LNSIZ]; // Text for .rept block line
34 char lnbuf[LNSIZ]; // Text of current line
35 WORD filecount; // Unique file number counter
36 WORD cfileno; // Current file number
37 TOKEN * tok; // Ptr to current token
38 TOKEN * etok; // Ptr past last token in tokbuf[]
39 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
40 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
41 int optimizeOff; // Optimization override flag
47 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
48 static INOBJ * f_inobj; // Ptr list of free INOBJs
49 static IFILE * f_ifile; // Ptr list of free IFILEs
50 static IMACRO * f_imacro; // Ptr list of free IMACROs
52 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
54 uint8_t chrtab[0x100] = {
55 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
56 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
57 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
58 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
60 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
61 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
62 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
63 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
65 WHITE, MULTX, MULTX, SELF, // SP ! " #
66 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
67 SELF, SELF, SELF, SELF, // ( ) * +
68 SELF, SELF, STSYM, SELF, // , - . /
70 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
71 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
72 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
73 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
74 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
76 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
78 MULTX, STSYM+CTSYM+HDIGIT, // @ A
79 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
80 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
81 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
82 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
83 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
85 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
86 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
87 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
88 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
90 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
91 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
92 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
93 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
94 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
95 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
97 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
98 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
99 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
100 SELF, SELF, SELF, ILLEG, // | } ~ DEL
102 // Anything above $7F is illegal (and yes, we need to check for this,
103 // otherwise you get strange and spurious errors that will lead you astray)
104 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
105 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
106 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
107 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
108 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
109 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
110 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
122 // Names of registers
123 static char * regname[] = {
124 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
125 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
126 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
127 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
128 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
129 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
130 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
131 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
132 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
133 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
134 "tt0","tt1","crp","","","","","", // 208,215
135 "","","","","fpiar","fpsr","fpcr","", // 216,223
136 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
137 "","","","","","","","", // 232,239
138 "","","","","","","","", // 240,247
139 "","","","","","","","", // 248,255
140 "","","","","x0","x1","y0","y1", // 256,263
141 "","b0","","b2","","b1","a","b", // 264,271
142 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
143 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
144 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
145 "","","","","","","l","p", // 296,303
146 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
147 "a10","b10","x","y","","","ab","ba" // 312,319
150 static char * riscregname[] = {
151 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
152 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
153 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
154 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
159 // Initialize tokenizer
161 void InitTokenizer(void)
164 char * htab = "0123456789abcdefABCDEF"; // Hex character table
166 lnsave = 0; // Don't save lines
167 curfname = ""; // No file, empty filename
168 filecount = (WORD)-1;
169 cfileno = (WORD)-1; // cfileno gets bumped to 0
181 // Initialize hex, "dot" and tolower tables
186 tolowertab[i] = (char)i;
189 for(i=0; htab[i]!=EOS; i++)
190 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
192 for(i='A'; i<='Z'; i++)
193 tolowertab[i] |= 0x20;
195 // These characters are legal immediately after a period
196 dotxtab['b'] = DOTB; // .b .B .s .S
198 //dotxtab['s'] = DOTB;
199 //dotxtab['S'] = DOTB;
200 dotxtab['w'] = DOTW; // .w .W
202 dotxtab['l'] = DOTL; // .l .L
204 dotxtab['i'] = DOTI; // .i .I (WTF is this???)
206 dotxtab['D'] = DOTD; // .d .D (double)
208 dotxtab['S'] = DOTS; // .s .S
210 dotxtab['Q'] = DOTQ; // .q .Q (quad word)
212 dotxtab['X'] = DOTX; // .x .x
214 dotxtab['P'] = DOTP; // .p .P
219 void SetFilenameForErrorReporting(void)
223 // Check for absolute top filename (this should never happen)
226 curfname = "(*top*)";
230 FILEREC * fr = filerec;
232 // Advance to the correct record...
233 while (fr != NULL && fnum != 0)
239 // Check for file # record not found (this should never happen either)
242 curfname = "(*NOT FOUND*)";
246 curfname = fr->frec_name;
251 // Allocate an IFILE or IMACRO
253 INOBJ * a_inobj(int typ)
259 // Allocate and initialize INOBJ first
261 inobj = malloc(sizeof(INOBJ));
265 f_inobj = f_inobj->in_link;
270 case SRC_IFILE: // Alloc and init an IFILE
272 ifile = malloc(sizeof(IFILE));
276 f_ifile = f_ifile->if_link;
279 inobj->inobj.ifile = ifile;
282 case SRC_IMACRO: // Alloc and init an IMACRO
283 if (f_imacro == NULL)
284 imacro = malloc(sizeof(IMACRO));
288 f_imacro = f_imacro->im_link;
291 inobj->inobj.imacro = imacro;
294 case SRC_IREPT: // Alloc and init an IREPT
295 inobj->inobj.irept = malloc(sizeof(IREPT));
296 DEBUG { printf("alloc IREPT\n"); }
300 // Install INOBJ on top of input stack
301 inobj->in_ifent = ifent; // Record .if context on entry
302 inobj->in_type = (WORD)typ;
303 inobj->in_otok = tok;
304 inobj->in_etok = etok;
305 inobj->in_link = cur_inobj;
313 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
314 // A macro reference is in one of two forms:
315 // \name <non-name-character>
317 // A doubled backslash (\\) is compressed to a single backslash (\).
318 // Argument definitions have been pre-tokenized, so we have to turn them back
319 // into text. This means that numbers, in particular, become hex, regardless of
320 // their representation when the macro was invoked. This is a hack.
321 // A label may appear at the beginning of the line:
322 // :<name><whitespace>
323 // (the colon must be in the first column). These labels are stripped before
324 // macro expansion takes place.
326 int ExpandMacro(char * src, char * dest, int destsiz)
329 int questmark; // \? for testing argument existence
330 char mname[128]; // Assume max size of a formal arg name
331 char numbuf[20]; // Buffer for text of CONSTs
334 char ** symbolString;
336 DEBUG { printf("ExM: src=\"%s\"\n", src); }
338 IMACRO * imacro = cur_inobj->inobj.imacro;
339 int macnum = (int)(imacro->im_macro->sattr);
341 char * dst = dest; // Next dest slot
342 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
344 // Check for (and skip over) any "label" on the line
350 while (*s != EOS && !(chrtab[*s] & WHITE))
354 s++; // Skip first whitespace
357 // Expand the rest of the line
360 // Copy single character
366 // Skip comments in case a loose @ or \ is in there
367 // In that case the tokeniser was trying to expand it.
368 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
373 // Do macro expansion
381 case '\\': // \\, \ (collapse to single backslash)
387 case '?': // \? <macro> set `questmark' flag
391 case '#': // \#, number of arguments
392 sprintf(numbuf, "%d", (int)imacro->im_nargs);
394 case '!': // \! size suffix supplied on invocation
395 switch ((int)imacro->im_siz)
397 case SIZN: d = ""; break;
398 case SIZB: d = ".b"; break;
399 case SIZW: d = ".w"; break;
400 case SIZL: d = ".l"; break;
404 case '~': // ==> unique label string Mnnnn...
405 sprintf(numbuf, "M%u", curuniq);
421 return error("missing argument name");
424 // \n ==> argument number 'n', 0..9
425 if (chrtab[*s] & DIGIT)
435 // Get argument name: \name, \{name}
445 while (chrtab[*s] & CTSYM);
450 for(++s; *s != EOS && *s != '}';)
454 return error("missing closing brace ('}')");
461 // Lookup the argument and copy its (string) value into the
462 // destination string
463 DEBUG { printf("argument='%s'\n", mname); }
465 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
466 return error("undefined argument: '%s'", mname);
469 // Convert a string of tokens (terminated with EOL) back into
470 // text. If an argument is out of range (not specified in the
471 // macro invocation) then it is ignored.
472 i = (int)arg->svalue;
474 DEBUG { printf("~argnumber=%d\n", i); }
477 if (i < imacro->im_nargs)
479 tk = imacro->argument[i].token;
480 symbolString = imacro->argument[i].string;
483 // printf("ExM: Preparing to parse argument #%u...\n", i);
489 // 0 if the argument is empty or non-existant,
490 // 1 if the argument is not empty
493 if (tk == NULL || *tk == EOL)
499 *dst++ = (char)(questmark + '0');
503 // Argument # is in range, so expand it
508 // Reverse-translation from a token number to a string.
509 // This is a hack. It might be better table-driven.
512 if ((*tk >= KW_D0) && !rdsp && !rgpu)
514 d = regname[(int)*tk++ - KW_D0];
517 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
519 d = riscregname[(int)*tk++ - KW_R0];
527 d = symbolString[*tk++];
528 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
531 d = symbolString[*tk++];
552 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
553 // to choke on legitimate code... Need to investigate this further
554 // before changing anything else here!
556 // sprintf(numbuf, "$%lx", (uint64_t)*tk++);
557 sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
625 *dst++ = (char)*(tk - 1);
630 // If 'd' != NULL, copy string to destination
634 DEBUG printf("d='%s'\n", d);
653 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
658 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
659 return fatal("line too long as a result of macro expansion");
664 // Get next line of text from a macro
666 char * GetNextMacroLine(void)
668 IMACRO * imacro = cur_inobj->inobj.imacro;
669 LLIST * strp = imacro->im_nextln;
671 if (strp == NULL) // End-of-macro
674 imacro->im_nextln = strp->next;
675 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
676 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
678 return imacro->im_lnbuf;
683 // Get next line of text from a repeat block
685 char * GetNextRepeatLine(void)
687 IREPT * irept = cur_inobj->inobj.irept;
688 // LONG * strp = irept->ir_nextln; // initial null
690 // Do repeat at end of .rept block's string list
692 if (irept->ir_nextln == NULL)
694 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
695 irept->ir_nextln = irept->ir_firstln; // copy first line
697 if (irept->ir_count-- == 0)
699 DEBUG { printf("end-repeat-block\n"); }
703 // strp = irept->ir_nextln;
705 // Mark the current macro line in the irept object
706 // This is probably overkill - a global variable
707 // would suffice here (it only gets used during
708 // error reporting anyway)
709 irept->lineno = irept->ir_nextln->lineno;
711 // Copy the rept lines verbatim, unless we're in nest level 0.
712 // Then, expand any \~ labels to unique numbers (Rn)
715 strcpy(irbuf, irept->ir_nextln->line);
719 uint32_t linelen = strlen(irept->ir_nextln->line);
720 uint8_t *p_line = irept->ir_nextln->line;
721 char *irbufwrite = irbuf;
722 for (int i = 0; i <= linelen; i++)
726 if (c == '\\' && *p_line == '~')
729 irbufwrite += sprintf(irbufwrite, "R%u", reptuniq);
738 DEBUG { printf("repeat line='%s'\n", irbuf); }
739 // irept->ir_nextln = (LONG *)*strp;
740 irept->ir_nextln = irept->ir_nextln->next;
747 // Include a source file used at the root, and for ".include" files
749 int include(int handle, char * fname)
752 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
754 // Alloc and initialize include-descriptors
755 INOBJ * inobj = a_inobj(SRC_IFILE);
756 IFILE * ifile = inobj->inobj.ifile;
758 ifile->ifhandle = handle; // Setup file handle
759 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
760 ifile->ifoldlineno = curlineno; // Save old line number
761 ifile->ifoldfname = curfname; // Save old filename
762 ifile->ifno = cfileno; // Save old file number
764 // NB: This *must* be preincrement, we're adding one to the filecount here!
765 cfileno = ++filecount; // Compute NEW file number
766 curfname = strdup(fname); // Set current filename (alloc storage)
767 curlineno = 0; // Start on line zero
769 // Add another file to the file-record
770 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
771 fr->frec_next = NULL;
772 fr->frec_name = curfname;
775 filerec = fr; // Add first filerec
777 last_fr->frec_next = fr; // Append to list of filerecs
780 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
787 // Pop the current input level
791 INOBJ * inobj = cur_inobj;
796 // Pop IFENT levels until we reach the conditional assembly context we
797 // were at when the input object was entered.
798 int numUnmatched = 0;
800 while (ifent != inobj->in_ifent)
802 if (d_endif() != 0) // Something bad happened during endif parsing?
803 return -1; // If yes, bail instead of getting stuck in a loop
808 // Give a warning to the user that we had to wipe their bum for them
809 if (numUnmatched > 0)
810 warn("missing %d .endif(s)", numUnmatched);
812 tok = inobj->in_otok; // Restore tok and etok
813 etok = inobj->in_etok;
815 switch (inobj->in_type)
817 case SRC_IFILE: // Pop and release an IFILE
819 DEBUG { printf("[Leaving: %s]\n", curfname); }
821 IFILE * ifile = inobj->inobj.ifile;
822 ifile->if_link = f_ifile;
824 close(ifile->ifhandle); // Close source file
825 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
826 curfname = ifile->ifoldfname; // Set current filename
827 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
828 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
829 curlineno = ifile->ifoldlineno; // Set current line#
830 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
831 cfileno = ifile->ifno; // Restore current file number
832 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
836 case SRC_IMACRO: // Pop and release an IMACRO
838 IMACRO * imacro = inobj->inobj.imacro;
839 imacro->im_link = f_imacro;
844 case SRC_IREPT: // Pop and release an IREPT
846 DEBUG { printf("dealloc IREPT\n"); }
847 LLIST * p = inobj->inobj.irept->ir_firstln;
849 // Deallocate repeat lines
860 cur_inobj = inobj->in_link;
861 inobj->in_link = f_inobj;
869 // Get line from file into buf, return NULL on EOF or ptr to the start of a
872 char * GetNextLine(void)
876 int readamt = -1; // 0 if last read() yeilded 0 bytes
877 IFILE * fl = cur_inobj->inobj.ifile;
881 // Scan for next end-of-line; handle stupid text formats by treating
882 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
884 d = &fl->ifbuf[fl->ifind];
886 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
888 if (*p == '\r' || *p == '\n')
895 break; // Need to read more, then look for '\n' to eat
896 else if (p[1] == '\n')
900 // Cover up the newline with end-of-string sentinel
909 // Handle hanging lines by ignoring them (Input file is exhausted, no
910 // \r or \n on last line)
911 // Shamus: This is retarded. Never ignore any input!
912 if (!readamt && fl->ifcnt)
919 // Really should check to see if we're at the end of the buffer!
921 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
923 return &fl->ifbuf[fl->ifind];
927 // Truncate and return absurdly long lines.
928 if (fl->ifcnt >= QUANTUM)
930 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
932 return &fl->ifbuf[fl->ifind];
935 // Relocate what's left of a line to the beginning of the buffer, and
936 // read some more of the file in; return NULL if the buffer's empty and
940 p = &fl->ifbuf[fl->ifind];
941 d = &fl->ifbuf[fl->ifcnt & 1];
943 for(i=0; i<fl->ifcnt; i++)
946 fl->ifind = fl->ifcnt & 1;
949 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
954 if ((fl->ifcnt += readamt) == 0)
963 int TokenizeLine(void)
965 uint8_t * ln = NULL; // Ptr to current position in line
966 uint8_t * p; // Random character ptr
967 PTR tk; // Token-deposit ptr
968 int state = 0; // State for keyword detector
969 int j = 0; // Var for keyword detector
970 uint8_t c; // Random char
971 uint64_t v; // Random value
972 uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d)
973 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
974 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
976 int stringNum = 0; // Pointer to string locations in tokenized line
977 SYM* sy; // For looking up symbols (.equr)
978 int equrundef = 0; // Flag for equrundef scanning
982 if (cur_inobj == NULL) // Return EOF if input stack is empty
985 // Get another line of input from the current input source: a file, a
986 // macro, or a repeat-block
987 switch (cur_inobj->in_type)
991 // o bump source line number;
992 // o tag the listing-line with a space;
993 // o kludge lines generated by Alcyon C.
995 if ((ln = GetNextLine()) == NULL)
997 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
998 if (fpop() == 0) // Pop input level
999 goto retry; // Try for more lines
1002 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
1007 curlineno++; // Bump line number
1013 // o Handle end-of-macro;
1014 // o tag the listing-line with an at (@) sign.
1016 if ((ln = GetNextMacroLine()) == NULL)
1018 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1019 goto retry; // Try for more lines...
1021 return TKEOF; // Oops, we got a non zero return code, signal EOF
1028 // o Handle end-of-repeat-block;
1029 // o tag the listing-line with a pound (#) sign.
1031 if ((ln = GetNextRepeatLine()) == NULL)
1033 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1042 // Save text of the line. We only do this during listings and within
1043 // macro-type blocks, since it is expensive to unconditionally copy every
1048 if (strlen(ln) > LNSIZ)
1049 return error("line too long (%d, max %d)", strlen(ln), LNSIZ);
1054 // General housekeeping
1055 tok = tokeol; // Set "tok" to EOL in case of error
1056 tk.u32 = etok; // Reset token ptr
1057 stuffnull = 0; // Don't stuff nulls
1058 totlines++; // Bump total #lines assembled
1060 // See if the entire line is a comment. This is a win if the programmer
1061 // puts in lots of comments
1062 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1065 // And here we have a very ugly hack for signalling a single line 'turn off
1066 // optimization'. There's really no nice way to do this, so hack it is!
1067 optimizeOff = 0; // Default is to take optimizations as they come
1071 optimizeOff = 1; // Signal that we don't want to optimize this line
1072 ln++; // & skip over the darned thing
1075 // Main tokenization loop;
1076 // o skip whitespace;
1077 // o handle end-of-line;
1078 // o handle symbols;
1079 // o handle single-character tokens (operators, etc.);
1080 // o handle multiple-character tokens (constants, strings, etc.).
1083 // Check to see if there's enough space in the token buffer
1084 if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20)
1086 return error("token buffer overrun");
1089 // Skip whitespace, handle EOL
1090 while (chrtab[*ln] & WHITE)
1093 // Handle EOL, comment with ';'
1094 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1097 // Handle start of symbol. Symbols are null-terminated in place. The
1098 // termination is always one symbol behind, since there may be no place
1099 // for a null in the case that an operator immediately follows the name.
1104 if (stuffnull) // Terminate old symbol from previous pass
1107 v = 0; // Assume no DOT attrib follows symbol
1110 // In some cases, we need to check for a DOTx at the *beginning*
1111 // of a symbol, as the "start" of the line we're currently looking
1112 // at could be somewhere in the middle of that line!
1115 // Make sure that it's *only* a .[bwsl] following, and not the
1116 // start of a local symbol:
1117 if ((chrtab[*(ln + 1)] & DOT)
1118 && (dotxtab[*(ln + 1)] != 0)
1119 && !(chrtab[*(ln + 2)] & CTSYM))
1121 // We found a legitimate DOTx construct, so add it to the
1125 *tk.u32++ = (TOKEN)dotxtab[*ln++];
1130 p = nullspot = ln++; // Nullspot -> start of this symbol
1132 // Find end of symbol (and compute its length)
1133 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1136 // Handle "DOT" special forms (like ".b") that follow a normal
1137 // symbol or keyword:
1140 *ln++ = EOS; // Terminate symbol
1141 stuffnull = 0; // And never try it again
1143 // Character following the '.' must have a DOT attribute, and
1144 // the chararacter after THAT one must not have a start-symbol
1145 // attribute (to prevent symbols that look like, for example,
1146 // "zingo.barf", which might be a good idea anyway....)
1147 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1148 return error("[bwsl] must follow '.' in symbol");
1150 v = (uint32_t)dotxtab[*ln++];
1151 cursize = (uint32_t)v;
1153 if (chrtab[*ln] & CTSYM)
1154 return error("misuse of '.'; not allowed in symbols");
1157 // If the symbol is small, check to see if it's really the name of
1161 for(state=0; state>=0;)
1163 j = (int)tolowertab[*p++];
1166 if (kwcheck[j] != state)
1172 if (*p == EOS || p == ln)
1186 // Make j = -1 if user tries to use a RISC register while in 68K mode
1187 if (!(rgpu || rdsp || dsp56001) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1192 // Make j = -1 if time, date etc with no preceeding ^^
1193 // defined, referenced, streq, macdef, date and time
1196 case 112: // defined
1197 case 113: // referenced
1205 // If we detected equrundef/regundef set relevant flag
1206 if (j == KW_EQURUNDEF)
1210 //printf("line %d, equrundef found\n", curlineno);
1213 // If not tokenized keyword OR token was not found
1214 if ((j < 0) || (state < 0))
1216 // Only proceed if no equrundef has been detected. In that case we need to store the symbol
1217 // because the directive handler (d_equrundef) will run outside this loop, further into procln.c
1220 // Last attempt: let's see if this is an equated register
1223 sy = lookup(nullspot, LABEL, 0);
1227 if (sy->sattre & EQUATEDREG)
1229 uint32_t register_token = sy->svalue;
1232 // If we are in GPU or DSP mode then mark the register bank.
1233 // We will use it during EvaluateRegisterFromTokenStream()
1234 // when we check if we can use the equated register with the currently
1236 // Note (ggn): I find all this superfluous. Do we really want to be so
1237 // protective? Plus, the current implementation happily skips
1238 // these checks on .equr that are set during fixups - oops!
1239 register_token |= 0x80000000; // Mark that this is an .equr
1240 if (sy->sattre & BANK_1)
1242 register_token |= 0x40000000; // Mark bank 1
1245 *tk.u32++ = register_token;
1251 // Ok, that failed, let's store the symbol instead
1253 string[stringNum] = nullspot;
1254 *tk.u32++ = stringNum;
1259 *tk.u32++ = (TOKEN)j;
1263 if (v) // Record attribute token (if any)
1264 *tk.u32++ = (TOKEN)v;
1266 if (stuffnull) // Arrange for string termination on next pass
1272 // Handle identity tokens
1279 // Handle multiple-character tokens
1284 case '!': // ! or !=
1294 case '\'': // 'string'
1297 // Hardcoded for now, maybe this will change in the future
1298 *tk.u32++ = STRINGA8;
1302 case '\"': // "string"
1306 string[stringNum] = ln;
1307 *tk.u32++ = stringNum;
1310 for(p=ln; *ln!=EOS && *ln!=c1;)
1319 return(error("unterminated string"));
1348 // If we're evaluating a macro
1349 // this is valid because it's
1350 // a parameter expansion
1352 // If we're evaluating a macro
1353 // this is valid and expands to
1357 warn("bad backslash code in string");
1367 return error("unterminated string");
1371 case '$': // $, hex constant
1372 if (chrtab[*ln] & HDIGIT)
1376 // Parse the hex value
1377 while (hextab[*ln] >= 0)
1378 v = (v << 4) + (int)hextab[*ln++];
1385 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1390 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1401 case '<': // < or << or <> or <=
1420 case ':': // : or ::
1430 case '=': // = or ==
1433 *tk.u32++ = DEQUALS;
1440 case '>': // > or >> or >=
1455 case '%': // % or binary constant
1456 if (*ln < '0' || *ln > '1')
1464 while (*ln >= '0' && *ln <= '1')
1465 v = (v << 1) + *ln++ - '0';
1469 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1475 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1481 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1491 case '@': // @ or octal constant
1492 if (*ln < '0' || *ln > '7')
1500 while (*ln >= '0' && *ln <= '7')
1501 v = (v << 3) + *ln++ - '0';
1505 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1511 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1517 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1527 case '^': // ^ or ^^ <operator-name>
1534 if (((int)chrtab[*++ln] & STSYM) == 0)
1536 error("invalid symbol following ^^");
1542 while ((int)chrtab[*ln] & CTSYM)
1545 for(state=0; state>=0;)
1547 // Get char, convert to lowercase
1550 if (j >= 'A' && j <= 'Z')
1555 if (kwcheck[j] != state)
1561 if (*p == EOS || p == ln)
1570 if (j < 0 || state < 0)
1572 error("unknown symbol following ^^");
1576 *tk.u32++ = (TOKEN)j;
1579 interror(2); // Bad MULTX entry in chrtab
1584 // Handle decimal constant
1587 uint8_t * numStart = ln;
1590 while ((int)chrtab[*ln] & DIGIT)
1591 v = (v * 10) + *ln++ - '0';
1593 // See if there's a .[bwl] after the constant & deal with it if so
1596 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1604 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1612 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1620 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1622 // Hey, more digits after the dot, so we assume it's a
1623 // floating point number of some kind... numEnd will point
1624 // to the first non-float character after it's done
1627 double f = strtod(numStart, &numEnd);
1628 ln = (uint8_t *)numEnd;
1631 return error("floating point parse error");
1633 // N.B.: We use the C compiler's internal double
1634 // representation for all internal float calcs and
1635 // are reasonably sure that the size of said double
1636 // is 8 bytes long (which we check for in fltpoint.c)
1649 //printf("CONST: %i\n", v);
1653 // Handle illegal character
1654 return error("illegal character $%02X found", *ln);
1657 // Terminate line of tokens and return "success."
1660 tok = etok; // Set tok to beginning of line
1662 if (stuffnull) // Terminate last SYMBOL
1672 // .GOTO <label> goto directive
1674 // The label is searched for starting from the first line of the current,
1675 // enclosing macro definition. If no enclosing macro exists, an error is
1678 // A label is of the form:
1680 // :<name><whitespace>
1682 // The colon must appear in column 1. The label is stripped prior to macro
1683 // expansion, and is NOT subject to macro expansion. The whitespace may also
1686 int d_goto(WORD unused)
1688 // Setup for the search
1690 return error("missing label");
1692 char * sym = string[tok[1]];
1695 if (cur_inobj->in_type != SRC_IMACRO)
1696 return error("goto not in macro");
1698 IMACRO * imacro = cur_inobj->inobj.imacro;
1699 LLIST * defln = imacro->im_macro->lineList;
1701 // Attempt to find the label, starting with the first line.
1702 for(; defln!=NULL; defln=defln->next)
1704 // Must start with a colon
1705 if (defln->line[0] == ':')
1707 // Compare names (sleazo string compare)
1709 char * s2 = defln->line + 1;
1711 // Either we will match the strings to EOS on both, or we will
1712 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1714 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1716 // If we reached the end of string 1 (sym), we're done.
1717 // Note that we're also checking for the end of string 2 as
1718 // well, since we've established they're equal above.
1721 // Found the label, set new macro next-line and return.
1722 imacro->im_nextln = defln;
1732 return error("goto label not found");
1736 void DumpToken(TOKEN t)
1740 else if (t == CONST)
1742 else if (t == FCONST)
1744 else if (t == ACONST)
1746 else if (t == STRING)
1748 else if (t == SYMBOL)
1752 else if (t == TKEOF)
1754 else if (t == DEQUALS)
1755 printf("[DEQUALS]");
1760 else if (t == DCOLON)
1772 else if (t == UNMINUS)
1773 printf("[UNMINUS]");
1788 else if (t == ENDEXPR)
1789 printf("[ENDEXPR]");
1790 else if (t == CR_ABSCOUNT)
1791 printf("[CR_ABSCOUNT]");
1792 else if (t == CR_FILESIZE)
1793 printf("[CR_FILESIZE]");
1794 else if (t == CR_DEFINED)
1795 printf("[CR_DEFINED]");
1796 else if (t == CR_REFERENCED)
1797 printf("[CR_REFERENCED]");
1798 else if (t == CR_STREQ)
1799 printf("[CR_STREQ]");
1800 else if (t == CR_MACDEF)
1801 printf("[CR_MACDEF]");
1802 else if (t == CR_TIME)
1803 printf("[CR_TIME]");
1804 else if (t == CR_DATE)
1805 printf("[CR_DATE]");
1806 else if (t >= 0x20 && t <= 0x2F)
1807 printf("[%c]", (char)t);
1808 else if (t >= 0x3A && t <= 0x3F)
1809 printf("[%c]", (char)t);
1810 else if (t >= 0x80 && t <= 0x87)
1811 printf("[D%u]", ((uint32_t)t) - 0x80);
1812 else if (t >= 0x88 && t <= 0x8F)
1813 printf("[A%u]", ((uint32_t)t) - 0x88);
1815 printf("[%X:%c]", (uint32_t)t, (char)t);
1819 void DumpTokenBuffer(void)
1821 printf("Tokens [%X]: ", sloc);
1823 for(TOKEN * t=tokbuf; *t!=EOL; t++)
1827 else if (*t == CONST)
1831 printf("[CONST: $%lX]", *tp.u64);
1834 else if (*t == FCONST)
1838 printf("[FCONST: $%lX]", *tp.u64);
1841 else if (*t == ACONST)
1843 printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1846 else if (*t == STRING)
1849 printf("[STRING:\"%s\"]", string[*t]);
1851 else if (*t == SYMBOL)
1854 printf("[SYMBOL:\"%s\"]", string[*t]);
1858 else if (*t == TKEOF)
1860 else if (*t == DEQUALS)
1861 printf("[DEQUALS]");
1866 else if (*t == DCOLON)
1878 else if (*t == UNMINUS)
1879 printf("[UNMINUS]");
1880 else if (*t == DOTB)
1882 else if (*t == DOTW)
1884 else if (*t == DOTL)
1886 else if (*t == DOTQ)
1888 else if (*t == DOTS)
1890 else if (*t == DOTD)
1892 else if (*t == DOTI)
1894 else if (*t == ENDEXPR)
1895 printf("[ENDEXPR]");
1896 else if (*t == CR_ABSCOUNT)
1897 printf("[CR_ABSCOUNT]");
1898 else if (*t == CR_FILESIZE)
1899 printf("[CR_FILESIZE]");
1900 else if (*t == CR_DEFINED)
1901 printf("[CR_DEFINED]");
1902 else if (*t == CR_REFERENCED)
1903 printf("[CR_REFERENCED]");
1904 else if (*t == CR_STREQ)
1905 printf("[CR_STREQ]");
1906 else if (*t == CR_MACDEF)
1907 printf("[CR_MACDEF]");
1908 else if (*t == CR_TIME)
1909 printf("[CR_TIME]");
1910 else if (*t == CR_DATE)
1911 printf("[CR_DATE]");
1912 else if (*t >= 0x20 && *t <= 0x2F)
1913 printf("[%c]", (char)*t);
1914 else if (*t >= 0x3A && *t <= 0x3F)
1915 printf("[%c]", (char)*t);
1916 else if (*t >= 0x80 && *t <= 0x87)
1917 printf("[D%u]", ((uint32_t)*t) - 0x80);
1918 else if (*t >= 0x88 && *t <= 0x8F)
1919 printf("[A%u]", ((uint32_t)*t) - 0x88);
1921 printf("[%X:%c]", (uint32_t)*t, (char)*t);