2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
19 #define DECL_KW // Declare keyword arrays
20 #define DEF_KW // Declare keyword values
21 #include "kwtab.h" // Incl generated keyword tables & defs
22 #define DEF_REG68 // Incl 68k register definitions
24 #define DEF_REGRISC // Include GPU/DSP register definitions
26 #define DEF_UNARY // Declare unary values
27 #define DECL_UNARY // Incl uanry keyword state machine tables
28 #include "unarytab.h" // Incl generated unary tables & defs
31 int lnsave; // 1; strcpy() text of current line
32 uint32_t curlineno; // Current line number (64K max currently)
33 int totlines; // Total # of lines
34 int mjump_align = 0; // mjump alignment flag
35 char lntag; // Line tag
36 char * curfname; // Current filename
37 char tolowertab[128]; // Uppercase ==> lowercase
38 int8_t hextab[128]; // Table of hex values
39 char dotxtab[128]; // Table for ".b", ".s", etc.
40 char irbuf[LNSIZ]; // Text for .rept block line
41 char lnbuf[LNSIZ]; // Text of current line
42 WORD filecount; // Unique file number counter
43 WORD cfileno; // Current file number
44 TOKEN * tok; // Ptr to current token
45 TOKEN * etok; // Ptr past last token in tokbuf[]
46 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
47 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
48 int optimizeOff; // Optimization override flag
54 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
55 static INOBJ * f_inobj; // Ptr list of free INOBJs
56 static IFILE * f_ifile; // Ptr list of free IFILEs
57 static IMACRO * f_imacro; // Ptr list of free IMACROs
59 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
61 uint8_t chrtab[0x100] = {
62 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
63 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
64 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
65 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
67 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
68 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
69 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
70 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
72 WHITE, MULTX, MULTX, SELF, // SP ! " #
73 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
74 SELF, SELF, SELF, SELF, // ( ) * +
75 SELF, SELF, STSYM, SELF, // , - . /
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
79 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
80 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
81 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
83 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
85 MULTX, STSYM+CTSYM+HDIGIT, // @ A
86 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
87 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
88 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
89 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
90 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
92 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
93 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
94 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
95 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
97 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
98 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
99 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
100 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
101 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
102 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
104 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
105 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
106 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
107 SELF, SELF, SELF, ILLEG, // | } ~ DEL
109 // Anything above $7F is illegal (and yes, we need to check for this,
110 // otherwise you get strange and spurious errors that will lead you astray)
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
125 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
126 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
129 // Names of registers
130 static char * regname[] = {
131 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
132 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
133 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
134 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
135 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
136 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
137 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
138 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
139 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
140 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
141 "tt0","tt1","crp","","","","","", // 208,215
142 "","","","","fpiar","fpsr","fpcr","", // 216,223
143 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
144 "","","","","","","","", // 232,239
145 "","","","","","","","", // 240,247
146 "","","","","","","","", // 248,255
147 "","","","","x0","x1","y0","y1", // 256,263
148 "","b0","","b2","","b1","a","b", // 264,271
149 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
150 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
151 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
152 "","","","","","","l","p", // 296,303
153 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
154 "a10","b10","x","y","","","ab","ba" // 312,319
157 WARNING("We should get rid of this table, it's a subset of the table above")
158 static char * riscregname[] = {
159 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
160 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
161 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
162 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
167 // Initialize tokenizer
169 void InitTokenizer(void)
172 char * htab = "0123456789abcdefABCDEF"; // Hex character table
174 lnsave = 0; // Don't save lines
175 curfname = ""; // No file, empty filename
176 filecount = (WORD)-1;
177 cfileno = (WORD)-1; // cfileno gets bumped to 0
189 // Initialize hex, "dot" and tolower tables
194 tolowertab[i] = (char)i;
197 for(i=0; htab[i]!=EOS; i++)
198 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
200 for(i='A'; i<='Z'; i++)
201 tolowertab[i] |= 0x20;
203 // These characters are legal immediately after a period
204 dotxtab['b'] = DOTB; // .b .B .s .S
206 //dotxtab['s'] = DOTB;
207 //dotxtab['S'] = DOTB;
208 dotxtab['w'] = DOTW; // .w .W
210 dotxtab['l'] = DOTL; // .l .L
212 dotxtab['i'] = DOTI; // .i .I (WTF is this???)
214 dotxtab['D'] = DOTD; // .d .D (double)
216 dotxtab['S'] = DOTS; // .s .S
218 dotxtab['Q'] = DOTQ; // .q .Q (quad word)
220 dotxtab['X'] = DOTX; // .x .x
222 dotxtab['P'] = DOTP; // .p .P
227 void SetFilenameForErrorReporting(void)
231 // Check for absolute top filename (this should never happen)
234 curfname = "(*top*)";
238 FILEREC * fr = filerec;
240 // Advance to the correct record...
241 while (fr != NULL && fnum != 0)
247 // Check for file # record not found (this should never happen either)
250 curfname = "(*NOT FOUND*)";
254 curfname = fr->frec_name;
259 // Allocate an IFILE or IMACRO
261 INOBJ * a_inobj(int typ)
267 // Allocate and initialize INOBJ first
269 inobj = malloc(sizeof(INOBJ));
273 f_inobj = f_inobj->in_link;
278 case SRC_IFILE: // Alloc and init an IFILE
280 ifile = malloc(sizeof(IFILE));
284 f_ifile = f_ifile->if_link;
287 inobj->inobj.ifile = ifile;
290 case SRC_IMACRO: // Alloc and init an IMACRO
291 if (f_imacro == NULL)
292 imacro = malloc(sizeof(IMACRO));
296 f_imacro = f_imacro->im_link;
299 inobj->inobj.imacro = imacro;
302 case SRC_IREPT: // Alloc and init an IREPT
303 inobj->inobj.irept = malloc(sizeof(IREPT));
304 DEBUG { printf("alloc IREPT\n"); }
308 // Install INOBJ on top of input stack
309 inobj->in_ifent = ifent; // Record .if context on entry
310 inobj->in_type = (WORD)typ;
311 inobj->in_otok = tok;
312 inobj->in_etok = etok;
313 inobj->in_link = cur_inobj;
321 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
322 // A macro reference is in one of two forms:
323 // \name <non-name-character>
325 // A doubled backslash (\\) is compressed to a single backslash (\).
326 // Argument definitions have been pre-tokenized, so we have to turn them back
327 // into text. This means that numbers, in particular, become hex, regardless of
328 // their representation when the macro was invoked. This is a hack.
329 // A label may appear at the beginning of the line:
330 // :<name><whitespace>
331 // (the colon must be in the first column). These labels are stripped before
332 // macro expansion takes place.
334 int ExpandMacro(char * src, char * dest, int destsiz)
337 int questmark; // \? for testing argument existence
338 char mname[128]; // Assume max size of a formal arg name
339 char numbuf[20]; // Buffer for text of CONSTs
342 char ** symbolString;
344 DEBUG { printf("ExM: src=\"%s\"\n", src); }
346 IMACRO * imacro = cur_inobj->inobj.imacro;
347 int macnum = (int)(imacro->im_macro->sattr);
349 char * dst = dest; // Next dest slot
350 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
352 // Check for (and skip over) any "label" on the line
358 while (*s != EOS && !(chrtab[*s] & WHITE))
362 s++; // Skip first whitespace
365 // Expand the rest of the line
368 // Copy single character
374 // Skip comments in case a loose @ or \ is in there
375 // In that case the tokeniser was trying to expand it.
376 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
381 // Do macro expansion
389 case '\\': // \\, \ (collapse to single backslash)
395 case '?': // \? <macro> set `questmark' flag
399 case '#': // \#, number of arguments
400 sprintf(numbuf, "%d", (int)imacro->im_nargs);
402 case '!': // \! size suffix supplied on invocation
403 switch ((int)imacro->im_siz)
405 case SIZN: d = ""; break;
406 case SIZB: d = ".b"; break;
407 case SIZW: d = ".w"; break;
408 case SIZL: d = ".l"; break;
412 case '~': // ==> unique label string Mnnnn...
413 sprintf(numbuf, "M%u", curuniq);
429 return error("missing argument name");
432 // \n ==> argument number 'n', 0..9
433 if (chrtab[*s] & DIGIT)
443 // Get argument name: \name, \{name}
453 while (chrtab[*s] & CTSYM);
458 for(++s; *s != EOS && *s != '}';)
462 return error("missing closing brace ('}')");
469 // Lookup the argument and copy its (string) value into the
470 // destination string
471 DEBUG { printf("argument='%s'\n", mname); }
473 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
474 return error("undefined argument: '%s'", mname);
477 // Convert a string of tokens (terminated with EOL) back into
478 // text. If an argument is out of range (not specified in the
479 // macro invocation) then it is ignored.
480 i = (int)arg->svalue;
482 DEBUG { printf("~argnumber=%d\n", i); }
485 if (i < imacro->im_nargs)
487 tk = imacro->argument[i].token;
488 symbolString = imacro->argument[i].string;
491 // printf("ExM: Preparing to parse argument #%u...\n", i);
497 // 0 if the argument is empty or non-existant,
498 // 1 if the argument is not empty
501 if (tk == NULL || *tk == EOL)
507 *dst++ = (char)(questmark + '0');
511 // Argument # is in range, so expand it
516 // Reverse-translation from a token number to a string.
517 // This is a hack. It might be better table-driven.
520 if ((*tk >= REG68_D0) && !rdsp && !rgpu)
522 d = regname[(int)*tk++ - REG68_D0];
525 else if ((*tk >= REGRISC_R0) && (*tk <= REGRISC_R31))
527 d = riscregname[(int)*tk++ - REGRISC_R0];
535 d = symbolString[*tk++];
536 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
539 d = symbolString[*tk++];
560 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
561 // to choke on legitimate code... Need to investigate this further
562 // before changing anything else here!
564 // sprintf(numbuf, "$%lx", (uint64_t)*tk++);
565 sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
633 *dst++ = (char)*(tk - 1);
638 // If 'd' != NULL, copy string to destination
642 DEBUG printf("d='%s'\n", d);
661 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
666 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
667 return fatal("line too long as a result of macro expansion");
672 // Get next line of text from a macro
674 char * GetNextMacroLine(void)
676 IMACRO * imacro = cur_inobj->inobj.imacro;
677 LLIST * strp = imacro->im_nextln;
679 if (strp == NULL) // End-of-macro
682 imacro->im_nextln = strp->next;
683 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
684 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
686 return imacro->im_lnbuf;
691 // Get next line of text from a repeat block
693 char * GetNextRepeatLine(void)
695 IREPT * irept = cur_inobj->inobj.irept;
696 // LONG * strp = irept->ir_nextln; // initial null
698 // Do repeat at end of .rept block's string list
700 if (irept->ir_nextln == NULL)
702 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
703 irept->ir_nextln = irept->ir_firstln; // copy first line
705 if (irept->ir_count-- == 0)
707 DEBUG { printf("end-repeat-block\n"); }
711 // strp = irept->ir_nextln;
713 // Mark the current macro line in the irept object
714 // This is probably overkill - a global variable
715 // would suffice here (it only gets used during
716 // error reporting anyway)
717 irept->lineno = irept->ir_nextln->lineno;
719 // Copy the rept lines verbatim, unless we're in nest level 0.
720 // Then, expand any \~ labels to unique numbers (Rn)
723 strcpy(irbuf, irept->ir_nextln->line);
727 uint32_t linelen = strlen(irept->ir_nextln->line);
728 uint8_t *p_line = irept->ir_nextln->line;
729 char *irbufwrite = irbuf;
730 for (int i = 0; i <= linelen; i++)
734 if (c == '\\' && *p_line == '~')
737 irbufwrite += sprintf(irbufwrite, "R%u", reptuniq);
746 DEBUG { printf("repeat line='%s'\n", irbuf); }
747 // irept->ir_nextln = (LONG *)*strp;
748 irept->ir_nextln = irept->ir_nextln->next;
755 // Include a source file used at the root, and for ".include" files
757 int include(int handle, char * fname)
760 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
762 // Alloc and initialize include-descriptors
763 INOBJ * inobj = a_inobj(SRC_IFILE);
764 IFILE * ifile = inobj->inobj.ifile;
766 ifile->ifhandle = handle; // Setup file handle
767 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
768 ifile->ifoldlineno = curlineno; // Save old line number
769 ifile->ifoldfname = curfname; // Save old filename
770 ifile->ifno = cfileno; // Save old file number
772 // NB: This *must* be preincrement, we're adding one to the filecount here!
773 cfileno = ++filecount; // Compute NEW file number
774 curfname = strdup(fname); // Set current filename (alloc storage)
775 curlineno = 0; // Start on line zero
777 // Add another file to the file-record
778 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
779 fr->frec_next = NULL;
780 fr->frec_name = curfname;
783 filerec = fr; // Add first filerec
785 last_fr->frec_next = fr; // Append to list of filerecs
788 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
795 // Pop the current input level
799 INOBJ * inobj = cur_inobj;
804 // Pop IFENT levels until we reach the conditional assembly context we
805 // were at when the input object was entered.
806 int numUnmatched = 0;
808 while (ifent != inobj->in_ifent)
810 if (d_endif() != 0) // Something bad happened during endif parsing?
811 return -1; // If yes, bail instead of getting stuck in a loop
816 // Give a warning to the user that we had to wipe their bum for them
817 if (numUnmatched > 0)
818 warn("missing %d .endif(s)", numUnmatched);
820 tok = inobj->in_otok; // Restore tok and etok
821 etok = inobj->in_etok;
823 switch (inobj->in_type)
825 case SRC_IFILE: // Pop and release an IFILE
827 DEBUG { printf("[Leaving: %s]\n", curfname); }
829 IFILE * ifile = inobj->inobj.ifile;
830 ifile->if_link = f_ifile;
832 close(ifile->ifhandle); // Close source file
833 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
834 curfname = ifile->ifoldfname; // Set current filename
835 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
836 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
837 curlineno = ifile->ifoldlineno; // Set current line#
838 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
839 cfileno = ifile->ifno; // Restore current file number
840 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
844 case SRC_IMACRO: // Pop and release an IMACRO
846 IMACRO * imacro = inobj->inobj.imacro;
847 imacro->im_link = f_imacro;
852 case SRC_IREPT: // Pop and release an IREPT
854 DEBUG { printf("dealloc IREPT\n"); }
855 LLIST * p = inobj->inobj.irept->ir_firstln;
857 // Deallocate repeat lines
868 cur_inobj = inobj->in_link;
869 inobj->in_link = f_inobj;
877 // Get line from file into buf, return NULL on EOF or ptr to the start of a
880 char * GetNextLine(void)
884 int readamt = -1; // 0 if last read() yeilded 0 bytes
885 IFILE * fl = cur_inobj->inobj.ifile;
889 // Scan for next end-of-line; handle stupid text formats by treating
890 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
892 d = &fl->ifbuf[fl->ifind];
894 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
896 if (*p == '\r' || *p == '\n')
903 break; // Need to read more, then look for '\n' to eat
904 else if (p[1] == '\n')
908 // Cover up the newline with end-of-string sentinel
917 // Handle hanging lines by ignoring them (Input file is exhausted, no
918 // \r or \n on last line)
919 // Shamus: This is retarded. Never ignore any input!
920 if (!readamt && fl->ifcnt)
927 // Really should check to see if we're at the end of the buffer!
929 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
931 return &fl->ifbuf[fl->ifind];
935 // Truncate and return absurdly long lines.
936 if (fl->ifcnt >= QUANTUM)
938 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
940 return &fl->ifbuf[fl->ifind];
943 // Relocate what's left of a line to the beginning of the buffer, and
944 // read some more of the file in; return NULL if the buffer's empty and
948 p = &fl->ifbuf[fl->ifind];
949 d = &fl->ifbuf[fl->ifcnt & 1];
951 for(i=0; i<fl->ifcnt; i++)
954 fl->ifind = fl->ifcnt & 1;
957 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
962 if ((fl->ifcnt += readamt) == 0)
971 int TokenizeLine(void)
973 uint8_t * ln = NULL; // Ptr to current position in line
974 uint8_t * p; // Random character ptr
975 PTR tk; // Token-deposit ptr
976 int state = 0; // State for keyword detector
977 int j = 0; // Var for keyword detector
978 uint8_t c; // Random char
979 uint64_t v; // Random value
980 uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d)
981 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
982 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
984 int stringNum = 0; // Pointer to string locations in tokenized line
985 SYM* sy; // For looking up symbols (.equr)
986 int equrundef = 0; // Flag for equrundef scanning
990 if (cur_inobj == NULL) // Return EOF if input stack is empty
993 // Get another line of input from the current input source: a file, a
994 // macro, or a repeat-block
995 switch (cur_inobj->in_type)
999 // o bump source line number;
1000 // o tag the listing-line with a space;
1001 // o kludge lines generated by Alcyon C.
1003 if ((ln = GetNextLine()) == NULL)
1005 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
1006 if (fpop() == 0) // Pop input level
1007 goto retry; // Try for more lines
1010 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
1015 curlineno++; // Bump line number
1021 // o Handle end-of-macro;
1022 // o tag the listing-line with an at (@) sign.
1024 if ((ln = GetNextMacroLine()) == NULL)
1026 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1027 goto retry; // Try for more lines...
1029 return TKEOF; // Oops, we got a non zero return code, signal EOF
1036 // o Handle end-of-repeat-block;
1037 // o tag the listing-line with a pound (#) sign.
1039 if ((ln = GetNextRepeatLine()) == NULL)
1041 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1050 // Save text of the line. We only do this during listings and within
1051 // macro-type blocks, since it is expensive to unconditionally copy every
1056 if (strlen(ln) > LNSIZ)
1057 return error("line too long (%d, max %d)", strlen(ln), LNSIZ);
1062 // General housekeeping
1063 tok = tokeol; // Set "tok" to EOL in case of error
1064 tk.u32 = etok; // Reset token ptr
1065 stuffnull = 0; // Don't stuff nulls
1066 totlines++; // Bump total #lines assembled
1068 // See if the entire line is a comment. This is a win if the programmer
1069 // puts in lots of comments
1070 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1073 // And here we have a very ugly hack for signalling a single line 'turn off
1074 // optimization'. There's really no nice way to do this, so hack it is!
1075 optimizeOff = 0; // Default is to take optimizations as they come
1079 optimizeOff = 1; // Signal that we don't want to optimize this line
1080 ln++; // & skip over the darned thing
1083 // Main tokenization loop;
1084 // o skip whitespace;
1085 // o handle end-of-line;
1086 // o handle symbols;
1087 // o handle single-character tokens (operators, etc.);
1088 // o handle multiple-character tokens (constants, strings, etc.).
1091 // Check to see if there's enough space in the token buffer
1092 if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20)
1094 return error("token buffer overrun");
1097 // Skip whitespace, handle EOL
1098 while (chrtab[*ln] & WHITE)
1101 // Handle EOL, comment with ';'
1102 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1105 // Handle start of symbol. Symbols are null-terminated in place. The
1106 // termination is always one symbol behind, since there may be no place
1107 // for a null in the case that an operator immediately follows the name.
1112 if (stuffnull) // Terminate old symbol from previous pass
1115 v = 0; // Assume no DOT attrib follows symbol
1118 // In some cases, we need to check for a DOTx at the *beginning*
1119 // of a symbol, as the "start" of the line we're currently looking
1120 // at could be somewhere in the middle of that line!
1123 // Make sure that it's *only* a .[bwsl] following, and not the
1124 // start of a local symbol:
1125 if ((chrtab[*(ln + 1)] & DOT)
1126 && (dotxtab[*(ln + 1)] != 0)
1127 && !(chrtab[*(ln + 2)] & CTSYM))
1129 // We found a legitimate DOTx construct, so add it to the
1133 *tk.u32++ = (TOKEN)dotxtab[*ln++];
1138 p = nullspot = ln++; // Nullspot -> start of this symbol
1140 // Find end of symbol (and compute its length)
1141 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1144 // Handle "DOT" special forms (like ".b") that follow a normal
1145 // symbol or keyword:
1148 *ln++ = EOS; // Terminate symbol
1149 stuffnull = 0; // And never try it again
1151 // Character following the '.' must have a DOT attribute, and
1152 // the chararacter after THAT one must not have a start-symbol
1153 // attribute (to prevent symbols that look like, for example,
1154 // "zingo.barf", which might be a good idea anyway....)
1155 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1156 return error("[bwsl] must follow '.' in symbol");
1158 v = (uint32_t)dotxtab[*ln++];
1159 cursize = (uint32_t)v;
1161 if (chrtab[*ln] & CTSYM)
1162 return error("misuse of '.'; not allowed in symbols");
1165 // If the symbol is small, check to see if it's really the name of
1170 for (state = 0; state >= 0;)
1172 j = (int)tolowertab[*p++];
1173 j += regbase[state];
1175 if (regcheck[j] != state)
1181 if (*p == EOS || p == ln)
1192 // Scan for keywords
1193 if ((j <= 0 || state <= 0) || p==p2)
1197 for (state = 0; state >= 0;)
1199 j = (int)tolowertab[*p2++];
1202 if (kwcheck[j] != state)
1208 if (*p == EOS || p2 == ln)
1225 // If we detected equrundef/regundef set relevant flag
1226 if (j == KW_EQURUNDEF)
1232 // If not tokenized keyword OR token was not found
1233 if ((j < 0) || (state < 0))
1235 // Only proceed if no equrundef has been detected. In that case we need to store the symbol
1236 // because the directive handler (d_equrundef) will run outside this loop, further into procln.c
1237 if (!equrundef && !disabled)
1239 // Last attempt: let's see if this is an equated register.
1240 // If yes, then just store the register's keyword value instead of the symbol
1243 sy = lookup(nullspot, LABEL, 0);
1247 if (sy->sattre & EQUATEDREG)
1249 *tk.u32++ = sy->svalue;
1255 // Ok, that failed, let's store the symbol instead
1257 string[stringNum] = nullspot;
1258 *tk.u32++ = stringNum;
1263 *tk.u32++ = (TOKEN)j;
1267 if (v) // Record attribute token (if any)
1268 *tk.u32++ = (TOKEN)v;
1270 if (stuffnull) // Arrange for string termination on next pass
1276 // Handle identity tokens
1283 // Handle multiple-character tokens
1288 case '!': // ! or !=
1298 case '\'': // 'string'
1301 // Hardcoded for now, maybe this will change in the future
1302 *tk.u32++ = STRINGA8;
1306 case '\"': // "string"
1310 string[stringNum] = ln;
1311 *tk.u32++ = stringNum;
1314 for(p=ln; *ln!=EOS && *ln!=c1;)
1323 return(error("unterminated string"));
1352 // If we're evaluating a macro
1353 // this is valid because it's
1354 // a parameter expansion
1356 // If we're evaluating a macro
1357 // this is valid and expands to
1361 warn("bad backslash code in string");
1371 return error("unterminated string");
1375 case '$': // $, hex constant
1376 if (chrtab[*ln] & HDIGIT)
1380 // Parse the hex value
1381 while (hextab[*ln] >= 0)
1382 v = (v << 4) + (int)hextab[*ln++];
1389 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1394 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1405 case '<': // < or << or <> or <=
1424 case ':': // : or ::
1434 case '=': // = or ==
1437 *tk.u32++ = DEQUALS;
1444 case '>': // > or >> or >=
1459 case '%': // % or binary constant
1460 if (*ln < '0' || *ln > '1')
1468 while (*ln >= '0' && *ln <= '1')
1469 v = (v << 1) + *ln++ - '0';
1473 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1479 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1485 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1495 case '@': // @ or octal constant
1496 if (*ln < '0' || *ln > '7')
1504 while (*ln >= '0' && *ln <= '7')
1505 v = (v << 3) + *ln++ - '0';
1509 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1515 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1521 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1531 case '^': // ^ or ^^ <operator-name>
1538 if (((int)chrtab[*++ln] & STSYM) == 0)
1540 error("invalid symbol following ^^");
1546 while ((int)chrtab[*ln] & CTSYM)
1549 for(state=0; state>=0;)
1551 // Get char, convert to lowercase
1552 j = (int)tolowertab[*p++];
1554 //if (j >= 'A' && j <= 'Z')
1557 j += unarybase[state];
1559 if (unarycheck[j] != state)
1565 if (*p == EOS || p == ln)
1571 state = unarytab[j];
1574 if (j < 0 || state < 0)
1576 error("unknown symbol following ^^");
1580 *tk.u32++ = (TOKEN)j;
1583 interror(2); // Bad MULTX entry in chrtab
1588 // Handle decimal constant
1591 uint8_t * numStart = ln;
1594 while ((int)chrtab[*ln] & DIGIT)
1595 v = (v * 10) + *ln++ - '0';
1597 // See if there's a .[bwl] after the constant & deal with it if so
1600 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1608 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1616 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1624 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1626 // Hey, more digits after the dot, so we assume it's a
1627 // floating point number of some kind... numEnd will point
1628 // to the first non-float character after it's done
1631 double f = strtod(numStart, &numEnd);
1632 ln = (uint8_t *)numEnd;
1635 return error("floating point parse error");
1637 // N.B.: We use the C compiler's internal double
1638 // representation for all internal float calcs and
1639 // are reasonably sure that the size of said double
1640 // is 8 bytes long (which we check for in fltpoint.c)
1653 //printf("CONST: %i\n", v);
1657 // Handle illegal character
1658 return error("illegal character $%02X found", *ln);
1661 // Terminate line of tokens and return "success."
1664 tok = etok; // Set tok to beginning of line
1666 if (stuffnull) // Terminate last SYMBOL
1676 // .GOTO <label> goto directive
1678 // The label is searched for starting from the first line of the current,
1679 // enclosing macro definition. If no enclosing macro exists, an error is
1682 // A label is of the form:
1684 // :<name><whitespace>
1686 // The colon must appear in column 1. The label is stripped prior to macro
1687 // expansion, and is NOT subject to macro expansion. The whitespace may also
1690 int d_goto(WORD unused)
1692 // Setup for the search
1694 return error("missing label");
1696 char * sym = string[tok[1]];
1699 if (cur_inobj->in_type != SRC_IMACRO)
1700 return error("goto not in macro");
1702 IMACRO * imacro = cur_inobj->inobj.imacro;
1703 LLIST * defln = imacro->im_macro->lineList;
1705 // Attempt to find the label, starting with the first line.
1706 for(; defln!=NULL; defln=defln->next)
1708 // Must start with a colon
1709 if (defln->line[0] == ':')
1711 // Compare names (sleazo string compare)
1713 char * s2 = defln->line + 1;
1715 // Either we will match the strings to EOS on both, or we will
1716 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1718 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1720 // If we reached the end of string 1 (sym), we're done.
1721 // Note that we're also checking for the end of string 2 as
1722 // well, since we've established they're equal above.
1725 // Found the label, set new macro next-line and return.
1726 imacro->im_nextln = defln;
1736 return error("goto label not found");
1740 void DumpToken(TOKEN t)
1744 else if (t == CONST)
1746 else if (t == FCONST)
1748 else if (t == ACONST)
1750 else if (t == STRING)
1752 else if (t == SYMBOL)
1756 else if (t == TKEOF)
1758 else if (t == DEQUALS)
1759 printf("[DEQUALS]");
1764 else if (t == DCOLON)
1776 else if (t == UNMINUS)
1777 printf("[UNMINUS]");
1792 else if (t == ENDEXPR)
1793 printf("[ENDEXPR]");
1794 else if (t == CR_ABSCOUNT)
1795 printf("[CR_ABSCOUNT]");
1796 else if (t == CR_FILESIZE)
1797 printf("[CR_FILESIZE]");
1798 else if (t == CR_DEFINED)
1799 printf("[CR_DEFINED]");
1800 else if (t == CR_REFERENCED)
1801 printf("[CR_REFERENCED]");
1802 else if (t == CR_STREQ)
1803 printf("[CR_STREQ]");
1804 else if (t == CR_MACDEF)
1805 printf("[CR_MACDEF]");
1806 else if (t == CR_TIME)
1807 printf("[CR_TIME]");
1808 else if (t == CR_DATE)
1809 printf("[CR_DATE]");
1810 else if (t >= 0x20 && t <= 0x2F)
1811 printf("[%c]", (char)t);
1812 else if (t >= 0x3A && t <= 0x3F)
1813 printf("[%c]", (char)t);
1814 else if (t >= 0x80 && t <= 0x87)
1815 printf("[D%u]", ((uint32_t)t) - 0x80);
1816 else if (t >= 0x88 && t <= 0x8F)
1817 printf("[A%u]", ((uint32_t)t) - 0x88);
1819 printf("[%X:%c]", (uint32_t)t, (char)t);
1823 void DumpTokenBuffer(void)
1825 printf("Tokens [%X]: ", sloc);
1827 for(TOKEN * t=tokbuf; *t!=EOL; t++)
1831 else if (*t == CONST)
1835 printf("[CONST: $%lX]", *tp.u64);
1838 else if (*t == FCONST)
1842 printf("[FCONST: $%lX]", *tp.u64);
1845 else if (*t == ACONST)
1847 printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1850 else if (*t == STRING)
1853 printf("[STRING:\"%s\"]", string[*t]);
1855 else if (*t == SYMBOL)
1858 printf("[SYMBOL:\"%s\"]", string[*t]);
1862 else if (*t == TKEOF)
1864 else if (*t == DEQUALS)
1865 printf("[DEQUALS]");
1870 else if (*t == DCOLON)
1882 else if (*t == UNMINUS)
1883 printf("[UNMINUS]");
1884 else if (*t == DOTB)
1886 else if (*t == DOTW)
1888 else if (*t == DOTL)
1890 else if (*t == DOTQ)
1892 else if (*t == DOTS)
1894 else if (*t == DOTD)
1896 else if (*t == DOTI)
1898 else if (*t == ENDEXPR)
1899 printf("[ENDEXPR]");
1900 else if (*t == CR_ABSCOUNT)
1901 printf("[CR_ABSCOUNT]");
1902 else if (*t == CR_FILESIZE)
1903 printf("[CR_FILESIZE]");
1904 else if (*t == CR_DEFINED)
1905 printf("[CR_DEFINED]");
1906 else if (*t == CR_REFERENCED)
1907 printf("[CR_REFERENCED]");
1908 else if (*t == CR_STREQ)
1909 printf("[CR_STREQ]");
1910 else if (*t == CR_MACDEF)
1911 printf("[CR_MACDEF]");
1912 else if (*t == CR_TIME)
1913 printf("[CR_TIME]");
1914 else if (*t == CR_DATE)
1915 printf("[CR_DATE]");
1916 else if (*t >= 0x20 && *t <= 0x2F)
1917 printf("[%c]", (char)*t);
1918 else if (*t >= 0x3A && *t <= 0x3F)
1919 printf("[%c]", (char)*t);
1920 else if (*t >= 0x80 && *t <= 0x87)
1921 printf("[D%u]", ((uint32_t)*t) - 0x80);
1922 else if (*t >= 0x88 && *t <= 0x8F)
1923 printf("[A%u]", ((uint32_t)*t) - 0x88);
1925 printf("[%X:%c]", (uint32_t)*t, (char)*t);