2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2020 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
19 #define DECL_KW // Declare keyword arrays
20 #define DEF_KW // Declare keyword values
21 #include "kwtab.h" // Incl generated keyword tables & defs
24 int lnsave; // 1; strcpy() text of current line
25 uint32_t curlineno; // Current line number (64K max currently)
26 int totlines; // Total # of lines
27 int mjump_align = 0; // mjump alignment flag
28 char lntag; // Line tag
29 char * curfname; // Current filename
30 char tolowertab[128]; // Uppercase ==> lowercase
31 int8_t hextab[128]; // Table of hex values
32 char dotxtab[128]; // Table for ".b", ".s", etc.
33 char irbuf[LNSIZ]; // Text for .rept block line
34 char lnbuf[LNSIZ]; // Text of current line
35 WORD filecount; // Unique file number counter
36 WORD cfileno; // Current file number
37 TOKEN * tok; // Ptr to current token
38 TOKEN * etok; // Ptr past last token in tokbuf[]
39 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
40 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
41 int optimizeOff; // Optimization override flag
43 // File record, used to maintain a list of every include file ever visited
44 #define FILEREC struct _filerec
54 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
55 static INOBJ * f_inobj; // Ptr list of free INOBJs
56 static IFILE * f_ifile; // Ptr list of free IFILEs
57 static IMACRO * f_imacro; // Ptr list of free IMACROs
59 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
61 uint8_t chrtab[0x100] = {
62 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
63 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
64 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
65 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
67 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
68 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
69 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
70 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
72 WHITE, MULTX, MULTX, SELF, // SP ! " #
73 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
74 SELF, SELF, SELF, SELF, // ( ) * +
75 SELF, SELF, STSYM, SELF, // , - . /
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
79 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
80 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
81 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
83 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
85 MULTX, STSYM+CTSYM+HDIGIT, // @ A
86 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
87 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
88 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
89 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
90 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
92 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
93 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
94 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
95 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
97 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
98 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
99 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
100 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
101 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
102 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
104 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
105 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
106 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
107 SELF, SELF, SELF, ILLEG, // | } ~ DEL
109 // Anything above $7F is illegal (and yes, we need to check for this,
110 // otherwise you get strange and spurious errors that will lead you astray)
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
125 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
126 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
129 // Names of registers
130 static char * regname[] = {
131 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
132 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
133 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
134 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
135 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
136 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
137 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
138 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
139 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
140 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
141 "tt0","tt1","crp","","","","","", // 208,215
142 "","","","","fpiar","fpsr","fpcr","", // 216,223
143 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
144 "","","","","","","","", // 232,239
145 "","","","","","","","", // 240,247
146 "","","","","","","","", // 248,255
147 "","","","","x0","x1","y0","y1", // 256,263
148 "","b0","","b2","","b1","a","b", // 264,271
149 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
150 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
151 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
152 "","","","","","","l","p", // 296,303
153 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
154 "a10","b10","x","y","","","ab","ba" // 312,319
157 static char * riscregname[] = {
158 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
159 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
160 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
161 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
166 // Initialize tokenizer
168 void InitTokenizer(void)
171 char * htab = "0123456789abcdefABCDEF"; // Hex character table
173 lnsave = 0; // Don't save lines
174 curfname = ""; // No file, empty filename
175 filecount = (WORD)-1;
176 cfileno = (WORD)-1; // cfileno gets bumped to 0
188 // Initialize hex, "dot" and tolower tables
193 tolowertab[i] = (char)i;
196 for(i=0; htab[i]!=EOS; i++)
197 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
199 for(i='A'; i<='Z'; i++)
200 tolowertab[i] |= 0x20;
202 // These characters are legal immediately after a period
203 dotxtab['b'] = DOTB; // .b .B .s .S
205 //dotxtab['s'] = DOTB;
206 //dotxtab['S'] = DOTB;
207 dotxtab['w'] = DOTW; // .w .W
209 dotxtab['l'] = DOTL; // .l .L
211 dotxtab['i'] = DOTI; // .i .I (WTF is this???)
213 dotxtab['D'] = DOTD; // .d .D (double)
215 dotxtab['S'] = DOTS; // .s .S
217 dotxtab['Q'] = DOTQ; // .q .Q (quad word)
219 dotxtab['X'] = DOTX; // .x .x
221 dotxtab['P'] = DOTP; // .p .P
226 void SetFilenameForErrorReporting(void)
230 // Check for absolute top filename (this should never happen)
233 curfname = "(*top*)";
237 FILEREC * fr = filerec;
239 // Advance to the correct record...
240 while (fr != NULL && fnum != 0)
246 // Check for file # record not found (this should never happen either)
249 curfname = "(*NOT FOUND*)";
253 curfname = fr->frec_name;
258 // Allocate an IFILE or IMACRO
260 INOBJ * a_inobj(int typ)
266 // Allocate and initialize INOBJ first
268 inobj = malloc(sizeof(INOBJ));
272 f_inobj = f_inobj->in_link;
277 case SRC_IFILE: // Alloc and init an IFILE
279 ifile = malloc(sizeof(IFILE));
283 f_ifile = f_ifile->if_link;
286 inobj->inobj.ifile = ifile;
289 case SRC_IMACRO: // Alloc and init an IMACRO
290 if (f_imacro == NULL)
291 imacro = malloc(sizeof(IMACRO));
295 f_imacro = f_imacro->im_link;
298 inobj->inobj.imacro = imacro;
301 case SRC_IREPT: // Alloc and init an IREPT
302 inobj->inobj.irept = malloc(sizeof(IREPT));
303 DEBUG { printf("alloc IREPT\n"); }
307 // Install INOBJ on top of input stack
308 inobj->in_ifent = ifent; // Record .if context on entry
309 inobj->in_type = (WORD)typ;
310 inobj->in_otok = tok;
311 inobj->in_etok = etok;
312 inobj->in_link = cur_inobj;
320 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
321 // A macro reference is in one of two forms:
322 // \name <non-name-character>
324 // A doubled backslash (\\) is compressed to a single backslash (\).
325 // Argument definitions have been pre-tokenized, so we have to turn them back
326 // into text. This means that numbers, in particular, become hex, regardless of
327 // their representation when the macro was invoked. This is a hack.
328 // A label may appear at the beginning of the line:
329 // :<name><whitespace>
330 // (the colon must be in the first column). These labels are stripped before
331 // macro expansion takes place.
333 int ExpandMacro(char * src, char * dest, int destsiz)
336 int questmark; // \? for testing argument existence
337 char mname[128]; // Assume max size of a formal arg name
338 char numbuf[20]; // Buffer for text of CONSTs
341 char ** symbolString;
343 DEBUG { printf("ExM: src=\"%s\"\n", src); }
345 IMACRO * imacro = cur_inobj->inobj.imacro;
346 int macnum = (int)(imacro->im_macro->sattr);
348 char * dst = dest; // Next dest slot
349 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
351 // Check for (and skip over) any "label" on the line
357 while (*s != EOS && !(chrtab[*s] & WHITE))
361 s++; // Skip first whitespace
364 // Expand the rest of the line
367 // Copy single character
373 // Skip comments in case a loose @ or \ is in there
374 // In that case the tokeniser was trying to expand it.
375 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
380 // Do macro expansion
388 case '\\': // \\, \ (collapse to single backslash)
394 case '?': // \? <macro> set `questmark' flag
398 case '#': // \#, number of arguments
399 sprintf(numbuf, "%d", (int)imacro->im_nargs);
401 case '!': // \! size suffix supplied on invocation
402 switch ((int)imacro->im_siz)
404 case SIZN: d = ""; break;
405 case SIZB: d = ".b"; break;
406 case SIZW: d = ".w"; break;
407 case SIZL: d = ".l"; break;
411 case '~': // ==> unique label string Mnnnn...
412 sprintf(numbuf, "M%u", curuniq);
428 return error("missing argument name");
431 // \n ==> argument number 'n', 0..9
432 if (chrtab[*s] & DIGIT)
442 // Get argument name: \name, \{name}
452 while (chrtab[*s] & CTSYM);
457 for(++s; *s != EOS && *s != '}';)
461 return error("missing closing brace ('}')");
468 // Lookup the argument and copy its (string) value into the
469 // destination string
470 DEBUG { printf("argument='%s'\n", mname); }
472 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
473 return error("undefined argument: '%s'", mname);
476 // Convert a string of tokens (terminated with EOL) back into
477 // text. If an argument is out of range (not specified in the
478 // macro invocation) then it is ignored.
479 i = (int)arg->svalue;
481 DEBUG { printf("~argnumber=%d\n", i); }
484 if (i < imacro->im_nargs)
486 tk = imacro->argument[i].token;
487 symbolString = imacro->argument[i].string;
490 // printf("ExM: Preparing to parse argument #%u...\n", i);
496 // 0 if the argument is empty or non-existant,
497 // 1 if the argument is not empty
500 if (tk == NULL || *tk == EOL)
506 *dst++ = (char)(questmark + '0');
510 // Argument # is in range, so expand it
515 // Reverse-translation from a token number to a string.
516 // This is a hack. It might be better table-driven.
519 if ((*tk >= KW_D0) && !rdsp && !rgpu)
521 d = regname[(int)*tk++ - KW_D0];
524 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
526 d = riscregname[(int)*tk++ - KW_R0];
534 d = symbolString[*tk++];
535 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
538 d = symbolString[*tk++];
559 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
560 // to choke on legitimate code... Need to investigate this further
561 // before changing anything else here!
563 // sprintf(numbuf, "$%lx", (uint64_t)*tk++);
564 sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
632 *dst++ = (char)*(tk - 1);
637 // If 'd' != NULL, copy string to destination
641 DEBUG printf("d='%s'\n", d);
660 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
665 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
666 return fatal("line too long as a result of macro expansion");
671 // Get next line of text from a macro
673 char * GetNextMacroLine(void)
675 IMACRO * imacro = cur_inobj->inobj.imacro;
676 // LONG * strp = imacro->im_nextln;
677 LLIST * strp = imacro->im_nextln;
679 if (strp == NULL) // End-of-macro
682 imacro->im_nextln = strp->next;
683 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
684 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
686 return imacro->im_lnbuf;
691 // Get next line of text from a repeat block
693 char * GetNextRepeatLine(void)
695 IREPT * irept = cur_inobj->inobj.irept;
696 // LONG * strp = irept->ir_nextln; // initial null
698 // Do repeat at end of .rept block's string list
700 if (irept->ir_nextln == NULL)
702 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
703 irept->ir_nextln = irept->ir_firstln; // copy first line
705 if (irept->ir_count-- == 0)
707 DEBUG { printf("end-repeat-block\n"); }
711 // strp = irept->ir_nextln;
713 // Mark the current macro line in the irept object
714 // This is probably overkill - a global variable
715 // would suffice here (it only gets used during
716 // error reporting anyway)
717 irept->lineno = irept->ir_nextln->lineno;
719 // strcpy(irbuf, (char *)(irept->ir_nextln + 1));
720 strcpy(irbuf, irept->ir_nextln->line);
721 DEBUG { printf("repeat line='%s'\n", irbuf); }
722 // irept->ir_nextln = (LONG *)*strp;
723 irept->ir_nextln = irept->ir_nextln->next;
730 // Include a source file used at the root, and for ".include" files
732 int include(int handle, char * fname)
735 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
737 // Alloc and initialize include-descriptors
738 INOBJ * inobj = a_inobj(SRC_IFILE);
739 IFILE * ifile = inobj->inobj.ifile;
741 ifile->ifhandle = handle; // Setup file handle
742 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
743 ifile->ifoldlineno = curlineno; // Save old line number
744 ifile->ifoldfname = curfname; // Save old filename
745 ifile->ifno = cfileno; // Save old file number
747 // NB: This *must* be preincrement, we're adding one to the filecount here!
748 cfileno = ++filecount; // Compute NEW file number
749 curfname = strdup(fname); // Set current filename (alloc storage)
750 curlineno = 0; // Start on line zero
752 // Add another file to the file-record
753 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
754 fr->frec_next = NULL;
755 fr->frec_name = curfname;
758 filerec = fr; // Add first filerec
760 last_fr->frec_next = fr; // Append to list of filerecs
763 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
770 // Pop the current input level
774 INOBJ * inobj = cur_inobj;
779 // Pop IFENT levels until we reach the conditional assembly context we
780 // were at when the input object was entered.
781 int numUnmatched = 0;
783 while (ifent != inobj->in_ifent)
785 if (d_endif() != 0) // Something bad happened during endif parsing?
786 return -1; // If yes, bail instead of getting stuck in a loop
791 // Give a warning to the user that we had to wipe their bum for them
792 if (numUnmatched > 0)
793 warn("missing %d .endif(s)", numUnmatched);
795 tok = inobj->in_otok; // Restore tok and etok
796 etok = inobj->in_etok;
798 switch (inobj->in_type)
800 case SRC_IFILE: // Pop and release an IFILE
802 DEBUG { printf("[Leaving: %s]\n", curfname); }
804 IFILE * ifile = inobj->inobj.ifile;
805 ifile->if_link = f_ifile;
807 close(ifile->ifhandle); // Close source file
808 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
809 curfname = ifile->ifoldfname; // Set current filename
810 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
811 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
812 curlineno = ifile->ifoldlineno; // Set current line#
813 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
814 cfileno = ifile->ifno; // Restore current file number
815 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
819 case SRC_IMACRO: // Pop and release an IMACRO
821 IMACRO * imacro = inobj->inobj.imacro;
822 imacro->im_link = f_imacro;
827 case SRC_IREPT: // Pop and release an IREPT
829 DEBUG { printf("dealloc IREPT\n"); }
830 LLIST * p = inobj->inobj.irept->ir_firstln;
832 // Deallocate repeat lines
843 cur_inobj = inobj->in_link;
844 inobj->in_link = f_inobj;
852 // Get line from file into buf, return NULL on EOF or ptr to the start of a
855 char * GetNextLine(void)
859 int readamt = -1; // 0 if last read() yeilded 0 bytes
860 IFILE * fl = cur_inobj->inobj.ifile;
864 // Scan for next end-of-line; handle stupid text formats by treating
865 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
867 d = &fl->ifbuf[fl->ifind];
869 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
871 if (*p == '\r' || *p == '\n')
878 break; // Need to read more, then look for '\n' to eat
879 else if (p[1] == '\n')
883 // Cover up the newline with end-of-string sentinel
892 // Handle hanging lines by ignoring them (Input file is exhausted, no
893 // \r or \n on last line)
894 // Shamus: This is retarded. Never ignore any input!
895 if (!readamt && fl->ifcnt)
902 // Really should check to see if we're at the end of the buffer!
904 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
906 return &fl->ifbuf[fl->ifind];
910 // Truncate and return absurdly long lines.
911 if (fl->ifcnt >= QUANTUM)
913 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
915 return &fl->ifbuf[fl->ifind];
918 // Relocate what's left of a line to the beginning of the buffer, and
919 // read some more of the file in; return NULL if the buffer's empty and
923 p = &fl->ifbuf[fl->ifind];
924 d = &fl->ifbuf[fl->ifcnt & 1];
926 for(i=0; i<fl->ifcnt; i++)
929 fl->ifind = fl->ifcnt & 1;
932 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
937 if ((fl->ifcnt += readamt) == 0)
946 int TokenizeLine(void)
948 uint8_t * ln = NULL; // Ptr to current position in line
949 uint8_t * p; // Random character ptr
950 PTR tk; // Token-deposit ptr
951 int state = 0; // State for keyword detector
952 int j = 0; // Var for keyword detector
953 uint8_t c; // Random char
954 uint64_t v; // Random value
955 uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d)
956 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
957 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
959 int stringNum = 0; // Pointer to string locations in tokenized line
963 if (cur_inobj == NULL) // Return EOF if input stack is empty
966 // Get another line of input from the current input source: a file, a
967 // macro, or a repeat-block
968 switch (cur_inobj->in_type)
972 // o bump source line number;
973 // o tag the listing-line with a space;
974 // o kludge lines generated by Alcyon C.
976 if ((ln = GetNextLine()) == NULL)
978 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
979 if (fpop() == 0) // Pop input level
980 goto retry; // Try for more lines
983 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
988 curlineno++; // Bump line number
993 // AS68 compatibility, throw away all lines starting with
994 // back-quotes, tildes, or '*'
995 // On other lines, turn the first '*' into a semi-colon.
996 if (*ln == '`' || *ln == '~' || *ln == '*')
1000 for(p=ln; *p!=EOS; p++)
1014 // o Handle end-of-macro;
1015 // o tag the listing-line with an at (@) sign.
1017 if ((ln = GetNextMacroLine()) == NULL)
1019 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1020 goto retry; // Try for more lines...
1022 return TKEOF; // Oops, we got a non zero return code, signal EOF
1029 // o Handle end-of-repeat-block;
1030 // o tag the listing-line with a pound (#) sign.
1032 if ((ln = GetNextRepeatLine()) == NULL)
1034 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1043 // Save text of the line. We only do this during listings and within
1044 // macro-type blocks, since it is expensive to unconditionally copy every
1049 if (strlen(ln) > LNSIZ)
1050 return error("line too long (%d, max %d)", strlen(ln), LNSIZ);
1055 // General housekeeping
1056 tok = tokeol; // Set "tok" to EOL in case of error
1057 tk.u32 = etok; // Reset token ptr
1058 stuffnull = 0; // Don't stuff nulls
1059 totlines++; // Bump total #lines assembled
1061 // See if the entire line is a comment. This is a win if the programmer
1062 // puts in lots of comments
1063 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1066 // And here we have a very ugly hack for signalling a single line 'turn off
1067 // optimization'. There's really no nice way to do this, so hack it is!
1068 optimizeOff = 0; // Default is to take optimizations as they come
1072 optimizeOff = 1; // Signal that we don't want to optimize this line
1073 ln++; // & skip over the darned thing
1076 // Main tokenization loop;
1077 // o skip whitespace;
1078 // o handle end-of-line;
1079 // o handle symbols;
1080 // o handle single-character tokens (operators, etc.);
1081 // o handle multiple-character tokens (constants, strings, etc.).
1084 // Check to see if there's enough space in the token buffer
1085 if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20)
1087 return error("token buffer overrun");
1090 // Skip whitespace, handle EOL
1091 while (chrtab[*ln] & WHITE)
1094 // Handle EOL, comment with ';'
1095 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1098 // Handle start of symbol. Symbols are null-terminated in place. The
1099 // termination is always one symbol behind, since there may be no place
1100 // for a null in the case that an operator immediately follows the name.
1105 if (stuffnull) // Terminate old symbol from previous pass
1108 v = 0; // Assume no DOT attrib follows symbol
1111 // In some cases, we need to check for a DOTx at the *beginning*
1112 // of a symbol, as the "start" of the line we're currently looking
1113 // at could be somewhere in the middle of that line!
1116 // Make sure that it's *only* a .[bwsl] following, and not the
1117 // start of a local symbol:
1118 if ((chrtab[*(ln + 1)] & DOT)
1119 && (dotxtab[*(ln + 1)] != 0)
1120 && !(chrtab[*(ln + 2)] & CTSYM))
1122 // We found a legitimate DOTx construct, so add it to the
1126 *tk.u32++ = (TOKEN)dotxtab[*ln++];
1131 p = nullspot = ln++; // Nullspot -> start of this symbol
1133 // Find end of symbol (and compute its length)
1134 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1137 // Handle "DOT" special forms (like ".b") that follow a normal
1138 // symbol or keyword:
1141 *ln++ = EOS; // Terminate symbol
1142 stuffnull = 0; // And never try it again
1144 // Character following the '.' must have a DOT attribute, and
1145 // the chararacter after THAT one must not have a start-symbol
1146 // attribute (to prevent symbols that look like, for example,
1147 // "zingo.barf", which might be a good idea anyway....)
1148 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1149 return error("[bwsl] must follow '.' in symbol");
1151 v = (uint32_t)dotxtab[*ln++];
1152 cursize = (uint32_t)v;
1154 if (chrtab[*ln] & CTSYM)
1155 return error("misuse of '.'; not allowed in symbols");
1158 // If the symbol is small, check to see if it's really the name of
1162 for(state=0; state>=0;)
1164 j = (int)tolowertab[*p++];
1167 if (kwcheck[j] != state)
1173 if (*p == EOS || p == ln)
1187 // Make j = -1 if user tries to use a RISC register while in 68K mode
1188 if (!(rgpu || rdsp || dsp56001) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1193 // Make j = -1 if time, date etc with no preceeding ^^
1194 // defined, referenced, streq, macdef, date and time
1197 case 112: // defined
1198 case 113: // referenced
1206 // If not tokenized keyword OR token was not found
1207 if ((j < 0) || (state < 0))
1210 string[stringNum] = nullspot;
1211 *tk.u32++ = stringNum;
1216 *tk.u32++ = (TOKEN)j;
1220 if (v) // Record attribute token (if any)
1221 *tk.u32++ = (TOKEN)v;
1223 if (stuffnull) // Arrange for string termination on next pass
1229 // Handle identity tokens
1236 // Handle multiple-character tokens
1241 case '!': // ! or !=
1251 case '\'': // 'string'
1254 // Hardcoded for now, maybe this will change in the future
1255 *tk.u32++ = STRINGA8;
1259 case '\"': // "string"
1263 string[stringNum] = ln;
1264 *tk.u32++ = stringNum;
1267 for(p=ln; *ln!=EOS && *ln!=c1;)
1276 return(error("unterminated string"));
1305 // If we're evaluating a macro
1306 // this is valid because it's
1307 // a parameter expansion
1309 // If we're evaluating a macro
1310 // this is valid and expands to
1314 warn("bad backslash code in string");
1324 return error("unterminated string");
1328 case '$': // $, hex constant
1329 if (chrtab[*ln] & HDIGIT)
1333 // Parse the hex value
1334 while (hextab[*ln] >= 0)
1335 v = (v << 4) + (int)hextab[*ln++];
1342 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1347 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1358 case '<': // < or << or <> or <=
1377 case ':': // : or ::
1387 case '=': // = or ==
1390 *tk.u32++ = DEQUALS;
1397 case '>': // > or >> or >=
1412 case '%': // % or binary constant
1413 if (*ln < '0' || *ln > '1')
1421 while (*ln >= '0' && *ln <= '1')
1422 v = (v << 1) + *ln++ - '0';
1426 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1432 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1438 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1448 case '@': // @ or octal constant
1449 if (*ln < '0' || *ln > '7')
1457 while (*ln >= '0' && *ln <= '7')
1458 v = (v << 3) + *ln++ - '0';
1462 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1468 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1474 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1484 case '^': // ^ or ^^ <operator-name>
1491 if (((int)chrtab[*++ln] & STSYM) == 0)
1493 error("invalid symbol following ^^");
1499 while ((int)chrtab[*ln] & CTSYM)
1502 for(state=0; state>=0;)
1504 // Get char, convert to lowercase
1507 if (j >= 'A' && j <= 'Z')
1512 if (kwcheck[j] != state)
1518 if (*p == EOS || p == ln)
1527 if (j < 0 || state < 0)
1529 error("unknown symbol following ^^");
1533 *tk.u32++ = (TOKEN)j;
1536 interror(2); // Bad MULTX entry in chrtab
1541 // Handle decimal constant
1544 uint8_t * numStart = ln;
1547 while ((int)chrtab[*ln] & DIGIT)
1548 v = (v * 10) + *ln++ - '0';
1550 // See if there's a .[bwl] after the constant & deal with it if so
1553 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1561 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1569 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1577 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1579 // Hey, more digits after the dot, so we assume it's a
1580 // floating point number of some kind... numEnd will point
1581 // to the first non-float character after it's done
1584 double f = strtod(numStart, &numEnd);
1585 ln = (uint8_t *)numEnd;
1588 return error("floating point parse error");
1590 // N.B.: We use the C compiler's internal double
1591 // representation for all internal float calcs and
1592 // are reasonably sure that the size of said double
1593 // is 8 bytes long (which we check for in fltpoint.c)
1606 //printf("CONST: %i\n", v);
1610 // Handle illegal character
1611 return error("illegal character $%02X found", *ln);
1614 // Terminate line of tokens and return "success."
1617 tok = etok; // Set tok to beginning of line
1619 if (stuffnull) // Terminate last SYMBOL
1629 // .GOTO <label> goto directive
1631 // The label is searched for starting from the first line of the current,
1632 // enclosing macro definition. If no enclosing macro exists, an error is
1635 // A label is of the form:
1637 // :<name><whitespace>
1639 // The colon must appear in column 1. The label is stripped prior to macro
1640 // expansion, and is NOT subject to macro expansion. The whitespace may also
1643 int d_goto(WORD unused)
1645 // Setup for the search
1647 return error("missing label");
1649 char * sym = string[tok[1]];
1652 if (cur_inobj->in_type != SRC_IMACRO)
1653 return error("goto not in macro");
1655 IMACRO * imacro = cur_inobj->inobj.imacro;
1656 LLIST * defln = imacro->im_macro->lineList;
1658 // Attempt to find the label, starting with the first line.
1659 for(; defln!=NULL; defln=defln->next)
1661 // Must start with a colon
1662 if (defln->line[0] == ':')
1664 // Compare names (sleazo string compare)
1666 char * s2 = defln->line;
1668 // Either we will match the strings to EOS on both, or we will
1669 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1671 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1673 // If we reached the end of string 1 (sym), we're done.
1674 // Note that we're also checking for the end of string 2 as
1675 // well, since we've established they're equal above.
1678 // Found the label, set new macro next-line and return.
1679 imacro->im_nextln = defln;
1689 return error("goto label not found");
1693 void DumpToken(TOKEN t)
1697 else if (t == CONST)
1699 else if (t == FCONST)
1701 else if (t == ACONST)
1703 else if (t == STRING)
1705 else if (t == SYMBOL)
1709 else if (t == TKEOF)
1711 else if (t == DEQUALS)
1712 printf("[DEQUALS]");
1717 else if (t == DCOLON)
1729 else if (t == UNMINUS)
1730 printf("[UNMINUS]");
1745 else if (t == ENDEXPR)
1746 printf("[ENDEXPR]");
1747 else if (t == CR_ABSCOUNT)
1748 printf("[CR_ABSCOUNT]");
1749 else if (t == CR_FILESIZE)
1750 printf("[CR_FILESIZE]");
1751 else if (t == CR_DEFINED)
1752 printf("[CR_DEFINED]");
1753 else if (t == CR_REFERENCED)
1754 printf("[CR_REFERENCED]");
1755 else if (t == CR_STREQ)
1756 printf("[CR_STREQ]");
1757 else if (t == CR_MACDEF)
1758 printf("[CR_MACDEF]");
1759 else if (t == CR_TIME)
1760 printf("[CR_TIME]");
1761 else if (t == CR_DATE)
1762 printf("[CR_DATE]");
1763 else if (t >= 0x20 && t <= 0x2F)
1764 printf("[%c]", (char)t);
1765 else if (t >= 0x3A && t <= 0x3F)
1766 printf("[%c]", (char)t);
1767 else if (t >= 0x80 && t <= 0x87)
1768 printf("[D%u]", ((uint32_t)t) - 0x80);
1769 else if (t >= 0x88 && t <= 0x8F)
1770 printf("[A%u]", ((uint32_t)t) - 0x88);
1772 printf("[%X:%c]", (uint32_t)t, (char)t);
1776 void DumpTokenBuffer(void)
1778 printf("Tokens [%X]: ", sloc);
1780 for(TOKEN * t=tokbuf; *t!=EOL; t++)
1784 else if (*t == CONST)
1788 printf("[CONST: $%lX]", *tp.u64);
1791 else if (*t == FCONST)
1795 printf("[FCONST: $%lX]", *tp.u64);
1798 else if (*t == ACONST)
1800 printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1803 else if (*t == STRING)
1806 printf("[STRING:\"%s\"]", string[*t]);
1808 else if (*t == SYMBOL)
1811 printf("[SYMBOL:\"%s\"]", string[*t]);
1815 else if (*t == TKEOF)
1817 else if (*t == DEQUALS)
1818 printf("[DEQUALS]");
1823 else if (*t == DCOLON)
1835 else if (*t == UNMINUS)
1836 printf("[UNMINUS]");
1837 else if (*t == DOTB)
1839 else if (*t == DOTW)
1841 else if (*t == DOTL)
1843 else if (*t == DOTQ)
1845 else if (*t == DOTS)
1847 else if (*t == DOTD)
1849 else if (*t == DOTI)
1851 else if (*t == ENDEXPR)
1852 printf("[ENDEXPR]");
1853 else if (*t == CR_ABSCOUNT)
1854 printf("[CR_ABSCOUNT]");
1855 else if (*t == CR_FILESIZE)
1856 printf("[CR_FILESIZE]");
1857 else if (*t == CR_DEFINED)
1858 printf("[CR_DEFINED]");
1859 else if (*t == CR_REFERENCED)
1860 printf("[CR_REFERENCED]");
1861 else if (*t == CR_STREQ)
1862 printf("[CR_STREQ]");
1863 else if (*t == CR_MACDEF)
1864 printf("[CR_MACDEF]");
1865 else if (*t == CR_TIME)
1866 printf("[CR_TIME]");
1867 else if (*t == CR_DATE)
1868 printf("[CR_DATE]");
1869 else if (*t >= 0x20 && *t <= 0x2F)
1870 printf("[%c]", (char)*t);
1871 else if (*t >= 0x3A && *t <= 0x3F)
1872 printf("[%c]", (char)*t);
1873 else if (*t >= 0x80 && *t <= 0x87)
1874 printf("[D%u]", ((uint32_t)*t) - 0x80);
1875 else if (*t >= 0x88 && *t <= 0x8F)
1876 printf("[A%u]", ((uint32_t)*t) - 0x88);
1878 printf("[%X:%c]", (uint32_t)*t, (char)*t);