2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
19 #define DECL_KW // Declare keyword arrays
20 #define DEF_KW // Declare keyword values
21 #include "kwtab.h" // Incl generated keyword tables & defs
24 int lnsave; // 1; strcpy() text of current line
25 uint32_t curlineno; // Current line number (64K max currently)
26 int totlines; // Total # of lines
27 int mjump_align = 0; // mjump alignment flag
28 char lntag; // Line tag
29 char * curfname; // Current filename
30 char tolowertab[128]; // Uppercase ==> lowercase
31 int8_t hextab[128]; // Table of hex values
32 char dotxtab[128]; // Table for ".b", ".s", etc.
33 char irbuf[LNSIZ]; // Text for .rept block line
34 char lnbuf[LNSIZ]; // Text of current line
35 WORD filecount; // Unique file number counter
36 WORD cfileno; // Current file number
37 TOKEN * tok; // Ptr to current token
38 TOKEN * etok; // Ptr past last token in tokbuf[]
39 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
40 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
41 int optimizeOff; // Optimization override flag
43 // File record, used to maintain a list of every include file ever visited
44 #define FILEREC struct _filerec
54 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
55 static INOBJ * f_inobj; // Ptr list of free INOBJs
56 static IFILE * f_ifile; // Ptr list of free IFILEs
57 static IMACRO * f_imacro; // Ptr list of free IMACROs
59 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
61 uint8_t chrtab[0x100] = {
62 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
63 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
64 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
65 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
67 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
68 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
69 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
70 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
72 WHITE, MULTX, MULTX, SELF, // SP ! " #
73 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
74 SELF, SELF, SELF, SELF, // ( ) * +
75 SELF, SELF, STSYM, SELF, // , - . /
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
79 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
80 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
81 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
83 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
85 MULTX, STSYM+CTSYM+HDIGIT, // @ A
86 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
87 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
88 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
89 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
90 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
92 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
93 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
94 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
95 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
97 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
98 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
99 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
100 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
101 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
102 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
104 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
105 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
106 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
107 SELF, SELF, SELF, ILLEG, // | } ~ DEL
109 // Anything above $7F is illegal (and yes, we need to check for this,
110 // otherwise you get strange and spurious errors that will lead you astray)
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
125 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
126 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
129 // Names of registers
130 static char * regname[] = {
131 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
132 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
133 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
134 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
135 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
136 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
137 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
138 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
139 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
140 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
141 "tt0","tt1","crp","","","","","", // 208,215
142 "","","","","fpiar","fpsr","fpcr","", // 216,223
143 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
144 "","","","","","","","", // 232,239
145 "","","","","","","","", // 240,247
146 "","","","","","","","", // 248,255
147 "","","","","x0","x1","y0","y1", // 256,263
148 "","b0","","b2","","b1","a","b", // 264,271
149 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
150 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
151 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
152 "","","","","","","l","p", // 296,303
153 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
154 "a10","b10","x","y","","","ab","ba" // 312,319
157 static char * riscregname[] = {
158 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
159 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
160 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
161 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
166 // Initialize tokenizer
168 void InitTokenizer(void)
171 char * htab = "0123456789abcdefABCDEF"; // Hex character table
173 lnsave = 0; // Don't save lines
174 curfname = ""; // No file, empty filename
175 filecount = (WORD)-1;
176 cfileno = (WORD)-1; // cfileno gets bumped to 0
188 // Initialize hex, "dot" and tolower tables
193 tolowertab[i] = (char)i;
196 for(i=0; htab[i]!=EOS; i++)
197 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
199 for(i='A'; i<='Z'; i++)
200 tolowertab[i] |= 0x20;
202 // These characters are legal immediately after a period
203 dotxtab['b'] = DOTB; // .b .B .s .S
205 //dotxtab['s'] = DOTB;
206 //dotxtab['S'] = DOTB;
207 dotxtab['w'] = DOTW; // .w .W
209 dotxtab['l'] = DOTL; // .l .L
211 dotxtab['i'] = DOTI; // .i .I (WTF is this???)
213 dotxtab['D'] = DOTD; // .d .D (double)
215 dotxtab['S'] = DOTS; // .s .S
217 dotxtab['Q'] = DOTQ; // .q .Q (quad word)
219 dotxtab['X'] = DOTX; // .x .x
221 dotxtab['P'] = DOTP; // .p .P
226 void SetFilenameForErrorReporting(void)
230 // Check for absolute top filename (this should never happen)
233 curfname = "(*top*)";
237 FILEREC * fr = filerec;
239 // Advance to the correct record...
240 while (fr != NULL && fnum != 0)
246 // Check for file # record not found (this should never happen either)
249 curfname = "(*NOT FOUND*)";
253 curfname = fr->frec_name;
258 // Allocate an IFILE or IMACRO
260 INOBJ * a_inobj(int typ)
266 // Allocate and initialize INOBJ first
268 inobj = malloc(sizeof(INOBJ));
272 f_inobj = f_inobj->in_link;
277 case SRC_IFILE: // Alloc and init an IFILE
279 ifile = malloc(sizeof(IFILE));
283 f_ifile = f_ifile->if_link;
286 inobj->inobj.ifile = ifile;
289 case SRC_IMACRO: // Alloc and init an IMACRO
290 if (f_imacro == NULL)
291 imacro = malloc(sizeof(IMACRO));
295 f_imacro = f_imacro->im_link;
298 inobj->inobj.imacro = imacro;
301 case SRC_IREPT: // Alloc and init an IREPT
302 inobj->inobj.irept = malloc(sizeof(IREPT));
303 DEBUG { printf("alloc IREPT\n"); }
307 // Install INOBJ on top of input stack
308 inobj->in_ifent = ifent; // Record .if context on entry
309 inobj->in_type = (WORD)typ;
310 inobj->in_otok = tok;
311 inobj->in_etok = etok;
312 inobj->in_link = cur_inobj;
320 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
321 // A macro reference is in one of two forms:
322 // \name <non-name-character>
324 // A doubled backslash (\\) is compressed to a single backslash (\).
325 // Argument definitions have been pre-tokenized, so we have to turn them back
326 // into text. This means that numbers, in particular, become hex, regardless of
327 // their representation when the macro was invoked. This is a hack.
328 // A label may appear at the beginning of the line:
329 // :<name><whitespace>
330 // (the colon must be in the first column). These labels are stripped before
331 // macro expansion takes place.
333 int ExpandMacro(char * src, char * dest, int destsiz)
336 int questmark; // \? for testing argument existence
337 char mname[128]; // Assume max size of a formal arg name
338 char numbuf[20]; // Buffer for text of CONSTs
341 char ** symbolString;
343 DEBUG { printf("ExM: src=\"%s\"\n", src); }
345 IMACRO * imacro = cur_inobj->inobj.imacro;
346 int macnum = (int)(imacro->im_macro->sattr);
348 char * dst = dest; // Next dest slot
349 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
351 // Check for (and skip over) any "label" on the line
357 while (*s != EOS && !(chrtab[*s] & WHITE))
361 s++; // Skip first whitespace
364 // Expand the rest of the line
367 // Copy single character
373 // Skip comments in case a loose @ or \ is in there
374 // In that case the tokeniser was trying to expand it.
375 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
380 // Do macro expansion
388 case '\\': // \\, \ (collapse to single backslash)
394 case '?': // \? <macro> set `questmark' flag
398 case '#': // \#, number of arguments
399 sprintf(numbuf, "%d", (int)imacro->im_nargs);
401 case '!': // \! size suffix supplied on invocation
402 switch ((int)imacro->im_siz)
404 case SIZN: d = ""; break;
405 case SIZB: d = ".b"; break;
406 case SIZW: d = ".w"; break;
407 case SIZL: d = ".l"; break;
411 case '~': // ==> unique label string Mnnnn...
412 sprintf(numbuf, "M%u", curuniq);
428 return error("missing argument name");
431 // \n ==> argument number 'n', 0..9
432 if (chrtab[*s] & DIGIT)
442 // Get argument name: \name, \{name}
452 while (chrtab[*s] & CTSYM);
457 for(++s; *s != EOS && *s != '}';)
461 return error("missing closing brace ('}')");
468 // Lookup the argument and copy its (string) value into the
469 // destination string
470 DEBUG { printf("argument='%s'\n", mname); }
472 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
473 return error("undefined argument: '%s'", mname);
476 // Convert a string of tokens (terminated with EOL) back into
477 // text. If an argument is out of range (not specified in the
478 // macro invocation) then it is ignored.
479 i = (int)arg->svalue;
481 DEBUG { printf("~argnumber=%d\n", i); }
484 if (i < imacro->im_nargs)
486 tk = imacro->argument[i].token;
487 symbolString = imacro->argument[i].string;
490 // printf("ExM: Preparing to parse argument #%u...\n", i);
496 // 0 if the argument is empty or non-existant,
497 // 1 if the argument is not empty
500 if (tk == NULL || *tk == EOL)
506 *dst++ = (char)(questmark + '0');
510 // Argument # is in range, so expand it
515 // Reverse-translation from a token number to a string.
516 // This is a hack. It might be better table-driven.
519 if ((*tk >= KW_D0) && !rdsp && !rgpu)
521 d = regname[(int)*tk++ - KW_D0];
524 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
526 d = riscregname[(int)*tk++ - KW_R0];
534 d = symbolString[*tk++];
535 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
538 d = symbolString[*tk++];
559 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
560 // to choke on legitimate code... Need to investigate this further
561 // before changing anything else here!
563 // sprintf(numbuf, "$%lx", (uint64_t)*tk++);
564 sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
632 *dst++ = (char)*(tk - 1);
637 // If 'd' != NULL, copy string to destination
641 DEBUG printf("d='%s'\n", d);
660 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
665 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
666 return fatal("line too long as a result of macro expansion");
671 // Get next line of text from a macro
673 char * GetNextMacroLine(void)
675 IMACRO * imacro = cur_inobj->inobj.imacro;
676 LLIST * strp = imacro->im_nextln;
678 if (strp == NULL) // End-of-macro
681 imacro->im_nextln = strp->next;
682 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
683 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
685 return imacro->im_lnbuf;
690 // Get next line of text from a repeat block
692 char * GetNextRepeatLine(void)
694 IREPT * irept = cur_inobj->inobj.irept;
695 // LONG * strp = irept->ir_nextln; // initial null
697 // Do repeat at end of .rept block's string list
699 if (irept->ir_nextln == NULL)
701 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
702 irept->ir_nextln = irept->ir_firstln; // copy first line
704 if (irept->ir_count-- == 0)
706 DEBUG { printf("end-repeat-block\n"); }
710 // strp = irept->ir_nextln;
712 // Mark the current macro line in the irept object
713 // This is probably overkill - a global variable
714 // would suffice here (it only gets used during
715 // error reporting anyway)
716 irept->lineno = irept->ir_nextln->lineno;
718 // strcpy(irbuf, (char *)(irept->ir_nextln + 1));
719 strcpy(irbuf, irept->ir_nextln->line);
720 DEBUG { printf("repeat line='%s'\n", irbuf); }
721 // irept->ir_nextln = (LONG *)*strp;
722 irept->ir_nextln = irept->ir_nextln->next;
729 // Include a source file used at the root, and for ".include" files
731 int include(int handle, char * fname)
734 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
736 // Alloc and initialize include-descriptors
737 INOBJ * inobj = a_inobj(SRC_IFILE);
738 IFILE * ifile = inobj->inobj.ifile;
740 ifile->ifhandle = handle; // Setup file handle
741 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
742 ifile->ifoldlineno = curlineno; // Save old line number
743 ifile->ifoldfname = curfname; // Save old filename
744 ifile->ifno = cfileno; // Save old file number
746 // NB: This *must* be preincrement, we're adding one to the filecount here!
747 cfileno = ++filecount; // Compute NEW file number
748 curfname = strdup(fname); // Set current filename (alloc storage)
749 curlineno = 0; // Start on line zero
751 // Add another file to the file-record
752 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
753 fr->frec_next = NULL;
754 fr->frec_name = curfname;
757 filerec = fr; // Add first filerec
759 last_fr->frec_next = fr; // Append to list of filerecs
762 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
769 // Pop the current input level
773 INOBJ * inobj = cur_inobj;
778 // Pop IFENT levels until we reach the conditional assembly context we
779 // were at when the input object was entered.
780 int numUnmatched = 0;
782 while (ifent != inobj->in_ifent)
784 if (d_endif() != 0) // Something bad happened during endif parsing?
785 return -1; // If yes, bail instead of getting stuck in a loop
790 // Give a warning to the user that we had to wipe their bum for them
791 if (numUnmatched > 0)
792 warn("missing %d .endif(s)", numUnmatched);
794 tok = inobj->in_otok; // Restore tok and etok
795 etok = inobj->in_etok;
797 switch (inobj->in_type)
799 case SRC_IFILE: // Pop and release an IFILE
801 DEBUG { printf("[Leaving: %s]\n", curfname); }
803 IFILE * ifile = inobj->inobj.ifile;
804 ifile->if_link = f_ifile;
806 close(ifile->ifhandle); // Close source file
807 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
808 curfname = ifile->ifoldfname; // Set current filename
809 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
810 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
811 curlineno = ifile->ifoldlineno; // Set current line#
812 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
813 cfileno = ifile->ifno; // Restore current file number
814 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
818 case SRC_IMACRO: // Pop and release an IMACRO
820 IMACRO * imacro = inobj->inobj.imacro;
821 imacro->im_link = f_imacro;
826 case SRC_IREPT: // Pop and release an IREPT
828 DEBUG { printf("dealloc IREPT\n"); }
829 LLIST * p = inobj->inobj.irept->ir_firstln;
831 // Deallocate repeat lines
842 cur_inobj = inobj->in_link;
843 inobj->in_link = f_inobj;
851 // Get line from file into buf, return NULL on EOF or ptr to the start of a
854 char * GetNextLine(void)
858 int readamt = -1; // 0 if last read() yeilded 0 bytes
859 IFILE * fl = cur_inobj->inobj.ifile;
863 // Scan for next end-of-line; handle stupid text formats by treating
864 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
866 d = &fl->ifbuf[fl->ifind];
868 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
870 if (*p == '\r' || *p == '\n')
877 break; // Need to read more, then look for '\n' to eat
878 else if (p[1] == '\n')
882 // Cover up the newline with end-of-string sentinel
891 // Handle hanging lines by ignoring them (Input file is exhausted, no
892 // \r or \n on last line)
893 // Shamus: This is retarded. Never ignore any input!
894 if (!readamt && fl->ifcnt)
901 // Really should check to see if we're at the end of the buffer!
903 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
905 return &fl->ifbuf[fl->ifind];
909 // Truncate and return absurdly long lines.
910 if (fl->ifcnt >= QUANTUM)
912 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
914 return &fl->ifbuf[fl->ifind];
917 // Relocate what's left of a line to the beginning of the buffer, and
918 // read some more of the file in; return NULL if the buffer's empty and
922 p = &fl->ifbuf[fl->ifind];
923 d = &fl->ifbuf[fl->ifcnt & 1];
925 for(i=0; i<fl->ifcnt; i++)
928 fl->ifind = fl->ifcnt & 1;
931 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
936 if ((fl->ifcnt += readamt) == 0)
945 int TokenizeLine(void)
947 uint8_t * ln = NULL; // Ptr to current position in line
948 uint8_t * p; // Random character ptr
949 PTR tk; // Token-deposit ptr
950 int state = 0; // State for keyword detector
951 int j = 0; // Var for keyword detector
952 uint8_t c; // Random char
953 uint64_t v; // Random value
954 uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d)
955 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
956 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
958 int stringNum = 0; // Pointer to string locations in tokenized line
962 if (cur_inobj == NULL) // Return EOF if input stack is empty
965 // Get another line of input from the current input source: a file, a
966 // macro, or a repeat-block
967 switch (cur_inobj->in_type)
971 // o bump source line number;
972 // o tag the listing-line with a space;
973 // o kludge lines generated by Alcyon C.
975 if ((ln = GetNextLine()) == NULL)
977 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
978 if (fpop() == 0) // Pop input level
979 goto retry; // Try for more lines
982 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
987 curlineno++; // Bump line number
992 // AS68 compatibility, throw away all lines starting with
993 // back-quotes, tildes, or '*'
994 // On other lines, turn the first '*' into a semi-colon.
995 if (*ln == '`' || *ln == '~' || *ln == '*')
999 for(p=ln; *p!=EOS; p++)
1013 // o Handle end-of-macro;
1014 // o tag the listing-line with an at (@) sign.
1016 if ((ln = GetNextMacroLine()) == NULL)
1018 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1019 goto retry; // Try for more lines...
1021 return TKEOF; // Oops, we got a non zero return code, signal EOF
1028 // o Handle end-of-repeat-block;
1029 // o tag the listing-line with a pound (#) sign.
1031 if ((ln = GetNextRepeatLine()) == NULL)
1033 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1042 // Save text of the line. We only do this during listings and within
1043 // macro-type blocks, since it is expensive to unconditionally copy every
1048 if (strlen(ln) > LNSIZ)
1049 return error("line too long (%d, max %d)", strlen(ln), LNSIZ);
1054 // General housekeeping
1055 tok = tokeol; // Set "tok" to EOL in case of error
1056 tk.u32 = etok; // Reset token ptr
1057 stuffnull = 0; // Don't stuff nulls
1058 totlines++; // Bump total #lines assembled
1060 // See if the entire line is a comment. This is a win if the programmer
1061 // puts in lots of comments
1062 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1065 // And here we have a very ugly hack for signalling a single line 'turn off
1066 // optimization'. There's really no nice way to do this, so hack it is!
1067 optimizeOff = 0; // Default is to take optimizations as they come
1071 optimizeOff = 1; // Signal that we don't want to optimize this line
1072 ln++; // & skip over the darned thing
1075 // Main tokenization loop;
1076 // o skip whitespace;
1077 // o handle end-of-line;
1078 // o handle symbols;
1079 // o handle single-character tokens (operators, etc.);
1080 // o handle multiple-character tokens (constants, strings, etc.).
1083 // Check to see if there's enough space in the token buffer
1084 if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20)
1086 return error("token buffer overrun");
1089 // Skip whitespace, handle EOL
1090 while (chrtab[*ln] & WHITE)
1093 // Handle EOL, comment with ';'
1094 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1097 // Handle start of symbol. Symbols are null-terminated in place. The
1098 // termination is always one symbol behind, since there may be no place
1099 // for a null in the case that an operator immediately follows the name.
1104 if (stuffnull) // Terminate old symbol from previous pass
1107 v = 0; // Assume no DOT attrib follows symbol
1110 // In some cases, we need to check for a DOTx at the *beginning*
1111 // of a symbol, as the "start" of the line we're currently looking
1112 // at could be somewhere in the middle of that line!
1115 // Make sure that it's *only* a .[bwsl] following, and not the
1116 // start of a local symbol:
1117 if ((chrtab[*(ln + 1)] & DOT)
1118 && (dotxtab[*(ln + 1)] != 0)
1119 && !(chrtab[*(ln + 2)] & CTSYM))
1121 // We found a legitimate DOTx construct, so add it to the
1125 *tk.u32++ = (TOKEN)dotxtab[*ln++];
1130 p = nullspot = ln++; // Nullspot -> start of this symbol
1132 // Find end of symbol (and compute its length)
1133 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1136 // Handle "DOT" special forms (like ".b") that follow a normal
1137 // symbol or keyword:
1140 *ln++ = EOS; // Terminate symbol
1141 stuffnull = 0; // And never try it again
1143 // Character following the '.' must have a DOT attribute, and
1144 // the chararacter after THAT one must not have a start-symbol
1145 // attribute (to prevent symbols that look like, for example,
1146 // "zingo.barf", which might be a good idea anyway....)
1147 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1148 return error("[bwsl] must follow '.' in symbol");
1150 v = (uint32_t)dotxtab[*ln++];
1151 cursize = (uint32_t)v;
1153 if (chrtab[*ln] & CTSYM)
1154 return error("misuse of '.'; not allowed in symbols");
1157 // If the symbol is small, check to see if it's really the name of
1161 for(state=0; state>=0;)
1163 j = (int)tolowertab[*p++];
1166 if (kwcheck[j] != state)
1172 if (*p == EOS || p == ln)
1186 // Make j = -1 if user tries to use a RISC register while in 68K mode
1187 if (!(rgpu || rdsp || dsp56001) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1192 // Make j = -1 if time, date etc with no preceeding ^^
1193 // defined, referenced, streq, macdef, date and time
1196 case 112: // defined
1197 case 113: // referenced
1205 // If not tokenized keyword OR token was not found
1206 if ((j < 0) || (state < 0))
1209 string[stringNum] = nullspot;
1210 *tk.u32++ = stringNum;
1215 *tk.u32++ = (TOKEN)j;
1219 if (v) // Record attribute token (if any)
1220 *tk.u32++ = (TOKEN)v;
1222 if (stuffnull) // Arrange for string termination on next pass
1228 // Handle identity tokens
1235 // Handle multiple-character tokens
1240 case '!': // ! or !=
1250 case '\'': // 'string'
1253 // Hardcoded for now, maybe this will change in the future
1254 *tk.u32++ = STRINGA8;
1258 case '\"': // "string"
1262 string[stringNum] = ln;
1263 *tk.u32++ = stringNum;
1266 for(p=ln; *ln!=EOS && *ln!=c1;)
1275 return(error("unterminated string"));
1304 // If we're evaluating a macro
1305 // this is valid because it's
1306 // a parameter expansion
1308 // If we're evaluating a macro
1309 // this is valid and expands to
1313 warn("bad backslash code in string");
1323 return error("unterminated string");
1327 case '$': // $, hex constant
1328 if (chrtab[*ln] & HDIGIT)
1332 // Parse the hex value
1333 while (hextab[*ln] >= 0)
1334 v = (v << 4) + (int)hextab[*ln++];
1341 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1346 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1357 case '<': // < or << or <> or <=
1376 case ':': // : or ::
1386 case '=': // = or ==
1389 *tk.u32++ = DEQUALS;
1396 case '>': // > or >> or >=
1411 case '%': // % or binary constant
1412 if (*ln < '0' || *ln > '1')
1420 while (*ln >= '0' && *ln <= '1')
1421 v = (v << 1) + *ln++ - '0';
1425 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1431 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1437 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1447 case '@': // @ or octal constant
1448 if (*ln < '0' || *ln > '7')
1456 while (*ln >= '0' && *ln <= '7')
1457 v = (v << 3) + *ln++ - '0';
1461 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1467 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1473 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1483 case '^': // ^ or ^^ <operator-name>
1490 if (((int)chrtab[*++ln] & STSYM) == 0)
1492 error("invalid symbol following ^^");
1498 while ((int)chrtab[*ln] & CTSYM)
1501 for(state=0; state>=0;)
1503 // Get char, convert to lowercase
1506 if (j >= 'A' && j <= 'Z')
1511 if (kwcheck[j] != state)
1517 if (*p == EOS || p == ln)
1526 if (j < 0 || state < 0)
1528 error("unknown symbol following ^^");
1532 *tk.u32++ = (TOKEN)j;
1535 interror(2); // Bad MULTX entry in chrtab
1540 // Handle decimal constant
1543 uint8_t * numStart = ln;
1546 while ((int)chrtab[*ln] & DIGIT)
1547 v = (v * 10) + *ln++ - '0';
1549 // See if there's a .[bwl] after the constant & deal with it if so
1552 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1560 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1568 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1576 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1578 // Hey, more digits after the dot, so we assume it's a
1579 // floating point number of some kind... numEnd will point
1580 // to the first non-float character after it's done
1583 double f = strtod(numStart, &numEnd);
1584 ln = (uint8_t *)numEnd;
1587 return error("floating point parse error");
1589 // N.B.: We use the C compiler's internal double
1590 // representation for all internal float calcs and
1591 // are reasonably sure that the size of said double
1592 // is 8 bytes long (which we check for in fltpoint.c)
1605 //printf("CONST: %i\n", v);
1609 // Handle illegal character
1610 return error("illegal character $%02X found", *ln);
1613 // Terminate line of tokens and return "success."
1616 tok = etok; // Set tok to beginning of line
1618 if (stuffnull) // Terminate last SYMBOL
1628 // .GOTO <label> goto directive
1630 // The label is searched for starting from the first line of the current,
1631 // enclosing macro definition. If no enclosing macro exists, an error is
1634 // A label is of the form:
1636 // :<name><whitespace>
1638 // The colon must appear in column 1. The label is stripped prior to macro
1639 // expansion, and is NOT subject to macro expansion. The whitespace may also
1642 int d_goto(WORD unused)
1644 // Setup for the search
1646 return error("missing label");
1648 char * sym = string[tok[1]];
1651 if (cur_inobj->in_type != SRC_IMACRO)
1652 return error("goto not in macro");
1654 IMACRO * imacro = cur_inobj->inobj.imacro;
1655 LLIST * defln = imacro->im_macro->lineList;
1657 // Attempt to find the label, starting with the first line.
1658 for(; defln!=NULL; defln=defln->next)
1660 // Must start with a colon
1661 if (defln->line[0] == ':')
1663 // Compare names (sleazo string compare)
1665 char * s2 = defln->line;
1667 // Either we will match the strings to EOS on both, or we will
1668 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1670 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1672 // If we reached the end of string 1 (sym), we're done.
1673 // Note that we're also checking for the end of string 2 as
1674 // well, since we've established they're equal above.
1677 // Found the label, set new macro next-line and return.
1678 imacro->im_nextln = defln;
1688 return error("goto label not found");
1692 void DumpToken(TOKEN t)
1696 else if (t == CONST)
1698 else if (t == FCONST)
1700 else if (t == ACONST)
1702 else if (t == STRING)
1704 else if (t == SYMBOL)
1708 else if (t == TKEOF)
1710 else if (t == DEQUALS)
1711 printf("[DEQUALS]");
1716 else if (t == DCOLON)
1728 else if (t == UNMINUS)
1729 printf("[UNMINUS]");
1744 else if (t == ENDEXPR)
1745 printf("[ENDEXPR]");
1746 else if (t == CR_ABSCOUNT)
1747 printf("[CR_ABSCOUNT]");
1748 else if (t == CR_FILESIZE)
1749 printf("[CR_FILESIZE]");
1750 else if (t == CR_DEFINED)
1751 printf("[CR_DEFINED]");
1752 else if (t == CR_REFERENCED)
1753 printf("[CR_REFERENCED]");
1754 else if (t == CR_STREQ)
1755 printf("[CR_STREQ]");
1756 else if (t == CR_MACDEF)
1757 printf("[CR_MACDEF]");
1758 else if (t == CR_TIME)
1759 printf("[CR_TIME]");
1760 else if (t == CR_DATE)
1761 printf("[CR_DATE]");
1762 else if (t >= 0x20 && t <= 0x2F)
1763 printf("[%c]", (char)t);
1764 else if (t >= 0x3A && t <= 0x3F)
1765 printf("[%c]", (char)t);
1766 else if (t >= 0x80 && t <= 0x87)
1767 printf("[D%u]", ((uint32_t)t) - 0x80);
1768 else if (t >= 0x88 && t <= 0x8F)
1769 printf("[A%u]", ((uint32_t)t) - 0x88);
1771 printf("[%X:%c]", (uint32_t)t, (char)t);
1775 void DumpTokenBuffer(void)
1777 printf("Tokens [%X]: ", sloc);
1779 for(TOKEN * t=tokbuf; *t!=EOL; t++)
1783 else if (*t == CONST)
1787 printf("[CONST: $%lX]", *tp.u64);
1790 else if (*t == FCONST)
1794 printf("[FCONST: $%lX]", *tp.u64);
1797 else if (*t == ACONST)
1799 printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1802 else if (*t == STRING)
1805 printf("[STRING:\"%s\"]", string[*t]);
1807 else if (*t == SYMBOL)
1810 printf("[SYMBOL:\"%s\"]", string[*t]);
1814 else if (*t == TKEOF)
1816 else if (*t == DEQUALS)
1817 printf("[DEQUALS]");
1822 else if (*t == DCOLON)
1834 else if (*t == UNMINUS)
1835 printf("[UNMINUS]");
1836 else if (*t == DOTB)
1838 else if (*t == DOTW)
1840 else if (*t == DOTL)
1842 else if (*t == DOTQ)
1844 else if (*t == DOTS)
1846 else if (*t == DOTD)
1848 else if (*t == DOTI)
1850 else if (*t == ENDEXPR)
1851 printf("[ENDEXPR]");
1852 else if (*t == CR_ABSCOUNT)
1853 printf("[CR_ABSCOUNT]");
1854 else if (*t == CR_FILESIZE)
1855 printf("[CR_FILESIZE]");
1856 else if (*t == CR_DEFINED)
1857 printf("[CR_DEFINED]");
1858 else if (*t == CR_REFERENCED)
1859 printf("[CR_REFERENCED]");
1860 else if (*t == CR_STREQ)
1861 printf("[CR_STREQ]");
1862 else if (*t == CR_MACDEF)
1863 printf("[CR_MACDEF]");
1864 else if (*t == CR_TIME)
1865 printf("[CR_TIME]");
1866 else if (*t == CR_DATE)
1867 printf("[CR_DATE]");
1868 else if (*t >= 0x20 && *t <= 0x2F)
1869 printf("[%c]", (char)*t);
1870 else if (*t >= 0x3A && *t <= 0x3F)
1871 printf("[%c]", (char)*t);
1872 else if (*t >= 0x80 && *t <= 0x87)
1873 printf("[D%u]", ((uint32_t)*t) - 0x80);
1874 else if (*t >= 0x88 && *t <= 0x8F)
1875 printf("[A%u]", ((uint32_t)*t) - 0x88);
1877 printf("[%X:%c]", (uint32_t)*t, (char)*t);