2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
17 #define DECL_KW // Declare keyword arrays
18 #define DEF_KW // Declare keyword values
19 #include "kwtab.h" // Incl generated keyword tables & defs
22 int lnsave; // 1; strcpy() text of current line
23 uint16_t curlineno; // Current line number (64K max currently)
24 int totlines; // Total # of lines
25 int mjump_align = 0; // mjump alignment flag
26 char lntag; // Line tag
27 char * curfname; // Current filename
28 char tolowertab[128]; // Uppercase ==> lowercase
29 int8_t hextab[128]; // Table of hex values
30 char dotxtab[128]; // Table for ".b", ".s", etc.
31 char irbuf[LNSIZ]; // Text for .rept block line
32 char lnbuf[LNSIZ]; // Text of current line
33 WORD filecount; // Unique file number counter
34 WORD cfileno; // Current file number
35 TOKENPTR tok; // Ptr to current token
36 TOKEN * etok; // Ptr past last token in tokbuf[]
37 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
38 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
39 int optimizeOff; // Optimization override flag
41 // File record, used to maintain a list of every include file ever visited
42 #define FILEREC struct _filerec
52 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
53 static INOBJ * f_inobj; // Ptr list of free INOBJs
54 static IFILE * f_ifile; // Ptr list of free IFILEs
55 static IMACRO * f_imacro; // Ptr list of free IMACROs
57 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
59 uint8_t chrtab[0x100] = {
60 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
61 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
62 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
63 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
65 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
66 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
67 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
68 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
70 WHITE, MULTX, MULTX, SELF, // SP ! " #
71 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
72 SELF, SELF, SELF, SELF, // ( ) * +
73 SELF, SELF, STSYM, SELF, // , - . /
75 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
76 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
79 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
81 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
83 MULTX, STSYM+CTSYM+HDIGIT, // @ A
84 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
85 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
86 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
87 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
88 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
90 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
91 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
92 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
93 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
95 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
96 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
97 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
98 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
99 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
100 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
102 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
103 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
104 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
105 SELF, SELF, SELF, ILLEG, // | } ~ DEL
107 // Anything above $7F is illegal (and yes, we need to check for this,
108 // otherwise you get strange and spurious errors that will lead you astray)
109 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
110 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
127 // Names of registers
128 static char * regname[] = {
129 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
130 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
131 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
132 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
133 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
134 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
135 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
136 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
137 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
138 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
139 "tt0","tt1","crp","","","","","", // 208,215
140 "","","","","fpiar","fpsr","fpcr","", // 216,223
141 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
142 "","","","","","","","", // 232,239
143 "","","","","","","","", // 240,247
144 "","","","","","","","", // 248,255
145 "","","","","x0","x1","y0","y1", // 256,263
146 "","b0","","b2","","b1","a","b", // 264,271
147 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
148 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
149 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
150 "","","","","","","l","p", // 296,303
151 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
152 "a10","b10","x","y","","","ab","ba" // 312,319
155 static char * riscregname[] = {
156 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
157 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
158 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
159 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
164 // Initialize tokenizer
166 void InitTokenizer(void)
169 char * htab = "0123456789abcdefABCDEF"; // Hex character table
171 lnsave = 0; // Don't save lines
172 curfname = ""; // No file, empty filename
173 filecount = (WORD)-1;
174 cfileno = (WORD)-1; // cfileno gets bumped to 0
186 // Initialize hex, "dot" and tolower tables
191 tolowertab[i] = (char)i;
194 for(i=0; htab[i]!=EOS; i++)
195 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
197 for(i='A'; i<='Z'; i++)
198 tolowertab[i] |= 0x20;
200 // These characters are legal immediately after a period
201 dotxtab['b'] = DOTB; // .b .B .s .S
203 //dotxtab['s'] = DOTB;
204 //dotxtab['S'] = DOTB;
205 dotxtab['w'] = DOTW; // .w .W
207 dotxtab['l'] = DOTL; // .l .L
209 dotxtab['i'] = DOTI; // .i .I (WTF is this???)
211 dotxtab['D'] = DOTD; // .d .D (double)
213 dotxtab['S'] = DOTS; // .s .S
215 dotxtab['Q'] = DOTQ; // .q .Q (quad word)
217 dotxtab['X'] = DOTX; // .x .x
219 dotxtab['P'] = DOTP; // .p .P
224 void SetFilenameForErrorReporting(void)
228 // Check for absolute top filename (this should never happen)
231 curfname = "(*top*)";
235 FILEREC * fr = filerec;
237 // Advance to the correct record...
238 while (fr != NULL && fnum != 0)
244 // Check for file # record not found (this should never happen either)
247 curfname = "(*NOT FOUND*)";
251 curfname = fr->frec_name;
256 // Allocate an IFILE or IMACRO
258 INOBJ * a_inobj(int typ)
264 // Allocate and initialize INOBJ first
266 inobj = malloc(sizeof(INOBJ));
270 f_inobj = f_inobj->in_link;
275 case SRC_IFILE: // Alloc and init an IFILE
277 ifile = malloc(sizeof(IFILE));
281 f_ifile = f_ifile->if_link;
284 inobj->inobj.ifile = ifile;
287 case SRC_IMACRO: // Alloc and init an IMACRO
288 if (f_imacro == NULL)
289 imacro = malloc(sizeof(IMACRO));
293 f_imacro = f_imacro->im_link;
296 inobj->inobj.imacro = imacro;
299 case SRC_IREPT: // Alloc and init an IREPT
300 inobj->inobj.irept = malloc(sizeof(IREPT));
301 DEBUG { printf("alloc IREPT\n"); }
305 // Install INOBJ on top of input stack
306 inobj->in_ifent = ifent; // Record .if context on entry
307 inobj->in_type = (WORD)typ;
308 inobj->in_otok = tok.u32;
309 inobj->in_etok = etok;
310 inobj->in_link = cur_inobj;
318 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
319 // A macro reference is in one of two forms:
320 // \name <non-name-character>
322 // A doubled backslash (\\) is compressed to a single backslash (\).
323 // Argument definitions have been pre-tokenized, so we have to turn them back
324 // into text. This means that numbers, in particular, become hex, regardless of
325 // their representation when the macro was invoked. This is a hack.
326 // A label may appear at the beginning of the line:
327 // :<name><whitespace>
328 // (the colon must be in the first column). These labels are stripped before
329 // macro expansion takes place.
331 int ExpandMacro(char * src, char * dest, int destsiz)
334 int questmark; // \? for testing argument existence
335 char mname[128]; // Assume max size of a formal arg name
336 char numbuf[20]; // Buffer for text of CONSTs
339 char ** symbolString;
341 DEBUG { printf("ExM: src=\"%s\"\n", src); }
343 IMACRO * imacro = cur_inobj->inobj.imacro;
344 int macnum = (int)(imacro->im_macro->sattr);
346 char * dst = dest; // Next dest slot
347 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
349 // Check for (and skip over) any "label" on the line
355 while (*s != EOS && !(chrtab[*s] & WHITE))
359 s++; // Skip first whitespace
362 // Expand the rest of the line
365 // Copy single character
371 // Skip comments in case a loose @ or \ is in there
372 // In that case the tokeniser was trying to expand it.
373 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
378 // Do macro expansion
386 case '\\': // \\, \ (collapse to single backslash)
392 case '?': // \? <macro> set `questmark' flag
396 case '#': // \#, number of arguments
397 sprintf(numbuf, "%d", (int)imacro->im_nargs);
399 case '!': // \! size suffix supplied on invocation
400 switch ((int)imacro->im_siz)
402 case SIZN: d = ""; break;
403 case SIZB: d = ".b"; break;
404 case SIZW: d = ".w"; break;
405 case SIZL: d = ".l"; break;
409 case '~': // ==> unique label string Mnnnn...
410 sprintf(numbuf, "M%u", curuniq);
426 return error("missing argument name");
429 // \n ==> argument number 'n', 0..9
430 if (chrtab[*s] & DIGIT)
440 // Get argument name: \name, \{name}
450 while (chrtab[*s] & CTSYM);
455 for(++s; *s != EOS && *s != '}';)
459 return error("missing closing brace ('}')");
466 // Lookup the argument and copy its (string) value into the
467 // destination string
468 DEBUG { printf("argument='%s'\n", mname); }
470 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
471 return error("undefined argument: '%s'", mname);
474 // Convert a string of tokens (terminated with EOL) back into
475 // text. If an argument is out of range (not specified in the
476 // macro invocation) then it is ignored.
477 i = (int)arg->svalue;
479 DEBUG { printf("~argnumber=%d\n", i); }
482 if (i < imacro->im_nargs)
484 tk = imacro->argument[i].token;
485 symbolString = imacro->argument[i].string;
488 // printf("ExM: Preparing to parse argument #%u...\n", i);
494 // 0 if the argument is empty or non-existant,
495 // 1 if the argument is not empty
498 if (tk == NULL || *tk == EOL)
504 *dst++ = (char)(questmark + '0');
508 // Argument # is in range, so expand it
513 // Reverse-translation from a token number to a string.
514 // This is a hack. It might be better table-driven.
517 if ((*tk >= KW_D0) && !rdsp && !rgpu)
519 d = regname[(int)*tk++ - KW_D0];
522 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
524 d = riscregname[(int)*tk++ - KW_R0];
533 // d = (char *)*tk++;
536 // This fix should be done for strings too
537 d = symbolString[*tk++];
538 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
543 // d = (char *)*tk++;
546 d = symbolString[*tk++];
567 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
568 // to choke on legitimate code... Need to investigate this further
569 // before changing anything else here!
571 sprintf(numbuf, "$%lx", (uint64_t)*tk++);
636 *dst++ = (char)*(tk - 1);
641 // If 'd' != NULL, copy string to destination
645 DEBUG printf("d='%s'\n", d);
664 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
669 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
670 return fatal("line too long as a result of macro expansion");
675 // Get next line of text from a macro
677 char * GetNextMacroLine(void)
679 IMACRO * imacro = cur_inobj->inobj.imacro;
680 // LONG * strp = imacro->im_nextln;
681 LLIST * strp = imacro->im_nextln;
683 if (strp == NULL) // End-of-macro
686 imacro->im_nextln = strp->next;
687 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
688 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
690 return imacro->im_lnbuf;
695 // Get next line of text from a repeat block
697 char * GetNextRepeatLine(void)
699 IREPT * irept = cur_inobj->inobj.irept;
700 // LONG * strp = irept->ir_nextln; // initial null
702 // Do repeat at end of .rept block's string list
704 if (irept->ir_nextln == NULL)
706 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
707 irept->ir_nextln = irept->ir_firstln; // copy first line
709 if (irept->ir_count-- == 0)
711 DEBUG { printf("end-repeat-block\n"); }
715 // strp = irept->ir_nextln;
718 // strcpy(irbuf, (char *)(irept->ir_nextln + 1));
719 strcpy(irbuf, irept->ir_nextln->line);
720 DEBUG { printf("repeat line='%s'\n", irbuf); }
721 // irept->ir_nextln = (LONG *)*strp;
722 irept->ir_nextln = irept->ir_nextln->next;
729 // Include a source file used at the root, and for ".include" files
731 int include(int handle, char * fname)
734 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
736 // Alloc and initialize include-descriptors
737 INOBJ * inobj = a_inobj(SRC_IFILE);
738 IFILE * ifile = inobj->inobj.ifile;
740 ifile->ifhandle = handle; // Setup file handle
741 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
742 ifile->ifoldlineno = curlineno; // Save old line number
743 ifile->ifoldfname = curfname; // Save old filename
744 ifile->ifno = cfileno; // Save old file number
746 // NB: This *must* be preincrement, we're adding one to the filecount here!
747 cfileno = ++filecount; // Compute NEW file number
748 curfname = strdup(fname); // Set current filename (alloc storage)
749 curlineno = 0; // Start on line zero
751 // Add another file to the file-record
752 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
753 fr->frec_next = NULL;
754 fr->frec_name = curfname;
757 filerec = fr; // Add first filerec
759 last_fr->frec_next = fr; // Append to list of filerecs
762 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
769 // Pop the current input level
773 INOBJ * inobj = cur_inobj;
778 // Pop IFENT levels until we reach the conditional assembly context we
779 // were at when the input object was entered.
780 int numUnmatched = 0;
782 while (ifent != inobj->in_ifent)
784 if (d_endif() != 0) // Something bad happened during endif parsing?
785 return -1; // If yes, bail instead of getting stuck in a loop
790 // Give a warning to the user that we had to wipe their bum for them
791 if (numUnmatched > 0)
792 warn("missing %d .endif(s)", numUnmatched);
794 tok.u32 = inobj->in_otok; // Restore tok and otok
795 etok = inobj->in_etok;
797 switch (inobj->in_type)
799 case SRC_IFILE: // Pop and release an IFILE
801 DEBUG { printf("[Leaving: %s]\n", curfname); }
803 IFILE * ifile = inobj->inobj.ifile;
804 ifile->if_link = f_ifile;
806 close(ifile->ifhandle); // Close source file
807 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
808 curfname = ifile->ifoldfname; // Set current filename
809 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
810 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
811 curlineno = ifile->ifoldlineno; // Set current line#
812 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
813 cfileno = ifile->ifno; // Restore current file number
814 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
818 case SRC_IMACRO: // Pop and release an IMACRO
820 IMACRO * imacro = inobj->inobj.imacro;
821 imacro->im_link = f_imacro;
826 case SRC_IREPT: // Pop and release an IREPT
828 DEBUG { printf("dealloc IREPT\n"); }
829 LLIST * p = inobj->inobj.irept->ir_firstln;
831 // Deallocate repeat lines
842 cur_inobj = inobj->in_link;
843 inobj->in_link = f_inobj;
851 // Get line from file into buf, return NULL on EOF or ptr to the start of a
854 char * GetNextLine(void)
858 int readamt = -1; // 0 if last read() yeilded 0 bytes
859 IFILE * fl = cur_inobj->inobj.ifile;
863 // Scan for next end-of-line; handle stupid text formats by treating
864 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
866 d = &fl->ifbuf[fl->ifind];
868 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
870 if (*p == '\r' || *p == '\n')
877 break; // Need to read more, then look for '\n' to eat
878 else if (p[1] == '\n')
882 // Cover up the newline with end-of-string sentinel
891 // Handle hanging lines by ignoring them (Input file is exhausted, no
892 // \r or \n on last line)
893 // Shamus: This is retarded. Never ignore any input!
894 if (!readamt && fl->ifcnt)
901 // Really should check to see if we're at the end of the buffer!
903 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
905 return &fl->ifbuf[fl->ifind];
909 // Truncate and return absurdly long lines.
910 if (fl->ifcnt >= QUANTUM)
912 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
914 return &fl->ifbuf[fl->ifind];
917 // Relocate what's left of a line to the beginning of the buffer, and
918 // read some more of the file in; return NULL if the buffer's empty and
922 p = &fl->ifbuf[fl->ifind];
923 d = &fl->ifbuf[fl->ifcnt & 1];
925 for(i=0; i<fl->ifcnt; i++)
928 fl->ifind = fl->ifcnt & 1;
931 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
936 if ((fl->ifcnt += readamt) == 0)
945 int TokenizeLine(void)
947 uint8_t * ln = NULL; // Ptr to current position in line
948 uint8_t * p; // Random character ptr
949 TOKENPTR tk; // Token-deposit ptr
950 int state = 0; // State for keyword detector
951 int j = 0; // Var for keyword detector
952 uint8_t c; // Random char
953 uint64_t v; // Random value
954 uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d)
955 double f; // Random float
956 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
957 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
959 int stringNum = 0; // Pointer to string locations in tokenized line
963 if (cur_inobj == NULL) // Return EOF if input stack is empty
966 // Get another line of input from the current input source: a file, a
967 // macro, or a repeat-block
968 switch (cur_inobj->in_type)
972 // o bump source line number;
973 // o tag the listing-line with a space;
974 // o kludge lines generated by Alcyon C.
976 if ((ln = GetNextLine()) == NULL)
978 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
979 if (fpop() == 0) // Pop input level
980 goto retry; // Try for more lines
983 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
988 curlineno++; // Bump line number
993 // AS68 compatibility, throw away all lines starting with
994 // back-quotes, tildes, or '*'
995 // On other lines, turn the first '*' into a semi-colon.
996 if (*ln == '`' || *ln == '~' || *ln == '*')
1000 for(p=ln; *p!=EOS; p++)
1014 // o Handle end-of-macro;
1015 // o tag the listing-line with an at (@) sign.
1017 if ((ln = GetNextMacroLine()) == NULL)
1019 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1020 goto retry; // Try for more lines...
1022 return TKEOF; // Oops, we got a non zero return code, signal EOF
1029 // o Handle end-of-repeat-block;
1030 // o tag the listing-line with a pound (#) sign.
1032 if ((ln = GetNextRepeatLine()) == NULL)
1034 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1043 // Save text of the line. We only do this during listings and within
1044 // macro-type blocks, since it is expensive to unconditionally copy every
1049 // General housekeeping
1050 tok.u32 = tokeol; // Set "tok" to EOL in case of error
1051 tk.u32 = etok; // Reset token ptr
1052 stuffnull = 0; // Don't stuff nulls
1053 totlines++; // Bump total #lines assembled
1055 // See if the entire line is a comment. This is a win if the programmer
1056 // puts in lots of comments
1057 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1060 // And here we have a very ugly hack for signalling a single line 'turn off
1061 // optimization'. There's really no nice way to do this, so hack it is!
1062 optimizeOff = 0; // Default is to take optimizations as they come
1066 optimizeOff = 1; // Signal that we don't want to optimize this line
1067 ln++; // & skip over the darned thing
1070 // Main tokenization loop;
1071 // o skip whitespace;
1072 // o handle end-of-line;
1073 // o handle symbols;
1074 // o handle single-character tokens (operators, etc.);
1075 // o handle multiple-character tokens (constants, strings, etc.).
1078 // Skip whitespace, handle EOL
1079 while (chrtab[*ln] & WHITE)
1082 // Handle EOL, comment with ';'
1083 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1086 // Handle start of symbol. Symbols are null-terminated in place. The
1087 // termination is always one symbol behind, since there may be no place
1088 // for a null in the case that an operator immediately follows the name.
1093 if (stuffnull) // Terminate old symbol from previous pass
1096 v = 0; // Assume no DOT attrib follows symbol
1099 // In some cases, we need to check for a DOTx at the *beginning*
1100 // of a symbol, as the "start" of the line we're currently looking
1101 // at could be somewhere in the middle of that line!
1104 // Make sure that it's *only* a .[bwsl] following, and not the
1105 // start of a local symbol:
1106 if ((chrtab[*(ln + 1)] & DOT)
1107 && (dotxtab[*(ln + 1)] != 0)
1108 && !(chrtab[*(ln + 2)] & CTSYM))
1110 // We found a legitimate DOTx construct, so add it to the
1114 *tk.u32++ = (TOKEN)dotxtab[*ln++];
1119 p = nullspot = ln++; // Nullspot -> start of this symbol
1121 // Find end of symbol (and compute its length)
1122 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1125 // Handle "DOT" special forms (like ".b") that follow a normal
1126 // symbol or keyword:
1129 *ln++ = EOS; // Terminate symbol
1130 stuffnull = 0; // And never try it again
1132 // Character following the '.' must have a DOT attribute, and
1133 // the chararacter after THAT one must not have a start-symbol
1134 // attribute (to prevent symbols that look like, for example,
1135 // "zingo.barf", which might be a good idea anyway....)
1136 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1137 return error("[bwsl] must follow '.' in symbol");
1139 v = (uint32_t)dotxtab[*ln++];
1140 cursize = (uint32_t)v;
1142 if (chrtab[*ln] & CTSYM)
1143 return error("misuse of '.'; not allowed in symbols");
1146 // If the symbol is small, check to see if it's really the name of
1150 for(state=0; state>=0;)
1152 j = (int)tolowertab[*p++];
1155 if (kwcheck[j] != state)
1161 if (*p == EOS || p == ln)
1175 // Make j = -1 if user tries to use a RISC register while in 68K mode
1176 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1181 // Make j = -1 if time, date etc with no preceeding ^^
1182 // defined, referenced, streq, macdef, date and time
1185 case 112: // defined
1186 case 113: // referenced
1194 // If not tokenized keyword OR token was not found
1195 if ((j < 0) || (state < 0))
1199 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1200 //system, this will cause all kinds of mischief.
1202 *tk++ = (TOKEN)nullspot;
1204 string[stringNum] = nullspot;
1205 *tk.u32++ = stringNum;
1211 *tk.u32++ = (TOKEN)j;
1215 if (v) // Record attribute token (if any)
1216 *tk.u32++ = (TOKEN)v;
1218 if (stuffnull) // Arrange for string termination on next pass
1224 // Handle identity tokens
1231 // Handle multiple-character tokens
1236 case '!': // ! or !=
1246 case '\'': // 'string'
1249 // Hardcoded for now, maybe this will change in the future
1250 *tk.u32++ = STRINGA8;
1254 case '\"': // "string"
1258 string[stringNum] = ln;
1259 *tk.u32++ = stringNum;
1262 for(p=ln; *ln!=EOS && *ln!=c1;)
1271 return(error("unterminated string"));
1300 // If we're evaluating a macro
1301 // this is valid and expands to
1305 warn("bad backslash code in string");
1315 return error("unterminated string");
1319 case '$': // $, hex constant
1320 if (chrtab[*ln] & HDIGIT)
1324 // Parse the hex value
1325 while (hextab[*ln] >= 0)
1326 v = (v << 4) + (int)hextab[*ln++];
1330 if (obj_format == BSD)
1332 if ((*(ln + 1) & 0xDF) == 'B')
1337 else if ((*(ln + 1) & 0xDF) == 'W')
1342 else if ((*(ln + 1) & 0xDF) == 'L')
1353 if (obj_format == ALCYON)
1357 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1362 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1374 case '<': // < or << or <> or <=
1393 case ':': // : or ::
1403 case '=': // = or ==
1406 *tk.u32++ = DEQUALS;
1413 case '>': // > or >> or >=
1428 case '%': // % or binary constant
1429 if (*ln < '0' || *ln > '1')
1437 while (*ln >= '0' && *ln <= '1')
1438 v = (v << 1) + *ln++ - '0';
1442 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1448 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1454 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1464 case '@': // @ or octal constant
1465 if (*ln < '0' || *ln > '7')
1473 while (*ln >= '0' && *ln <= '7')
1474 v = (v << 3) + *ln++ - '0';
1478 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1484 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1490 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1500 case '^': // ^ or ^^ <operator-name>
1507 if (((int)chrtab[*++ln] & STSYM) == 0)
1509 error("invalid symbol following ^^");
1515 while ((int)chrtab[*ln] & CTSYM)
1518 for(state=0; state>=0;)
1520 // Get char, convert to lowercase
1523 if (j >= 'A' && j <= 'Z')
1528 if (kwcheck[j] != state)
1534 if (*p == EOS || p == ln)
1543 if (j < 0 || state < 0)
1545 error("unknown symbol following ^^");
1549 *tk.u32++ = (TOKEN)j;
1552 interror(2); // Bad MULTX entry in chrtab
1557 // Handle decimal constant
1560 uint8_t * numStart = ln;
1563 while ((int)chrtab[*ln] & DIGIT)
1564 v = (v * 10) + *ln++ - '0';
1566 // See if there's a .[bwl] after the constant & deal with it if so
1569 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1577 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1585 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1593 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1595 // Hey, more digits after the dot, so we assume it's a
1596 // floating point number of some kind
1602 while ((int)chrtab[*ln] & DIGIT)
1604 f = f + (double)(*ln++ - '0') / fract;
1608 // Here we parse the whole floating point number
1612 double f = strtod(numStart, &numEnd);
1613 ln = (uint8_t *)numEnd;
1616 return error("floating point parse error");
1620 // Shamus: Well, this is all kinds of icky--not the least of which is that unlike uintNN_t types, we have no guarantees of any kind when it comes to the size of floating point numbers in C (as far as I know of). If there is, we need to use those kinds here, or else figure out at runtime what sizes we're dealing with and act accordingly. To be fair, this is OK as long as the double type is less than 64 bits wide, but again, there's no guarantee that it isn't. :-/
1631 //printf("CONST: %i\n", v);
1635 // Handle illegal character
1636 return error("illegal character $%02X found", *ln);
1639 // Terminate line of tokens and return "success."
1642 tok.u32 = etok; // Set tok to beginning of line
1644 if (stuffnull) // Terminate last SYMBOL
1654 // .GOTO <label> goto directive
1656 // The label is searched for starting from the first line of the current,
1657 // enclosing macro definition. If no enclosing macro exists, an error is
1660 // A label is of the form:
1662 // :<name><whitespace>
1664 // The colon must appear in column 1. The label is stripped prior to macro
1665 // expansion, and is NOT subject to macro expansion. The whitespace may also
1668 int d_goto(WORD unused)
1670 // Setup for the search
1671 if (*tok.u32 != SYMBOL)
1672 return error("missing label");
1674 char * sym = string[tok.u32[1]];
1677 if (cur_inobj->in_type != SRC_IMACRO)
1678 return error("goto not in macro");
1680 IMACRO * imacro = cur_inobj->inobj.imacro;
1681 LLIST * defln = imacro->im_macro->lineList;
1683 // Attempt to find the label, starting with the first line.
1684 for(; defln!=NULL; defln=defln->next)
1686 // Must start with a colon
1687 if (defln->line[0] == ':')
1689 // Compare names (sleazo string compare)
1691 char * s2 = defln->line;
1693 // Either we will match the strings to EOS on both, or we will
1694 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1696 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1698 // If we reached the end of string 1 (sym), we're done.
1699 // Note that we're also checking for the end of string 2 as
1700 // well, since we've established they're equal above.
1703 // Found the label, set new macro next-line and return.
1704 imacro->im_nextln = defln;
1714 return error("goto label not found");
1718 void DumpToken(TOKEN t)
1722 else if (t == CONST)
1724 else if (t == ACONST)
1726 else if (t == STRING)
1728 else if (t == SYMBOL)
1732 else if (t == TKEOF)
1734 else if (t == DEQUALS)
1735 printf("[DEQUALS]");
1740 else if (t == DCOLON)
1752 else if (t == UNMINUS)
1753 printf("[UNMINUS]");
1768 else if (t == ENDEXPR)
1769 printf("[ENDEXPR]");
1770 else if (t == CR_ABSCOUNT)
1771 printf("[CR_ABSCOUNT]");
1772 else if (t == CR_DEFINED)
1773 printf("[CR_DEFINED]");
1774 else if (t == CR_REFERENCED)
1775 printf("[CR_REFERENCED]");
1776 else if (t == CR_STREQ)
1777 printf("[CR_STREQ]");
1778 else if (t == CR_MACDEF)
1779 printf("[CR_MACDEF]");
1780 else if (t == CR_TIME)
1781 printf("[CR_TIME]");
1782 else if (t == CR_DATE)
1783 printf("[CR_DATE]");
1784 else if (t >= 0x20 && t <= 0x2F)
1785 printf("[%c]", (char)t);
1786 else if (t >= 0x3A && t <= 0x3F)
1787 printf("[%c]", (char)t);
1788 else if (t >= 0x80 && t <= 0x87)
1789 printf("[D%u]", ((uint32_t)t) - 0x80);
1790 else if (t >= 0x88 && t <= 0x8F)
1791 printf("[A%u]", ((uint32_t)t) - 0x88);
1793 printf("[%X:%c]", (uint32_t)t, (char)t);
1797 void DumpTokenBuffer(void)
1799 printf("Tokens [%X]: ", sloc);
1801 for(TOKEN * t=tokbuf; *t!=EOL; t++)
1805 else if (*t == CONST)
1807 TOKENPTR tp = (TOKENPTR)(t + 1);
1808 printf("[CONST: $%lX]", (uint64_t)(*tp.u64));
1811 else if (*t == ACONST)
1813 printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1816 else if (*t == STRING)
1819 printf("[STRING:\"%s\"]", string[*t]);
1821 else if (*t == SYMBOL)
1824 printf("[SYMBOL:\"%s\"]", string[*t]);
1828 else if (*t == TKEOF)
1830 else if (*t == DEQUALS)
1831 printf("[DEQUALS]");
1836 else if (*t == DCOLON)
1848 else if (*t == UNMINUS)
1849 printf("[UNMINUS]");
1850 else if (*t == DOTB)
1852 else if (*t == DOTW)
1854 else if (*t == DOTL)
1856 else if (*t == DOTQ)
1858 else if (*t == DOTS)
1860 else if (*t == DOTD)
1862 else if (*t == DOTI)
1864 else if (*t == ENDEXPR)
1865 printf("[ENDEXPR]");
1866 else if (*t == CR_ABSCOUNT)
1867 printf("[CR_ABSCOUNT]");
1868 else if (*t == CR_DEFINED)
1869 printf("[CR_DEFINED]");
1870 else if (*t == CR_REFERENCED)
1871 printf("[CR_REFERENCED]");
1872 else if (*t == CR_STREQ)
1873 printf("[CR_STREQ]");
1874 else if (*t == CR_MACDEF)
1875 printf("[CR_MACDEF]");
1876 else if (*t == CR_TIME)
1877 printf("[CR_TIME]");
1878 else if (*t == CR_DATE)
1879 printf("[CR_DATE]");
1880 else if (*t >= 0x20 && *t <= 0x2F)
1881 printf("[%c]", (char)*t);
1882 else if (*t >= 0x3A && *t <= 0x3F)
1883 printf("[%c]", (char)*t);
1884 else if (*t >= 0x80 && *t <= 0x87)
1885 printf("[D%u]", ((uint32_t)*t) - 0x80);
1886 else if (*t >= 0x88 && *t <= 0x8F)
1887 printf("[A%u]", ((uint32_t)*t) - 0x88);
1889 printf("[%X:%c]", (uint32_t)*t, (char)*t);