2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
17 #define DECL_KW // Declare keyword arrays
18 #define DEF_KW // Declare keyword values
19 #include "kwtab.h" // Incl generated keyword tables & defs
22 int lnsave; // 1; strcpy() text of current line
23 uint16_t curlineno; // Current line number (64K max currently)
24 int totlines; // Total # of lines
25 int mjump_align = 0; // mjump alignment flag
26 char lntag; // Line tag
27 char * curfname; // Current filename
28 char tolowertab[128]; // Uppercase ==> lowercase
29 int8_t hextab[128]; // Table of hex values
30 char dotxtab[128]; // Table for ".b", ".s", etc.
31 char irbuf[LNSIZ]; // Text for .rept block line
32 char lnbuf[LNSIZ]; // Text of current line
33 WORD filecount; // Unique file number counter
34 WORD cfileno; // Current file number
35 TOKEN * tok; // Ptr to current token
36 TOKEN * etok; // Ptr past last token in tokbuf[]
37 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
38 char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage
39 int optimizeOff; // Optimization override flag
41 // File record, used to maintain a list of every include file ever visited
42 #define FILEREC struct _filerec
52 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
53 static INOBJ * f_inobj; // Ptr list of free INOBJs
54 static IFILE * f_ifile; // Ptr list of free IFILEs
55 static IMACRO * f_imacro; // Ptr list of free IMACROs
57 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
59 uint8_t chrtab[0x100] = {
60 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
61 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
62 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
63 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
65 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
66 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
67 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
68 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
70 WHITE, MULTX, MULTX, SELF, // SP ! " #
71 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
72 SELF, SELF, SELF, SELF, // ( ) * +
73 SELF, SELF, STSYM, SELF, // , - . /
75 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
76 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
79 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
81 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
83 MULTX, STSYM+CTSYM+HDIGIT, // @ A
84 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
85 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
86 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
87 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
88 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
90 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
91 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
92 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
93 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
95 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
96 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
97 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
98 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
99 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
100 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
102 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
103 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
104 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
105 SELF, SELF, SELF, ILLEG, // | } ~ DEL
107 // Anything above $7F is illegal (and yes, we need to check for this,
108 // otherwise you get strange and spurious errors that will lead you astray)
109 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
110 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
127 // Names of registers
128 static char * regname[] = {
129 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
130 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
131 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
132 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
133 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
134 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
135 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
136 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
137 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
138 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
139 "tt0","tt1","crp","","","","","", // 208,215
140 "","","","","fpiar","fpsr","fpcr","", // 216,223
141 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
142 "","","","","","","","", // 232,239
143 "","","","","","","","", // 240,247
144 "","","","","","","","", // 248,255
145 "","","","","x0","x1","y0","y1", // 256,263
146 "","b0","","b2","","b1","a","b", // 264,271
147 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
148 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
149 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
150 "","","","","","","l","p", // 296,303
151 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
152 "a10","b10","x","y","","","ab","ba" // 312,319
155 static char * riscregname[] = {
156 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
157 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
158 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
159 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
164 // Initialize tokenizer
166 void InitTokenizer(void)
169 char * htab = "0123456789abcdefABCDEF"; // Hex character table
171 lnsave = 0; // Don't save lines
172 curfname = ""; // No file, empty filename
173 filecount = (WORD)-1;
174 cfileno = (WORD)-1; // cfileno gets bumped to 0
186 // Initialize hex, "dot" and tolower tables
191 tolowertab[i] = (char)i;
194 for(i=0; htab[i]!=EOS; i++)
195 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
197 for(i='A'; i<='Z'; i++)
198 tolowertab[i] |= 0x20;
200 // These characters are legal immediately after a period
201 dotxtab['b'] = DOTB; // .b .B .s .S
203 //dotxtab['s'] = DOTB;
204 //dotxtab['S'] = DOTB;
205 dotxtab['w'] = DOTW; // .w .W
207 dotxtab['l'] = DOTL; // .l .L
209 dotxtab['i'] = DOTI; // .i .I (???)
211 dotxtab['D'] = DOTD; // .d .D (quad word)
213 dotxtab['S'] = DOTS; // .s .S
215 dotxtab['Q'] = DOTQ; // .q .Q
217 dotxtab['X'] = DOTX; // .x .x
219 dotxtab['P'] = DOTP; // .p .P
224 void SetFilenameForErrorReporting(void)
228 // Check for absolute top filename (this should never happen)
231 curfname = "(*top*)";
235 FILEREC * fr = filerec;
237 // Advance to the correct record...
238 while (fr != NULL && fnum != 0)
244 // Check for file # record not found (this should never happen either)
247 curfname = "(*NOT FOUND*)";
251 curfname = fr->frec_name;
256 // Allocate an IFILE or IMACRO
258 INOBJ * a_inobj(int typ)
264 // Allocate and initialize INOBJ first
266 inobj = malloc(sizeof(INOBJ));
270 f_inobj = f_inobj->in_link;
275 case SRC_IFILE: // Alloc and init an IFILE
277 ifile = malloc(sizeof(IFILE));
281 f_ifile = f_ifile->if_link;
284 inobj->inobj.ifile = ifile;
287 case SRC_IMACRO: // Alloc and init an IMACRO
288 if (f_imacro == NULL)
289 imacro = malloc(sizeof(IMACRO));
293 f_imacro = f_imacro->im_link;
296 inobj->inobj.imacro = imacro;
299 case SRC_IREPT: // Alloc and init an IREPT
300 inobj->inobj.irept = malloc(sizeof(IREPT));
301 DEBUG { printf("alloc IREPT\n"); }
305 // Install INOBJ on top of input stack
306 inobj->in_ifent = ifent; // Record .if context on entry
307 inobj->in_type = (WORD)typ;
308 inobj->in_otok = tok;
309 inobj->in_etok = etok;
310 inobj->in_link = cur_inobj;
318 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
319 // A macro reference is in one of two forms:
320 // \name <non-name-character>
322 // A doubled backslash (\\) is compressed to a single backslash (\).
323 // Argument definitions have been pre-tokenized, so we have to turn them back
324 // into text. This means that numbers, in particular, become hex, regardless of
325 // their representation when the macro was invoked. This is a hack.
326 // A label may appear at the beginning of the line:
327 // :<name><whitespace>
328 // (the colon must be in the first column). These labels are stripped before
329 // macro expansion takes place.
331 int ExpandMacro(char * src, char * dest, int destsiz)
334 int questmark; // \? for testing argument existence
335 char mname[128]; // Assume max size of a formal arg name
336 char numbuf[20]; // Buffer for text of CONSTs
339 char ** symbolString;
341 DEBUG { printf("ExM: src=\"%s\"\n", src); }
343 IMACRO * imacro = cur_inobj->inobj.imacro;
344 int macnum = (int)(imacro->im_macro->sattr);
346 char * dst = dest; // Next dest slot
347 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
349 // Check for (and skip over) any "label" on the line
355 while (*s != EOS && !(chrtab[*s] & WHITE))
359 s++; // Skip first whitespace
362 // Expand the rest of the line
365 // Copy single character
371 // Skip comments in case a loose @ or \ is in there
372 // In that case the tokeniser was trying to expand it.
373 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
378 // Do macro expansion
386 case '\\': // \\, \ (collapse to single backslash)
392 case '?': // \? <macro> set `questmark' flag
396 case '#': // \#, number of arguments
397 sprintf(numbuf, "%d", (int)imacro->im_nargs);
399 case '!': // \! size suffix supplied on invocation
400 switch ((int)imacro->im_siz)
402 case SIZN: d = ""; break;
403 case SIZB: d = ".b"; break;
404 case SIZW: d = ".w"; break;
405 case SIZL: d = ".l"; break;
409 case '~': // ==> unique label string Mnnnn...
410 sprintf(numbuf, "M%u", curuniq);
426 return error("missing argument name");
429 // \n ==> argument number 'n', 0..9
430 if (chrtab[*s] & DIGIT)
440 // Get argument name: \name, \{name}
450 while (chrtab[*s] & CTSYM);
455 for(++s; *s != EOS && *s != '}';)
459 return error("missing closing brace ('}')");
466 // Lookup the argument and copy its (string) value into the
467 // destination string
468 DEBUG { printf("argument='%s'\n", mname); }
470 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
471 return error("undefined argument: '%s'", mname);
474 // Convert a string of tokens (terminated with EOL) back into
475 // text. If an argument is out of range (not specified in the
476 // macro invocation) then it is ignored.
477 i = (int)arg->svalue;
479 DEBUG { printf("~argnumber=%d\n", i); }
482 if (i < imacro->im_nargs)
484 tk = imacro->argument[i].token;
485 symbolString = imacro->argument[i].string;
488 // printf("ExM: Preparing to parse argument #%u...\n", i);
494 // 0 if the argument is empty or non-existant,
495 // 1 if the argument is not empty
498 if (tk == NULL || *tk == EOL)
504 *dst++ = (char)(questmark + '0');
508 // Argument # is in range, so expand it
513 // Reverse-translation from a token number to a string.
514 // This is a hack. It might be better table-driven.
517 if ((*tk >= KW_D0) && !rdsp && !rgpu)
519 d = regname[(int)*tk++ - KW_D0];
522 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
524 d = riscregname[(int)*tk++ - KW_R0];
533 // d = (char *)*tk++;
536 // This fix should be done for strings too
537 d = symbolString[*tk++];
538 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
543 // d = (char *)*tk++;
546 d = symbolString[*tk++];
567 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
568 // to choke on legitimate code... Need to investigate this further
569 // before changing anything else here!
571 tk++; // Skip the hi LONG...
572 sprintf(numbuf, "$%lx", (uint64_t)*tk++);
636 *dst++ = (char)*(tk - 1);
641 // If 'd' != NULL, copy string to destination
645 DEBUG printf("d='%s'\n", d);
664 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
669 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
670 return fatal("line too long as a result of macro expansion");
675 // Get next line of text from a macro
677 char * GetNextMacroLine(void)
679 IMACRO * imacro = cur_inobj->inobj.imacro;
680 // LONG * strp = imacro->im_nextln;
681 LLIST * strp = imacro->im_nextln;
683 if (strp == NULL) // End-of-macro
686 imacro->im_nextln = strp->next;
687 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
688 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
690 return imacro->im_lnbuf;
695 // Get next line of text from a repeat block
697 char * GetNextRepeatLine(void)
699 IREPT * irept = cur_inobj->inobj.irept;
700 // LONG * strp = irept->ir_nextln; // initial null
702 // Do repeat at end of .rept block's string list
704 if (irept->ir_nextln == NULL)
706 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
707 irept->ir_nextln = irept->ir_firstln; // copy first line
709 if (irept->ir_count-- == 0)
711 DEBUG { printf("end-repeat-block\n"); }
715 // strp = irept->ir_nextln;
718 // strcpy(irbuf, (char *)(irept->ir_nextln + 1));
719 strcpy(irbuf, irept->ir_nextln->line);
720 DEBUG { printf("repeat line='%s'\n", irbuf); }
721 // irept->ir_nextln = (LONG *)*strp;
722 irept->ir_nextln = irept->ir_nextln->next;
729 // Include a source file used at the root, and for ".include" files
731 int include(int handle, char * fname)
734 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
736 // Alloc and initialize include-descriptors
737 INOBJ * inobj = a_inobj(SRC_IFILE);
738 IFILE * ifile = inobj->inobj.ifile;
740 ifile->ifhandle = handle; // Setup file handle
741 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
742 ifile->ifoldlineno = curlineno; // Save old line number
743 ifile->ifoldfname = curfname; // Save old filename
744 ifile->ifno = cfileno; // Save old file number
746 // NB: This *must* be preincrement, we're adding one to the filecount here!
747 cfileno = ++filecount; // Compute NEW file number
748 curfname = strdup(fname); // Set current filename (alloc storage)
749 curlineno = 0; // Start on line zero
751 // Add another file to the file-record
752 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
753 fr->frec_next = NULL;
754 fr->frec_name = curfname;
757 filerec = fr; // Add first filerec
759 last_fr->frec_next = fr; // Append to list of filerecs
762 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
769 // Pop the current input level
773 INOBJ * inobj = cur_inobj;
778 // Pop IFENT levels until we reach the conditional assembly context we
779 // were at when the input object was entered.
780 int numUnmatched = 0;
782 while (ifent != inobj->in_ifent)
784 if (d_endif() != 0) // Something bad happened during endif parsing?
785 return -1; // If yes, bail instead of getting stuck in a loop
790 // Give a warning to the user that we had to wipe their bum for them
791 if (numUnmatched > 0)
792 warn("missing %d .endif(s)", numUnmatched);
794 tok = inobj->in_otok; // Restore tok and otok
795 etok = inobj->in_etok;
797 switch (inobj->in_type)
799 case SRC_IFILE: // Pop and release an IFILE
801 DEBUG { printf("[Leaving: %s]\n", curfname); }
803 IFILE * ifile = inobj->inobj.ifile;
804 ifile->if_link = f_ifile;
806 close(ifile->ifhandle); // Close source file
807 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
808 curfname = ifile->ifoldfname; // Set current filename
809 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
810 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
811 curlineno = ifile->ifoldlineno; // Set current line#
812 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
813 cfileno = ifile->ifno; // Restore current file number
814 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
818 case SRC_IMACRO: // Pop and release an IMACRO
820 IMACRO * imacro = inobj->inobj.imacro;
821 imacro->im_link = f_imacro;
826 case SRC_IREPT: // Pop and release an IREPT
828 DEBUG { printf("dealloc IREPT\n"); }
829 LLIST * p = inobj->inobj.irept->ir_firstln;
831 // Deallocate repeat lines
842 cur_inobj = inobj->in_link;
843 inobj->in_link = f_inobj;
851 // Get line from file into buf, return NULL on EOF or ptr to the start of a
854 char * GetNextLine(void)
858 int readamt = -1; // 0 if last read() yeilded 0 bytes
859 IFILE * fl = cur_inobj->inobj.ifile;
863 // Scan for next end-of-line; handle stupid text formats by treating
864 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
866 d = &fl->ifbuf[fl->ifind];
868 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
870 if (*p == '\r' || *p == '\n')
877 break; // Need to read more, then look for '\n' to eat
878 else if (p[1] == '\n')
882 // Cover up the newline with end-of-string sentinel
891 // Handle hanging lines by ignoring them (Input file is exhausted, no
892 // \r or \n on last line)
893 // Shamus: This is retarded. Never ignore any input!
894 if (!readamt && fl->ifcnt)
901 // Really should check to see if we're at the end of the buffer!
903 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
905 return &fl->ifbuf[fl->ifind];
909 // Truncate and return absurdly long lines.
910 if (fl->ifcnt >= QUANTUM)
912 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
914 return &fl->ifbuf[fl->ifind];
917 // Relocate what's left of a line to the beginning of the buffer, and
918 // read some more of the file in; return NULL if the buffer's empty and
922 p = &fl->ifbuf[fl->ifind];
923 d = &fl->ifbuf[fl->ifcnt & 1];
925 for(i=0; i<fl->ifcnt; i++)
928 fl->ifind = fl->ifcnt & 1;
931 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
936 if ((fl->ifcnt += readamt) == 0)
945 int TokenizeLine(void)
947 uint8_t * ln = NULL; // Ptr to current position in line
948 uint8_t * p; // Random character ptr
949 TOKEN * tk; // Token-deposit ptr
950 int state = 0; // State for keyword detector
951 int j = 0; // Var for keyword detector
952 uint8_t c; // Random char
953 uint64_t v; // Random value
954 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
955 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
957 int stringNum = 0; // Pointer to string locations in tokenized line
961 if (cur_inobj == NULL) // Return EOF if input stack is empty
964 // Get another line of input from the current input source: a file, a
965 // macro, or a repeat-block
966 switch (cur_inobj->in_type)
970 // o bump source line number;
971 // o tag the listing-line with a space;
972 // o kludge lines generated by Alcyon C.
974 if ((ln = GetNextLine()) == NULL)
976 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
977 if (fpop() == 0) // Pop input level
978 goto retry; // Try for more lines
981 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
986 curlineno++; // Bump line number
991 // AS68 compatibility, throw away all lines starting with
992 // back-quotes, tildes, or '*'
993 // On other lines, turn the first '*' into a semi-colon.
994 if (*ln == '`' || *ln == '~' || *ln == '*')
998 for(p=ln; *p!=EOS; p++)
1012 // o Handle end-of-macro;
1013 // o tag the listing-line with an at (@) sign.
1015 if ((ln = GetNextMacroLine()) == NULL)
1017 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1018 goto retry; // Try for more lines...
1020 return TKEOF; // Oops, we got a non zero return code, signal EOF
1027 // o Handle end-of-repeat-block;
1028 // o tag the listing-line with a pound (#) sign.
1030 if ((ln = GetNextRepeatLine()) == NULL)
1032 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1041 // Save text of the line. We only do this during listings and within
1042 // macro-type blocks, since it is expensive to unconditionally copy every
1047 // General housekeeping
1048 tok = tokeol; // Set "tok" to EOL in case of error
1049 tk = etok; // Reset token ptr
1050 stuffnull = 0; // Don't stuff nulls
1051 totlines++; // Bump total #lines assembled
1053 // See if the entire line is a comment. This is a win if the programmer
1054 // puts in lots of comments
1055 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1058 // And here we have a very ugly hack for signalling a single line 'turn off
1059 // optimization'. There's really no nice way to do this, so hack it is!
1060 optimizeOff = 0; // Default is to take optimizations as they come
1064 optimizeOff = 1; // Signal that we don't want to optimize this line
1065 ln++; // & skip over the darned thing
1068 // Main tokenization loop;
1069 // o skip whitespace;
1070 // o handle end-of-line;
1071 // o handle symbols;
1072 // o handle single-character tokens (operators, etc.);
1073 // o handle multiple-character tokens (constants, strings, etc.).
1076 // Skip whitespace, handle EOL
1077 while (chrtab[*ln] & WHITE)
1080 // Handle EOL, comment with ';'
1081 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1084 // Handle start of symbol. Symbols are null-terminated in place. The
1085 // termination is always one symbol behind, since there may be no place
1086 // for a null in the case that an operator immediately follows the name.
1091 if (stuffnull) // Terminate old symbol from previous pass
1094 v = 0; // Assume no DOT attrib follows symbol
1097 // In some cases, we need to check for a DOTx at the *beginning*
1098 // of a symbol, as the "start" of the line we're currently looking
1099 // at could be somewhere in the middle of that line!
1102 // Make sure that it's *only* a .[bwsl] following, and not the
1103 // start of a local symbol:
1104 if ((chrtab[*(ln + 1)] & DOT)
1105 && (dotxtab[*(ln + 1)] != 0)
1106 && !(chrtab[*(ln + 2)] & CTSYM))
1108 // We found a legitimate DOTx construct, so add it to the
1112 *tk++ = (TOKEN)dotxtab[*ln++];
1117 p = nullspot = ln++; // Nullspot -> start of this symbol
1119 // Find end of symbol (and compute its length)
1120 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1123 // Handle "DOT" special forms (like ".b") that follow a normal
1124 // symbol or keyword:
1127 *ln++ = EOS; // Terminate symbol
1128 stuffnull = 0; // And never try it again
1130 // Character following the `.' must have a DOT attribute, and
1131 // the chararacter after THAT one must not have a start-symbol
1132 // attribute (to prevent symbols that look like, for example,
1133 // "zingo.barf", which might be a good idea anyway....)
1134 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1135 return error("[bwsl] must follow '.' in symbol");
1137 v = (uint32_t)dotxtab[*ln++];
1139 if (chrtab[*ln] & CTSYM)
1140 return error("misuse of '.'; not allowed in symbols");
1143 // If the symbol is small, check to see if it's really the name of
1147 for(state=0; state>=0;)
1149 j = (int)tolowertab[*p++];
1152 if (kwcheck[j] != state)
1158 if (*p == EOS || p == ln)
1172 // Make j = -1 if user tries to use a RISC register while in 68K mode
1173 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1178 // Make j = -1 if time, date etc with no preceeding ^^
1179 // defined, referenced, streq, macdef, date and time
1182 case 112: // defined
1183 case 113: // referenced
1191 // If not tokenized keyword OR token was not found
1192 if ((j < 0) || (state < 0))
1196 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1197 //system, this will cause all kinds of mischief.
1199 *tk++ = (TOKEN)nullspot;
1201 string[stringNum] = nullspot;
1212 if (v) // Record attribute token (if any)
1215 if (stuffnull) // Arrange for string termination on next pass
1221 // Handle identity tokens
1228 // Handle multiple-character tokens
1233 case '!': // ! or !=
1243 case '\'': // 'string'
1246 // Hardcoded for now, maybe this will change in the future
1251 case '\"': // "string"
1255 string[stringNum] = ln;
1259 for(p=ln; *ln!=EOS && *ln!=c1;)
1268 return(error("unterminated string"));
1297 // If we're evaluating a macro
1298 // this is valid and expands to
1302 warn("bad backslash code in string");
1312 return error("unterminated string");
1316 case '$': // $, hex constant
1317 if (chrtab[*ln] & HDIGIT)
1321 // Parse the hex value
1322 while (hextab[*ln] >= 0)
1323 v = (v << 4) + (int)hextab[*ln++];
1327 if (obj_format == BSD)
1329 if ((*(ln + 1) & 0xDF) == 'B')
1334 else if ((*(ln + 1) & 0xDF) == 'W')
1339 else if ((*(ln + 1) & 0xDF) == 'L')
1348 *tk++ = v >> 32; // High LONG of 64-bit value
1349 *tk++ = v & 0xFFFFFFFF; // Low LONG of 64-bit value
1351 if (obj_format == ALCYON)
1355 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1360 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1372 case '<': // < or << or <> or <=
1391 case ':': // : or ::
1401 case '=': // = or ==
1411 case '>': // > or >> or >=
1426 case '%': // % or binary constant
1427 if (*ln < '0' || *ln > '1')
1435 while (*ln >= '0' && *ln <= '1')
1436 v = (v << 1) + *ln++ - '0';
1440 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1446 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1452 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1460 *tk++ = v >> 32; // High LONG of 64-bit value
1461 *tk++ = v & 0xFFFFFFFF; // Low LONG of 64-bit value
1463 case '@': // @ or octal constant
1464 if (*ln < '0' || *ln > '7')
1472 while (*ln >= '0' && *ln <= '7')
1473 v = (v << 3) + *ln++ - '0';
1477 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1483 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1489 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1497 *tk++ = v >> 32; // High LONG of 64-bit value
1498 *tk++ = v & 0xFFFFFFFF; // Low LONG of 64-bit value
1500 case '^': // ^ or ^^ <operator-name>
1507 if (((int)chrtab[*++ln] & STSYM) == 0)
1509 error("invalid symbol following ^^");
1515 while ((int)chrtab[*ln] & CTSYM)
1518 for(state=0; state>=0;)
1520 // Get char, convert to lowercase
1523 if (j >= 'A' && j <= 'Z')
1528 if (kwcheck[j] != state)
1534 if (*p == EOS || p == ln)
1543 if (j < 0 || state < 0)
1545 error("unknown symbol following ^^");
1552 interror(2); // Bad MULTX entry in chrtab
1557 // Handle decimal constant
1562 while ((int)chrtab[*ln] & DIGIT)
1563 v = (v * 10) + *ln++ - '0';
1565 // See if there's a .[bwl] after the constant & deal with it if so
1568 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1573 *tk++ = 0; // Hi LONG of 64-bits
1577 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1582 *tk++ = 0; // Hi LONG of 64-bits
1586 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1591 *tk++ = 0; // Hi LONG of 64-bits
1599 *tk++ = v >> 32; // High LONG of 64-bit value
1600 *tk++ = v & 0xFFFFFFFF; // Low LONG of 64-bit value
1603 //printf("CONST: %i\n", v);
1607 // Handle illegal character
1608 return error("illegal character $%02X found", *ln);
1611 // Terminate line of tokens and return "success."
1614 tok = etok; // Set tok to beginning of line
1616 if (stuffnull) // Terminate last SYMBOL
1626 // .GOTO <label> goto directive
1628 // The label is searched for starting from the first line of the current,
1629 // enclosing macro definition. If no enclosing macro exists, an error is
1632 // A label is of the form:
1634 // :<name><whitespace>
1636 // The colon must appear in column 1. The label is stripped prior to macro
1637 // expansion, and is NOT subject to macro expansion. The whitespace may also
1640 int d_goto(WORD unused)
1642 // Setup for the search
1644 return error("missing label");
1646 char * sym = string[tok[1]];
1649 if (cur_inobj->in_type != SRC_IMACRO)
1650 return error("goto not in macro");
1652 IMACRO * imacro = cur_inobj->inobj.imacro;
1653 LLIST * defln = imacro->im_macro->lineList;
1655 // Attempt to find the label, starting with the first line.
1656 for(; defln!=NULL; defln=defln->next)
1658 // Must start with a colon
1659 if (defln->line[0] == ':')
1661 // Compare names (sleazo string compare)
1663 char * s2 = defln->line;
1665 // Either we will match the strings to EOS on both, or we will
1666 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1668 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1670 // If we reached the end of string 1 (sym), we're done.
1671 // Note that we're also checking for the end of string 2 as
1672 // well, since we've established they're equal above.
1675 // Found the label, set new macro next-line and return.
1676 imacro->im_nextln = defln;
1686 return error("goto label not found");
1690 void DumpTokenBuffer(void)
1692 printf("Tokens [%X]: ", sloc);
1694 for(TOKEN * t=tokbuf; *t!=EOL; t++)
1698 else if (*t == CONST)
1700 printf("[CONST: $%lX]", ((uint64_t)t[1] << 32) | (uint64_t)t[2]);
1703 else if (*t == ACONST)
1705 printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1708 else if (*t == STRING)
1711 printf("[STRING:\"%s\"]", string[*t]);
1713 else if (*t == SYMBOL)
1716 printf("[SYMBOL:\"%s\"]", string[*t]);
1720 else if (*t == TKEOF)
1722 else if (*t == DEQUALS)
1723 printf("[DEQUALS]");
1728 else if (*t == DCOLON)
1740 else if (*t == UNMINUS)
1741 printf("[UNMINUS]");
1742 else if (*t == DOTB)
1744 else if (*t == DOTW)
1746 else if (*t == DOTL)
1748 else if (*t == DOTI)
1750 else if (*t == ENDEXPR)
1751 printf("[ENDEXPR]");
1752 else if (*t == CR_ABSCOUNT)
1753 printf("[CR_ABSCOUNT]");
1754 else if (*t == CR_DEFINED)
1755 printf("[CR_DEFINED]");
1756 else if (*t == CR_REFERENCED)
1757 printf("[CR_REFERENCED]");
1758 else if (*t == CR_STREQ)
1759 printf("[CR_STREQ]");
1760 else if (*t == CR_MACDEF)
1761 printf("[CR_MACDEF]");
1762 else if (*t == CR_TIME)
1763 printf("[CR_TIME]");
1764 else if (*t == CR_DATE)
1765 printf("[CR_DATE]");
1766 else if (*t >= 0x20 && *t <= 0x2F)
1767 printf("[%c]", (char)*t);
1768 else if (*t >= 0x3A && *t <= 0x3F)
1769 printf("[%c]", (char)*t);
1770 else if (*t >= 0x80 && *t <= 0x87)
1771 printf("[D%u]", ((uint32_t)*t) - 0x80);
1772 else if (*t >= 0x88 && *t <= 0x8F)
1773 printf("[A%u]", ((uint32_t)*t) - 0x88);
1775 printf("[%X:%c]", (uint32_t)*t, (char)*t);