2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
19 #define DECL_KW // Declare keyword arrays
20 #define DEF_KW // Declare keyword values
21 #include "kwtab.h" // Incl generated keyword tables & defs
24 int lnsave; // 1; strcpy() text of current line
25 uint16_t curlineno; // Current line number (64K max currently)
26 int totlines; // Total # of lines
27 int mjump_align = 0; // mjump alignment flag
28 char lntag; // Line tag
29 char * curfname; // Current filename
30 char tolowertab[128]; // Uppercase ==> lowercase
31 int8_t hextab[128]; // Table of hex values
32 char dotxtab[128]; // Table for ".b", ".s", etc.
33 char irbuf[LNSIZ]; // Text for .rept block line
34 char lnbuf[LNSIZ]; // Text of current line
35 WORD filecount; // Unique file number counter
36 WORD cfileno; // Current file number
37 TOKEN * tok; // Ptr to current token
38 TOKEN * etok; // Ptr past last token in tokbuf[]
39 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
40 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
41 int optimizeOff; // Optimization override flag
43 // File record, used to maintain a list of every include file ever visited
44 #define FILEREC struct _filerec
54 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
55 static INOBJ * f_inobj; // Ptr list of free INOBJs
56 static IFILE * f_ifile; // Ptr list of free IFILEs
57 static IMACRO * f_imacro; // Ptr list of free IMACROs
59 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
61 uint8_t chrtab[0x100] = {
62 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
63 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
64 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
65 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
67 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
68 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
69 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
70 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
72 WHITE, MULTX, MULTX, SELF, // SP ! " #
73 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
74 SELF, SELF, SELF, SELF, // ( ) * +
75 SELF, SELF, STSYM, SELF, // , - . /
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
79 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
80 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
81 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
83 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
85 MULTX, STSYM+CTSYM+HDIGIT, // @ A
86 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
87 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
88 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
89 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
90 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
92 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
93 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
94 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
95 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
97 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
98 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
99 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
100 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
101 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
102 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
104 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
105 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
106 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
107 SELF, SELF, SELF, ILLEG, // | } ~ DEL
109 // Anything above $7F is illegal (and yes, we need to check for this,
110 // otherwise you get strange and spurious errors that will lead you astray)
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
125 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
126 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
129 // Names of registers
130 static char * regname[] = {
131 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
132 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
133 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
134 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
135 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
136 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
137 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
138 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
139 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
140 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
141 "tt0","tt1","crp","","","","","", // 208,215
142 "","","","","fpiar","fpsr","fpcr","", // 216,223
143 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
144 "","","","","","","","", // 232,239
145 "","","","","","","","", // 240,247
146 "","","","","","","","", // 248,255
147 "","","","","x0","x1","y0","y1", // 256,263
148 "","b0","","b2","","b1","a","b", // 264,271
149 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
150 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
151 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
152 "","","","","","","l","p", // 296,303
153 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
154 "a10","b10","x","y","","","ab","ba" // 312,319
157 static char * riscregname[] = {
158 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
159 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
160 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
161 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
166 // Initialize tokenizer
168 void InitTokenizer(void)
171 char * htab = "0123456789abcdefABCDEF"; // Hex character table
173 lnsave = 0; // Don't save lines
174 curfname = ""; // No file, empty filename
175 filecount = (WORD)-1;
176 cfileno = (WORD)-1; // cfileno gets bumped to 0
188 // Initialize hex, "dot" and tolower tables
193 tolowertab[i] = (char)i;
196 for(i=0; htab[i]!=EOS; i++)
197 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
199 for(i='A'; i<='Z'; i++)
200 tolowertab[i] |= 0x20;
202 // These characters are legal immediately after a period
203 dotxtab['b'] = DOTB; // .b .B .s .S
205 //dotxtab['s'] = DOTB;
206 //dotxtab['S'] = DOTB;
207 dotxtab['w'] = DOTW; // .w .W
209 dotxtab['l'] = DOTL; // .l .L
211 dotxtab['i'] = DOTI; // .i .I (WTF is this???)
213 dotxtab['D'] = DOTD; // .d .D (double)
215 dotxtab['S'] = DOTS; // .s .S
217 dotxtab['Q'] = DOTQ; // .q .Q (quad word)
219 dotxtab['X'] = DOTX; // .x .x
221 dotxtab['P'] = DOTP; // .p .P
226 void SetFilenameForErrorReporting(void)
230 // Check for absolute top filename (this should never happen)
233 curfname = "(*top*)";
237 FILEREC * fr = filerec;
239 // Advance to the correct record...
240 while (fr != NULL && fnum != 0)
246 // Check for file # record not found (this should never happen either)
249 curfname = "(*NOT FOUND*)";
253 curfname = fr->frec_name;
258 // Allocate an IFILE or IMACRO
260 INOBJ * a_inobj(int typ)
266 // Allocate and initialize INOBJ first
268 inobj = malloc(sizeof(INOBJ));
272 f_inobj = f_inobj->in_link;
277 case SRC_IFILE: // Alloc and init an IFILE
279 ifile = malloc(sizeof(IFILE));
283 f_ifile = f_ifile->if_link;
286 inobj->inobj.ifile = ifile;
289 case SRC_IMACRO: // Alloc and init an IMACRO
290 if (f_imacro == NULL)
291 imacro = malloc(sizeof(IMACRO));
295 f_imacro = f_imacro->im_link;
298 inobj->inobj.imacro = imacro;
301 case SRC_IREPT: // Alloc and init an IREPT
302 inobj->inobj.irept = malloc(sizeof(IREPT));
303 DEBUG { printf("alloc IREPT\n"); }
307 // Install INOBJ on top of input stack
308 inobj->in_ifent = ifent; // Record .if context on entry
309 inobj->in_type = (WORD)typ;
310 inobj->in_otok = tok;
311 inobj->in_etok = etok;
312 inobj->in_link = cur_inobj;
320 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
321 // A macro reference is in one of two forms:
322 // \name <non-name-character>
324 // A doubled backslash (\\) is compressed to a single backslash (\).
325 // Argument definitions have been pre-tokenized, so we have to turn them back
326 // into text. This means that numbers, in particular, become hex, regardless of
327 // their representation when the macro was invoked. This is a hack.
328 // A label may appear at the beginning of the line:
329 // :<name><whitespace>
330 // (the colon must be in the first column). These labels are stripped before
331 // macro expansion takes place.
333 int ExpandMacro(char * src, char * dest, int destsiz)
336 int questmark; // \? for testing argument existence
337 char mname[128]; // Assume max size of a formal arg name
338 char numbuf[20]; // Buffer for text of CONSTs
341 char ** symbolString;
343 DEBUG { printf("ExM: src=\"%s\"\n", src); }
345 IMACRO * imacro = cur_inobj->inobj.imacro;
346 int macnum = (int)(imacro->im_macro->sattr);
348 char * dst = dest; // Next dest slot
349 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
351 // Check for (and skip over) any "label" on the line
357 while (*s != EOS && !(chrtab[*s] & WHITE))
361 s++; // Skip first whitespace
364 // Expand the rest of the line
367 // Copy single character
373 // Skip comments in case a loose @ or \ is in there
374 // In that case the tokeniser was trying to expand it.
375 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
380 // Do macro expansion
388 case '\\': // \\, \ (collapse to single backslash)
394 case '?': // \? <macro> set `questmark' flag
398 case '#': // \#, number of arguments
399 sprintf(numbuf, "%d", (int)imacro->im_nargs);
401 case '!': // \! size suffix supplied on invocation
402 switch ((int)imacro->im_siz)
404 case SIZN: d = ""; break;
405 case SIZB: d = ".b"; break;
406 case SIZW: d = ".w"; break;
407 case SIZL: d = ".l"; break;
411 case '~': // ==> unique label string Mnnnn...
412 sprintf(numbuf, "M%u", curuniq);
428 return error("missing argument name");
431 // \n ==> argument number 'n', 0..9
432 if (chrtab[*s] & DIGIT)
442 // Get argument name: \name, \{name}
452 while (chrtab[*s] & CTSYM);
457 for(++s; *s != EOS && *s != '}';)
461 return error("missing closing brace ('}')");
468 // Lookup the argument and copy its (string) value into the
469 // destination string
470 DEBUG { printf("argument='%s'\n", mname); }
472 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
473 return error("undefined argument: '%s'", mname);
476 // Convert a string of tokens (terminated with EOL) back into
477 // text. If an argument is out of range (not specified in the
478 // macro invocation) then it is ignored.
479 i = (int)arg->svalue;
481 DEBUG { printf("~argnumber=%d\n", i); }
484 if (i < imacro->im_nargs)
486 tk = imacro->argument[i].token;
487 symbolString = imacro->argument[i].string;
490 // printf("ExM: Preparing to parse argument #%u...\n", i);
496 // 0 if the argument is empty or non-existant,
497 // 1 if the argument is not empty
500 if (tk == NULL || *tk == EOL)
506 *dst++ = (char)(questmark + '0');
510 // Argument # is in range, so expand it
515 // Reverse-translation from a token number to a string.
516 // This is a hack. It might be better table-driven.
519 if ((*tk >= KW_D0) && !rdsp && !rgpu)
521 d = regname[(int)*tk++ - KW_D0];
524 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
526 d = riscregname[(int)*tk++ - KW_R0];
535 // d = (char *)*tk++;
538 // This fix should be done for strings too
539 d = symbolString[*tk++];
540 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
545 // d = (char *)*tk++;
548 d = symbolString[*tk++];
569 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
570 // to choke on legitimate code... Need to investigate this further
571 // before changing anything else here!
573 sprintf(numbuf, "$%lx", (uint64_t)*tk++);
638 *dst++ = (char)*(tk - 1);
643 // If 'd' != NULL, copy string to destination
647 DEBUG printf("d='%s'\n", d);
666 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
671 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
672 return fatal("line too long as a result of macro expansion");
677 // Get next line of text from a macro
679 char * GetNextMacroLine(void)
681 IMACRO * imacro = cur_inobj->inobj.imacro;
682 // LONG * strp = imacro->im_nextln;
683 LLIST * strp = imacro->im_nextln;
685 if (strp == NULL) // End-of-macro
688 imacro->im_nextln = strp->next;
689 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
690 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
692 return imacro->im_lnbuf;
697 // Get next line of text from a repeat block
699 char * GetNextRepeatLine(void)
701 IREPT * irept = cur_inobj->inobj.irept;
702 // LONG * strp = irept->ir_nextln; // initial null
704 // Do repeat at end of .rept block's string list
706 if (irept->ir_nextln == NULL)
708 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
709 irept->ir_nextln = irept->ir_firstln; // copy first line
711 if (irept->ir_count-- == 0)
713 DEBUG { printf("end-repeat-block\n"); }
717 // strp = irept->ir_nextln;
719 // Mark the current macro line in the irept object
720 // This is probably overkill - a global variable
721 // would suffice here (it only gets used during
722 // error reporting anyway)
723 irept->lineno = irept->ir_nextln->lineno;
725 // strcpy(irbuf, (char *)(irept->ir_nextln + 1));
726 strcpy(irbuf, irept->ir_nextln->line);
727 DEBUG { printf("repeat line='%s'\n", irbuf); }
728 // irept->ir_nextln = (LONG *)*strp;
729 irept->ir_nextln = irept->ir_nextln->next;
736 // Include a source file used at the root, and for ".include" files
738 int include(int handle, char * fname)
741 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
743 // Alloc and initialize include-descriptors
744 INOBJ * inobj = a_inobj(SRC_IFILE);
745 IFILE * ifile = inobj->inobj.ifile;
747 ifile->ifhandle = handle; // Setup file handle
748 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
749 ifile->ifoldlineno = curlineno; // Save old line number
750 ifile->ifoldfname = curfname; // Save old filename
751 ifile->ifno = cfileno; // Save old file number
753 // NB: This *must* be preincrement, we're adding one to the filecount here!
754 cfileno = ++filecount; // Compute NEW file number
755 curfname = strdup(fname); // Set current filename (alloc storage)
756 curlineno = 0; // Start on line zero
758 // Add another file to the file-record
759 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
760 fr->frec_next = NULL;
761 fr->frec_name = curfname;
764 filerec = fr; // Add first filerec
766 last_fr->frec_next = fr; // Append to list of filerecs
769 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
776 // Pop the current input level
780 INOBJ * inobj = cur_inobj;
785 // Pop IFENT levels until we reach the conditional assembly context we
786 // were at when the input object was entered.
787 int numUnmatched = 0;
789 while (ifent != inobj->in_ifent)
791 if (d_endif() != 0) // Something bad happened during endif parsing?
792 return -1; // If yes, bail instead of getting stuck in a loop
797 // Give a warning to the user that we had to wipe their bum for them
798 if (numUnmatched > 0)
799 warn("missing %d .endif(s)", numUnmatched);
801 tok = inobj->in_otok; // Restore tok and otok
802 etok = inobj->in_etok;
804 switch (inobj->in_type)
806 case SRC_IFILE: // Pop and release an IFILE
808 DEBUG { printf("[Leaving: %s]\n", curfname); }
810 IFILE * ifile = inobj->inobj.ifile;
811 ifile->if_link = f_ifile;
813 close(ifile->ifhandle); // Close source file
814 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
815 curfname = ifile->ifoldfname; // Set current filename
816 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
817 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
818 curlineno = ifile->ifoldlineno; // Set current line#
819 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
820 cfileno = ifile->ifno; // Restore current file number
821 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
825 case SRC_IMACRO: // Pop and release an IMACRO
827 IMACRO * imacro = inobj->inobj.imacro;
828 imacro->im_link = f_imacro;
833 case SRC_IREPT: // Pop and release an IREPT
835 DEBUG { printf("dealloc IREPT\n"); }
836 LLIST * p = inobj->inobj.irept->ir_firstln;
838 // Deallocate repeat lines
849 cur_inobj = inobj->in_link;
850 inobj->in_link = f_inobj;
858 // Get line from file into buf, return NULL on EOF or ptr to the start of a
861 char * GetNextLine(void)
865 int readamt = -1; // 0 if last read() yeilded 0 bytes
866 IFILE * fl = cur_inobj->inobj.ifile;
870 // Scan for next end-of-line; handle stupid text formats by treating
871 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
873 d = &fl->ifbuf[fl->ifind];
875 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
877 if (*p == '\r' || *p == '\n')
884 break; // Need to read more, then look for '\n' to eat
885 else if (p[1] == '\n')
889 // Cover up the newline with end-of-string sentinel
898 // Handle hanging lines by ignoring them (Input file is exhausted, no
899 // \r or \n on last line)
900 // Shamus: This is retarded. Never ignore any input!
901 if (!readamt && fl->ifcnt)
908 // Really should check to see if we're at the end of the buffer!
910 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
912 return &fl->ifbuf[fl->ifind];
916 // Truncate and return absurdly long lines.
917 if (fl->ifcnt >= QUANTUM)
919 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
921 return &fl->ifbuf[fl->ifind];
924 // Relocate what's left of a line to the beginning of the buffer, and
925 // read some more of the file in; return NULL if the buffer's empty and
929 p = &fl->ifbuf[fl->ifind];
930 d = &fl->ifbuf[fl->ifcnt & 1];
932 for(i=0; i<fl->ifcnt; i++)
935 fl->ifind = fl->ifcnt & 1;
938 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
943 if ((fl->ifcnt += readamt) == 0)
952 int TokenizeLine(void)
954 uint8_t * ln = NULL; // Ptr to current position in line
955 uint8_t * p; // Random character ptr
956 PTR tk; // Token-deposit ptr
957 int state = 0; // State for keyword detector
958 int j = 0; // Var for keyword detector
959 uint8_t c; // Random char
960 uint64_t v; // Random value
961 uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d)
962 double f; // Random float
963 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
964 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
966 int stringNum = 0; // Pointer to string locations in tokenized line
970 if (cur_inobj == NULL) // Return EOF if input stack is empty
973 // Get another line of input from the current input source: a file, a
974 // macro, or a repeat-block
975 switch (cur_inobj->in_type)
979 // o bump source line number;
980 // o tag the listing-line with a space;
981 // o kludge lines generated by Alcyon C.
983 if ((ln = GetNextLine()) == NULL)
985 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
986 if (fpop() == 0) // Pop input level
987 goto retry; // Try for more lines
990 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
995 curlineno++; // Bump line number
1000 // AS68 compatibility, throw away all lines starting with
1001 // back-quotes, tildes, or '*'
1002 // On other lines, turn the first '*' into a semi-colon.
1003 if (*ln == '`' || *ln == '~' || *ln == '*')
1007 for(p=ln; *p!=EOS; p++)
1021 // o Handle end-of-macro;
1022 // o tag the listing-line with an at (@) sign.
1024 if ((ln = GetNextMacroLine()) == NULL)
1026 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1027 goto retry; // Try for more lines...
1029 return TKEOF; // Oops, we got a non zero return code, signal EOF
1036 // o Handle end-of-repeat-block;
1037 // o tag the listing-line with a pound (#) sign.
1039 if ((ln = GetNextRepeatLine()) == NULL)
1041 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1050 // Save text of the line. We only do this during listings and within
1051 // macro-type blocks, since it is expensive to unconditionally copy every
1056 // General housekeeping
1057 tok = tokeol; // Set "tok" to EOL in case of error
1058 tk.u32 = etok; // Reset token ptr
1059 stuffnull = 0; // Don't stuff nulls
1060 totlines++; // Bump total #lines assembled
1062 // See if the entire line is a comment. This is a win if the programmer
1063 // puts in lots of comments
1064 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1067 // And here we have a very ugly hack for signalling a single line 'turn off
1068 // optimization'. There's really no nice way to do this, so hack it is!
1069 optimizeOff = 0; // Default is to take optimizations as they come
1073 optimizeOff = 1; // Signal that we don't want to optimize this line
1074 ln++; // & skip over the darned thing
1077 // Main tokenization loop;
1078 // o skip whitespace;
1079 // o handle end-of-line;
1080 // o handle symbols;
1081 // o handle single-character tokens (operators, etc.);
1082 // o handle multiple-character tokens (constants, strings, etc.).
1085 // Skip whitespace, handle EOL
1086 while (chrtab[*ln] & WHITE)
1089 // Handle EOL, comment with ';'
1090 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1093 // Handle start of symbol. Symbols are null-terminated in place. The
1094 // termination is always one symbol behind, since there may be no place
1095 // for a null in the case that an operator immediately follows the name.
1100 if (stuffnull) // Terminate old symbol from previous pass
1103 v = 0; // Assume no DOT attrib follows symbol
1106 // In some cases, we need to check for a DOTx at the *beginning*
1107 // of a symbol, as the "start" of the line we're currently looking
1108 // at could be somewhere in the middle of that line!
1111 // Make sure that it's *only* a .[bwsl] following, and not the
1112 // start of a local symbol:
1113 if ((chrtab[*(ln + 1)] & DOT)
1114 && (dotxtab[*(ln + 1)] != 0)
1115 && !(chrtab[*(ln + 2)] & CTSYM))
1117 // We found a legitimate DOTx construct, so add it to the
1121 *tk.u32++ = (TOKEN)dotxtab[*ln++];
1126 p = nullspot = ln++; // Nullspot -> start of this symbol
1128 // Find end of symbol (and compute its length)
1129 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1132 // Handle "DOT" special forms (like ".b") that follow a normal
1133 // symbol or keyword:
1136 *ln++ = EOS; // Terminate symbol
1137 stuffnull = 0; // And never try it again
1139 // Character following the '.' must have a DOT attribute, and
1140 // the chararacter after THAT one must not have a start-symbol
1141 // attribute (to prevent symbols that look like, for example,
1142 // "zingo.barf", which might be a good idea anyway....)
1143 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1144 return error("[bwsl] must follow '.' in symbol");
1146 v = (uint32_t)dotxtab[*ln++];
1147 cursize = (uint32_t)v;
1149 if (chrtab[*ln] & CTSYM)
1150 return error("misuse of '.'; not allowed in symbols");
1153 // If the symbol is small, check to see if it's really the name of
1157 for(state=0; state>=0;)
1159 j = (int)tolowertab[*p++];
1162 if (kwcheck[j] != state)
1168 if (*p == EOS || p == ln)
1182 // Make j = -1 if user tries to use a RISC register while in 68K mode
1183 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1188 // Make j = -1 if time, date etc with no preceeding ^^
1189 // defined, referenced, streq, macdef, date and time
1192 case 112: // defined
1193 case 113: // referenced
1201 // If not tokenized keyword OR token was not found
1202 if ((j < 0) || (state < 0))
1206 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1207 //system, this will cause all kinds of mischief.
1209 *tk++ = (TOKEN)nullspot;
1211 string[stringNum] = nullspot;
1212 *tk.u32++ = stringNum;
1218 *tk.u32++ = (TOKEN)j;
1222 if (v) // Record attribute token (if any)
1223 *tk.u32++ = (TOKEN)v;
1225 if (stuffnull) // Arrange for string termination on next pass
1231 // Handle identity tokens
1238 // Handle multiple-character tokens
1243 case '!': // ! or !=
1253 case '\'': // 'string'
1256 // Hardcoded for now, maybe this will change in the future
1257 *tk.u32++ = STRINGA8;
1261 case '\"': // "string"
1265 string[stringNum] = ln;
1266 *tk.u32++ = stringNum;
1269 for(p=ln; *ln!=EOS && *ln!=c1;)
1278 return(error("unterminated string"));
1307 // If we're evaluating a macro
1308 // this is valid and expands to
1312 warn("bad backslash code in string");
1322 return error("unterminated string");
1326 case '$': // $, hex constant
1327 if (chrtab[*ln] & HDIGIT)
1331 // Parse the hex value
1332 while (hextab[*ln] >= 0)
1333 v = (v << 4) + (int)hextab[*ln++];
1337 if (obj_format == BSD)
1339 if ((*(ln + 1) & 0xDF) == 'B')
1344 else if ((*(ln + 1) & 0xDF) == 'W')
1349 else if ((*(ln + 1) & 0xDF) == 'L')
1360 if (obj_format == ALCYON)
1364 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1369 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1381 case '<': // < or << or <> or <=
1400 case ':': // : or ::
1410 case '=': // = or ==
1413 *tk.u32++ = DEQUALS;
1420 case '>': // > or >> or >=
1435 case '%': // % or binary constant
1436 if (*ln < '0' || *ln > '1')
1444 while (*ln >= '0' && *ln <= '1')
1445 v = (v << 1) + *ln++ - '0';
1449 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1455 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1461 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1471 case '@': // @ or octal constant
1472 if (*ln < '0' || *ln > '7')
1480 while (*ln >= '0' && *ln <= '7')
1481 v = (v << 3) + *ln++ - '0';
1485 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1491 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1497 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1507 case '^': // ^ or ^^ <operator-name>
1514 if (((int)chrtab[*++ln] & STSYM) == 0)
1516 error("invalid symbol following ^^");
1522 while ((int)chrtab[*ln] & CTSYM)
1525 for(state=0; state>=0;)
1527 // Get char, convert to lowercase
1530 if (j >= 'A' && j <= 'Z')
1535 if (kwcheck[j] != state)
1541 if (*p == EOS || p == ln)
1550 if (j < 0 || state < 0)
1552 error("unknown symbol following ^^");
1556 *tk.u32++ = (TOKEN)j;
1559 interror(2); // Bad MULTX entry in chrtab
1564 // Handle decimal constant
1567 uint8_t * numStart = ln;
1570 while ((int)chrtab[*ln] & DIGIT)
1571 v = (v * 10) + *ln++ - '0';
1573 // See if there's a .[bwl] after the constant & deal with it if so
1576 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1584 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1592 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1600 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1602 // Hey, more digits after the dot, so we assume it's a
1603 // floating point number of some kind... numEnd will point
1604 // to the first non-float character after it's done
1607 double f = strtod(numStart, &numEnd);
1608 ln = (uint8_t *)numEnd;
1611 return error("floating point parse error");
1613 // N.B.: We use the C compiler's internal double
1614 // representation for all internal float calcs and
1615 // are reasonably sure that the size of said double
1616 // is 8 bytes long (which we check for in fltpoint.c)
1629 //printf("CONST: %i\n", v);
1633 // Handle illegal character
1634 return error("illegal character $%02X found", *ln);
1637 // Terminate line of tokens and return "success."
1640 tok = etok; // Set tok to beginning of line
1642 if (stuffnull) // Terminate last SYMBOL
1652 // .GOTO <label> goto directive
1654 // The label is searched for starting from the first line of the current,
1655 // enclosing macro definition. If no enclosing macro exists, an error is
1658 // A label is of the form:
1660 // :<name><whitespace>
1662 // The colon must appear in column 1. The label is stripped prior to macro
1663 // expansion, and is NOT subject to macro expansion. The whitespace may also
1666 int d_goto(WORD unused)
1668 // Setup for the search
1670 return error("missing label");
1672 char * sym = string[tok[1]];
1675 if (cur_inobj->in_type != SRC_IMACRO)
1676 return error("goto not in macro");
1678 IMACRO * imacro = cur_inobj->inobj.imacro;
1679 LLIST * defln = imacro->im_macro->lineList;
1681 // Attempt to find the label, starting with the first line.
1682 for(; defln!=NULL; defln=defln->next)
1684 // Must start with a colon
1685 if (defln->line[0] == ':')
1687 // Compare names (sleazo string compare)
1689 char * s2 = defln->line;
1691 // Either we will match the strings to EOS on both, or we will
1692 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1694 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1696 // If we reached the end of string 1 (sym), we're done.
1697 // Note that we're also checking for the end of string 2 as
1698 // well, since we've established they're equal above.
1701 // Found the label, set new macro next-line and return.
1702 imacro->im_nextln = defln;
1712 return error("goto label not found");
1716 void DumpToken(TOKEN t)
1720 else if (t == CONST)
1722 else if (t == FCONST)
1724 else if (t == ACONST)
1726 else if (t == STRING)
1728 else if (t == SYMBOL)
1732 else if (t == TKEOF)
1734 else if (t == DEQUALS)
1735 printf("[DEQUALS]");
1740 else if (t == DCOLON)
1752 else if (t == UNMINUS)
1753 printf("[UNMINUS]");
1768 else if (t == ENDEXPR)
1769 printf("[ENDEXPR]");
1770 else if (t == CR_ABSCOUNT)
1771 printf("[CR_ABSCOUNT]");
1772 else if (t == CR_DEFINED)
1773 printf("[CR_DEFINED]");
1774 else if (t == CR_REFERENCED)
1775 printf("[CR_REFERENCED]");
1776 else if (t == CR_STREQ)
1777 printf("[CR_STREQ]");
1778 else if (t == CR_MACDEF)
1779 printf("[CR_MACDEF]");
1780 else if (t == CR_TIME)
1781 printf("[CR_TIME]");
1782 else if (t == CR_DATE)
1783 printf("[CR_DATE]");
1784 else if (t >= 0x20 && t <= 0x2F)
1785 printf("[%c]", (char)t);
1786 else if (t >= 0x3A && t <= 0x3F)
1787 printf("[%c]", (char)t);
1788 else if (t >= 0x80 && t <= 0x87)
1789 printf("[D%u]", ((uint32_t)t) - 0x80);
1790 else if (t >= 0x88 && t <= 0x8F)
1791 printf("[A%u]", ((uint32_t)t) - 0x88);
1793 printf("[%X:%c]", (uint32_t)t, (char)t);
1797 void DumpTokenBuffer(void)
1799 printf("Tokens [%X]: ", sloc);
1801 for(TOKEN * t=tokbuf; *t!=EOL; t++)
1805 else if (*t == CONST)
1809 printf("[CONST: $%lX]", *tp.u64);
1812 else if (*t == FCONST)
1816 printf("[FCONST: $%lX]", *tp.u64);
1819 else if (*t == ACONST)
1821 printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1824 else if (*t == STRING)
1827 printf("[STRING:\"%s\"]", string[*t]);
1829 else if (*t == SYMBOL)
1832 printf("[SYMBOL:\"%s\"]", string[*t]);
1836 else if (*t == TKEOF)
1838 else if (*t == DEQUALS)
1839 printf("[DEQUALS]");
1844 else if (*t == DCOLON)
1856 else if (*t == UNMINUS)
1857 printf("[UNMINUS]");
1858 else if (*t == DOTB)
1860 else if (*t == DOTW)
1862 else if (*t == DOTL)
1864 else if (*t == DOTQ)
1866 else if (*t == DOTS)
1868 else if (*t == DOTD)
1870 else if (*t == DOTI)
1872 else if (*t == ENDEXPR)
1873 printf("[ENDEXPR]");
1874 else if (*t == CR_ABSCOUNT)
1875 printf("[CR_ABSCOUNT]");
1876 else if (*t == CR_DEFINED)
1877 printf("[CR_DEFINED]");
1878 else if (*t == CR_REFERENCED)
1879 printf("[CR_REFERENCED]");
1880 else if (*t == CR_STREQ)
1881 printf("[CR_STREQ]");
1882 else if (*t == CR_MACDEF)
1883 printf("[CR_MACDEF]");
1884 else if (*t == CR_TIME)
1885 printf("[CR_TIME]");
1886 else if (*t == CR_DATE)
1887 printf("[CR_DATE]");
1888 else if (*t >= 0x20 && *t <= 0x2F)
1889 printf("[%c]", (char)*t);
1890 else if (*t >= 0x3A && *t <= 0x3F)
1891 printf("[%c]", (char)*t);
1892 else if (*t >= 0x80 && *t <= 0x87)
1893 printf("[D%u]", ((uint32_t)*t) - 0x80);
1894 else if (*t >= 0x88 && *t <= 0x8F)
1895 printf("[A%u]", ((uint32_t)*t) - 0x88);
1897 printf("[%X:%c]", (uint32_t)*t, (char)*t);