2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
19 #define DECL_KW // Declare keyword arrays
20 #define DEF_KW // Declare keyword values
21 #include "kwtab.h" // Incl generated keyword tables & defs
24 int lnsave; // 1; strcpy() text of current line
25 uint16_t curlineno; // Current line number (64K max currently)
26 int totlines; // Total # of lines
27 int mjump_align = 0; // mjump alignment flag
28 char lntag; // Line tag
29 char * curfname; // Current filename
30 char tolowertab[128]; // Uppercase ==> lowercase
31 int8_t hextab[128]; // Table of hex values
32 char dotxtab[128]; // Table for ".b", ".s", etc.
33 char irbuf[LNSIZ]; // Text for .rept block line
34 char lnbuf[LNSIZ]; // Text of current line
35 WORD filecount; // Unique file number counter
36 WORD cfileno; // Current file number
37 TOKEN * tok; // Ptr to current token
38 TOKEN * etok; // Ptr past last token in tokbuf[]
39 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
40 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
41 int optimizeOff; // Optimization override flag
43 // File record, used to maintain a list of every include file ever visited
44 #define FILEREC struct _filerec
54 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
55 static INOBJ * f_inobj; // Ptr list of free INOBJs
56 static IFILE * f_ifile; // Ptr list of free IFILEs
57 static IMACRO * f_imacro; // Ptr list of free IMACROs
59 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
61 uint8_t chrtab[0x100] = {
62 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
63 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
64 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
65 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
67 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
68 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
69 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
70 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
72 WHITE, MULTX, MULTX, SELF, // SP ! " #
73 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
74 SELF, SELF, SELF, SELF, // ( ) * +
75 SELF, SELF, STSYM, SELF, // , - . /
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
79 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
80 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
81 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
83 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
85 MULTX, STSYM+CTSYM+HDIGIT, // @ A
86 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
87 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
88 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
89 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
90 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
92 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
93 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
94 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
95 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
97 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
98 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
99 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
100 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
101 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
102 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
104 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
105 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
106 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
107 SELF, SELF, SELF, ILLEG, // | } ~ DEL
109 // Anything above $7F is illegal (and yes, we need to check for this,
110 // otherwise you get strange and spurious errors that will lead you astray)
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
125 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
126 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
129 // Names of registers
130 static char * regname[] = {
131 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
132 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
133 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
134 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
135 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
136 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
137 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
138 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
139 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
140 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
141 "tt0","tt1","crp","","","","","", // 208,215
142 "","","","","fpiar","fpsr","fpcr","", // 216,223
143 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
144 "","","","","","","","", // 232,239
145 "","","","","","","","", // 240,247
146 "","","","","","","","", // 248,255
147 "","","","","x0","x1","y0","y1", // 256,263
148 "","b0","","b2","","b1","a","b", // 264,271
149 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
150 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
151 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
152 "","","","","","","l","p", // 296,303
153 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
154 "a10","b10","x","y","","","ab","ba" // 312,319
157 static char * riscregname[] = {
158 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
159 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
160 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
161 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
166 // Initialize tokenizer
168 void InitTokenizer(void)
171 char * htab = "0123456789abcdefABCDEF"; // Hex character table
173 lnsave = 0; // Don't save lines
174 curfname = ""; // No file, empty filename
175 filecount = (WORD)-1;
176 cfileno = (WORD)-1; // cfileno gets bumped to 0
188 // Initialize hex, "dot" and tolower tables
193 tolowertab[i] = (char)i;
196 for(i=0; htab[i]!=EOS; i++)
197 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
199 for(i='A'; i<='Z'; i++)
200 tolowertab[i] |= 0x20;
202 // These characters are legal immediately after a period
203 dotxtab['b'] = DOTB; // .b .B .s .S
205 //dotxtab['s'] = DOTB;
206 //dotxtab['S'] = DOTB;
207 dotxtab['w'] = DOTW; // .w .W
209 dotxtab['l'] = DOTL; // .l .L
211 dotxtab['i'] = DOTI; // .i .I (WTF is this???)
213 dotxtab['D'] = DOTD; // .d .D (double)
215 dotxtab['S'] = DOTS; // .s .S
217 dotxtab['Q'] = DOTQ; // .q .Q (quad word)
219 dotxtab['X'] = DOTX; // .x .x
221 dotxtab['P'] = DOTP; // .p .P
226 void SetFilenameForErrorReporting(void)
230 // Check for absolute top filename (this should never happen)
233 curfname = "(*top*)";
237 FILEREC * fr = filerec;
239 // Advance to the correct record...
240 while (fr != NULL && fnum != 0)
246 // Check for file # record not found (this should never happen either)
249 curfname = "(*NOT FOUND*)";
253 curfname = fr->frec_name;
258 // Allocate an IFILE or IMACRO
260 INOBJ * a_inobj(int typ)
266 // Allocate and initialize INOBJ first
268 inobj = malloc(sizeof(INOBJ));
272 f_inobj = f_inobj->in_link;
277 case SRC_IFILE: // Alloc and init an IFILE
279 ifile = malloc(sizeof(IFILE));
283 f_ifile = f_ifile->if_link;
286 inobj->inobj.ifile = ifile;
289 case SRC_IMACRO: // Alloc and init an IMACRO
290 if (f_imacro == NULL)
291 imacro = malloc(sizeof(IMACRO));
295 f_imacro = f_imacro->im_link;
298 inobj->inobj.imacro = imacro;
301 case SRC_IREPT: // Alloc and init an IREPT
302 inobj->inobj.irept = malloc(sizeof(IREPT));
303 DEBUG { printf("alloc IREPT\n"); }
307 // Install INOBJ on top of input stack
308 inobj->in_ifent = ifent; // Record .if context on entry
309 inobj->in_type = (WORD)typ;
310 inobj->in_otok = tok;
311 inobj->in_etok = etok;
312 inobj->in_link = cur_inobj;
320 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
321 // A macro reference is in one of two forms:
322 // \name <non-name-character>
324 // A doubled backslash (\\) is compressed to a single backslash (\).
325 // Argument definitions have been pre-tokenized, so we have to turn them back
326 // into text. This means that numbers, in particular, become hex, regardless of
327 // their representation when the macro was invoked. This is a hack.
328 // A label may appear at the beginning of the line:
329 // :<name><whitespace>
330 // (the colon must be in the first column). These labels are stripped before
331 // macro expansion takes place.
333 int ExpandMacro(char * src, char * dest, int destsiz)
336 int questmark; // \? for testing argument existence
337 char mname[128]; // Assume max size of a formal arg name
338 char numbuf[20]; // Buffer for text of CONSTs
341 char ** symbolString;
343 DEBUG { printf("ExM: src=\"%s\"\n", src); }
345 IMACRO * imacro = cur_inobj->inobj.imacro;
346 int macnum = (int)(imacro->im_macro->sattr);
348 char * dst = dest; // Next dest slot
349 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
351 // Check for (and skip over) any "label" on the line
357 while (*s != EOS && !(chrtab[*s] & WHITE))
361 s++; // Skip first whitespace
364 // Expand the rest of the line
367 // Copy single character
373 // Skip comments in case a loose @ or \ is in there
374 // In that case the tokeniser was trying to expand it.
375 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
380 // Do macro expansion
388 case '\\': // \\, \ (collapse to single backslash)
394 case '?': // \? <macro> set `questmark' flag
398 case '#': // \#, number of arguments
399 sprintf(numbuf, "%d", (int)imacro->im_nargs);
401 case '!': // \! size suffix supplied on invocation
402 switch ((int)imacro->im_siz)
404 case SIZN: d = ""; break;
405 case SIZB: d = ".b"; break;
406 case SIZW: d = ".w"; break;
407 case SIZL: d = ".l"; break;
411 case '~': // ==> unique label string Mnnnn...
412 sprintf(numbuf, "M%u", curuniq);
428 return error("missing argument name");
431 // \n ==> argument number 'n', 0..9
432 if (chrtab[*s] & DIGIT)
442 // Get argument name: \name, \{name}
452 while (chrtab[*s] & CTSYM);
457 for(++s; *s != EOS && *s != '}';)
461 return error("missing closing brace ('}')");
468 // Lookup the argument and copy its (string) value into the
469 // destination string
470 DEBUG { printf("argument='%s'\n", mname); }
472 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
473 return error("undefined argument: '%s'", mname);
476 // Convert a string of tokens (terminated with EOL) back into
477 // text. If an argument is out of range (not specified in the
478 // macro invocation) then it is ignored.
479 i = (int)arg->svalue;
481 DEBUG { printf("~argnumber=%d\n", i); }
484 if (i < imacro->im_nargs)
486 tk = imacro->argument[i].token;
487 symbolString = imacro->argument[i].string;
490 // printf("ExM: Preparing to parse argument #%u...\n", i);
496 // 0 if the argument is empty or non-existant,
497 // 1 if the argument is not empty
500 if (tk == NULL || *tk == EOL)
506 *dst++ = (char)(questmark + '0');
510 // Argument # is in range, so expand it
515 // Reverse-translation from a token number to a string.
516 // This is a hack. It might be better table-driven.
519 if ((*tk >= KW_D0) && !rdsp && !rgpu)
521 d = regname[(int)*tk++ - KW_D0];
524 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
526 d = riscregname[(int)*tk++ - KW_R0];
535 // d = (char *)*tk++;
538 // This fix should be done for strings too
539 d = symbolString[*tk++];
540 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
545 // d = (char *)*tk++;
548 d = symbolString[*tk++];
569 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
570 // to choke on legitimate code... Need to investigate this further
571 // before changing anything else here!
573 sprintf(numbuf, "$%lx", (uint64_t)*tk++);
638 *dst++ = (char)*(tk - 1);
643 // If 'd' != NULL, copy string to destination
647 DEBUG printf("d='%s'\n", d);
666 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
671 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
672 return fatal("line too long as a result of macro expansion");
677 // Get next line of text from a macro
679 char * GetNextMacroLine(void)
681 IMACRO * imacro = cur_inobj->inobj.imacro;
682 // LONG * strp = imacro->im_nextln;
683 LLIST * strp = imacro->im_nextln;
685 if (strp == NULL) // End-of-macro
688 imacro->im_nextln = strp->next;
689 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
690 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
692 return imacro->im_lnbuf;
697 // Get next line of text from a repeat block
699 char * GetNextRepeatLine(void)
701 IREPT * irept = cur_inobj->inobj.irept;
702 // LONG * strp = irept->ir_nextln; // initial null
704 // Do repeat at end of .rept block's string list
706 if (irept->ir_nextln == NULL)
708 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
709 irept->ir_nextln = irept->ir_firstln; // copy first line
711 if (irept->ir_count-- == 0)
713 DEBUG { printf("end-repeat-block\n"); }
717 // strp = irept->ir_nextln;
720 // strcpy(irbuf, (char *)(irept->ir_nextln + 1));
721 strcpy(irbuf, irept->ir_nextln->line);
722 DEBUG { printf("repeat line='%s'\n", irbuf); }
723 // irept->ir_nextln = (LONG *)*strp;
724 irept->ir_nextln = irept->ir_nextln->next;
731 // Include a source file used at the root, and for ".include" files
733 int include(int handle, char * fname)
736 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
738 // Alloc and initialize include-descriptors
739 INOBJ * inobj = a_inobj(SRC_IFILE);
740 IFILE * ifile = inobj->inobj.ifile;
742 ifile->ifhandle = handle; // Setup file handle
743 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
744 ifile->ifoldlineno = curlineno; // Save old line number
745 ifile->ifoldfname = curfname; // Save old filename
746 ifile->ifno = cfileno; // Save old file number
748 // NB: This *must* be preincrement, we're adding one to the filecount here!
749 cfileno = ++filecount; // Compute NEW file number
750 curfname = strdup(fname); // Set current filename (alloc storage)
751 curlineno = 0; // Start on line zero
753 // Add another file to the file-record
754 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
755 fr->frec_next = NULL;
756 fr->frec_name = curfname;
759 filerec = fr; // Add first filerec
761 last_fr->frec_next = fr; // Append to list of filerecs
764 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
771 // Pop the current input level
775 INOBJ * inobj = cur_inobj;
780 // Pop IFENT levels until we reach the conditional assembly context we
781 // were at when the input object was entered.
782 int numUnmatched = 0;
784 while (ifent != inobj->in_ifent)
786 if (d_endif() != 0) // Something bad happened during endif parsing?
787 return -1; // If yes, bail instead of getting stuck in a loop
792 // Give a warning to the user that we had to wipe their bum for them
793 if (numUnmatched > 0)
794 warn("missing %d .endif(s)", numUnmatched);
796 tok = inobj->in_otok; // Restore tok and otok
797 etok = inobj->in_etok;
799 switch (inobj->in_type)
801 case SRC_IFILE: // Pop and release an IFILE
803 DEBUG { printf("[Leaving: %s]\n", curfname); }
805 IFILE * ifile = inobj->inobj.ifile;
806 ifile->if_link = f_ifile;
808 close(ifile->ifhandle); // Close source file
809 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
810 curfname = ifile->ifoldfname; // Set current filename
811 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
812 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
813 curlineno = ifile->ifoldlineno; // Set current line#
814 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
815 cfileno = ifile->ifno; // Restore current file number
816 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
820 case SRC_IMACRO: // Pop and release an IMACRO
822 IMACRO * imacro = inobj->inobj.imacro;
823 imacro->im_link = f_imacro;
828 case SRC_IREPT: // Pop and release an IREPT
830 DEBUG { printf("dealloc IREPT\n"); }
831 LLIST * p = inobj->inobj.irept->ir_firstln;
833 // Deallocate repeat lines
844 cur_inobj = inobj->in_link;
845 inobj->in_link = f_inobj;
853 // Get line from file into buf, return NULL on EOF or ptr to the start of a
856 char * GetNextLine(void)
860 int readamt = -1; // 0 if last read() yeilded 0 bytes
861 IFILE * fl = cur_inobj->inobj.ifile;
865 // Scan for next end-of-line; handle stupid text formats by treating
866 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
868 d = &fl->ifbuf[fl->ifind];
870 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
872 if (*p == '\r' || *p == '\n')
879 break; // Need to read more, then look for '\n' to eat
880 else if (p[1] == '\n')
884 // Cover up the newline with end-of-string sentinel
893 // Handle hanging lines by ignoring them (Input file is exhausted, no
894 // \r or \n on last line)
895 // Shamus: This is retarded. Never ignore any input!
896 if (!readamt && fl->ifcnt)
903 // Really should check to see if we're at the end of the buffer!
905 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
907 return &fl->ifbuf[fl->ifind];
911 // Truncate and return absurdly long lines.
912 if (fl->ifcnt >= QUANTUM)
914 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
916 return &fl->ifbuf[fl->ifind];
919 // Relocate what's left of a line to the beginning of the buffer, and
920 // read some more of the file in; return NULL if the buffer's empty and
924 p = &fl->ifbuf[fl->ifind];
925 d = &fl->ifbuf[fl->ifcnt & 1];
927 for(i=0; i<fl->ifcnt; i++)
930 fl->ifind = fl->ifcnt & 1;
933 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
938 if ((fl->ifcnt += readamt) == 0)
947 int TokenizeLine(void)
949 uint8_t * ln = NULL; // Ptr to current position in line
950 uint8_t * p; // Random character ptr
951 PTR tk; // Token-deposit ptr
952 int state = 0; // State for keyword detector
953 int j = 0; // Var for keyword detector
954 uint8_t c; // Random char
955 uint64_t v; // Random value
956 uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d)
957 double f; // Random float
958 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
959 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
961 int stringNum = 0; // Pointer to string locations in tokenized line
965 if (cur_inobj == NULL) // Return EOF if input stack is empty
968 // Get another line of input from the current input source: a file, a
969 // macro, or a repeat-block
970 switch (cur_inobj->in_type)
974 // o bump source line number;
975 // o tag the listing-line with a space;
976 // o kludge lines generated by Alcyon C.
978 if ((ln = GetNextLine()) == NULL)
980 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
981 if (fpop() == 0) // Pop input level
982 goto retry; // Try for more lines
985 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
990 curlineno++; // Bump line number
995 // AS68 compatibility, throw away all lines starting with
996 // back-quotes, tildes, or '*'
997 // On other lines, turn the first '*' into a semi-colon.
998 if (*ln == '`' || *ln == '~' || *ln == '*')
1002 for(p=ln; *p!=EOS; p++)
1016 // o Handle end-of-macro;
1017 // o tag the listing-line with an at (@) sign.
1019 if ((ln = GetNextMacroLine()) == NULL)
1021 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1022 goto retry; // Try for more lines...
1024 return TKEOF; // Oops, we got a non zero return code, signal EOF
1031 // o Handle end-of-repeat-block;
1032 // o tag the listing-line with a pound (#) sign.
1034 if ((ln = GetNextRepeatLine()) == NULL)
1036 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1045 // Save text of the line. We only do this during listings and within
1046 // macro-type blocks, since it is expensive to unconditionally copy every
1051 // General housekeeping
1052 tok = tokeol; // Set "tok" to EOL in case of error
1053 tk.u32 = etok; // Reset token ptr
1054 stuffnull = 0; // Don't stuff nulls
1055 totlines++; // Bump total #lines assembled
1057 // See if the entire line is a comment. This is a win if the programmer
1058 // puts in lots of comments
1059 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1062 // And here we have a very ugly hack for signalling a single line 'turn off
1063 // optimization'. There's really no nice way to do this, so hack it is!
1064 optimizeOff = 0; // Default is to take optimizations as they come
1068 optimizeOff = 1; // Signal that we don't want to optimize this line
1069 ln++; // & skip over the darned thing
1072 // Main tokenization loop;
1073 // o skip whitespace;
1074 // o handle end-of-line;
1075 // o handle symbols;
1076 // o handle single-character tokens (operators, etc.);
1077 // o handle multiple-character tokens (constants, strings, etc.).
1080 // Skip whitespace, handle EOL
1081 while (chrtab[*ln] & WHITE)
1084 // Handle EOL, comment with ';'
1085 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1088 // Handle start of symbol. Symbols are null-terminated in place. The
1089 // termination is always one symbol behind, since there may be no place
1090 // for a null in the case that an operator immediately follows the name.
1095 if (stuffnull) // Terminate old symbol from previous pass
1098 v = 0; // Assume no DOT attrib follows symbol
1101 // In some cases, we need to check for a DOTx at the *beginning*
1102 // of a symbol, as the "start" of the line we're currently looking
1103 // at could be somewhere in the middle of that line!
1106 // Make sure that it's *only* a .[bwsl] following, and not the
1107 // start of a local symbol:
1108 if ((chrtab[*(ln + 1)] & DOT)
1109 && (dotxtab[*(ln + 1)] != 0)
1110 && !(chrtab[*(ln + 2)] & CTSYM))
1112 // We found a legitimate DOTx construct, so add it to the
1116 *tk.u32++ = (TOKEN)dotxtab[*ln++];
1121 p = nullspot = ln++; // Nullspot -> start of this symbol
1123 // Find end of symbol (and compute its length)
1124 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1127 // Handle "DOT" special forms (like ".b") that follow a normal
1128 // symbol or keyword:
1131 *ln++ = EOS; // Terminate symbol
1132 stuffnull = 0; // And never try it again
1134 // Character following the '.' must have a DOT attribute, and
1135 // the chararacter after THAT one must not have a start-symbol
1136 // attribute (to prevent symbols that look like, for example,
1137 // "zingo.barf", which might be a good idea anyway....)
1138 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1139 return error("[bwsl] must follow '.' in symbol");
1141 v = (uint32_t)dotxtab[*ln++];
1142 cursize = (uint32_t)v;
1144 if (chrtab[*ln] & CTSYM)
1145 return error("misuse of '.'; not allowed in symbols");
1148 // If the symbol is small, check to see if it's really the name of
1152 for(state=0; state>=0;)
1154 j = (int)tolowertab[*p++];
1157 if (kwcheck[j] != state)
1163 if (*p == EOS || p == ln)
1177 // Make j = -1 if user tries to use a RISC register while in 68K mode
1178 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1183 // Make j = -1 if time, date etc with no preceeding ^^
1184 // defined, referenced, streq, macdef, date and time
1187 case 112: // defined
1188 case 113: // referenced
1196 // If not tokenized keyword OR token was not found
1197 if ((j < 0) || (state < 0))
1201 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1202 //system, this will cause all kinds of mischief.
1204 *tk++ = (TOKEN)nullspot;
1206 string[stringNum] = nullspot;
1207 *tk.u32++ = stringNum;
1213 *tk.u32++ = (TOKEN)j;
1217 if (v) // Record attribute token (if any)
1218 *tk.u32++ = (TOKEN)v;
1220 if (stuffnull) // Arrange for string termination on next pass
1226 // Handle identity tokens
1233 // Handle multiple-character tokens
1238 case '!': // ! or !=
1248 case '\'': // 'string'
1251 // Hardcoded for now, maybe this will change in the future
1252 *tk.u32++ = STRINGA8;
1256 case '\"': // "string"
1260 string[stringNum] = ln;
1261 *tk.u32++ = stringNum;
1264 for(p=ln; *ln!=EOS && *ln!=c1;)
1273 return(error("unterminated string"));
1302 // If we're evaluating a macro
1303 // this is valid and expands to
1307 warn("bad backslash code in string");
1317 return error("unterminated string");
1321 case '$': // $, hex constant
1322 if (chrtab[*ln] & HDIGIT)
1326 // Parse the hex value
1327 while (hextab[*ln] >= 0)
1328 v = (v << 4) + (int)hextab[*ln++];
1332 if (obj_format == BSD)
1334 if ((*(ln + 1) & 0xDF) == 'B')
1339 else if ((*(ln + 1) & 0xDF) == 'W')
1344 else if ((*(ln + 1) & 0xDF) == 'L')
1355 if (obj_format == ALCYON)
1359 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1364 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1376 case '<': // < or << or <> or <=
1395 case ':': // : or ::
1405 case '=': // = or ==
1408 *tk.u32++ = DEQUALS;
1415 case '>': // > or >> or >=
1430 case '%': // % or binary constant
1431 if (*ln < '0' || *ln > '1')
1439 while (*ln >= '0' && *ln <= '1')
1440 v = (v << 1) + *ln++ - '0';
1444 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1450 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1456 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1466 case '@': // @ or octal constant
1467 if (*ln < '0' || *ln > '7')
1475 while (*ln >= '0' && *ln <= '7')
1476 v = (v << 3) + *ln++ - '0';
1480 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1486 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1492 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1502 case '^': // ^ or ^^ <operator-name>
1509 if (((int)chrtab[*++ln] & STSYM) == 0)
1511 error("invalid symbol following ^^");
1517 while ((int)chrtab[*ln] & CTSYM)
1520 for(state=0; state>=0;)
1522 // Get char, convert to lowercase
1525 if (j >= 'A' && j <= 'Z')
1530 if (kwcheck[j] != state)
1536 if (*p == EOS || p == ln)
1545 if (j < 0 || state < 0)
1547 error("unknown symbol following ^^");
1551 *tk.u32++ = (TOKEN)j;
1554 interror(2); // Bad MULTX entry in chrtab
1559 // Handle decimal constant
1562 uint8_t * numStart = ln;
1565 while ((int)chrtab[*ln] & DIGIT)
1566 v = (v * 10) + *ln++ - '0';
1568 // See if there's a .[bwl] after the constant & deal with it if so
1571 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1579 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1587 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1595 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1597 // Hey, more digits after the dot, so we assume it's a
1598 // floating point number of some kind
1604 while ((int)chrtab[*ln] & DIGIT)
1606 f = f + (double)(*ln++ - '0') / fract;
1610 // Here we parse the whole floating point number
1613 double f = strtod(numStart, &numEnd);
1614 ln = (uint8_t *)numEnd;
1617 return error("floating point parse error");
1621 // Shamus: Well, this is all kinds of icky--not the least of which is that unlike uintNN_t types, we have no guarantees of any kind when it comes to the size of floating point numbers in C (as far as I know of). If there is, we need to use those kinds here, or else figure out at runtime what sizes we're dealing with and act accordingly. To be fair, this is OK as long as the double type is less than 64 bits wide, but again, there's no guarantee that it isn't. :-/
1632 //printf("CONST: %i\n", v);
1636 // Handle illegal character
1637 return error("illegal character $%02X found", *ln);
1640 // Terminate line of tokens and return "success."
1643 tok = etok; // Set tok to beginning of line
1645 if (stuffnull) // Terminate last SYMBOL
1655 // .GOTO <label> goto directive
1657 // The label is searched for starting from the first line of the current,
1658 // enclosing macro definition. If no enclosing macro exists, an error is
1661 // A label is of the form:
1663 // :<name><whitespace>
1665 // The colon must appear in column 1. The label is stripped prior to macro
1666 // expansion, and is NOT subject to macro expansion. The whitespace may also
1669 int d_goto(WORD unused)
1671 // Setup for the search
1673 return error("missing label");
1675 char * sym = string[tok[1]];
1678 if (cur_inobj->in_type != SRC_IMACRO)
1679 return error("goto not in macro");
1681 IMACRO * imacro = cur_inobj->inobj.imacro;
1682 LLIST * defln = imacro->im_macro->lineList;
1684 // Attempt to find the label, starting with the first line.
1685 for(; defln!=NULL; defln=defln->next)
1687 // Must start with a colon
1688 if (defln->line[0] == ':')
1690 // Compare names (sleazo string compare)
1692 char * s2 = defln->line;
1694 // Either we will match the strings to EOS on both, or we will
1695 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1697 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1699 // If we reached the end of string 1 (sym), we're done.
1700 // Note that we're also checking for the end of string 2 as
1701 // well, since we've established they're equal above.
1704 // Found the label, set new macro next-line and return.
1705 imacro->im_nextln = defln;
1715 return error("goto label not found");
1719 void DumpToken(TOKEN t)
1723 else if (t == CONST)
1725 else if (t == ACONST)
1727 else if (t == STRING)
1729 else if (t == SYMBOL)
1733 else if (t == TKEOF)
1735 else if (t == DEQUALS)
1736 printf("[DEQUALS]");
1741 else if (t == DCOLON)
1753 else if (t == UNMINUS)
1754 printf("[UNMINUS]");
1769 else if (t == ENDEXPR)
1770 printf("[ENDEXPR]");
1771 else if (t == CR_ABSCOUNT)
1772 printf("[CR_ABSCOUNT]");
1773 else if (t == CR_DEFINED)
1774 printf("[CR_DEFINED]");
1775 else if (t == CR_REFERENCED)
1776 printf("[CR_REFERENCED]");
1777 else if (t == CR_STREQ)
1778 printf("[CR_STREQ]");
1779 else if (t == CR_MACDEF)
1780 printf("[CR_MACDEF]");
1781 else if (t == CR_TIME)
1782 printf("[CR_TIME]");
1783 else if (t == CR_DATE)
1784 printf("[CR_DATE]");
1785 else if (t >= 0x20 && t <= 0x2F)
1786 printf("[%c]", (char)t);
1787 else if (t >= 0x3A && t <= 0x3F)
1788 printf("[%c]", (char)t);
1789 else if (t >= 0x80 && t <= 0x87)
1790 printf("[D%u]", ((uint32_t)t) - 0x80);
1791 else if (t >= 0x88 && t <= 0x8F)
1792 printf("[A%u]", ((uint32_t)t) - 0x88);
1794 printf("[%X:%c]", (uint32_t)t, (char)t);
1798 void DumpTokenBuffer(void)
1800 printf("Tokens [%X]: ", sloc);
1802 for(TOKEN * t=tokbuf; *t!=EOL; t++)
1806 else if (*t == CONST)
1810 printf("[CONST: $%lX]", *tp.u64);
1813 else if (*t == ACONST)
1815 printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1818 else if (*t == STRING)
1821 printf("[STRING:\"%s\"]", string[*t]);
1823 else if (*t == SYMBOL)
1826 printf("[SYMBOL:\"%s\"]", string[*t]);
1830 else if (*t == TKEOF)
1832 else if (*t == DEQUALS)
1833 printf("[DEQUALS]");
1838 else if (*t == DCOLON)
1850 else if (*t == UNMINUS)
1851 printf("[UNMINUS]");
1852 else if (*t == DOTB)
1854 else if (*t == DOTW)
1856 else if (*t == DOTL)
1858 else if (*t == DOTQ)
1860 else if (*t == DOTS)
1862 else if (*t == DOTD)
1864 else if (*t == DOTI)
1866 else if (*t == ENDEXPR)
1867 printf("[ENDEXPR]");
1868 else if (*t == CR_ABSCOUNT)
1869 printf("[CR_ABSCOUNT]");
1870 else if (*t == CR_DEFINED)
1871 printf("[CR_DEFINED]");
1872 else if (*t == CR_REFERENCED)
1873 printf("[CR_REFERENCED]");
1874 else if (*t == CR_STREQ)
1875 printf("[CR_STREQ]");
1876 else if (*t == CR_MACDEF)
1877 printf("[CR_MACDEF]");
1878 else if (*t == CR_TIME)
1879 printf("[CR_TIME]");
1880 else if (*t == CR_DATE)
1881 printf("[CR_DATE]");
1882 else if (*t >= 0x20 && *t <= 0x2F)
1883 printf("[%c]", (char)*t);
1884 else if (*t >= 0x3A && *t <= 0x3F)
1885 printf("[%c]", (char)*t);
1886 else if (*t >= 0x80 && *t <= 0x87)
1887 printf("[D%u]", ((uint32_t)*t) - 0x80);
1888 else if (*t >= 0x88 && *t <= 0x8F)
1889 printf("[A%u]", ((uint32_t)*t) - 0x88);
1891 printf("[%X:%c]", (uint32_t)*t, (char)*t);