2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
17 #define DECL_KW // Declare keyword arrays
18 #define DEF_KW // Declare keyword values
19 #include "kwtab.h" // Incl generated keyword tables & defs
22 int lnsave; // 1; strcpy() text of current line
23 uint16_t curlineno; // Current line number (64K max currently)
24 int totlines; // Total # of lines
25 int mjump_align = 0; // mjump alignment flag
26 char lntag; // Line tag
27 char * curfname; // Current filename
28 char tolowertab[128]; // Uppercase ==> lowercase
29 int8_t hextab[128]; // Table of hex values
30 char dotxtab[128]; // Table for ".b", ".s", etc.
31 char irbuf[LNSIZ]; // Text for .rept block line
32 char lnbuf[LNSIZ]; // Text of current line
33 WORD filecount; // Unique file number counter
34 WORD cfileno; // Current file number
35 TOKEN * tok; // Ptr to current token
36 TOKEN * etok; // Ptr past last token in tokbuf[]
37 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
38 char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage
39 int optimizeOff; // Optimization override flag
41 // File record, used to maintain a list of every include file ever visited
42 #define FILEREC struct _filerec
52 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
53 static INOBJ * f_inobj; // Ptr list of free INOBJs
54 static IFILE * f_ifile; // Ptr list of free IFILEs
55 static IMACRO * f_imacro; // Ptr list of free IMACROs
57 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
59 uint8_t chrtab[0x100] = {
60 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
61 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
62 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
63 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
65 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
66 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
67 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
68 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
70 WHITE, MULTX, MULTX, SELF, // SP ! " #
71 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
72 SELF, SELF, SELF, SELF, // ( ) * +
73 SELF, SELF, STSYM, SELF, // , - . /
75 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
76 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
79 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
81 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
83 MULTX, STSYM+CTSYM+HDIGIT, // @ A
84 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
85 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
86 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
87 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
88 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
90 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
91 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
92 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
93 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
95 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
96 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
97 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
98 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
99 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
100 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
102 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
103 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
104 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
105 SELF, SELF, SELF, ILLEG, // | } ~ DEL
107 // Anything above $7F is illegal (and yes, we need to check for this,
108 // otherwise you get strange and spurious errors that will lead you astray)
109 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
110 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
127 // Names of registers
128 static char * regname[] = {
129 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
130 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
131 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
132 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
133 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
134 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
135 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
136 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
137 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
138 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
139 "tt0","tt1","crp","","","","","", // 208,215
140 "","","","","fpiar","fpsr","fpcr","", // 216,223
141 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
142 "","","","","","","","", // 232,239
143 "","","","","","","","", // 240,247
144 "","","","","","","","", // 248,255
145 "","","","","x0","x1","y0","y1", // 256,263
146 "","b0","","b2","","b1","a","b", // 264,271
147 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
148 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
149 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
150 "","","","","","","l","p", // 296,303
151 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
152 "a10","b10","x","y","","","ab","ba" // 312,319
155 static char * riscregname[] = {
156 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
157 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
158 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
159 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
164 // Initialize tokenizer
166 void InitTokenizer(void)
169 char * htab = "0123456789abcdefABCDEF"; // Hex character table
171 lnsave = 0; // Don't save lines
172 curfname = ""; // No file, empty filename
173 filecount = (WORD)-1;
174 cfileno = (WORD)-1; // cfileno gets bumped to 0
186 // Initialize hex, "dot" and tolower tables
191 tolowertab[i] = (char)i;
194 for(i=0; htab[i]!=EOS; i++)
195 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
197 for(i='A'; i<='Z'; i++)
198 tolowertab[i] |= 0x20;
200 // These characters are legal immediately after a period
201 dotxtab['b'] = DOTB; // .b .B .s .S
203 //dotxtab['s'] = DOTB;
204 //dotxtab['S'] = DOTB;
205 dotxtab['w'] = DOTW; // .w .W
207 dotxtab['l'] = DOTL; // .l .L
209 dotxtab['i'] = DOTI; // .i .I (???)
211 dotxtab['D'] = DOTD; // .d .D (quad word)
213 dotxtab['S'] = DOTS; // .s .S
215 dotxtab['Q'] = DOTQ; // .q .Q
217 dotxtab['X'] = DOTX; // .x .x
219 dotxtab['P'] = DOTP; // .p .P
224 void SetFilenameForErrorReporting(void)
228 // Check for absolute top filename (this should never happen)
231 curfname = "(*top*)";
235 FILEREC * fr = filerec;
237 // Advance to the correct record...
238 while (fr != NULL && fnum != 0)
244 // Check for file # record not found (this should never happen either)
247 curfname = "(*NOT FOUND*)";
251 curfname = fr->frec_name;
256 // Allocate an IFILE or IMACRO
258 INOBJ * a_inobj(int typ)
264 // Allocate and initialize INOBJ first
266 inobj = malloc(sizeof(INOBJ));
270 f_inobj = f_inobj->in_link;
275 case SRC_IFILE: // Alloc and init an IFILE
277 ifile = malloc(sizeof(IFILE));
281 f_ifile = f_ifile->if_link;
284 inobj->inobj.ifile = ifile;
287 case SRC_IMACRO: // Alloc and init an IMACRO
288 if (f_imacro == NULL)
289 imacro = malloc(sizeof(IMACRO));
293 f_imacro = f_imacro->im_link;
296 inobj->inobj.imacro = imacro;
299 case SRC_IREPT: // Alloc and init an IREPT
300 inobj->inobj.irept = malloc(sizeof(IREPT));
301 DEBUG { printf("alloc IREPT\n"); }
305 // Install INOBJ on top of input stack
306 inobj->in_ifent = ifent; // Record .if context on entry
307 inobj->in_type = (WORD)typ;
308 inobj->in_otok = tok;
309 inobj->in_etok = etok;
310 inobj->in_link = cur_inobj;
318 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
319 // A macro reference is in one of two forms:
320 // \name <non-name-character>
322 // A doubled backslash (\\) is compressed to a single backslash (\).
323 // Argument definitions have been pre-tokenized, so we have to turn them back
324 // into text. This means that numbers, in particular, become hex, regardless of
325 // their representation when the macro was invoked. This is a hack.
326 // A label may appear at the beginning of the line:
327 // :<name><whitespace>
328 // (the colon must be in the first column). These labels are stripped before
329 // macro expansion takes place.
331 int ExpandMacro(char * src, char * dest, int destsiz)
334 int questmark; // \? for testing argument existence
335 char mname[128]; // Assume max size of a formal arg name
336 char numbuf[20]; // Buffer for text of CONSTs
339 char ** symbolString;
341 DEBUG { printf("ExM: src=\"%s\"\n", src); }
343 IMACRO * imacro = cur_inobj->inobj.imacro;
344 int macnum = (int)(imacro->im_macro->sattr);
346 char * dst = dest; // Next dest slot
347 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
349 // Check for (and skip over) any "label" on the line
355 while (*s != EOS && !(chrtab[*s] & WHITE))
359 s++; // Skip first whitespace
362 // Expand the rest of the line
365 // Copy single character
371 // Skip comments in case a loose @ or \ is in there
372 // In that case the tokeniser was trying to expand it.
373 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
378 // Do macro expansion
386 case '\\': // \\, \ (collapse to single backslash)
392 case '?': // \? <macro> set `questmark' flag
396 case '#': // \#, number of arguments
397 sprintf(numbuf, "%d", (int)imacro->im_nargs);
399 case '!': // \! size suffix supplied on invocation
400 switch ((int)imacro->im_siz)
402 case SIZN: d = ""; break;
403 case SIZB: d = ".b"; break;
404 case SIZW: d = ".w"; break;
405 case SIZL: d = ".l"; break;
409 case '~': // ==> unique label string Mnnnn...
410 sprintf(numbuf, "M%u", curuniq);
426 return error("missing argument name");
429 // \n ==> argument number 'n', 0..9
430 if (chrtab[*s] & DIGIT)
440 // Get argument name: \name, \{name}
450 while (chrtab[*s] & CTSYM);
455 for(++s; *s != EOS && *s != '}';)
459 return error("missing closing brace ('}')");
466 // Lookup the argument and copy its (string) value into the
467 // destination string
468 DEBUG { printf("argument='%s'\n", mname); }
470 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
471 return error("undefined argument: '%s'", mname);
474 // Convert a string of tokens (terminated with EOL) back into
475 // text. If an argument is out of range (not specified in the
476 // macro invocation) then it is ignored.
477 i = (int)arg->svalue;
479 DEBUG { printf("~argnumber=%d\n", i); }
482 if (i < imacro->im_nargs)
484 tk = imacro->argument[i].token;
485 symbolString = imacro->argument[i].string;
488 // printf("ExM: Preparing to parse argument #%u...\n", i);
494 // 0 if the argument is empty or non-existant,
495 // 1 if the argument is not empty
498 if (tk == NULL || *tk == EOL)
504 *dst++ = (char)(questmark + '0');
508 // Argument # is in range, so expand it
513 // Reverse-translation from a token number to a string.
514 // This is a hack. It might be better table-driven.
517 if ((*tk >= KW_D0) && !rdsp && !rgpu)
519 d = regname[(int)*tk++ - KW_D0];
522 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
524 d = riscregname[(int)*tk++ - KW_R0];
533 // d = (char *)*tk++;
536 // This fix should be done for strings too
537 d = symbolString[*tk++];
538 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
543 // d = (char *)*tk++;
546 d = symbolString[*tk++];
567 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
568 // to choke on legitimate code... Need to investigate this further
569 // before changing anything else here!
571 sprintf(numbuf, "$%lx", (uint64_t)*tk++);
636 *dst++ = (char)*(tk - 1);
641 // If 'd' != NULL, copy string to destination
645 DEBUG printf("d='%s'\n", d);
664 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
669 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
670 return fatal("line too long as a result of macro expansion");
675 // Get next line of text from a macro
677 char * GetNextMacroLine(void)
679 IMACRO * imacro = cur_inobj->inobj.imacro;
680 // LONG * strp = imacro->im_nextln;
681 LLIST * strp = imacro->im_nextln;
683 if (strp == NULL) // End-of-macro
686 imacro->im_nextln = strp->next;
687 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
688 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
690 return imacro->im_lnbuf;
695 // Get next line of text from a repeat block
697 char * GetNextRepeatLine(void)
699 IREPT * irept = cur_inobj->inobj.irept;
700 // LONG * strp = irept->ir_nextln; // initial null
702 // Do repeat at end of .rept block's string list
704 if (irept->ir_nextln == NULL)
706 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
707 irept->ir_nextln = irept->ir_firstln; // copy first line
709 if (irept->ir_count-- == 0)
711 DEBUG { printf("end-repeat-block\n"); }
715 // strp = irept->ir_nextln;
718 // strcpy(irbuf, (char *)(irept->ir_nextln + 1));
719 strcpy(irbuf, irept->ir_nextln->line);
720 DEBUG { printf("repeat line='%s'\n", irbuf); }
721 // irept->ir_nextln = (LONG *)*strp;
722 irept->ir_nextln = irept->ir_nextln->next;
729 // Include a source file used at the root, and for ".include" files
731 int include(int handle, char * fname)
734 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
736 // Alloc and initialize include-descriptors
737 INOBJ * inobj = a_inobj(SRC_IFILE);
738 IFILE * ifile = inobj->inobj.ifile;
740 ifile->ifhandle = handle; // Setup file handle
741 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
742 ifile->ifoldlineno = curlineno; // Save old line number
743 ifile->ifoldfname = curfname; // Save old filename
744 ifile->ifno = cfileno; // Save old file number
746 // NB: This *must* be preincrement, we're adding one to the filecount here!
747 cfileno = ++filecount; // Compute NEW file number
748 curfname = strdup(fname); // Set current filename (alloc storage)
749 curlineno = 0; // Start on line zero
751 // Add another file to the file-record
752 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
753 fr->frec_next = NULL;
754 fr->frec_name = curfname;
757 filerec = fr; // Add first filerec
759 last_fr->frec_next = fr; // Append to list of filerecs
762 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
769 // Pop the current input level
773 INOBJ * inobj = cur_inobj;
778 // Pop IFENT levels until we reach the conditional assembly context we
779 // were at when the input object was entered.
780 int numUnmatched = 0;
782 while (ifent != inobj->in_ifent)
784 if (d_endif() != 0) // Something bad happened during endif parsing?
785 return -1; // If yes, bail instead of getting stuck in a loop
790 // Give a warning to the user that we had to wipe their bum for them
791 if (numUnmatched > 0)
792 warn("missing %d .endif(s)", numUnmatched);
794 tok = inobj->in_otok; // Restore tok and otok
795 etok = inobj->in_etok;
797 switch (inobj->in_type)
799 case SRC_IFILE: // Pop and release an IFILE
801 DEBUG { printf("[Leaving: %s]\n", curfname); }
803 IFILE * ifile = inobj->inobj.ifile;
804 ifile->if_link = f_ifile;
806 close(ifile->ifhandle); // Close source file
807 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
808 curfname = ifile->ifoldfname; // Set current filename
809 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
810 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
811 curlineno = ifile->ifoldlineno; // Set current line#
812 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
813 cfileno = ifile->ifno; // Restore current file number
814 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
818 case SRC_IMACRO: // Pop and release an IMACRO
820 IMACRO * imacro = inobj->inobj.imacro;
821 imacro->im_link = f_imacro;
826 case SRC_IREPT: // Pop and release an IREPT
828 DEBUG { printf("dealloc IREPT\n"); }
829 LLIST * p = inobj->inobj.irept->ir_firstln;
831 // Deallocate repeat lines
842 cur_inobj = inobj->in_link;
843 inobj->in_link = f_inobj;
851 // Get line from file into buf, return NULL on EOF or ptr to the start of a
854 char * GetNextLine(void)
858 int readamt = -1; // 0 if last read() yeilded 0 bytes
859 IFILE * fl = cur_inobj->inobj.ifile;
863 // Scan for next end-of-line; handle stupid text formats by treating
864 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
866 d = &fl->ifbuf[fl->ifind];
868 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
870 if (*p == '\r' || *p == '\n')
877 break; // Need to read more, then look for '\n' to eat
878 else if (p[1] == '\n')
882 // Cover up the newline with end-of-string sentinel
891 // Handle hanging lines by ignoring them (Input file is exhausted, no
892 // \r or \n on last line)
893 // Shamus: This is retarded. Never ignore any input!
894 if (!readamt && fl->ifcnt)
901 // Really should check to see if we're at the end of the buffer!
903 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
905 return &fl->ifbuf[fl->ifind];
909 // Truncate and return absurdly long lines.
910 if (fl->ifcnt >= QUANTUM)
912 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
914 return &fl->ifbuf[fl->ifind];
917 // Relocate what's left of a line to the beginning of the buffer, and
918 // read some more of the file in; return NULL if the buffer's empty and
922 p = &fl->ifbuf[fl->ifind];
923 d = &fl->ifbuf[fl->ifcnt & 1];
925 for(i=0; i<fl->ifcnt; i++)
928 fl->ifind = fl->ifcnt & 1;
931 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
936 if ((fl->ifcnt += readamt) == 0)
945 int TokenizeLine(void)
947 uint8_t * ln = NULL; // Ptr to current position in line
948 uint8_t * p; // Random character ptr
949 TOKEN * tk; // Token-deposit ptr
950 int state = 0; // State for keyword detector
951 int j = 0; // Var for keyword detector
952 uint8_t c; // Random char
953 uint64_t v; // Random value
954 uint32_t cursize = 0; // Current line's size (.b, .w, .l, .s, .q, .d)
955 double f; // Random float
956 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
957 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
959 int stringNum = 0; // Pointer to string locations in tokenized line
964 if (cur_inobj == NULL) // Return EOF if input stack is empty
967 // Get another line of input from the current input source: a file, a
968 // macro, or a repeat-block
969 switch (cur_inobj->in_type)
973 // o bump source line number;
974 // o tag the listing-line with a space;
975 // o kludge lines generated by Alcyon C.
977 if ((ln = GetNextLine()) == NULL)
979 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
980 if (fpop() == 0) // Pop input level
981 goto retry; // Try for more lines
984 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
989 curlineno++; // Bump line number
994 // AS68 compatibility, throw away all lines starting with
995 // back-quotes, tildes, or '*'
996 // On other lines, turn the first '*' into a semi-colon.
997 if (*ln == '`' || *ln == '~' || *ln == '*')
1001 for(p=ln; *p!=EOS; p++)
1015 // o Handle end-of-macro;
1016 // o tag the listing-line with an at (@) sign.
1018 if ((ln = GetNextMacroLine()) == NULL)
1020 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1021 goto retry; // Try for more lines...
1023 return TKEOF; // Oops, we got a non zero return code, signal EOF
1030 // o Handle end-of-repeat-block;
1031 // o tag the listing-line with a pound (#) sign.
1033 if ((ln = GetNextRepeatLine()) == NULL)
1035 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1044 // Save text of the line. We only do this during listings and within
1045 // macro-type blocks, since it is expensive to unconditionally copy every
1050 // General housekeeping
1051 tok = tokeol; // Set "tok" to EOL in case of error
1052 tk = etok; // Reset token ptr
1053 stuffnull = 0; // Don't stuff nulls
1054 totlines++; // Bump total #lines assembled
1056 // See if the entire line is a comment. This is a win if the programmer
1057 // puts in lots of comments
1058 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1061 // And here we have a very ugly hack for signalling a single line 'turn off
1062 // optimization'. There's really no nice way to do this, so hack it is!
1063 optimizeOff = 0; // Default is to take optimizations as they come
1067 optimizeOff = 1; // Signal that we don't want to optimize this line
1068 ln++; // & skip over the darned thing
1071 // Main tokenization loop;
1072 // o skip whitespace;
1073 // o handle end-of-line;
1074 // o handle symbols;
1075 // o handle single-character tokens (operators, etc.);
1076 // o handle multiple-character tokens (constants, strings, etc.).
1079 // Skip whitespace, handle EOL
1080 while (chrtab[*ln] & WHITE)
1083 // Handle EOL, comment with ';'
1084 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1087 // Handle start of symbol. Symbols are null-terminated in place. The
1088 // termination is always one symbol behind, since there may be no place
1089 // for a null in the case that an operator immediately follows the name.
1094 if (stuffnull) // Terminate old symbol from previous pass
1097 v = 0; // Assume no DOT attrib follows symbol
1100 // In some cases, we need to check for a DOTx at the *beginning*
1101 // of a symbol, as the "start" of the line we're currently looking
1102 // at could be somewhere in the middle of that line!
1105 // Make sure that it's *only* a .[bwsl] following, and not the
1106 // start of a local symbol:
1107 if ((chrtab[*(ln + 1)] & DOT)
1108 && (dotxtab[*(ln + 1)] != 0)
1109 && !(chrtab[*(ln + 2)] & CTSYM))
1111 // We found a legitimate DOTx construct, so add it to the
1115 *tk++ = (TOKEN)dotxtab[*ln++];
1120 p = nullspot = ln++; // Nullspot -> start of this symbol
1122 // Find end of symbol (and compute its length)
1123 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1126 // Handle "DOT" special forms (like ".b") that follow a normal
1127 // symbol or keyword:
1130 *ln++ = EOS; // Terminate symbol
1131 stuffnull = 0; // And never try it again
1133 // Character following the `.' must have a DOT attribute, and
1134 // the chararacter after THAT one must not have a start-symbol
1135 // attribute (to prevent symbols that look like, for example,
1136 // "zingo.barf", which might be a good idea anyway....)
1137 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1138 return error("[bwsl] must follow '.' in symbol");
1140 v = (uint32_t)dotxtab[*ln++];
1141 cursize = (uint32_t)v;
1143 if (chrtab[*ln] & CTSYM)
1144 return error("misuse of '.'; not allowed in symbols");
1147 // If the symbol is small, check to see if it's really the name of
1151 for(state=0; state>=0;)
1153 j = (int)tolowertab[*p++];
1156 if (kwcheck[j] != state)
1162 if (*p == EOS || p == ln)
1176 // Make j = -1 if user tries to use a RISC register while in 68K mode
1177 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1182 // Make j = -1 if time, date etc with no preceeding ^^
1183 // defined, referenced, streq, macdef, date and time
1186 case 112: // defined
1187 case 113: // referenced
1195 // If not tokenized keyword OR token was not found
1196 if ((j < 0) || (state < 0))
1200 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1201 //system, this will cause all kinds of mischief.
1203 *tk++ = (TOKEN)nullspot;
1205 string[stringNum] = nullspot;
1216 if (v) // Record attribute token (if any)
1219 if (stuffnull) // Arrange for string termination on next pass
1225 // Handle identity tokens
1232 // Handle multiple-character tokens
1237 case '!': // ! or !=
1247 case '\'': // 'string'
1250 // Hardcoded for now, maybe this will change in the future
1255 case '\"': // "string"
1259 string[stringNum] = ln;
1263 for(p=ln; *ln!=EOS && *ln!=c1;)
1272 return(error("unterminated string"));
1301 // If we're evaluating a macro
1302 // this is valid and expands to
1306 warn("bad backslash code in string");
1316 return error("unterminated string");
1320 case '$': // $, hex constant
1321 if (chrtab[*ln] & HDIGIT)
1323 if (cursize == 'q' || cursize == 'Q')
1325 // Parse 64-bit integer
1328 while (hextab[*ln] >= 0)
1329 v64 = (v64 << 4) + (int)hextab[*ln++];
1331 *(uint64_t *)tk = v64;
1338 // Parse the hex value
1339 while (hextab[*ln] >= 0)
1340 v = (v << 4) + (int)hextab[*ln++];
1344 if (obj_format == BSD)
1346 if ((*(ln + 1) & 0xDF) == 'B')
1351 else if ((*(ln + 1) & 0xDF) == 'W')
1356 else if ((*(ln + 1) & 0xDF) == 'L')
1365 tk64 = (uint64_t *)tk;
1369 if (obj_format == ALCYON)
1373 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1378 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1390 case '<': // < or << or <> or <=
1409 case ':': // : or ::
1419 case '=': // = or ==
1429 case '>': // > or >> or >=
1444 case '%': // % or binary constant
1445 if (*ln < '0' || *ln > '1')
1453 while (*ln >= '0' && *ln <= '1')
1454 v = (v << 1) + *ln++ - '0';
1458 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1464 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1470 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1478 tk64 = (uint64_t *)tk;
1482 case '@': // @ or octal constant
1483 if (*ln < '0' || *ln > '7')
1491 while (*ln >= '0' && *ln <= '7')
1492 v = (v << 3) + *ln++ - '0';
1496 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1502 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1508 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1516 tk64 = (uint64_t *)tk;
1520 case '^': // ^ or ^^ <operator-name>
1527 if (((int)chrtab[*++ln] & STSYM) == 0)
1529 error("invalid symbol following ^^");
1535 while ((int)chrtab[*ln] & CTSYM)
1538 for(state=0; state>=0;)
1540 // Get char, convert to lowercase
1543 if (j >= 'A' && j <= 'Z')
1548 if (kwcheck[j] != state)
1554 if (*p == EOS || p == ln)
1563 if (j < 0 || state < 0)
1565 error("unknown symbol following ^^");
1572 interror(2); // Bad MULTX entry in chrtab
1577 // Handle decimal constant
1582 while ((int)chrtab[*ln] & DIGIT)
1583 v = (v * 10) + *ln++ - '0';
1585 // See if there's a .[bwl] after the constant & deal with it if so
1588 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1593 tk64 = (uint64_t *)tk;
1595 tk = (uint32_t *)tk64;
1598 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1603 tk64 = (uint64_t *)tk;
1605 tk = (uint32_t *)tk64;
1609 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1614 tk64 = (uint64_t *)tk;
1616 tk = (uint32_t *)tk64;
1620 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1622 // Hey, more digits after the dot, so assume it's a
1623 // fractional number
1628 while ((int)chrtab[*ln] & DIGIT)
1630 f = f + (double)(*ln++ - '0') / fract;
1635 *((double *)tk) = f;
1643 tk64 = (uint64_t *)tk;
1648 //printf("CONST: %i\n", v);
1652 // Handle illegal character
1653 return error("illegal character $%02X found", *ln);
1656 // Terminate line of tokens and return "success."
1659 tok = etok; // Set tok to beginning of line
1661 if (stuffnull) // Terminate last SYMBOL
1671 // .GOTO <label> goto directive
1673 // The label is searched for starting from the first line of the current,
1674 // enclosing macro definition. If no enclosing macro exists, an error is
1677 // A label is of the form:
1679 // :<name><whitespace>
1681 // The colon must appear in column 1. The label is stripped prior to macro
1682 // expansion, and is NOT subject to macro expansion. The whitespace may also
1685 int d_goto(WORD unused)
1687 // Setup for the search
1689 return error("missing label");
1691 char * sym = string[tok[1]];
1694 if (cur_inobj->in_type != SRC_IMACRO)
1695 return error("goto not in macro");
1697 IMACRO * imacro = cur_inobj->inobj.imacro;
1698 LLIST * defln = imacro->im_macro->lineList;
1700 // Attempt to find the label, starting with the first line.
1701 for(; defln!=NULL; defln=defln->next)
1703 // Must start with a colon
1704 if (defln->line[0] == ':')
1706 // Compare names (sleazo string compare)
1708 char * s2 = defln->line;
1710 // Either we will match the strings to EOS on both, or we will
1711 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1713 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1715 // If we reached the end of string 1 (sym), we're done.
1716 // Note that we're also checking for the end of string 2 as
1717 // well, since we've established they're equal above.
1720 // Found the label, set new macro next-line and return.
1721 imacro->im_nextln = defln;
1731 return error("goto label not found");
1735 void DumpTokenBuffer(void)
1737 printf("Tokens [%X]: ", sloc);
1739 for(TOKEN * t=tokbuf; *t!=EOL; t++)
1743 else if (*t == CONST)
1745 printf("[CONST: $%lX]", ((uint64_t)t[1] << 32) | (uint64_t)t[2]);
1748 else if (*t == ACONST)
1750 printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1753 else if (*t == STRING)
1756 printf("[STRING:\"%s\"]", string[*t]);
1758 else if (*t == SYMBOL)
1761 printf("[SYMBOL:\"%s\"]", string[*t]);
1765 else if (*t == TKEOF)
1767 else if (*t == DEQUALS)
1768 printf("[DEQUALS]");
1773 else if (*t == DCOLON)
1785 else if (*t == UNMINUS)
1786 printf("[UNMINUS]");
1787 else if (*t == DOTB)
1789 else if (*t == DOTW)
1791 else if (*t == DOTL)
1793 else if (*t == DOTI)
1795 else if (*t == ENDEXPR)
1796 printf("[ENDEXPR]");
1797 else if (*t == CR_ABSCOUNT)
1798 printf("[CR_ABSCOUNT]");
1799 else if (*t == CR_DEFINED)
1800 printf("[CR_DEFINED]");
1801 else if (*t == CR_REFERENCED)
1802 printf("[CR_REFERENCED]");
1803 else if (*t == CR_STREQ)
1804 printf("[CR_STREQ]");
1805 else if (*t == CR_MACDEF)
1806 printf("[CR_MACDEF]");
1807 else if (*t == CR_TIME)
1808 printf("[CR_TIME]");
1809 else if (*t == CR_DATE)
1810 printf("[CR_DATE]");
1811 else if (*t >= 0x20 && *t <= 0x2F)
1812 printf("[%c]", (char)*t);
1813 else if (*t >= 0x3A && *t <= 0x3F)
1814 printf("[%c]", (char)*t);
1815 else if (*t >= 0x80 && *t <= 0x87)
1816 printf("[D%u]", ((uint32_t)*t) - 0x80);
1817 else if (*t >= 0x88 && *t <= 0x8F)
1818 printf("[A%u]", ((uint32_t)*t) - 0x88);
1820 printf("[%X:%c]", (uint32_t)*t, (char)*t);