2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
17 #define DECL_KW // Declare keyword arrays
18 #define DEF_KW // Declare keyword values
19 #include "kwtab.h" // Incl generated keyword tables & defs
22 int lnsave; // 1; strcpy() text of current line
23 uint16_t curlineno; // Current line number (64K max currently)
24 int totlines; // Total # of lines
25 int mjump_align = 0; // mjump alignment flag
26 char lntag; // Line tag
27 char * curfname; // Current filename
28 char tolowertab[128]; // Uppercase ==> lowercase
29 int8_t hextab[128]; // Table of hex values
30 char dotxtab[128]; // Table for ".b", ".s", etc.
31 char irbuf[LNSIZ]; // Text for .rept block line
32 char lnbuf[LNSIZ]; // Text of current line
33 WORD filecount; // Unique file number counter
34 WORD cfileno; // Current file number
35 TOKEN * tok; // Ptr to current token
36 TOKEN * etok; // Ptr past last token in tokbuf[]
37 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
38 char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage
39 int optimizeOff; // Optimization override flag
41 // File record, used to maintain a list of every include file ever visited
42 #define FILEREC struct _filerec
52 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
53 static INOBJ * f_inobj; // Ptr list of free INOBJs
54 static IFILE * f_ifile; // Ptr list of free IFILEs
55 static IMACRO * f_imacro; // Ptr list of free IMACROs
57 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
59 uint8_t chrtab[0x100] = {
60 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
61 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
62 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
63 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
65 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
66 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
67 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
68 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
70 WHITE, MULTX, MULTX, SELF, // SP ! " #
71 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
72 SELF, SELF, SELF, SELF, // ( ) * +
73 SELF, SELF, STSYM, SELF, // , - . /
75 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
76 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
79 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
81 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
83 MULTX, STSYM+CTSYM+HDIGIT, // @ A
84 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
85 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
86 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
87 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
88 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
90 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
91 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
92 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
93 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
95 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
96 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
97 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
98 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
99 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
100 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
102 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
103 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
104 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
105 SELF, SELF, SELF, ILLEG, // | } ~ DEL
107 // Anything above $7F is illegal (and yes, we need to check for this,
108 // otherwise you get strange and spurious errors that will lead you astray)
109 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
110 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
127 // Names of registers
128 static char * regname[] = {
129 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
130 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
131 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
132 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
133 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
134 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
135 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
136 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
137 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
138 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
139 "tt0","tt1","crp","","","","","", // 208,215
140 "","","","","fpiar","fpsr","fpcr","", // 216,223
141 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
142 "","","","","","","","", // 232,239
143 "","","","","","","","", // 240,247
144 "","","","","","","","", // 248,255
145 "","","","","x0","x1","y0","y1", // 256,263
146 "","b0","","b2","","b1","a","b", // 264,271
147 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
148 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
149 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
150 "","","","","","","l","p", // 296,303
151 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
152 "a10","b10","x","y","","","ab","ba" // 312,319
155 static char * riscregname[] = {
156 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
157 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
158 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
159 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
164 // Initialize tokenizer
166 void InitTokenizer(void)
169 char * htab = "0123456789abcdefABCDEF"; // Hex character table
171 lnsave = 0; // Don't save lines
172 curfname = ""; // No file, empty filename
173 filecount = (WORD)-1;
174 cfileno = (WORD)-1; // cfileno gets bumped to 0
186 // Initialize hex, "dot" and tolower tables
191 tolowertab[i] = (char)i;
194 for(i=0; htab[i]!=EOS; i++)
195 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
197 for(i='A'; i<='Z'; i++)
198 tolowertab[i] |= 0x20;
200 // These characters are legal immediately after a period
201 dotxtab['b'] = DOTB; // .b .B .s .S
203 //dotxtab['s'] = DOTB;
204 //dotxtab['S'] = DOTB;
205 dotxtab['w'] = DOTW; // .w .W
207 dotxtab['l'] = DOTL; // .l .L
209 dotxtab['i'] = DOTI; // .i .I (???)
211 dotxtab['D'] = DOTD; // .d .D (quad word)
213 dotxtab['S'] = DOTS; // .s .S
215 dotxtab['Q'] = DOTQ; // .q .Q
217 dotxtab['X'] = DOTX; // .x .x
219 dotxtab['P'] = DOTP; // .p .P
224 void SetFilenameForErrorReporting(void)
228 // Check for absolute top filename (this should never happen)
231 curfname = "(*top*)";
235 FILEREC * fr = filerec;
237 // Advance to the correct record...
238 while (fr != NULL && fnum != 0)
244 // Check for file # record not found (this should never happen either)
247 curfname = "(*NOT FOUND*)";
251 curfname = fr->frec_name;
256 // Allocate an IFILE or IMACRO
258 INOBJ * a_inobj(int typ)
264 // Allocate and initialize INOBJ first
266 inobj = malloc(sizeof(INOBJ));
270 f_inobj = f_inobj->in_link;
275 case SRC_IFILE: // Alloc and init an IFILE
277 ifile = malloc(sizeof(IFILE));
281 f_ifile = f_ifile->if_link;
284 inobj->inobj.ifile = ifile;
287 case SRC_IMACRO: // Alloc and init an IMACRO
288 if (f_imacro == NULL)
289 imacro = malloc(sizeof(IMACRO));
293 f_imacro = f_imacro->im_link;
296 inobj->inobj.imacro = imacro;
299 case SRC_IREPT: // Alloc and init an IREPT
300 inobj->inobj.irept = malloc(sizeof(IREPT));
301 DEBUG { printf("alloc IREPT\n"); }
305 // Install INOBJ on top of input stack
306 inobj->in_ifent = ifent; // Record .if context on entry
307 inobj->in_type = (WORD)typ;
308 inobj->in_otok = tok;
309 inobj->in_etok = etok;
310 inobj->in_link = cur_inobj;
318 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
319 // A macro reference is in one of two forms:
320 // \name <non-name-character>
322 // A doubled backslash (\\) is compressed to a single backslash (\).
323 // Argument definitions have been pre-tokenized, so we have to turn them back
324 // into text. This means that numbers, in particular, become hex, regardless of
325 // their representation when the macro was invoked. This is a hack.
326 // A label may appear at the beginning of the line:
327 // :<name><whitespace>
328 // (the colon must be in the first column). These labels are stripped before
329 // macro expansion takes place.
331 int ExpandMacro(char * src, char * dest, int destsiz)
334 int questmark; // \? for testing argument existence
335 char mname[128]; // Assume max size of a formal arg name
336 char numbuf[20]; // Buffer for text of CONSTs
339 char ** symbolString;
341 DEBUG { printf("ExM: src=\"%s\"\n", src); }
343 IMACRO * imacro = cur_inobj->inobj.imacro;
344 int macnum = (int)(imacro->im_macro->sattr);
346 char * dst = dest; // Next dest slot
347 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
349 // Check for (and skip over) any "label" on the line
355 while (*s != EOS && !(chrtab[*s] & WHITE))
359 s++; // Skip first whitespace
362 // Expand the rest of the line
365 // Copy single character
371 // Skip comments in case a loose @ or \ is in there
372 // In that case the tokeniser was trying to expand it.
373 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
378 // Do macro expansion
386 case '\\': // \\, \ (collapse to single backslash)
392 case '?': // \? <macro> set `questmark' flag
396 case '#': // \#, number of arguments
397 sprintf(numbuf, "%d", (int)imacro->im_nargs);
399 case '!': // \! size suffix supplied on invocation
400 switch ((int)imacro->im_siz)
402 case SIZN: d = ""; break;
403 case SIZB: d = ".b"; break;
404 case SIZW: d = ".w"; break;
405 case SIZL: d = ".l"; break;
409 case '~': // ==> unique label string Mnnnn...
410 sprintf(numbuf, "M%u", curuniq);
426 return error("missing argument name");
429 // \n ==> argument number 'n', 0..9
430 if (chrtab[*s] & DIGIT)
440 // Get argument name: \name, \{name}
450 while (chrtab[*s] & CTSYM);
455 for(++s; *s != EOS && *s != '}';)
459 return error("missing closing brace ('}')");
466 // Lookup the argument and copy its (string) value into the
467 // destination string
468 DEBUG { printf("argument='%s'\n", mname); }
470 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
471 return error("undefined argument: '%s'", mname);
474 // Convert a string of tokens (terminated with EOL) back into
475 // text. If an argument is out of range (not specified in the
476 // macro invocation) then it is ignored.
477 i = (int)arg->svalue;
479 DEBUG { printf("~argnumber=%d\n", i); }
482 if (i < imacro->im_nargs)
484 tk = imacro->argument[i].token;
485 symbolString = imacro->argument[i].string;
488 // printf("ExM: Preparing to parse argument #%u...\n", i);
494 // 0 if the argument is empty or non-existant,
495 // 1 if the argument is not empty
498 if (tk == NULL || *tk == EOL)
504 *dst++ = (char)(questmark + '0');
508 // Argument # is in range, so expand it
513 // Reverse-translation from a token number to a string.
514 // This is a hack. It might be better table-driven.
517 if ((*tk >= KW_D0) && !rdsp && !rgpu)
519 d = regname[(int)*tk++ - KW_D0];
522 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
524 d = riscregname[(int)*tk++ - KW_R0];
533 // d = (char *)*tk++;
536 // This fix should be done for strings too
537 d = symbolString[*tk++];
538 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
543 // d = (char *)*tk++;
546 d = symbolString[*tk++];
567 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
568 // to choke on legitimate code... Need to investigate this further
569 // before changing anything else here!
571 sprintf(numbuf, "$%lx", (long unsigned int)*tk++);
635 *dst++ = (char)*(tk - 1);
640 // If 'd' != NULL, copy string to destination
644 DEBUG printf("d='%s'\n", d);
663 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
668 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
669 return fatal("line too long as a result of macro expansion");
674 // Get next line of text from a macro
676 char * GetNextMacroLine(void)
678 IMACRO * imacro = cur_inobj->inobj.imacro;
679 // LONG * strp = imacro->im_nextln;
680 LLIST * strp = imacro->im_nextln;
682 if (strp == NULL) // End-of-macro
685 imacro->im_nextln = strp->next;
686 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
687 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
689 return imacro->im_lnbuf;
694 // Get next line of text from a repeat block
696 char * GetNextRepeatLine(void)
698 IREPT * irept = cur_inobj->inobj.irept;
699 // LONG * strp = irept->ir_nextln; // initial null
701 // Do repeat at end of .rept block's string list
703 if (irept->ir_nextln == NULL)
705 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
706 irept->ir_nextln = irept->ir_firstln; // copy first line
708 if (irept->ir_count-- == 0)
710 DEBUG { printf("end-repeat-block\n"); }
714 // strp = irept->ir_nextln;
717 // strcpy(irbuf, (char *)(irept->ir_nextln + 1));
718 strcpy(irbuf, irept->ir_nextln->line);
719 DEBUG { printf("repeat line='%s'\n", irbuf); }
720 // irept->ir_nextln = (LONG *)*strp;
721 irept->ir_nextln = irept->ir_nextln->next;
728 // Include a source file used at the root, and for ".include" files
730 int include(int handle, char * fname)
733 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
735 // Alloc and initialize include-descriptors
736 INOBJ * inobj = a_inobj(SRC_IFILE);
737 IFILE * ifile = inobj->inobj.ifile;
739 ifile->ifhandle = handle; // Setup file handle
740 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
741 ifile->ifoldlineno = curlineno; // Save old line number
742 ifile->ifoldfname = curfname; // Save old filename
743 ifile->ifno = cfileno; // Save old file number
745 // NB: This *must* be preincrement, we're adding one to the filecount here!
746 cfileno = ++filecount; // Compute NEW file number
747 curfname = strdup(fname); // Set current filename (alloc storage)
748 curlineno = 0; // Start on line zero
750 // Add another file to the file-record
751 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
752 fr->frec_next = NULL;
753 fr->frec_name = curfname;
756 filerec = fr; // Add first filerec
758 last_fr->frec_next = fr; // Append to list of filerecs
761 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
768 // Pop the current input level
772 INOBJ * inobj = cur_inobj;
777 // Pop IFENT levels until we reach the conditional assembly context we
778 // were at when the input object was entered.
779 int numUnmatched = 0;
781 while (ifent != inobj->in_ifent)
783 if (d_endif() != 0) // Something bad happened during endif parsing?
784 return -1; // If yes, bail instead of getting stuck in a loop
789 // Give a warning to the user that we had to wipe their bum for them
790 if (numUnmatched > 0)
791 warn("missing %d .endif(s)", numUnmatched);
793 tok = inobj->in_otok; // Restore tok and otok
794 etok = inobj->in_etok;
796 switch (inobj->in_type)
798 case SRC_IFILE: // Pop and release an IFILE
800 DEBUG { printf("[Leaving: %s]\n", curfname); }
802 IFILE * ifile = inobj->inobj.ifile;
803 ifile->if_link = f_ifile;
805 close(ifile->ifhandle); // Close source file
806 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
807 curfname = ifile->ifoldfname; // Set current filename
808 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
809 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
810 curlineno = ifile->ifoldlineno; // Set current line#
811 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
812 cfileno = ifile->ifno; // Restore current file number
813 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
817 case SRC_IMACRO: // Pop and release an IMACRO
819 IMACRO * imacro = inobj->inobj.imacro;
820 imacro->im_link = f_imacro;
825 case SRC_IREPT: // Pop and release an IREPT
827 DEBUG { printf("dealloc IREPT\n"); }
828 LLIST * p = inobj->inobj.irept->ir_firstln;
830 // Deallocate repeat lines
841 cur_inobj = inobj->in_link;
842 inobj->in_link = f_inobj;
850 // Get line from file into buf, return NULL on EOF or ptr to the start of a
853 char * GetNextLine(void)
857 int readamt = -1; // 0 if last read() yeilded 0 bytes
858 IFILE * fl = cur_inobj->inobj.ifile;
862 // Scan for next end-of-line; handle stupid text formats by treating
863 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
865 d = &fl->ifbuf[fl->ifind];
867 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
869 if (*p == '\r' || *p == '\n')
876 break; // Need to read more, then look for '\n' to eat
877 else if (p[1] == '\n')
881 // Cover up the newline with end-of-string sentinel
890 // Handle hanging lines by ignoring them (Input file is exhausted, no
891 // \r or \n on last line)
892 // Shamus: This is retarded. Never ignore any input!
893 if (!readamt && fl->ifcnt)
900 // Really should check to see if we're at the end of the buffer!
902 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
904 return &fl->ifbuf[fl->ifind];
908 // Truncate and return absurdly long lines.
909 if (fl->ifcnt >= QUANTUM)
911 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
913 return &fl->ifbuf[fl->ifind];
916 // Relocate what's left of a line to the beginning of the buffer, and
917 // read some more of the file in; return NULL if the buffer's empty and
921 p = &fl->ifbuf[fl->ifind];
922 d = &fl->ifbuf[fl->ifcnt & 1];
924 for(i=0; i<fl->ifcnt; i++)
927 fl->ifind = fl->ifcnt & 1;
930 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
935 if ((fl->ifcnt += readamt) == 0)
944 int TokenizeLine(void)
946 uint8_t * ln = NULL; // Ptr to current position in line
947 uint8_t * p; // Random character ptr
948 TOKEN * tk; // Token-deposit ptr
949 int state = 0; // State for keyword detector
950 int j = 0; // Var for keyword detector
951 uint8_t c; // Random char
952 uint32_t v; // Random value
953 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
954 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
956 int stringNum = 0; // Pointer to string locations in tokenized line
960 if (cur_inobj == NULL) // Return EOF if input stack is empty
963 // Get another line of input from the current input source: a file, a
964 // macro, or a repeat-block
965 switch (cur_inobj->in_type)
969 // o bump source line number;
970 // o tag the listing-line with a space;
971 // o kludge lines generated by Alcyon C.
973 if ((ln = GetNextLine()) == NULL)
975 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
976 if (fpop() == 0) // Pop input level
977 goto retry; // Try for more lines
980 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
985 curlineno++; // Bump line number
990 // AS68 compatibility, throw away all lines starting with
991 // back-quotes, tildes, or '*'
992 // On other lines, turn the first '*' into a semi-colon.
993 if (*ln == '`' || *ln == '~' || *ln == '*')
997 for(p=ln; *p!=EOS; p++)
1011 // o Handle end-of-macro;
1012 // o tag the listing-line with an at (@) sign.
1014 if ((ln = GetNextMacroLine()) == NULL)
1016 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1017 goto retry; // Try for more lines...
1019 return TKEOF; // Oops, we got a non zero return code, signal EOF
1026 // o Handle end-of-repeat-block;
1027 // o tag the listing-line with a pound (#) sign.
1029 if ((ln = GetNextRepeatLine()) == NULL)
1031 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1040 // Save text of the line. We only do this during listings and within
1041 // macro-type blocks, since it is expensive to unconditionally copy every
1046 // General housekeeping
1047 tok = tokeol; // Set "tok" to EOL in case of error
1048 tk = etok; // Reset token ptr
1049 stuffnull = 0; // Don't stuff nulls
1050 totlines++; // Bump total #lines assembled
1052 // See if the entire line is a comment. This is a win if the programmer
1053 // puts in lots of comments
1054 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1057 // And here we have a very ugly hack for signalling a single line 'turn off
1058 // optimization'. There's really no nice way to do this, so hack it is!
1059 optimizeOff = 0; // Default is to take optimizations as they come
1063 optimizeOff = 1; // Signal that we don't want to optimize this line
1064 ln++; // & skip over the darned thing
1067 // Main tokenization loop;
1068 // o skip whitespace;
1069 // o handle end-of-line;
1070 // o handle symbols;
1071 // o handle single-character tokens (operators, etc.);
1072 // o handle multiple-character tokens (constants, strings, etc.).
1075 // Skip whitespace, handle EOL
1076 while (chrtab[*ln] & WHITE)
1079 // Handle EOL, comment with ';'
1080 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1083 // Handle start of symbol. Symbols are null-terminated in place. The
1084 // termination is always one symbol behind, since there may be no place
1085 // for a null in the case that an operator immediately follows the name.
1090 if (stuffnull) // Terminate old symbol from previous pass
1093 v = 0; // Assume no DOT attrib follows symbol
1096 // In some cases, we need to check for a DOTx at the *beginning*
1097 // of a symbol, as the "start" of the line we're currently looking
1098 // at could be somewhere in the middle of that line!
1101 // Make sure that it's *only* a .[bwsl] following, and not the
1102 // start of a local symbol:
1103 if ((chrtab[*(ln + 1)] & DOT)
1104 && (dotxtab[*(ln + 1)] != 0)
1105 && !(chrtab[*(ln + 2)] & CTSYM))
1107 // We found a legitimate DOTx construct, so add it to the
1111 *tk++ = (TOKEN)dotxtab[*ln++];
1116 p = nullspot = ln++; // Nullspot -> start of this symbol
1118 // Find end of symbol (and compute its length)
1119 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1122 // Handle "DOT" special forms (like ".b") that follow a normal
1123 // symbol or keyword:
1126 *ln++ = EOS; // Terminate symbol
1127 stuffnull = 0; // And never try it again
1129 // Character following the `.' must have a DOT attribute, and
1130 // the chararacter after THAT one must not have a start-symbol
1131 // attribute (to prevent symbols that look like, for example,
1132 // "zingo.barf", which might be a good idea anyway....)
1133 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1134 return error("[bwsl] must follow '.' in symbol");
1136 v = (uint32_t)dotxtab[*ln++];
1138 if (chrtab[*ln] & CTSYM)
1139 return error("misuse of '.'; not allowed in symbols");
1142 // If the symbol is small, check to see if it's really the name of
1146 for(state=0; state>=0;)
1148 j = (int)tolowertab[*p++];
1151 if (kwcheck[j] != state)
1157 if (*p == EOS || p == ln)
1171 // Make j = -1 if user tries to use a RISC register while in 68K mode
1172 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1177 // Make j = -1 if time, date etc with no preceeding ^^
1178 // defined, referenced, streq, macdef, date and time
1181 case 112: // defined
1182 case 113: // referenced
1190 // If not tokenized keyword OR token was not found
1191 if ((j < 0) || (state < 0))
1195 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1196 //system, this will cause all kinds of mischief.
1198 *tk++ = (TOKEN)nullspot;
1200 string[stringNum] = nullspot;
1211 if (v) // Record attribute token (if any)
1214 if (stuffnull) // Arrange for string termination on next pass
1220 // Handle identity tokens
1227 // Handle multiple-character tokens
1232 case '!': // ! or !=
1242 case '\'': // 'string'
1245 // Hardcoded for now, maybe this will change in the future
1250 case '\"': // "string"
1254 string[stringNum] = ln;
1258 for(p=ln; *ln!=EOS && *ln!=c1;)
1267 return(error("unterminated string"));
1296 // If we're evaluating a macro
1297 // this is valid and expands to
1301 warn("bad backslash code in string");
1311 return error("unterminated string");
1315 case '$': // $, hex constant
1316 if (chrtab[*ln] & HDIGIT)
1320 // Parse the hex value
1321 while (hextab[*ln] >= 0)
1322 v = (v << 4) + (int)hextab[*ln++];
1326 if (obj_format == BSD)
1328 if ((*(ln + 1) & 0xDF) == 'B')
1333 else if ((*(ln + 1) & 0xDF) == 'W')
1338 else if ((*(ln + 1) & 0xDF) == 'L')
1348 if (obj_format == ALCYON)
1352 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1357 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1369 case '<': // < or << or <> or <=
1388 case ':': // : or ::
1398 case '=': // = or ==
1408 case '>': // > or >> or >=
1423 case '%': // % or binary constant
1424 if (*ln < '0' || *ln > '1')
1432 while (*ln >= '0' && *ln <= '1')
1433 v = (v << 1) + *ln++ - '0';
1437 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1443 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1449 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1458 case '@': // @ or octal constant
1459 if (*ln < '0' || *ln > '7')
1467 while (*ln >= '0' && *ln <= '7')
1468 v = (v << 3) + *ln++ - '0';
1472 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1478 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1484 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1493 case '^': // ^ or ^^ <operator-name>
1500 if (((int)chrtab[*++ln] & STSYM) == 0)
1502 error("invalid symbol following ^^");
1508 while ((int)chrtab[*ln] & CTSYM)
1511 for(state=0; state>=0;)
1513 // Get char, convert to lowercase
1516 if (j >= 'A' && j <= 'Z')
1521 if (kwcheck[j] != state)
1527 if (*p == EOS || p == ln)
1536 if (j < 0 || state < 0)
1538 error("unknown symbol following ^^");
1545 interror(2); // Bad MULTX entry in chrtab
1550 // Handle decimal constant
1555 while ((int)chrtab[*ln] & DIGIT)
1556 v = (v * 10) + *ln++ - '0';
1558 // See if there's a .[bwl] after the constant & deal with it if so
1561 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1569 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1577 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1591 //printf("CONST: %i\n", v);
1595 // Handle illegal character
1596 return error("illegal character $%02X found", *ln);
1599 // Terminate line of tokens and return "success."
1602 tok = etok; // Set tok to beginning of line
1604 if (stuffnull) // Terminate last SYMBOL
1614 // .GOTO <label> goto directive
1616 // The label is searched for starting from the first line of the current,
1617 // enclosing macro definition. If no enclosing macro exists, an error is
1620 // A label is of the form:
1622 // :<name><whitespace>
1624 // The colon must appear in column 1. The label is stripped prior to macro
1625 // expansion, and is NOT subject to macro expansion. The whitespace may also
1628 int d_goto(WORD unused)
1630 // Setup for the search
1632 return error("missing label");
1634 char * sym = string[tok[1]];
1637 if (cur_inobj->in_type != SRC_IMACRO)
1638 return error("goto not in macro");
1640 IMACRO * imacro = cur_inobj->inobj.imacro;
1641 LLIST * defln = imacro->im_macro->lineList;
1643 // Attempt to find the label, starting with the first line.
1644 for(; defln!=NULL; defln=defln->next)
1646 // Must start with a colon
1647 if (defln->line[0] == ':')
1649 // Compare names (sleazo string compare)
1651 char * s2 = defln->line;
1653 // Either we will match the strings to EOS on both, or we will
1654 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1656 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1658 // If we reached the end of string 1 (sym), we're done.
1659 // Note that we're also checking for the end of string 2 as
1660 // well, since we've established they're equal above.
1663 // Found the label, set new macro next-line and return.
1664 imacro->im_nextln = defln;
1674 return error("goto label not found");
1678 void DumpTokenBuffer(void)
1680 printf("Tokens [%X]: ", sloc);
1682 for(TOKEN * t=tokbuf; *t!=EOL; t++)
1686 else if (*t == CONST)
1689 printf("[CONST: $%X]", (uint32_t)*t);
1691 else if (*t == ACONST)
1693 printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1696 else if (*t == STRING)
1699 printf("[STRING:\"%s\"]", string[*t]);
1701 else if (*t == SYMBOL)
1704 printf("[SYMBOL:\"%s\"]", string[*t]);
1708 else if (*t == TKEOF)
1710 else if (*t == DEQUALS)
1711 printf("[DEQUALS]");
1716 else if (*t == DCOLON)
1728 else if (*t == UNMINUS)
1729 printf("[UNMINUS]");
1730 else if (*t == DOTB)
1732 else if (*t == DOTW)
1734 else if (*t == DOTL)
1736 else if (*t == DOTI)
1738 else if (*t == ENDEXPR)
1739 printf("[ENDEXPR]");
1740 else if (*t == CR_ABSCOUNT)
1741 printf("[CR_ABSCOUNT]");
1742 else if (*t == CR_DEFINED)
1743 printf("[CR_DEFINED]");
1744 else if (*t == CR_REFERENCED)
1745 printf("[CR_REFERENCED]");
1746 else if (*t == CR_STREQ)
1747 printf("[CR_STREQ]");
1748 else if (*t == CR_MACDEF)
1749 printf("[CR_MACDEF]");
1750 else if (*t == CR_TIME)
1751 printf("[CR_TIME]");
1752 else if (*t == CR_DATE)
1753 printf("[CR_DATE]");
1754 else if (*t >= 0x20 && *t <= 0x2F)
1755 printf("[%c]", (char)*t);
1756 else if (*t >= 0x3A && *t <= 0x3F)
1757 printf("[%c]", (char)*t);
1758 else if (*t >= 0x80 && *t <= 0x87)
1759 printf("[D%u]", ((uint32_t)*t) - 0x80);
1760 else if (*t >= 0x88 && *t <= 0x8F)
1761 printf("[A%u]", ((uint32_t)*t) - 0x88);
1763 printf("[%X:%c]", (uint32_t)*t, (char)*t);