2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
17 #define DECL_KW // Declare keyword arrays
18 #define DEF_KW // Declare keyword values
19 #include "kwtab.h" // Incl generated keyword tables & defs
22 int lnsave; // 1; strcpy() text of current line
23 uint16_t curlineno; // Current line number (64K max currently)
24 int totlines; // Total # of lines
25 int mjump_align = 0; // mjump alignment flag
26 char lntag; // Line tag
27 char * curfname; // Current filename
28 char tolowertab[128]; // Uppercase ==> lowercase
29 int8_t hextab[128]; // Table of hex values
30 char dotxtab[128]; // Table for ".b", ".s", etc.
31 char irbuf[LNSIZ]; // Text for .rept block line
32 char lnbuf[LNSIZ]; // Text of current line
33 WORD filecount; // Unique file number counter
34 WORD cfileno; // Current file number
35 TOKEN * tok; // Ptr to current token
36 TOKEN * etok; // Ptr past last token in tokbuf[]
37 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
38 char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage
40 // File record, used to maintain a list of every include file ever visited
41 #define FILEREC struct _filerec
51 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
52 static INOBJ * f_inobj; // Ptr list of free INOBJs
53 static IFILE * f_ifile; // Ptr list of free IFILEs
54 static IMACRO * f_imacro; // Ptr list of free IMACROs
56 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
58 uint8_t chrtab[0x100] = {
59 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
60 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
61 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
62 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
64 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
65 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
66 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
67 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
69 WHITE, MULTX, MULTX, SELF, // SP ! " #
70 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
71 SELF, SELF, SELF, SELF, // ( ) * +
72 SELF, SELF, STSYM, SELF, // , - . /
74 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
75 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
76 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
80 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
82 MULTX, STSYM+CTSYM+HDIGIT, // @ A
83 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
84 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
85 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
86 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
87 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
89 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
90 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
91 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
92 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
94 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
95 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
96 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
97 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
98 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
99 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
101 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
102 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
103 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
104 SELF, SELF, SELF, ILLEG, // | } ~ DEL
106 // Anything above $7F is illegal (and yes, we need to check for this,
107 // otherwise you get strange and spurious errors that will lead you astray)
108 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
109 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
110 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
126 // Names of registers
127 static char * regname[] = {
128 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
129 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
130 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
131 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
132 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
133 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
134 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
135 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
136 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
137 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
138 "tt0","tt1","crp","","","","","", // 208,215
139 "","","","","fpiar","fpsr","fpcr","", // 216,223
140 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
141 "","","","","","","","", // 232,239
142 "","","","","","","","", // 240,247
143 "","","","","","","","", // 248,255
144 "","","","","x0","x1","y0","y1", // 256,263
145 "","b0","","b2","","b1","a","b", // 264,271
146 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
147 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
148 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
149 "","","","","","","l","p", // 296,303
150 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
151 "a10","b10","x","y","","","ab","ba" // 312,319
154 static char * riscregname[] = {
155 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
156 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
157 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
158 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
163 // Initialize tokenizer
165 void InitTokenizer(void)
168 char * htab = "0123456789abcdefABCDEF"; // Hex character table
170 lnsave = 0; // Don't save lines
171 curfname = ""; // No file, empty filename
172 filecount = (WORD)-1;
173 cfileno = (WORD)-1; // cfileno gets bumped to 0
185 // Initialize hex, "dot" and tolower tables
190 tolowertab[i] = (char)i;
193 for(i=0; htab[i]!=EOS; i++)
194 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
196 for(i='A'; i<='Z'; i++)
197 tolowertab[i] |= 0x20;
199 // These characters are legal immediately after a period
200 dotxtab['b'] = DOTB; // .b .B .s .S
202 //dotxtab['s'] = DOTB;
203 //dotxtab['S'] = DOTB;
204 dotxtab['w'] = DOTW; // .w .W
206 dotxtab['l'] = DOTL; // .l .L
208 dotxtab['i'] = DOTI; // .i .I (???)
210 dotxtab['D'] = DOTD; // .d .D (quad word)
212 dotxtab['S'] = DOTS; // .s .S
214 dotxtab['Q'] = DOTQ; // .q .Q
216 dotxtab['X'] = DOTX; // .x .x
218 dotxtab['P'] = DOTP; // .p .P
223 void SetFilenameForErrorReporting(void)
227 // Check for absolute top filename (this should never happen)
230 curfname = "(*top*)";
234 FILEREC * fr = filerec;
236 // Advance to the correct record...
237 while (fr != NULL && fnum != 0)
243 // Check for file # record not found (this should never happen either)
246 curfname = "(*NOT FOUND*)";
250 curfname = fr->frec_name;
255 // Allocate an IFILE or IMACRO
257 INOBJ * a_inobj(int typ)
263 // Allocate and initialize INOBJ first
265 inobj = malloc(sizeof(INOBJ));
269 f_inobj = f_inobj->in_link;
274 case SRC_IFILE: // Alloc and init an IFILE
276 ifile = malloc(sizeof(IFILE));
280 f_ifile = f_ifile->if_link;
283 inobj->inobj.ifile = ifile;
286 case SRC_IMACRO: // Alloc and init an IMACRO
287 if (f_imacro == NULL)
288 imacro = malloc(sizeof(IMACRO));
292 f_imacro = f_imacro->im_link;
295 inobj->inobj.imacro = imacro;
298 case SRC_IREPT: // Alloc and init an IREPT
299 inobj->inobj.irept = malloc(sizeof(IREPT));
300 DEBUG { printf("alloc IREPT\n"); }
304 // Install INOBJ on top of input stack
305 inobj->in_ifent = ifent; // Record .if context on entry
306 inobj->in_type = (WORD)typ;
307 inobj->in_otok = tok;
308 inobj->in_etok = etok;
309 inobj->in_link = cur_inobj;
317 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
318 // A macro reference is in one of two forms:
319 // \name <non-name-character>
321 // A doubled backslash (\\) is compressed to a single backslash (\).
322 // Argument definitions have been pre-tokenized, so we have to turn them back
323 // into text. This means that numbers, in particular, become hex, regardless of
324 // their representation when the macro was invoked. This is a hack.
325 // A label may appear at the beginning of the line:
326 // :<name><whitespace>
327 // (the colon must be in the first column). These labels are stripped before
328 // macro expansion takes place.
330 int ExpandMacro(char * src, char * dest, int destsiz)
333 int questmark; // \? for testing argument existence
334 char mname[128]; // Assume max size of a formal arg name
335 char numbuf[20]; // Buffer for text of CONSTs
338 char ** symbolString;
340 DEBUG { printf("ExM: src=\"%s\"\n", src); }
342 IMACRO * imacro = cur_inobj->inobj.imacro;
343 int macnum = (int)(imacro->im_macro->sattr);
345 char * dst = dest; // Next dest slot
346 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
348 // Check for (and skip over) any "label" on the line
354 while (*s != EOS && !(chrtab[*s] & WHITE))
358 s++; // Skip first whitespace
361 // Expand the rest of the line
364 // Copy single character
370 // Skip comments in case a loose @ or \ is in there
371 // In that case the tokeniser was trying to expand it.
372 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
377 // Do macro expansion
385 case '\\': // \\, \ (collapse to single backslash)
391 case '?': // \? <macro> set `questmark' flag
395 case '#': // \#, number of arguments
396 sprintf(numbuf, "%d", (int)imacro->im_nargs);
398 case '!': // \! size suffix supplied on invocation
399 switch ((int)imacro->im_siz)
401 case SIZN: d = ""; break;
402 case SIZB: d = ".b"; break;
403 case SIZW: d = ".w"; break;
404 case SIZL: d = ".l"; break;
408 case '~': // ==> unique label string Mnnnn...
409 sprintf(numbuf, "M%u", curuniq);
425 return error("missing argument name");
428 // \n ==> argument number 'n', 0..9
429 if (chrtab[*s] & DIGIT)
439 // Get argument name: \name, \{name}
449 while (chrtab[*s] & CTSYM);
454 for(++s; *s != EOS && *s != '}';)
458 return error("missing closing brace ('}')");
465 // Lookup the argument and copy its (string) value into the
466 // destination string
467 DEBUG { printf("argument='%s'\n", mname); }
469 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
470 return error("undefined argument: '%s'", mname);
473 // Convert a string of tokens (terminated with EOL) back into
474 // text. If an argument is out of range (not specified in the
475 // macro invocation) then it is ignored.
476 i = (int)arg->svalue;
478 DEBUG { printf("~argnumber=%d\n", i); }
481 if (i < imacro->im_nargs)
483 tk = imacro->argument[i].token;
484 symbolString = imacro->argument[i].string;
487 // printf("ExM: Preparing to parse argument #%u...\n", i);
493 // 0 if the argument is empty or non-existant,
494 // 1 if the argument is not empty
497 if (tk == NULL || *tk == EOL)
503 *dst++ = (char)(questmark + '0');
507 // Argument # is in range, so expand it
512 // Reverse-translation from a token number to a string.
513 // This is a hack. It might be better table-driven.
516 if ((*tk >= KW_D0) && !rdsp && !rgpu)
518 d = regname[(int)*tk++ - KW_D0];
521 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
523 d = riscregname[(int)*tk++ - KW_R0];
532 // d = (char *)*tk++;
535 // This fix should be done for strings too
536 d = symbolString[*tk++];
537 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
542 // d = (char *)*tk++;
545 d = symbolString[*tk++];
566 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
567 // to choke on legitimate code... Need to investigate this further
568 // before changing anything else here!
570 sprintf(numbuf, "$%lx", (long unsigned int)*tk++);
634 *dst++ = (char)*(tk - 1);
639 // If 'd' != NULL, copy string to destination
643 DEBUG printf("d='%s'\n", d);
662 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
667 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
668 return fatal("line too long as a result of macro expansion");
673 // Get next line of text from a macro
675 char * GetNextMacroLine(void)
677 IMACRO * imacro = cur_inobj->inobj.imacro;
678 // LONG * strp = imacro->im_nextln;
679 LLIST * strp = imacro->im_nextln;
681 if (strp == NULL) // End-of-macro
684 imacro->im_nextln = strp->next;
685 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
686 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
688 return imacro->im_lnbuf;
693 // Get next line of text from a repeat block
695 char * GetNextRepeatLine(void)
697 IREPT * irept = cur_inobj->inobj.irept;
698 // LONG * strp = irept->ir_nextln; // initial null
700 // Do repeat at end of .rept block's string list
702 if (irept->ir_nextln == NULL)
704 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
705 irept->ir_nextln = irept->ir_firstln; // copy first line
707 if (irept->ir_count-- == 0)
709 DEBUG { printf("end-repeat-block\n"); }
713 // strp = irept->ir_nextln;
716 // strcpy(irbuf, (char *)(irept->ir_nextln + 1));
717 strcpy(irbuf, irept->ir_nextln->line);
718 DEBUG { printf("repeat line='%s'\n", irbuf); }
719 // irept->ir_nextln = (LONG *)*strp;
720 irept->ir_nextln = irept->ir_nextln->next;
727 // Include a source file used at the root, and for ".include" files
729 int include(int handle, char * fname)
732 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
734 // Alloc and initialize include-descriptors
735 INOBJ * inobj = a_inobj(SRC_IFILE);
736 IFILE * ifile = inobj->inobj.ifile;
738 ifile->ifhandle = handle; // Setup file handle
739 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
740 ifile->ifoldlineno = curlineno; // Save old line number
741 ifile->ifoldfname = curfname; // Save old filename
742 ifile->ifno = cfileno; // Save old file number
744 // NB: This *must* be preincrement, we're adding one to the filecount here!
745 cfileno = ++filecount; // Compute NEW file number
746 curfname = strdup(fname); // Set current filename (alloc storage)
747 curlineno = 0; // Start on line zero
749 // Add another file to the file-record
750 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
751 fr->frec_next = NULL;
752 fr->frec_name = curfname;
755 filerec = fr; // Add first filerec
757 last_fr->frec_next = fr; // Append to list of filerecs
760 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
767 // Pop the current input level
771 INOBJ * inobj = cur_inobj;
776 // Pop IFENT levels until we reach the conditional assembly context we
777 // were at when the input object was entered.
778 int numUnmatched = 0;
780 while (ifent != inobj->in_ifent)
782 if (d_endif() != 0) // Something bad happened during endif parsing?
783 return -1; // If yes, bail instead of getting stuck in a loop
788 // Give a warning to the user that we had to wipe their bum for them
789 if (numUnmatched > 0)
790 warn("missing %d .endif(s)", numUnmatched);
792 tok = inobj->in_otok; // Restore tok and otok
793 etok = inobj->in_etok;
795 switch (inobj->in_type)
797 case SRC_IFILE: // Pop and release an IFILE
799 DEBUG { printf("[Leaving: %s]\n", curfname); }
801 IFILE * ifile = inobj->inobj.ifile;
802 ifile->if_link = f_ifile;
804 close(ifile->ifhandle); // Close source file
805 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
806 curfname = ifile->ifoldfname; // Set current filename
807 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
808 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
809 curlineno = ifile->ifoldlineno; // Set current line#
810 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
811 cfileno = ifile->ifno; // Restore current file number
812 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
816 case SRC_IMACRO: // Pop and release an IMACRO
818 IMACRO * imacro = inobj->inobj.imacro;
819 imacro->im_link = f_imacro;
824 case SRC_IREPT: // Pop and release an IREPT
826 DEBUG { printf("dealloc IREPT\n"); }
827 // LONG * p = inobj->inobj.irept->ir_firstln;
828 LLIST * p = inobj->inobj.irept->ir_firstln;
830 // Deallocate repeat lines
833 // Shamus: ggn confirmed that this will cause a segfault on 64-bit versions of
834 // RMAC. This is just stupid and wrong anyway, so we need to fix crapola
836 // LONG * p1 = (LONG *)*p;
846 cur_inobj = inobj->in_link;
847 inobj->in_link = f_inobj;
855 // Get line from file into buf, return NULL on EOF or ptr to the start of a
858 char * GetNextLine(void)
862 int readamt = -1; // 0 if last read() yeilded 0 bytes
863 IFILE * fl = cur_inobj->inobj.ifile;
867 // Scan for next end-of-line; handle stupid text formats by treating
868 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
870 d = &fl->ifbuf[fl->ifind];
872 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
874 if (*p == '\r' || *p == '\n')
881 break; // Need to read more, then look for '\n' to eat
882 else if (p[1] == '\n')
886 // Cover up the newline with end-of-string sentinel
895 // Handle hanging lines by ignoring them (Input file is exhausted, no
896 // \r or \n on last line)
897 // Shamus: This is retarded. Never ignore any input!
898 if (!readamt && fl->ifcnt)
905 // Really should check to see if we're at the end of the buffer!
907 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
909 return &fl->ifbuf[fl->ifind];
913 // Truncate and return absurdly long lines.
914 if (fl->ifcnt >= QUANTUM)
916 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
918 return &fl->ifbuf[fl->ifind];
921 // Relocate what's left of a line to the beginning of the buffer, and
922 // read some more of the file in; return NULL if the buffer's empty and
926 p = &fl->ifbuf[fl->ifind];
927 d = &fl->ifbuf[fl->ifcnt & 1];
929 for(i=0; i<fl->ifcnt; i++)
932 fl->ifind = fl->ifcnt & 1;
935 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
940 if ((fl->ifcnt += readamt) == 0)
949 int TokenizeLine(void)
951 uint8_t * ln = NULL; // Ptr to current position in line
952 uint8_t * p; // Random character ptr
953 TOKEN * tk; // Token-deposit ptr
954 int state = 0; // State for keyword detector
955 int j = 0; // Var for keyword detector
956 uint8_t c; // Random char
957 VALUE v; // Random value
958 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
959 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
961 int stringNum = 0; // Pointer to string locations in tokenized line
965 if (cur_inobj == NULL) // Return EOF if input stack is empty
968 // Get another line of input from the current input source: a file, a
969 // macro, or a repeat-block
970 switch (cur_inobj->in_type)
974 // o bump source line number;
975 // o tag the listing-line with a space;
976 // o kludge lines generated by Alcyon C.
978 if ((ln = GetNextLine()) == NULL)
980 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
981 if (fpop() == 0) // Pop input level
982 goto retry; // Try for more lines
985 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
990 curlineno++; // Bump line number
995 // AS68 compatibility, throw away all lines starting with
996 // back-quotes, tildes, or '*'
997 // On other lines, turn the first '*' into a semi-colon.
998 if (*ln == '`' || *ln == '~' || *ln == '*')
1002 for(p=ln; *p!=EOS; p++)
1016 // o Handle end-of-macro;
1017 // o tag the listing-line with an at (@) sign.
1019 if ((ln = GetNextMacroLine()) == NULL)
1021 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1022 goto retry; // Try for more lines...
1024 return TKEOF; // Oops, we got a non zero return code, signal EOF
1031 // o Handle end-of-repeat-block;
1032 // o tag the listing-line with a pound (#) sign.
1034 if ((ln = GetNextRepeatLine()) == NULL)
1036 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1045 // Save text of the line. We only do this during listings and within
1046 // macro-type blocks, since it is expensive to unconditionally copy every
1051 // General housekeeping
1052 tok = tokeol; // Set "tok" to EOL in case of error
1053 tk = etok; // Reset token ptr
1054 stuffnull = 0; // Don't stuff nulls
1055 totlines++; // Bump total #lines assembled
1057 // See if the entire line is a comment. This is a win if the programmer
1058 // puts in lots of comments
1059 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1062 // Main tokenization loop;
1063 // o skip whitespace;
1064 // o handle end-of-line;
1065 // o handle symbols;
1066 // o handle single-character tokens (operators, etc.);
1067 // o handle multiple-character tokens (constants, strings, etc.).
1070 // Skip whitespace, handle EOL
1071 while (chrtab[*ln] & WHITE)
1074 // Handle EOL, comment with ';'
1075 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1078 // Handle start of symbol. Symbols are null-terminated in place. The
1079 // termination is always one symbol behind, since there may be no place
1080 // for a null in the case that an operator immediately follows the name.
1085 if (stuffnull) // Terminate old symbol from previous pass
1088 v = 0; // Assume no DOT attrib follows symbol
1091 // In some cases, we need to check for a DOTx at the *beginning*
1092 // of a symbol, as the "start" of the line we're currently looking
1093 // at could be somewhere in the middle of that line!
1096 // Make sure that it's *only* a .[bwsl] following, and not the
1097 // start of a local symbol:
1098 if ((chrtab[*(ln + 1)] & DOT)
1099 && (dotxtab[*(ln + 1)] != 0)
1100 && !(chrtab[*(ln + 2)] & CTSYM))
1102 // We found a legitimate DOTx construct, so add it to the
1106 *tk++ = (TOKEN)dotxtab[*ln++];
1111 p = nullspot = ln++; // Nullspot -> start of this symbol
1113 // Find end of symbol (and compute its length)
1114 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1117 // Handle "DOT" special forms (like ".b") that follow a normal
1118 // symbol or keyword:
1121 *ln++ = EOS; // Terminate symbol
1122 stuffnull = 0; // And never try it again
1124 // Character following the `.' must have a DOT attribute, and
1125 // the chararacter after THAT one must not have a start-symbol
1126 // attribute (to prevent symbols that look like, for example,
1127 // "zingo.barf", which might be a good idea anyway....)
1128 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1129 return error("[bwsl] must follow '.' in symbol");
1131 v = (VALUE)dotxtab[*ln++];
1133 if (chrtab[*ln] & CTSYM)
1134 return error("misuse of '.'; not allowed in symbols");
1137 // If the symbol is small, check to see if it's really the name of
1141 for(state=0; state>=0;)
1143 j = (int)tolowertab[*p++];
1146 if (kwcheck[j] != state)
1152 if (*p == EOS || p == ln)
1166 // Make j = -1 if user tries to use a RISC register while in 68K mode
1167 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1172 // Make j = -1 if time, date etc with no preceeding ^^
1173 // defined, referenced, streq, macdef, date and time
1176 case 112: // defined
1177 case 113: // referenced
1185 // If not tokenized keyword OR token was not found
1186 if ((j < 0) || (state < 0))
1190 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1191 //system, this will cause all kinds of mischief.
1193 *tk++ = (TOKEN)nullspot;
1195 string[stringNum] = nullspot;
1206 if (v) // Record attribute token (if any)
1209 if (stuffnull) // Arrange for string termination on next pass
1215 // Handle identity tokens
1222 // Handle multiple-character tokens
1227 case '!': // ! or !=
1237 case '\'': // 'string'
1240 // Hardcoded for now, maybe this will change in the future
1245 case '\"': // "string"
1249 string[stringNum] = ln;
1253 for(p=ln; *ln!=EOS && *ln!=c1;)
1262 return(error("unterminated string"));
1291 warn("bad backslash code in string");
1301 return error("unterminated string");
1305 case '$': // $, hex constant
1306 if (chrtab[*ln] & HDIGIT)
1310 // Parse the hex value
1311 while (hextab[*ln] >= 0)
1312 v = (v << 4) + (int)hextab[*ln++];
1316 if (obj_format == BSD)
1318 if ((*(ln + 1) & 0xDF) == 'B')
1323 else if ((*(ln + 1) & 0xDF) == 'W')
1328 else if ((*(ln + 1) & 0xDF) == 'L')
1338 if (obj_format == ALCYON)
1342 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1347 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1359 case '<': // < or << or <> or <=
1378 case ':': // : or ::
1388 case '=': // = or ==
1398 case '>': // > or >> or >=
1413 case '%': // % or binary constant
1414 if (*ln < '0' || *ln > '1')
1422 while (*ln >= '0' && *ln <= '1')
1423 v = (v << 1) + *ln++ - '0';
1427 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1433 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1439 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1448 case '@': // @ or octal constant
1449 if (*ln < '0' || *ln > '7')
1457 while (*ln >= '0' && *ln <= '7')
1458 v = (v << 3) + *ln++ - '0';
1462 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1468 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1474 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1483 case '^': // ^ or ^^ <operator-name>
1490 if (((int)chrtab[*++ln] & STSYM) == 0)
1492 error("invalid symbol following ^^");
1498 while ((int)chrtab[*ln] & CTSYM)
1501 for(state=0; state>=0;)
1503 // Get char, convert to lowercase
1506 if (j >= 'A' && j <= 'Z')
1511 if (kwcheck[j] != state)
1517 if (*p == EOS || p == ln)
1526 if (j < 0 || state < 0)
1528 error("unknown symbol following ^^");
1535 interror(2); // Bad MULTX entry in chrtab
1540 // Handle decimal constant
1545 while ((int)chrtab[*ln] & DIGIT)
1546 v = (v * 10) + *ln++ - '0';
1548 // See if there's a .[bwl] after the constant & deal with it if so
1551 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1556 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1561 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1569 //printf("CONST: %i\n", v);
1573 // Handle illegal character
1574 return error("illegal character $%02X found", *ln);
1577 // Terminate line of tokens and return "success."
1580 tok = etok; // Set tok to beginning of line
1582 if (stuffnull) // Terminate last SYMBOL
1592 // .GOTO <label> goto directive
1594 // The label is searched for starting from the first line of the current,
1595 // enclosing macro definition. If no enclosing macro exists, an error is
1598 // A label is of the form:
1600 // :<name><whitespace>
1602 // The colon must appear in column 1. The label is stripped prior to macro
1603 // expansion, and is NOT subject to macro expansion. The whitespace may also
1606 int d_goto(WORD unused)
1608 // Setup for the search
1610 return error("missing label");
1612 char * sym = string[tok[1]];
1615 if (cur_inobj->in_type != SRC_IMACRO)
1616 return error("goto not in macro");
1618 IMACRO * imacro = cur_inobj->inobj.imacro;
1619 LLIST * defln = imacro->im_macro->lineList;
1621 // Attempt to find the label, starting with the first line.
1622 for(; defln!=NULL; defln=defln->next)
1624 // Must start with a colon
1625 if (defln->line[0] == ':')
1627 // Compare names (sleazo string compare)
1629 char * s2 = defln->line;
1631 // Either we will match the strings to EOS on both, or we will
1632 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1634 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1636 // If we reached the end of string 1 (sym), we're done.
1637 // Note that we're also checking for the end of string 2 as
1638 // well, since we've established they're equal above.
1641 // Found the label, set new macro next-line and return.
1642 imacro->im_nextln = defln;
1652 return error("goto label not found");
1656 void DumpTokenBuffer(void)
1658 printf("Tokens [%X]: ", sloc);
1660 for(TOKEN * t=tokbuf; *t!=EOL; t++)
1664 else if (*t == CONST)
1667 printf("[CONST: $%X]", (uint32_t)*t);
1669 else if (*t == ACONST)
1671 printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1674 else if (*t == STRING)
1677 printf("[STRING:\"%s\"]", string[*t]);
1679 else if (*t == SYMBOL)
1682 printf("[SYMBOL:\"%s\"]", string[*t]);
1686 else if (*t == TKEOF)
1688 else if (*t == DEQUALS)
1689 printf("[DEQUALS]");
1694 else if (*t == DCOLON)
1706 else if (*t == UNMINUS)
1707 printf("[UNMINUS]");
1708 else if (*t == DOTB)
1710 else if (*t == DOTW)
1712 else if (*t == DOTL)
1714 else if (*t == DOTI)
1716 else if (*t == ENDEXPR)
1717 printf("[ENDEXPR]");
1718 else if (*t == CR_ABSCOUNT)
1719 printf("[CR_ABSCOUNT]");
1720 else if (*t == CR_DEFINED)
1721 printf("[CR_DEFINED]");
1722 else if (*t == CR_REFERENCED)
1723 printf("[CR_REFERENCED]");
1724 else if (*t == CR_STREQ)
1725 printf("[CR_STREQ]");
1726 else if (*t == CR_MACDEF)
1727 printf("[CR_MACDEF]");
1728 else if (*t == CR_TIME)
1729 printf("[CR_TIME]");
1730 else if (*t == CR_DATE)
1731 printf("[CR_DATE]");
1732 else if (*t >= 0x20 && *t <= 0x2F)
1733 printf("[%c]", (char)*t);
1734 else if (*t >= 0x3A && *t <= 0x3F)
1735 printf("[%c]", (char)*t);
1736 else if (*t >= 0x80 && *t <= 0x87)
1737 printf("[D%u]", ((uint32_t)*t) - 0x80);
1738 else if (*t >= 0x88 && *t <= 0x8F)
1739 printf("[A%u]", ((uint32_t)*t) - 0x88);
1741 printf("[%X:%c]", (uint32_t)*t, (char)*t);