2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
17 #define DECL_KW // Declare keyword arrays
18 #define DEF_KW // Declare keyword values
19 #include "kwtab.h" // Incl generated keyword tables & defs
22 int lnsave; // 1; strcpy() text of current line
23 uint16_t curlineno; // Current line number (64K max currently)
24 int totlines; // Total # of lines
25 int mjump_align = 0; // mjump alignment flag
26 char lntag; // Line tag
27 char * curfname; // Current filename
28 char tolowertab[128]; // Uppercase ==> lowercase
29 int8_t hextab[128]; // Table of hex values
30 char dotxtab[128]; // Table for ".b", ".s", etc.
31 char irbuf[LNSIZ]; // Text for .rept block line
32 char lnbuf[LNSIZ]; // Text of current line
33 WORD filecount; // Unique file number counter
34 WORD cfileno; // Current file number
35 TOKEN * tok; // Ptr to current token
36 TOKEN * etok; // Ptr past last token in tokbuf[]
37 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
38 char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage
40 // File record, used to maintain a list of every include file ever visited
41 #define FILEREC struct _filerec
51 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
52 static INOBJ * f_inobj; // Ptr list of free INOBJs
53 static IFILE * f_ifile; // Ptr list of free IFILEs
54 static IMACRO * f_imacro; // Ptr list of free IMACROs
56 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
58 uint8_t chrtab[0x100] = {
59 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
60 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
61 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
62 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
64 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
65 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
66 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
67 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
69 WHITE, MULTX, MULTX, SELF, // SP ! " #
70 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
71 SELF, SELF, SELF, SELF, // ( ) * +
72 SELF, SELF, STSYM, SELF, // , - . /
74 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
75 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
76 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
80 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
82 MULTX, STSYM+CTSYM+HDIGIT, // @ A
83 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
84 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
85 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
86 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
87 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
89 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
90 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
91 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
92 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
94 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
95 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
96 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
97 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
98 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
99 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
101 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
102 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
103 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
104 SELF, SELF, SELF, ILLEG, // | } ~ DEL
106 // Anything above $7F is illegal (and yes, we need to check for this,
107 // otherwise you get strange and spurious errors that will lead you astray)
108 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
109 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
110 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
126 // Names of registers
127 static char * regname[] = {
128 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
129 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
130 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
131 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
132 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
133 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
134 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
135 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
136 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
137 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
138 "tt0","tt1","crp","","","","","", // 208,215
139 "","","","","fpiar","fpsr","fpcr","", // 216,223
140 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
141 "","","","","","","","", // 232,239
142 "","","","","","","","", // 240,247
143 "","","","","","","","", // 248,255
144 "","","","","x0","x1","y0","y1", // 256,263
145 "","b0","","b2","","b1","a","b", // 264,271
146 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
147 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
148 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
149 "","","","","","","l","p", // 296,303
150 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
151 "a10","b10","x","y","","","ab","ba" // 312,319
154 static char * riscregname[] = {
155 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
156 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
157 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
158 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
163 // Initialize tokenizer
165 void InitTokenizer(void)
168 char * htab = "0123456789abcdefABCDEF"; // Hex character table
170 lnsave = 0; // Don't save lines
171 curfname = ""; // No file, empty filename
172 filecount = (WORD)-1;
173 cfileno = (WORD)-1; // cfileno gets bumped to 0
185 // Initialize hex, "dot" and tolower tables
190 tolowertab[i] = (char)i;
193 for(i=0; htab[i]!=EOS; i++)
194 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
196 for(i='A'; i<='Z'; i++)
197 tolowertab[i] |= 0x20;
199 // These characters are legal immediately after a period
200 dotxtab['b'] = DOTB; // .b .B .s .S
202 //dotxtab['s'] = DOTB;
203 //dotxtab['S'] = DOTB;
204 dotxtab['w'] = DOTW; // .w .W
206 dotxtab['l'] = DOTL; // .l .L
208 dotxtab['i'] = DOTI; // .i .I (???)
210 dotxtab['D'] = DOTD; // .d .D (quad word)
212 dotxtab['S'] = DOTS; // .s .S
214 dotxtab['Q'] = DOTQ; // .q .Q
216 dotxtab['X'] = DOTX; // .x .x
218 dotxtab['P'] = DOTP; // .p .P
223 void SetFilenameForErrorReporting(void)
227 // Check for absolute top filename (this should never happen)
230 curfname = "(*top*)";
234 FILEREC * fr = filerec;
236 // Advance to the correct record...
237 while (fr != NULL && fnum != 0)
243 // Check for file # record not found (this should never happen either)
246 curfname = "(*NOT FOUND*)";
250 curfname = fr->frec_name;
255 // Allocate an IFILE or IMACRO
257 INOBJ * a_inobj(int typ)
263 // Allocate and initialize INOBJ first
265 inobj = malloc(sizeof(INOBJ));
269 f_inobj = f_inobj->in_link;
274 case SRC_IFILE: // Alloc and init an IFILE
276 ifile = malloc(sizeof(IFILE));
280 f_ifile = f_ifile->if_link;
283 inobj->inobj.ifile = ifile;
286 case SRC_IMACRO: // Alloc and init an IMACRO
287 if (f_imacro == NULL)
288 imacro = malloc(sizeof(IMACRO));
292 f_imacro = f_imacro->im_link;
295 inobj->inobj.imacro = imacro;
298 case SRC_IREPT: // Alloc and init an IREPT
299 inobj->inobj.irept = malloc(sizeof(IREPT));
300 DEBUG { printf("alloc IREPT\n"); }
304 // Install INOBJ on top of input stack
305 inobj->in_ifent = ifent; // Record .if context on entry
306 inobj->in_type = (WORD)typ;
307 inobj->in_otok = tok;
308 inobj->in_etok = etok;
309 inobj->in_link = cur_inobj;
317 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
318 // A macro reference is in one of two forms:
319 // \name <non-name-character>
321 // A doubled backslash (\\) is compressed to a single backslash (\).
322 // Argument definitions have been pre-tokenized, so we have to turn them back
323 // into text. This means that numbers, in particular, become hex, regardless of
324 // their representation when the macro was invoked. This is a hack.
325 // A label may appear at the beginning of the line:
326 // :<name><whitespace>
327 // (the colon must be in the first column). These labels are stripped before
328 // macro expansion takes place.
330 int ExpandMacro(char * src, char * dest, int destsiz)
333 int questmark; // \? for testing argument existence
334 char mname[128]; // Assume max size of a formal arg name
335 char numbuf[20]; // Buffer for text of CONSTs
338 char ** symbolString;
340 DEBUG { printf("ExM: src=\"%s\"\n", src); }
342 IMACRO * imacro = cur_inobj->inobj.imacro;
343 int macnum = (int)(imacro->im_macro->sattr);
345 char * dst = dest; // Next dest slot
346 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
348 // Check for (and skip over) any "label" on the line
354 while (*s != EOS && !(chrtab[*s] & WHITE))
358 s++; // Skip first whitespace
361 // Expand the rest of the line
364 // Copy single character
370 // Skip comments in case a loose @ or \ is in there
371 // In that case the tokeniser was trying to expand it.
372 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
377 // Do macro expansion
385 case '\\': // \\, \ (collapse to single backslash)
391 case '?': // \? <macro> set `questmark' flag
395 case '#': // \#, number of arguments
396 sprintf(numbuf, "%d", (int)imacro->im_nargs);
398 case '!': // \! size suffix supplied on invocation
399 switch ((int)imacro->im_siz)
401 case SIZN: d = ""; break;
402 case SIZB: d = ".b"; break;
403 case SIZW: d = ".w"; break;
404 case SIZL: d = ".l"; break;
408 case '~': // ==> unique label string Mnnnn...
409 sprintf(numbuf, "M%u", curuniq);
425 return error("missing argument name");
428 // \n ==> argument number 'n', 0..9
429 if (chrtab[*s] & DIGIT)
439 // Get argument name: \name, \{name}
449 while (chrtab[*s] & CTSYM);
454 for(++s; *s != EOS && *s != '}';)
458 return error("missing closing brace ('}')");
465 // Lookup the argument and copy its (string) value into the
466 // destination string
467 DEBUG { printf("argument='%s'\n", mname); }
469 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
470 return error("undefined argument: '%s'", mname);
473 // Convert a string of tokens (terminated with EOL) back into
474 // text. If an argument is out of range (not specified in the
475 // macro invocation) then it is ignored.
476 i = (int)arg->svalue;
478 DEBUG { printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase); }
481 if (i < imacro->im_nargs)
486 tk = argPtrs[imacro->argBase + i];
488 tk = imacro->argument[i].token;
489 symbolString = imacro->argument[i].string;
492 // printf("ExM: Preparing to parse argument #%u...\n", i);
499 // 0 if the argument is empty or non-existant,
500 // 1 if the argument is not empty
503 if (tk == NULL || *tk == EOL)
509 *dst++ = (char)(questmark + '0');
513 // Argument # is in range, so expand it
518 // Reverse-translation from a token number to a string.
519 // This is a hack. It might be better table-driven.
522 if ((*tk >= KW_D0) && !rdsp && !rgpu)
524 d = regname[(int)*tk++ - KW_D0];
527 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
529 d = riscregname[(int)*tk++ - KW_R0];
538 // d = (char *)*tk++;
541 // This fix should be done for strings too
542 d = symbolString[*tk++];
543 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
548 // d = (char *)*tk++;
551 d = symbolString[*tk++];
572 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
573 // to choke on legitimate code... Need to investigate this further
574 // before changing anything else here!
576 sprintf(numbuf, "$%lx", (long unsigned int)*tk++);
640 *dst++ = (char)*(tk - 1);
645 // If 'd' != NULL, copy string to destination
649 DEBUG printf("d='%s'\n", d);
668 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
673 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
674 return fatal("line too long as a result of macro expansion");
679 // Get next line of text from a macro
681 char * GetNextMacroLine(void)
683 IMACRO * imacro = cur_inobj->inobj.imacro;
684 // LONG * strp = imacro->im_nextln;
685 LLIST * strp = imacro->im_nextln;
687 if (strp == NULL) // End-of-macro
690 imacro->im_nextln = strp->next;
691 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
692 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
694 return imacro->im_lnbuf;
699 // Get next line of text from a repeat block
701 char * GetNextRepeatLine(void)
703 IREPT * irept = cur_inobj->inobj.irept;
704 // LONG * strp = irept->ir_nextln; // initial null
706 // Do repeat at end of .rept block's string list
708 if (irept->ir_nextln == NULL)
710 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
711 irept->ir_nextln = irept->ir_firstln; // copy first line
713 if (irept->ir_count-- == 0)
715 DEBUG { printf("end-repeat-block\n"); }
719 // strp = irept->ir_nextln;
722 // strcpy(irbuf, (char *)(irept->ir_nextln + 1));
723 strcpy(irbuf, irept->ir_nextln->line);
724 DEBUG { printf("repeat line='%s'\n", irbuf); }
725 // irept->ir_nextln = (LONG *)*strp;
726 irept->ir_nextln = irept->ir_nextln->next;
733 // Include a source file used at the root, and for ".include" files
735 int include(int handle, char * fname)
738 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
740 // Alloc and initialize include-descriptors
741 INOBJ * inobj = a_inobj(SRC_IFILE);
742 IFILE * ifile = inobj->inobj.ifile;
744 ifile->ifhandle = handle; // Setup file handle
745 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
746 ifile->ifoldlineno = curlineno; // Save old line number
747 ifile->ifoldfname = curfname; // Save old filename
748 ifile->ifno = cfileno; // Save old file number
750 // NB: This *must* be preincrement, we're adding one to the filecount here!
751 cfileno = ++filecount; // Compute NEW file number
752 curfname = strdup(fname); // Set current filename (alloc storage)
753 curlineno = 0; // Start on line zero
755 // Add another file to the file-record
756 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
757 fr->frec_next = NULL;
758 fr->frec_name = curfname;
761 filerec = fr; // Add first filerec
763 last_fr->frec_next = fr; // Append to list of filerecs
766 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
773 // Pop the current input level
777 INOBJ * inobj = cur_inobj;
782 // Pop IFENT levels until we reach the conditional assembly context we
783 // were at when the input object was entered.
784 int numUnmatched = 0;
786 while (ifent != inobj->in_ifent)
788 if (d_endif() != 0) // Something bad happened during endif parsing?
789 return -1; // If yes, bail instead of getting stuck in a loop
794 // Give a warning to the user that we had to wipe their bum for them
795 if (numUnmatched > 0)
796 warn("missing %d .endif(s)", numUnmatched);
798 tok = inobj->in_otok; // Restore tok and otok
799 etok = inobj->in_etok;
801 switch (inobj->in_type)
803 case SRC_IFILE: // Pop and release an IFILE
805 DEBUG { printf("[Leaving: %s]\n", curfname); }
807 IFILE * ifile = inobj->inobj.ifile;
808 ifile->if_link = f_ifile;
810 close(ifile->ifhandle); // Close source file
811 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
812 curfname = ifile->ifoldfname; // Set current filename
813 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
814 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
815 curlineno = ifile->ifoldlineno; // Set current line#
816 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
817 cfileno = ifile->ifno; // Restore current file number
818 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
822 case SRC_IMACRO: // Pop and release an IMACRO
824 IMACRO * imacro = inobj->inobj.imacro;
825 imacro->im_link = f_imacro;
830 case SRC_IREPT: // Pop and release an IREPT
832 DEBUG { printf("dealloc IREPT\n"); }
833 // LONG * p = inobj->inobj.irept->ir_firstln;
834 LLIST * p = inobj->inobj.irept->ir_firstln;
836 // Deallocate repeat lines
839 // Shamus: ggn confirmed that this will cause a segfault on 64-bit versions of
840 // RMAC. This is just stupid and wrong anyway, so we need to fix crapola
842 // LONG * p1 = (LONG *)*p;
852 cur_inobj = inobj->in_link;
853 inobj->in_link = f_inobj;
861 // Get line from file into buf, return NULL on EOF or ptr to the start of a
864 char * GetNextLine(void)
868 int readamt = -1; // 0 if last read() yeilded 0 bytes
869 IFILE * fl = cur_inobj->inobj.ifile;
873 // Scan for next end-of-line; handle stupid text formats by treating
874 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
876 d = &fl->ifbuf[fl->ifind];
878 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
880 if (*p == '\r' || *p == '\n')
887 break; // Need to read more, then look for '\n' to eat
888 else if (p[1] == '\n')
892 // Cover up the newline with end-of-string sentinel
901 // Handle hanging lines by ignoring them (Input file is exhausted, no
902 // \r or \n on last line)
903 // Shamus: This is retarded. Never ignore any input!
904 if (!readamt && fl->ifcnt)
911 // Really should check to see if we're at the end of the buffer!
913 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
915 return &fl->ifbuf[fl->ifind];
919 // Truncate and return absurdly long lines.
920 if (fl->ifcnt >= QUANTUM)
922 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
924 return &fl->ifbuf[fl->ifind];
927 // Relocate what's left of a line to the beginning of the buffer, and
928 // read some more of the file in; return NULL if the buffer's empty and
932 p = &fl->ifbuf[fl->ifind];
933 d = &fl->ifbuf[fl->ifcnt & 1];
935 for(i=0; i<fl->ifcnt; i++)
938 fl->ifind = fl->ifcnt & 1;
941 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
946 if ((fl->ifcnt += readamt) == 0)
955 int TokenizeLine(void)
957 uint8_t * ln = NULL; // Ptr to current position in line
958 uint8_t * p; // Random character ptr
959 TOKEN * tk; // Token-deposit ptr
960 int state = 0; // State for keyword detector
961 int j = 0; // Var for keyword detector
962 uint8_t c; // Random char
963 VALUE v; // Random value
964 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
965 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
967 int stringNum = 0; // Pointer to string locations in tokenized line
971 if (cur_inobj == NULL) // Return EOF if input stack is empty
974 // Get another line of input from the current input source: a file, a
975 // macro, or a repeat-block
976 switch (cur_inobj->in_type)
980 // o bump source line number;
981 // o tag the listing-line with a space;
982 // o kludge lines generated by Alcyon C.
984 if ((ln = GetNextLine()) == NULL)
986 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
987 if (fpop() == 0) // Pop input level
988 goto retry; // Try for more lines
991 ifent->if_prev = (IFENT *)-1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
996 curlineno++; // Bump line number
1001 // AS68 compatibility, throw away all lines starting with
1002 // back-quotes, tildes, or '*'
1003 // On other lines, turn the first '*' into a semi-colon.
1004 if (*ln == '`' || *ln == '~' || *ln == '*')
1008 for(p=ln; *p!=EOS; p++)
1022 // o Handle end-of-macro;
1023 // o tag the listing-line with an at (@) sign.
1025 if ((ln = GetNextMacroLine()) == NULL)
1027 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1028 goto retry; // Try for more lines...
1030 return TKEOF; // Oops, we got a non zero return code, signal EOF
1037 // o Handle end-of-repeat-block;
1038 // o tag the listing-line with a pound (#) sign.
1040 if ((ln = GetNextRepeatLine()) == NULL)
1042 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1051 // Save text of the line. We only do this during listings and within
1052 // macro-type blocks, since it is expensive to unconditionally copy every
1057 // General housekeeping
1058 tok = tokeol; // Set "tok" to EOL in case of error
1059 tk = etok; // Reset token ptr
1060 stuffnull = 0; // Don't stuff nulls
1061 totlines++; // Bump total #lines assembled
1063 // See if the entire line is a comment. This is a win if the programmer
1064 // puts in lots of comments
1065 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1068 // Main tokenization loop;
1069 // o skip whitespace;
1070 // o handle end-of-line;
1071 // o handle symbols;
1072 // o handle single-character tokens (operators, etc.);
1073 // o handle multiple-character tokens (constants, strings, etc.).
1076 // Skip whitespace, handle EOL
1077 while (chrtab[*ln] & WHITE)
1080 // Handle EOL, comment with ';'
1081 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1084 // Handle start of symbol. Symbols are null-terminated in place. The
1085 // termination is always one symbol behind, since there may be no place
1086 // for a null in the case that an operator immediately follows the name.
1091 if (stuffnull) // Terminate old symbol from previous pass
1094 v = 0; // Assume no DOT attrib follows symbol
1097 // In some cases, we need to check for a DOTx at the *beginning*
1098 // of a symbol, as the "start" of the line we're currently looking
1099 // at could be somewhere in the middle of that line!
1102 // Make sure that it's *only* a .[bwsl] following, and not the
1103 // start of a local symbol:
1104 if ((chrtab[*(ln + 1)] & DOT)
1105 && (dotxtab[*(ln + 1)] != 0)
1106 && !(chrtab[*(ln + 2)] & CTSYM))
1108 // We found a legitimate DOTx construct, so add it to the
1112 *tk++ = (TOKEN)dotxtab[*ln++];
1117 p = nullspot = ln++; // Nullspot -> start of this symbol
1119 // Find end of symbol (and compute its length)
1120 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1123 // Handle "DOT" special forms (like ".b") that follow a normal
1124 // symbol or keyword:
1127 *ln++ = EOS; // Terminate symbol
1128 stuffnull = 0; // And never try it again
1130 // Character following the `.' must have a DOT attribute, and
1131 // the chararacter after THAT one must not have a start-symbol
1132 // attribute (to prevent symbols that look like, for example,
1133 // "zingo.barf", which might be a good idea anyway....)
1134 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1135 return error("[bwsl] must follow '.' in symbol");
1137 v = (VALUE)dotxtab[*ln++];
1139 if (chrtab[*ln] & CTSYM)
1140 return error("misuse of '.'; not allowed in symbols");
1143 // If the symbol is small, check to see if it's really the name of
1147 for(state=0; state>=0;)
1149 j = (int)tolowertab[*p++];
1152 if (kwcheck[j] != state)
1158 if (*p == EOS || p == ln)
1172 // Make j = -1 if user tries to use a RISC register while in 68K mode
1173 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1178 // Make j = -1 if time, date etc with no preceeding ^^
1179 // defined, referenced, streq, macdef, date and time
1182 case 112: // defined
1183 case 113: // referenced
1191 // If not tokenized keyword OR token was not found
1192 if ((j < 0) || (state < 0))
1196 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1197 //system, this will cause all kinds of mischief.
1199 *tk++ = (TOKEN)nullspot;
1201 string[stringNum] = nullspot;
1212 if (v) // Record attribute token (if any)
1215 if (stuffnull) // Arrange for string termination on next pass
1221 // Handle identity tokens
1228 // Handle multiple-character tokens
1233 case '!': // ! or !=
1243 case '\'': // 'string'
1246 // Hardcoded for now, maybe this will change in the future
1251 case '\"': // "string"
1255 string[stringNum] = ln;
1259 for(p=ln; *ln!=EOS && *ln!=c1;)
1268 return(error("unterminated string"));
1297 warn("bad backslash code in string");
1307 return error("unterminated string");
1311 case '$': // $, hex constant
1312 if (chrtab[*ln] & HDIGIT)
1316 // Parse the hex value
1317 while (hextab[*ln] >= 0)
1318 v = (v << 4) + (int)hextab[*ln++];
1322 if (obj_format == BSD)
1324 if ((*(ln + 1) & 0xDF) == 'B')
1329 else if ((*(ln + 1) & 0xDF) == 'W')
1334 else if ((*(ln + 1) & 0xDF) == 'L')
1344 if (obj_format == ALCYON)
1348 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1353 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1365 case '<': // < or << or <> or <=
1384 case ':': // : or ::
1394 case '=': // = or ==
1404 case '>': // > or >> or >=
1419 case '%': // % or binary constant
1420 if (*ln < '0' || *ln > '1')
1428 while (*ln >= '0' && *ln <= '1')
1429 v = (v << 1) + *ln++ - '0';
1433 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1439 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1445 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1454 case '@': // @ or octal constant
1455 if (*ln < '0' || *ln > '7')
1463 while (*ln >= '0' && *ln <= '7')
1464 v = (v << 3) + *ln++ - '0';
1468 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1474 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1480 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1489 case '^': // ^ or ^^ <operator-name>
1496 if (((int)chrtab[*++ln] & STSYM) == 0)
1498 error("invalid symbol following ^^");
1504 while ((int)chrtab[*ln] & CTSYM)
1507 for(state=0; state>=0;)
1509 // Get char, convert to lowercase
1512 if (j >= 'A' && j <= 'Z')
1517 if (kwcheck[j] != state)
1523 if (*p == EOS || p == ln)
1532 if (j < 0 || state < 0)
1534 error("unknown symbol following ^^");
1541 interror(2); // Bad MULTX entry in chrtab
1546 // Handle decimal constant
1551 while ((int)chrtab[*ln] & DIGIT)
1552 v = (v * 10) + *ln++ - '0';
1554 // See if there's a .[bwl] after the constant & deal with it if so
1557 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1562 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1567 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1575 //printf("CONST: %i\n", v);
1579 // Handle illegal character
1580 return error("illegal character $%02X found", *ln);
1583 // Terminate line of tokens and return "success."
1586 tok = etok; // Set tok to beginning of line
1588 if (stuffnull) // Terminate last SYMBOL
1598 // .GOTO <label> goto directive
1600 // The label is searched for starting from the first line of the current,
1601 // enclosing macro definition. If no enclosing macro exists, an error is
1604 // A label is of the form:
1606 // :<name><whitespace>
1608 // The colon must appear in column 1. The label is stripped prior to macro
1609 // expansion, and is NOT subject to macro expansion. The whitespace may also
1612 int d_goto(WORD unused)
1614 // Setup for the search
1616 return error("missing label");
1618 char * sym = string[tok[1]];
1621 if (cur_inobj->in_type != SRC_IMACRO)
1622 return error("goto not in macro");
1624 IMACRO * imacro = cur_inobj->inobj.imacro;
1625 LLIST * defln = imacro->im_macro->lineList;
1627 // Attempt to find the label, starting with the first line.
1628 for(; defln!=NULL; defln=defln->next)
1630 // Must start with a colon
1631 if (defln->line[0] == ':')
1633 // Compare names (sleazo string compare)
1635 char * s2 = defln->line;
1637 // Either we will match the strings to EOS on both, or we will
1638 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1640 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1642 // If we reached the end of string 1 (sym), we're done.
1643 // Note that we're also checking for the end of string 2 as
1644 // well, since we've established they're equal above.
1647 // Found the label, set new macro next-line and return.
1648 imacro->im_nextln = defln;
1658 return error("goto label not found");
1662 void DumpTokenBuffer(void)
1665 printf("Tokens [%X]: ", sloc);
1667 for(t=tokbuf; *t!=EOL; t++)
1671 else if (*t == CONST)
1674 printf("[CONST: $%X]", (uint32_t)*t);
1676 else if (*t == ACONST)
1678 else if (*t == STRING)
1681 printf("[STRING:\"%s\"]", string[*t]);
1683 else if (*t == SYMBOL)
1686 printf("[SYMBOL:\"%s\"]", string[*t]);
1690 else if (*t == TKEOF)
1692 else if (*t == DEQUALS)
1693 printf("[DEQUALS]");
1698 else if (*t == DCOLON)
1710 else if (*t == UNMINUS)
1711 printf("[UNMINUS]");
1712 else if (*t == DOTB)
1714 else if (*t == DOTW)
1716 else if (*t == DOTL)
1718 else if (*t == DOTI)
1720 else if (*t == ENDEXPR)
1721 printf("[ENDEXPR]");
1722 else if (*t == CR_ABSCOUNT)
1723 printf("[CR_ABSCOUNT]");
1724 else if (*t == CR_DEFINED)
1725 printf("[CR_DEFINED]");
1726 else if (*t == CR_REFERENCED)
1727 printf("[CR_REFERENCED]");
1728 else if (*t == CR_STREQ)
1729 printf("[CR_STREQ]");
1730 else if (*t == CR_MACDEF)
1731 printf("[CR_MACDEF]");
1732 else if (*t == CR_TIME)
1733 printf("[CR_TIME]");
1734 else if (*t == CR_DATE)
1735 printf("[CR_DATE]");
1736 else if (*t >= 0x20 && *t <= 0x2F)
1737 printf("[%c]", (char)*t);
1738 else if (*t >= 0x3A && *t <= 0x3F)
1739 printf("[%c]", (char)*t);
1740 else if (*t >= 0x80 && *t <= 0x87)
1741 printf("[D%u]", ((uint32_t)*t) - 0x80);
1742 else if (*t >= 0x88 && *t <= 0x8F)
1743 printf("[A%u]", ((uint32_t)*t) - 0x88);
1745 printf("[%X:%c]", (uint32_t)*t, (char)*t);