2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
17 #define DECL_KW // Declare keyword arrays
18 #define DEF_KW // Declare keyword values
19 #include "kwtab.h" // Incl generated keyword tables & defs
22 int lnsave; // 1; strcpy() text of current line
23 uint16_t curlineno; // Current line number (64K max currently)
24 int totlines; // Total # of lines
25 int mjump_align = 0; // mjump alignment flag
26 char lntag; // Line tag
27 char * curfname; // Current filename
28 char tolowertab[128]; // Uppercase ==> lowercase
29 int8_t hextab[128]; // Table of hex values
30 char dotxtab[128]; // Table for ".b", ".s", etc.
31 char irbuf[LNSIZ]; // Text for .rept block line
32 char lnbuf[LNSIZ]; // Text of current line
33 WORD filecount; // Unique file number counter
34 WORD cfileno; // Current file number
35 TOKEN * tok; // Ptr to current token
36 TOKEN * etok; // Ptr past last token in tokbuf[]
37 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
38 char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage
40 // File record, used to maintain a list of every include file ever visited
41 #define FILEREC struct _filerec
51 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
52 static INOBJ * f_inobj; // Ptr list of free INOBJs
53 static IFILE * f_ifile; // Ptr list of free IFILEs
54 static IMACRO * f_imacro; // Ptr list of free IMACROs
56 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
58 uint8_t chrtab[0x100] = {
59 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
60 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
61 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
62 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
64 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
65 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
66 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
67 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
69 WHITE, MULTX, MULTX, SELF, // SP ! " #
70 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
71 SELF, SELF, SELF, SELF, // ( ) * +
72 SELF, SELF, STSYM, SELF, // , - . /
74 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
75 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
76 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
80 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
82 MULTX, STSYM+CTSYM+HDIGIT, // @ A
83 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
84 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
85 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
86 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
87 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
89 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
90 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
91 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
92 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
94 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
95 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
96 DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
97 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
98 STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
99 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
101 DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
102 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
103 DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
104 SELF, SELF, SELF, ILLEG, // | } ~ DEL
106 // Anything above $7F is illegal (and yes, we need to check for this,
107 // otherwise you get strange and spurious errors that will lead you astray)
108 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
109 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
110 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
111 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123 ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
126 // Names of registers
127 static char * regname[] = {
128 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
129 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
130 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
131 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
132 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
133 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
134 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
135 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
136 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
137 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
138 "tt0","tt1","crp","","","","","", // 208,215
139 "","","","","fpiar","fpsr","fpcr","", // 216,223
140 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
141 "","","","","","","","", // 232,239
142 "","","","","","","","", // 240,247
143 "","","","","","","","", // 248,255
144 "","","","","x0","x1","y0","y1", // 256,263
145 "","b0","","b2","","b1","a","b", // 264,271
146 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
147 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
148 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
149 "","","","","","","l","p", // 296,303
150 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
151 "a10","b10","x","y","","","ab","ba" // 312,319
154 static char * riscregname[] = {
155 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
156 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
157 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
158 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
163 // Initialize tokenizer
165 void InitTokenizer(void)
168 char * htab = "0123456789abcdefABCDEF"; // Hex character table
170 lnsave = 0; // Don't save lines
171 curfname = ""; // No file, empty filename
172 filecount = (WORD)-1;
173 cfileno = (WORD)-1; // cfileno gets bumped to 0
185 // Initialize hex, "dot" and tolower tables
190 tolowertab[i] = (char)i;
193 for(i=0; htab[i]!=EOS; i++)
194 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
196 for(i='A'; i<='Z'; i++)
197 tolowertab[i] |= 0x20;
199 // These characters are legal immediately after a period
200 dotxtab['b'] = DOTB; // .b .B .s .S
202 //dotxtab['s'] = DOTB;
203 //dotxtab['S'] = DOTB;
204 dotxtab['w'] = DOTW; // .w .W
206 dotxtab['l'] = DOTL; // .l .L
208 dotxtab['i'] = DOTI; // .i .I (???)
210 dotxtab['D'] = DOTD; // .d .D (quad word)
212 dotxtab['S'] = DOTS; // .s .S
214 dotxtab['Q'] = DOTQ; // .q .Q
216 dotxtab['X'] = DOTX; // .x .x
218 dotxtab['P'] = DOTP; // .p .P
223 void SetFilenameForErrorReporting(void)
227 // Check for absolute top filename (this should never happen)
230 curfname = "(*top*)";
234 FILEREC * fr = filerec;
236 // Advance to the correct record...
237 while (fr != NULL && fnum != 0)
243 // Check for file # record not found (this should never happen either)
246 curfname = "(*NOT FOUND*)";
250 curfname = fr->frec_name;
255 // Allocate an IFILE or IMACRO
257 INOBJ * a_inobj(int typ)
263 // Allocate and initialize INOBJ first
265 inobj = malloc(sizeof(INOBJ));
269 f_inobj = f_inobj->in_link;
274 case SRC_IFILE: // Alloc and init an IFILE
276 ifile = malloc(sizeof(IFILE));
280 f_ifile = f_ifile->if_link;
283 inobj->inobj.ifile = ifile;
285 case SRC_IMACRO: // Alloc and init an IMACRO
286 if (f_imacro == NULL)
287 imacro = malloc(sizeof(IMACRO));
291 f_imacro = f_imacro->im_link;
294 inobj->inobj.imacro = imacro;
296 case SRC_IREPT: // Alloc and init an IREPT
297 inobj->inobj.irept = malloc(sizeof(IREPT));
298 DEBUG { printf("alloc IREPT\n"); }
302 // Install INOBJ on top of input stack
303 inobj->in_ifent = ifent; // Record .if context on entry
304 inobj->in_type = (WORD)typ;
305 inobj->in_otok = tok;
306 inobj->in_etok = etok;
307 inobj->in_link = cur_inobj;
315 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
316 // A macro reference is in one of two forms:
317 // \name <non-name-character>
319 // A doubled backslash (\\) is compressed to a single backslash (\).
320 // Argument definitions have been pre-tokenized, so we have to turn them back
321 // into text. This means that numbers, in particular, become hex, regardless of
322 // their representation when the macro was invoked. This is a hack.
323 // A label may appear at the beginning of the line:
324 // :<name><whitespace>
325 // (the colon must be in the first column). These labels are stripped before
326 // macro expansion takes place.
328 int ExpandMacro(char * src, char * dest, int destsiz)
331 int questmark; // \? for testing argument existence
332 char mname[128]; // Assume max size of a formal arg name
333 char numbuf[20]; // Buffer for text of CONSTs
336 char ** symbolString;
338 DEBUG { printf("ExM: src=\"%s\"\n", src); }
340 IMACRO * imacro = cur_inobj->inobj.imacro;
341 int macnum = (int)(imacro->im_macro->sattr);
343 char * dst = dest; // Next dest slot
344 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
346 // Check for (and skip over) any "label" on the line
352 while (*s != EOS && !(chrtab[*s] & WHITE))
356 s++; // Skip first whitespace
359 // Expand the rest of the line
362 // Copy single character
368 // Skip comments in case a loose @ or \ is in there
369 // In that case the tokeniser was trying to expand it.
370 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
375 // Do macro expansion
383 case '\\': // \\, \ (collapse to single backslash)
389 case '?': // \? <macro> set `questmark' flag
393 case '#': // \#, number of arguments
394 sprintf(numbuf, "%d", (int)imacro->im_nargs);
396 case '!': // \! size suffix supplied on invocation
397 switch ((int)imacro->im_siz)
399 case SIZN: d = ""; break;
400 case SIZB: d = ".b"; break;
401 case SIZW: d = ".w"; break;
402 case SIZL: d = ".l"; break;
406 case '~': // ==> unique label string Mnnnn...
407 sprintf(numbuf, "M%u", curuniq);
423 return error("missing argument name");
426 // \n ==> argument number 'n', 0..9
427 if (chrtab[*s] & DIGIT)
437 // Get argument name: \name, \{name}
447 while (chrtab[*s] & CTSYM);
452 for(++s; *s != EOS && *s != '}';)
456 return error("missing closing brace ('}')");
463 // Lookup the argument and copy its (string) value into the
464 // destination string
465 DEBUG { printf("argument='%s'\n", mname); }
467 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
468 return error("undefined argument: '%s'", mname);
471 // Convert a string of tokens (terminated with EOL) back into
472 // text. If an argument is out of range (not specified in the
473 // macro invocation) then it is ignored.
474 i = (int)arg->svalue;
476 DEBUG { printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase); }
479 if (i < imacro->im_nargs)
484 tk = argPtrs[imacro->argBase + i];
486 tk = imacro->argument[i].token;
487 symbolString = imacro->argument[i].string;
490 // printf("ExM: Preparing to parse argument #%u...\n", i);
497 // 0 if the argument is empty or non-existant,
498 // 1 if the argument is not empty
501 if (tk == NULL || *tk == EOL)
507 *dst++ = (char)(questmark + '0');
511 // Argument # is in range, so expand it
516 // Reverse-translation from a token number to a string.
517 // This is a hack. It might be better table-driven.
520 if ((*tk >= KW_D0) && !rdsp && !rgpu)
522 d = regname[(int)*tk++ - KW_D0];
525 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
527 d = riscregname[(int)*tk++ - KW_R0];
536 // d = (char *)*tk++;
539 // This fix should be done for strings too
540 d = symbolString[*tk++];
541 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
546 // d = (char *)*tk++;
549 d = symbolString[*tk++];
570 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
571 // to choke on legitimate code... Need to investigate this further
572 // before changing anything else here!
574 sprintf(numbuf, "$%lx", (long unsigned int)*tk++);
638 *dst++ = (char)*(tk - 1);
643 // If 'd' != NULL, copy string to destination
647 DEBUG printf("d='%s'\n", d);
666 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
671 DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
672 return fatal("line too long as a result of macro expansion");
677 // Get next line of text from a macro
679 char * GetNextMacroLine(void)
681 IMACRO * imacro = cur_inobj->inobj.imacro;
682 // LONG * strp = imacro->im_nextln;
683 struct LineList * strp = imacro->im_nextln;
685 if (strp == NULL) // End-of-macro
688 imacro->im_nextln = strp->next;
689 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
690 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
692 return imacro->im_lnbuf;
697 // Get next line of text from a repeat block
699 char * GetNextRepeatLine(void)
701 IREPT * irept = cur_inobj->inobj.irept;
702 LONG * strp = irept->ir_nextln; // initial null
704 // Do repeat at end of .rept block's string list
707 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
708 irept->ir_nextln = irept->ir_firstln; // copy first line
710 if (irept->ir_count-- == 0)
712 DEBUG { printf("end-repeat-block\n"); }
716 strp = irept->ir_nextln;
719 strcpy(irbuf, (char *)(irept->ir_nextln + 1));
720 DEBUG printf("repeat line='%s'\n", irbuf);
721 irept->ir_nextln = (LONG *)*strp;
728 // Include a source file used at the root, and for ".include" files
730 int include(int handle, char * fname)
733 DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
735 // Alloc and initialize include-descriptors
736 INOBJ * inobj = a_inobj(SRC_IFILE);
737 IFILE * ifile = inobj->inobj.ifile;
739 ifile->ifhandle = handle; // Setup file handle
740 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
741 ifile->ifoldlineno = curlineno; // Save old line number
742 ifile->ifoldfname = curfname; // Save old filename
743 ifile->ifno = cfileno; // Save old file number
745 // NB: This *must* be preincrement, we're adding one to the filecount here!
746 cfileno = ++filecount; // Compute NEW file number
747 curfname = strdup(fname); // Set current filename (alloc storage)
748 curlineno = 0; // Start on line zero
750 // Add another file to the file-record
751 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
752 fr->frec_next = NULL;
753 fr->frec_name = curfname;
756 filerec = fr; // Add first filerec
758 last_fr->frec_next = fr; // Append to list of filerecs
761 DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
768 // Pop the current input level
775 INOBJ * inobj = cur_inobj;
779 // Pop IFENT levels until we reach the conditional assembly context we
780 // were at when the input object was entered.
781 int numUnmatched = 0;
783 while (ifent != inobj->in_ifent)
785 if (d_endif() != 0) // Something bad happened during endif parsing?
786 return -1; // If yes, bail instead of getting stuck in a loop
791 // Give a warning to the user that we had to wipe their bum for them
792 if (numUnmatched > 0)
793 warn("missing %d .endif(s)", numUnmatched);
795 tok = inobj->in_otok; // Restore tok and otok
796 etok = inobj->in_etok;
798 switch (inobj->in_type)
800 case SRC_IFILE: // Pop and release an IFILE
801 DEBUG { printf("[Leaving: %s]\n", curfname); }
803 ifile = inobj->inobj.ifile;
804 ifile->if_link = f_ifile;
806 close(ifile->ifhandle); // Close source file
807 DEBUG { printf("[fpop (pre): curfname=%s]\n", curfname); }
808 curfname = ifile->ifoldfname; // Set current filename
809 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
810 DEBUG { printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
811 curlineno = ifile->ifoldlineno; // Set current line#
812 DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno);
813 cfileno = ifile->ifno; // Restore current file number
814 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
816 case SRC_IMACRO: // Pop and release an IMACRO
817 imacro = inobj->inobj.imacro;
818 imacro->im_link = f_imacro;
821 case SRC_IREPT: // Pop and release an IREPT
822 DEBUG printf("dealloc IREPT\n");
823 p = inobj->inobj.irept->ir_firstln;
834 cur_inobj = inobj->in_link;
835 inobj->in_link = f_inobj;
844 // Get line from file into buf, return NULL on EOF or ptr to the start of a
847 char * GetNextLine(void)
851 int readamt = -1; // 0 if last read() yeilded 0 bytes
852 IFILE * fl = cur_inobj->inobj.ifile;
856 // Scan for next end-of-line; handle stupid text formats by treating
857 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
859 d = &fl->ifbuf[fl->ifind];
861 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
863 if (*p == '\r' || *p == '\n')
870 break; // Need to read more, then look for '\n' to eat
871 else if (p[1] == '\n')
875 // Cover up the newline with end-of-string sentinel
884 // Handle hanging lines by ignoring them (Input file is exhausted, no
885 // \r or \n on last line)
886 // Shamus: This is retarded. Never ignore any input!
887 if (!readamt && fl->ifcnt)
894 // Really should check to see if we're at the end of the buffer!
896 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
898 return &fl->ifbuf[fl->ifind];
902 // Truncate and return absurdly long lines.
903 if (fl->ifcnt >= QUANTUM)
905 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
907 return &fl->ifbuf[fl->ifind];
910 // Relocate what's left of a line to the beginning of the buffer, and
911 // read some more of the file in; return NULL if the buffer's empty and
915 p = &fl->ifbuf[fl->ifind];
916 d = &fl->ifbuf[fl->ifcnt & 1];
918 for(i=0; i<fl->ifcnt; i++)
921 fl->ifind = fl->ifcnt & 1;
924 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
929 if ((fl->ifcnt += readamt) == 0)
938 int TokenizeLine(void)
940 uint8_t * ln = NULL; // Ptr to current position in line
941 uint8_t * p; // Random character ptr
942 TOKEN * tk; // Token-deposit ptr
943 int state = 0; // State for keyword detector
944 int j = 0; // Var for keyword detector
945 uint8_t c; // Random char
946 VALUE v; // Random value
947 uint8_t * nullspot = NULL; // Spot to clobber for SYMBOL termination
948 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
950 int stringNum = 0; // Pointer to string locations in tokenized line
954 if (cur_inobj == NULL) // Return EOF if input stack is empty
957 // Get another line of input from the current input source: a file, a
958 // macro, or a repeat-block
959 switch (cur_inobj->in_type)
963 // o bump source line number;
964 // o tag the listing-line with a space;
965 // o kludge lines generated by Alcyon C.
967 if ((ln = GetNextLine()) == NULL)
969 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
970 if (fpop() == 0) // Pop input level
971 goto retry; // Try for more lines
974 ifent->if_prev = (IFENT *) - 1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
979 curlineno++; // Bump line number
984 // AS68 compatibility, throw away all lines starting with
985 // back-quotes, tildes, or '*'
986 // On other lines, turn the first '*' into a semi-colon.
987 if (*ln == '`' || *ln == '~' || *ln == '*')
991 for(p=ln; *p!=EOS; p++)
1004 // o Handle end-of-macro;
1005 // o tag the listing-line with an at (@) sign.
1007 if ((ln = GetNextMacroLine()) == NULL)
1009 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
1010 goto retry; // Try for more lines...
1012 return TKEOF; // Oops, we got a non zero return code, signal EOF
1018 // o Handle end-of-repeat-block;
1019 // o tag the listing-line with a pound (#) sign.
1021 if ((ln = GetNextRepeatLine()) == NULL)
1023 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1032 // Save text of the line. We only do this during listings and within
1033 // macro-type blocks, since it is expensive to unconditionally copy every
1038 // General house-keeping
1039 tok = tokeol; // Set "tok" to EOL in case of error
1040 tk = etok; // Reset token ptr
1041 stuffnull = 0; // Don't stuff nulls
1042 totlines++; // Bump total #lines assembled
1044 // See if the entire line is a comment. This is a win if the programmer
1045 // puts in lots of comments
1046 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1049 // Main tokenization loop;
1050 // o skip whitespace;
1051 // o handle end-of-line;
1052 // o handle symbols;
1053 // o handle single-character tokens (operators, etc.);
1054 // o handle multiple-character tokens (constants, strings, etc.).
1057 // Skip whitespace, handle EOL
1058 while (chrtab[*ln] & WHITE)
1061 // Handle EOL, comment with ';'
1062 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1065 // Handle start of symbol. Symbols are null-terminated in place. The
1066 // termination is always one symbol behind, since there may be no place
1067 // for a null in the case that an operator immediately follows the name.
1072 if (stuffnull) // Terminate old symbol from previous pass
1075 v = 0; // Assume no DOT attrib follows symbol
1078 // In some cases, we need to check for a DOTx at the *beginning*
1079 // of a symbol, as the "start" of the line we're currently looking
1080 // at could be somewhere in the middle of that line!
1083 // Make sure that it's *only* a .[bwsl] following, and not the
1084 // start of a local symbol:
1085 if ((chrtab[*(ln + 1)] & DOT)
1086 && (dotxtab[*(ln + 1)] != 0)
1087 && !(chrtab[*(ln + 2)] & CTSYM))
1089 // We found a legitimate DOTx construct, so add it to the
1093 *tk++ = (TOKEN)dotxtab[*ln++];
1098 p = nullspot = ln++; // Nullspot -> start of this symbol
1100 // Find end of symbol (and compute its length)
1101 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1104 // Handle "DOT" special forms (like ".b") that follow a normal
1105 // symbol or keyword:
1108 *ln++ = EOS; // Terminate symbol
1109 stuffnull = 0; // And never try it again
1111 // Character following the `.' must have a DOT attribute, and
1112 // the chararacter after THAT one must not have a start-symbol
1113 // attribute (to prevent symbols that look like, for example,
1114 // "zingo.barf", which might be a good idea anyway....)
1115 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1116 return error("[bwsl] must follow '.' in symbol");
1118 v = (VALUE)dotxtab[*ln++];
1120 if (chrtab[*ln] & CTSYM)
1121 return error("misuse of '.'; not allowed in symbols");
1124 // If the symbol is small, check to see if it's really the name of
1128 for(state=0; state>=0;)
1130 j = (int)tolowertab[*p++];
1133 if (kwcheck[j] != state)
1139 if (*p == EOS || p == ln)
1153 // Make j = -1 if user tries to use a RISC register while in 68K mode
1154 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1159 // Make j = -1 if time, date etc with no preceeding ^^
1160 // defined, referenced, streq, macdef, date and time
1163 case 112: // defined
1164 case 113: // referenced
1172 // If not tokenized keyword OR token was not found
1173 if ((j < 0) || (state < 0))
1177 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1178 //system, this will cause all kinds of mischief.
1180 *tk++ = (TOKEN)nullspot;
1182 string[stringNum] = nullspot;
1193 if (v) // Record attribute token (if any)
1196 if (stuffnull) // Arrange for string termination on next pass
1202 // Handle identity tokens
1209 // Handle multiple-character tokens
1214 case '!': // ! or !=
1224 case '\'': // 'string'
1227 // Hardcoded for now, maybe this will change in the future
1232 case '\"': // "string"
1236 string[stringNum] = ln;
1240 for(p=ln; *ln!=EOS && *ln!=c1;)
1249 return(error("unterminated string"));
1278 warn("bad backslash code in string");
1288 return error("unterminated string");
1292 case '$': // $, hex constant
1293 if (chrtab[*ln] & HDIGIT)
1297 // Parse the hex value
1298 while (hextab[*ln] >= 0)
1299 v = (v << 4) + (int)hextab[*ln++];
1303 if (obj_format == BSD)
1305 if ((*(ln + 1) & 0xDF) == 'B')
1310 else if ((*(ln + 1) & 0xDF) == 'W')
1315 else if ((*(ln + 1) & 0xDF) == 'L')
1325 if (obj_format == ALCYON)
1329 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1334 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1346 case '<': // < or << or <> or <=
1365 case ':': // : or ::
1375 case '=': // = or ==
1385 case '>': // > or >> or >=
1400 case '%': // % or binary constant
1401 if (*ln < '0' || *ln > '1')
1409 while (*ln >= '0' && *ln <= '1')
1410 v = (v << 1) + *ln++ - '0';
1414 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1420 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1426 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1435 case '@': // @ or octal constant
1436 if (*ln < '0' || *ln > '7')
1444 while (*ln >= '0' && *ln <= '7')
1445 v = (v << 3) + *ln++ - '0';
1449 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1455 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1461 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1470 case '^': // ^ or ^^ <operator-name>
1477 if (((int)chrtab[*++ln] & STSYM) == 0)
1479 error("invalid symbol following ^^");
1485 while ((int)chrtab[*ln] & CTSYM)
1488 for(state=0; state>=0;)
1490 // Get char, convert to lowercase
1493 if (j >= 'A' && j <= 'Z')
1498 if (kwcheck[j] != state)
1504 if (*p == EOS || p == ln)
1513 if (j < 0 || state < 0)
1515 error("unknown symbol following ^^");
1522 interror(2); // Bad MULTX entry in chrtab
1527 // Handle decimal constant
1532 while ((int)chrtab[*ln] & DIGIT)
1533 v = (v * 10) + *ln++ - '0';
1535 // See if there's a .[bwl] after the constant & deal with it if so
1538 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1543 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1548 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1556 //printf("CONST: %i\n", v);
1560 // Handle illegal character
1561 return error("illegal character $%02X found", *ln);
1564 // Terminate line of tokens and return "success."
1567 tok = etok; // Set tok to beginning of line
1569 if (stuffnull) // Terminate last SYMBOL
1579 // .GOTO <label> goto directive
1581 // The label is searched for starting from the first line of the current,
1582 // enclosing macro definition. If no enclosing macro exists, an error is
1585 // A label is of the form:
1587 // :<name><whitespace>
1589 // The colon must appear in column 1. The label is stripped prior to macro
1590 // expansion, and is NOT subject to macro expansion. The whitespace may also
1593 int d_goto(WORD unused)
1595 // Setup for the search
1597 return error("missing label");
1599 char * sym = string[tok[1]];
1602 if (cur_inobj->in_type != SRC_IMACRO)
1603 return error("goto not in macro");
1605 IMACRO * imacro = cur_inobj->inobj.imacro;
1606 struct LineList * defln = imacro->im_macro->lineList;
1608 // Attempt to find the label, starting with the first line.
1609 for(; defln!=NULL; defln=defln->next)
1611 // Must start with a colon
1612 if (defln->line[0] == ':')
1614 // Compare names (sleazo string compare)
1616 char * s2 = defln->line;
1618 // Either we will match the strings to EOS on both, or we will
1619 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1621 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1623 // If we reached the end of string 1 (sym), we're done.
1624 // Note that we're also checking for the end of string 2 as
1625 // well, since we've established they're equal above.
1628 // Found the label, set new macro next-line and return.
1629 imacro->im_nextln = defln;
1639 return error("goto label not found");
1643 void DumpTokenBuffer(void)
1646 printf("Tokens [%X]: ", sloc);
1648 for(t=tokbuf; *t!=EOL; t++)
1652 else if (*t == CONST)
1655 printf("[CONST: $%X]", (uint32_t)*t);
1657 else if (*t == ACONST)
1659 else if (*t == STRING)
1662 printf("[STRING:\"%s\"]", string[*t]);
1664 else if (*t == SYMBOL)
1667 printf("[SYMBOL:\"%s\"]", string[*t]);
1671 else if (*t == TKEOF)
1673 else if (*t == DEQUALS)
1674 printf("[DEQUALS]");
1679 else if (*t == DCOLON)
1691 else if (*t == UNMINUS)
1692 printf("[UNMINUS]");
1693 else if (*t == DOTB)
1695 else if (*t == DOTW)
1697 else if (*t == DOTL)
1699 else if (*t == DOTI)
1701 else if (*t == ENDEXPR)
1702 printf("[ENDEXPR]");
1703 else if (*t == CR_ABSCOUNT)
1704 printf("[CR_ABSCOUNT]");
1705 else if (*t == CR_DEFINED)
1706 printf("[CR_DEFINED]");
1707 else if (*t == CR_REFERENCED)
1708 printf("[CR_REFERENCED]");
1709 else if (*t == CR_STREQ)
1710 printf("[CR_STREQ]");
1711 else if (*t == CR_MACDEF)
1712 printf("[CR_MACDEF]");
1713 else if (*t == CR_TIME)
1714 printf("[CR_TIME]");
1715 else if (*t == CR_DATE)
1716 printf("[CR_DATE]");
1717 else if (*t >= 0x20 && *t <= 0x2F)
1718 printf("[%c]", (char)*t);
1719 else if (*t >= 0x3A && *t <= 0x3F)
1720 printf("[%c]", (char)*t);
1721 else if (*t >= 0x80 && *t <= 0x87)
1722 printf("[D%u]", ((uint32_t)*t) - 0x80);
1723 else if (*t >= 0x88 && *t <= 0x8F)
1724 printf("[A%u]", ((uint32_t)*t) - 0x88);
1726 printf("[%X:%c]", (uint32_t)*t, (char)*t);