2 // RMAC - Reboot's Macro Assembler for the Atari Jaguar Console System
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
17 #define DECL_KW // Declare keyword arrays
18 #define DEF_KW // Declare keyword values
19 #include "kwtab.h" // Incl generated keyword tables & defs
22 int lnsave; // 1; strcpy() text of current line
23 uint16_t curlineno; // Current line number (64K max currently)
24 int totlines; // Total # of lines
25 int mjump_align = 0; // mjump alignment flag
26 char lntag; // Line tag
27 char * curfname; // Current filename
28 char tolowertab[128]; // Uppercase ==> lowercase
29 int8_t hextab[128]; // Table of hex values
30 char dotxtab[128]; // Table for ".b", ".s", etc.
31 char irbuf[LNSIZ]; // Text for .rept block line
32 char lnbuf[LNSIZ]; // Text of current line
33 WORD filecount; // Unique file number counter
34 WORD cfileno; // Current file number
35 TOKEN * tok; // Ptr to current token
36 TOKEN * etok; // Ptr past last token in tokbuf[]
37 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
38 char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage
40 // File record, used to maintain a list of every include file ever visited
41 #define FILEREC struct _filerec
51 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
52 static INOBJ * f_inobj; // Ptr list of free INOBJs
53 static IFILE * f_ifile; // Ptr list of free IFILEs
54 static IMACRO * f_imacro; // Ptr list of free IMACROs
56 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
59 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
60 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
61 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
62 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
64 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
65 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
66 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
67 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
69 WHITE, MULTX, MULTX, SELF, // SP ! " #
70 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
71 SELF, SELF, SELF, SELF, // ( ) * +
72 SELF, SELF, STSYM, SELF, // , - . /
74 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
75 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
76 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
80 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
82 MULTX, STSYM+CTSYM+HDIGIT, // @ A
83 (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
84 (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
85 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
86 STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
87 (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
89 (char)((BYTE)DOT)+STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // P Q R S
90 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // T U V W
91 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // X Y Z [
92 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
94 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
95 (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
96 (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
97 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
98 STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
99 (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
101 (char)((BYTE)DOT)+STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // p q r s
102 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // t u v w
103 (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
104 SELF, SELF, SELF, ILLEG // | } ~ DEL
107 // Names of registers
108 static char * regname[] = {
109 // "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
110 // "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
111 // "pc", "ssp", "usp", "sr", "ccr"
112 "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
113 "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
114 "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
115 "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
116 "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
117 "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
118 "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
119 "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
120 "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
121 "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
122 "tt0","tt1","crp","","","","","", // 208,215
123 "","","","","fpiar","fpsr","fpcr","", // 216,223
124 "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
125 "","","","","","","","", // 232,239
126 "","","","","","","","", // 240,247
127 "","","","","","","","", // 248,255
128 "","","","","x0","x1","y0","y1", // 256,263
129 "","b0","","b2","","b1","a","b", // 264,271
130 "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
131 "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
132 "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
133 "","","","","","","l","p", // 296,303
134 "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
135 "a10","b10","x","y","","","ab","ba" // 312,319
138 static char * riscregname[] = {
139 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
140 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
141 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
142 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
147 // Initialize tokenizer
149 void InitTokenizer(void)
152 char * htab = "0123456789abcdefABCDEF"; // Hex character table
154 lnsave = 0; // Don't save lines
155 curfname = ""; // No file, empty filename
156 filecount = (WORD)-1;
157 cfileno = (WORD)-1; // cfileno gets bumped to 0
169 // Initialize hex, "dot" and tolower tables
174 tolowertab[i] = (char)i;
177 for(i=0; htab[i]!=EOS; i++)
178 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
180 for(i='A'; i<='Z'; i++)
181 tolowertab[i] |= 0x20;
183 // These characters are legal immediately after a period
184 dotxtab['b'] = DOTB; // .b .B .s .S
186 //dotxtab['s'] = DOTB;
187 //dotxtab['S'] = DOTB;
188 dotxtab['w'] = DOTW; // .w .W
190 dotxtab['l'] = DOTL; // .l .L
192 dotxtab['i'] = DOTI; // .i .I (???)
194 dotxtab['D'] = DOTD; // .d .D (quad word)
196 dotxtab['S'] = DOTS; // .s .S
198 dotxtab['Q'] = DOTQ; // .q .Q
200 dotxtab['X'] = DOTX; // .x .x
202 dotxtab['P'] = DOTP; // .p .P
207 void SetFilenameForErrorReporting(void)
211 // Check for absolute top filename (this should never happen)
214 curfname = "(*top*)";
218 FILEREC * fr = filerec;
220 // Advance to the correct record...
221 while (fr != NULL && fnum != 0)
227 // Check for file # record not found (this should never happen either)
230 curfname = "(*NOT FOUND*)";
234 curfname = fr->frec_name;
239 // Allocate an IFILE or IMACRO
241 INOBJ * a_inobj(int typ)
247 // Allocate and initialize INOBJ first
249 inobj = malloc(sizeof(INOBJ));
253 f_inobj = f_inobj->in_link;
258 case SRC_IFILE: // Alloc and init an IFILE
260 ifile = malloc(sizeof(IFILE));
264 f_ifile = f_ifile->if_link;
267 inobj->inobj.ifile = ifile;
269 case SRC_IMACRO: // Alloc and init an IMACRO
270 if (f_imacro == NULL)
271 imacro = malloc(sizeof(IMACRO));
275 f_imacro = f_imacro->im_link;
278 inobj->inobj.imacro = imacro;
280 case SRC_IREPT: // Alloc and init an IREPT
281 inobj->inobj.irept = malloc(sizeof(IREPT));
282 DEBUG printf("alloc IREPT\n");
286 // Install INOBJ on top of input stack
287 inobj->in_ifent = ifent; // Record .if context on entry
288 inobj->in_type = (WORD)typ;
289 inobj->in_otok = tok;
290 inobj->in_etok = etok;
291 inobj->in_link = cur_inobj;
299 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
300 // A macro reference is in one of two forms:
301 // \name <non-name-character>
303 // A doubled backslash (\\) is compressed to a single backslash (\).
304 // Argument definitions have been pre-tokenized, so we have to turn them back
305 // into text. This means that numbers, in particular, become hex, regardless of
306 // their representation when the macro was invoked. This is a hack.
307 // A label may appear at the beginning of the line:
308 // :<name><whitespace>
309 // (the colon must be in the first column). These labels are stripped before
310 // macro expansion takes place.
312 int ExpandMacro(char * src, char * dest, int destsiz)
315 int questmark; // \? for testing argument existence
316 char mname[128]; // Assume max size of a formal arg name
317 char numbuf[20]; // Buffer for text of CONSTs
320 char ** symbolString;
322 DEBUG { printf("ExM: src=\"%s\"\n", src); }
324 IMACRO * imacro = cur_inobj->inobj.imacro;
325 int macnum = (int)(imacro->im_macro->sattr);
327 char * dst = dest; // Next dest slot
328 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
330 // Check for (and skip over) any "label" on the line
336 while (*s != EOS && !(chrtab[*s] & WHITE))
340 s++; // Skip first whitespace
343 // Expand the rest of the line
346 // Copy single character
352 // Skip comments in case a loose @ or \ is in there
353 // In that case the tokeniser was trying to expand it.
354 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
359 // Do macro expansion
367 case '\\': // \\, \ (collapse to single backslash)
373 case '?': // \? <macro> set `questmark' flag
377 case '#': // \#, number of arguments
378 sprintf(numbuf, "%d", (int)imacro->im_nargs);
380 case '!': // \! size suffix supplied on invocation
381 switch ((int)imacro->im_siz)
383 case SIZN: d = ""; break;
384 case SIZB: d = ".b"; break;
385 case SIZW: d = ".w"; break;
386 case SIZL: d = ".l"; break;
390 case '~': // ==> unique label string Mnnnn...
391 sprintf(numbuf, "M%u", curuniq);
407 return error("missing argument name");
410 // \n ==> argument number 'n', 0..9
411 if (chrtab[*s] & DIGIT)
421 // Get argument name: \name, \{name}
431 while (chrtab[*s] & CTSYM);
436 for(++s; *s != EOS && *s != '}';)
440 return error("missing '}'");
447 // Lookup the argument and copy its (string) value into the
448 // destination string
449 DEBUG printf("argument='%s'\n", mname);
451 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
452 return errors("undefined argument: '%s'", mname);
455 // Convert a string of tokens (terminated with EOL) back into
456 // text. If an argument is out of range (not specified in the
457 // macro invocation) then it is ignored.
458 i = (int)arg->svalue;
460 DEBUG printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase);
463 if (i < imacro->im_nargs)
468 tk = argPtrs[imacro->argBase + i];
470 tk = imacro->argument[i].token;
471 symbolString = imacro->argument[i].string;
474 // printf("ExM: Preparing to parse argument #%u...\n", i);
481 // 0 if the argument is empty or non-existant,
482 // 1 if the argument is not empty
485 if (tk == NULL || *tk == EOL)
491 *dst++ = (char)(questmark + '0');
495 // Argument # is in range, so expand it
500 // Reverse-translation from a token number to a string.
501 // This is a hack. It might be better table-driven.
504 if ((*tk >= KW_D0) && !rdsp && !rgpu)
506 d = regname[(int)*tk++ - KW_D0];
509 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
511 d = riscregname[(int)*tk++ - KW_R0];
520 // d = (char *)*tk++;
523 // This fix should be done for strings too
524 d = symbolString[*tk++];
525 DEBUG printf("ExM: SYMBOL=\"%s\"", d);
530 // d = (char *)*tk++;
533 d = symbolString[*tk++];
554 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
555 // to choke on legitimate code... Need to investigate this further
556 // before changing anything else here!
558 sprintf(numbuf, "$%lx", (long unsigned int)*tk++);
622 *dst++ = (char)*(tk - 1);
627 // If 'd' != NULL, copy string to destination
631 DEBUG printf("d='%s'\n", d);
650 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
655 DEBUG printf("*** OVERFLOW LINE ***\n%s\n", dest);
656 return fatal("line too long as a result of macro expansion");
661 // Get next line of text from a macro
663 char * GetNextMacroLine(void)
665 IMACRO * imacro = cur_inobj->inobj.imacro;
666 // LONG * strp = imacro->im_nextln;
667 struct LineList * strp = imacro->im_nextln;
669 if (strp == NULL) // End-of-macro
672 imacro->im_nextln = strp->next;
673 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
674 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
676 return imacro->im_lnbuf;
681 // Get next line of text from a repeat block
683 char * GetNextRepeatLine(void)
685 IREPT * irept = cur_inobj->inobj.irept;
686 LONG * strp = irept->ir_nextln; // initial null
688 // Do repeat at end of .rept block's string list
691 DEBUG printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count);
692 irept->ir_nextln = irept->ir_firstln; // copy first line
694 if (irept->ir_count-- == 0)
696 DEBUG printf("end-repeat-block\n");
700 strp = irept->ir_nextln;
703 strcpy(irbuf, (char *)(irept->ir_nextln + 1));
704 DEBUG printf("repeat line='%s'\n", irbuf);
705 irept->ir_nextln = (LONG *)*strp;
712 // Include a source file used at the root, and for ".include" files
714 int include(int handle, char * fname)
718 printf("[include: %s, cfileno=%u]\n", fname, cfileno);
720 // Alloc and initialize include-descriptors
721 INOBJ * inobj = a_inobj(SRC_IFILE);
722 IFILE * ifile = inobj->inobj.ifile;
724 ifile->ifhandle = handle; // Setup file handle
725 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
726 ifile->ifoldlineno = curlineno; // Save old line number
727 ifile->ifoldfname = curfname; // Save old filename
728 ifile->ifno = cfileno; // Save old file number
730 // NB: This *must* be preincrement, we're adding one to the filecount here!
731 cfileno = ++filecount; // Compute NEW file number
732 curfname = strdup(fname); // Set current filename (alloc storage)
733 curlineno = 0; // Start on line zero
735 // Add another file to the file-record
736 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
737 fr->frec_next = NULL;
738 fr->frec_name = curfname;
741 filerec = fr; // Add first filerec
743 last_fr->frec_next = fr; // Append to list of filerecs
746 DEBUG printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno);
753 // Pop the current input level
760 INOBJ * inobj = cur_inobj;
764 // Pop IFENT levels until we reach the conditional assembly context we
765 // were at when the input object was entered.
766 int numUnmatched = 0;
768 while (ifent != inobj->in_ifent)
770 if (d_endif() != 0) // Something bad happened during endif parsing?
771 return -1; // If yes, bail instead of getting stuck in a loop
776 // Give a warning to the user that we had to wipe their bum for them
777 if (numUnmatched > 0)
778 warni("missing %d .endif(s)", numUnmatched);
780 tok = inobj->in_otok; // Restore tok and otok
781 etok = inobj->in_etok;
783 switch (inobj->in_type)
785 case SRC_IFILE: // Pop and release an IFILE
787 printf("[Leaving: %s]\n", curfname);
789 ifile = inobj->inobj.ifile;
790 ifile->if_link = f_ifile;
792 close(ifile->ifhandle); // Close source file
793 if (debug) printf("[fpop (pre): curfname=%s]\n", curfname);
794 curfname = ifile->ifoldfname; // Set current filename
795 if (debug) printf("[fpop (post): curfname=%s]\n", curfname);
796 if (debug) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
797 curlineno = ifile->ifoldlineno; // Set current line#
798 DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno);
799 cfileno = ifile->ifno; // Restore current file number
800 if (debug) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
802 case SRC_IMACRO: // Pop and release an IMACRO
803 imacro = inobj->inobj.imacro;
804 imacro->im_link = f_imacro;
807 case SRC_IREPT: // Pop and release an IREPT
808 DEBUG printf("dealloc IREPT\n");
809 p = inobj->inobj.irept->ir_firstln;
820 cur_inobj = inobj->in_link;
821 inobj->in_link = f_inobj;
830 // Get line from file into buf, return NULL on EOF or ptr to the start of a
833 char * GetNextLine(void)
837 int readamt = -1; // 0 if last read() yeilded 0 bytes
838 IFILE * fl = cur_inobj->inobj.ifile;
842 // Scan for next end-of-line; handle stupid text formats by treating
843 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
845 d = &fl->ifbuf[fl->ifind];
847 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
849 if (*p == '\r' || *p == '\n')
856 break; // Need to read more, then look for '\n' to eat
857 else if (p[1] == '\n')
861 // Cover up the newline with end-of-string sentinel
870 // Handle hanging lines by ignoring them (Input file is exhausted, no
871 // \r or \n on last line)
872 // Shamus: This is retarded. Never ignore any input!
873 if (!readamt && fl->ifcnt)
880 // Really should check to see if we're at the end of the buffer!
882 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
884 return &fl->ifbuf[fl->ifind];
888 // Truncate and return absurdly long lines.
889 if (fl->ifcnt >= QUANTUM)
891 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
893 return &fl->ifbuf[fl->ifind];
896 // Relocate what's left of a line to the beginning of the buffer, and
897 // read some more of the file in; return NULL if the buffer's empty and
901 p = &fl->ifbuf[fl->ifind];
902 d = &fl->ifbuf[fl->ifcnt & 1];
904 for(i=0; i<fl->ifcnt; i++)
907 fl->ifind = fl->ifcnt & 1;
910 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
915 if ((fl->ifcnt += readamt) == 0)
924 int TokenizeLine(void)
926 char * ln = NULL; // Ptr to current position in line
927 char * p; // Random character ptr
928 TOKEN * tk; // Token-deposit ptr
929 int state = 0; // State for keyword detector
930 int j = 0; // Var for keyword detector
931 char c; // Random char
932 VALUE v; // Random value
933 char * nullspot = NULL; // Spot to clobber for SYMBOL termination
934 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
936 int stringNum = 0; // Pointer to string locations in tokenized line
940 if (cur_inobj == NULL) // Return EOF if input stack is empty
943 // Get another line of input from the current input source: a file, a
944 // macro, or a repeat-block
945 switch (cur_inobj->in_type)
949 // o bump source line number;
950 // o tag the listing-line with a space;
951 // o kludge lines generated by Alcyon C.
953 if ((ln = GetNextLine()) == NULL)
955 if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
956 if (fpop() == 0) // Pop input level
957 goto retry; // Try for more lines
960 ifent->if_prev = (IFENT *) - 1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
965 curlineno++; // Bump line number
970 // AS68 compatibility, throw away all lines starting with
971 // back-quotes, tildes, or '*'
972 // On other lines, turn the first '*' into a semi-colon.
973 if (*ln == '`' || *ln == '~' || *ln == '*')
977 for(p=ln; *p!=EOS; p++)
990 // o Handle end-of-macro;
991 // o tag the listing-line with an at (@) sign.
993 if ((ln = GetNextMacroLine()) == NULL)
995 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
996 goto retry; // Try for more lines...
998 return TKEOF; // Oops, we got a non zero return code, signal EOF
1004 // o Handle end-of-repeat-block;
1005 // o tag the listing-line with a pound (#) sign.
1007 if ((ln = GetNextRepeatLine()) == NULL)
1009 if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
1018 // Save text of the line. We only do this during listings and within
1019 // macro-type blocks, since it is expensive to unconditionally copy every
1024 // General house-keeping
1025 tok = tokeol; // Set "tok" to EOL in case of error
1026 tk = etok; // Reset token ptr
1027 stuffnull = 0; // Don't stuff nulls
1028 totlines++; // Bump total #lines assembled
1030 // See if the entire line is a comment. This is a win if the programmer
1031 // puts in lots of comments
1032 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1035 // Main tokenization loop;
1036 // o skip whitespace;
1037 // o handle end-of-line;
1038 // o handle symbols;
1039 // o handle single-character tokens (operators, etc.);
1040 // o handle multiple-character tokens (constants, strings, etc.).
1043 // Skip whitespace, handle EOL
1044 while ((int)chrtab[*ln] & WHITE)
1047 // Handle EOL, comment with ';'
1048 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1051 // Handle start of symbol. Symbols are null-terminated in place. The
1052 // termination is always one symbol behind, since there may be no place
1053 // for a null in the case that an operator immediately follows the name.
1058 if (stuffnull) // Terminate old symbol from previous pass
1061 v = 0; // Assume no DOT attrib follows symbol
1064 // In some cases, we need to check for a DOTx at the *beginning*
1065 // of a symbol, as the "start" of the line we're currently looking
1066 // at could be somewhere in the middle of that line!
1069 // Make sure that it's *only* a .[bwsl] following, and not the
1070 // start of a local symbol:
1071 if ((chrtab[*(ln + 1)] & DOT)
1072 && (dotxtab[*(ln + 1)] != 0)
1073 && !(chrtab[*(ln + 2)] & CTSYM))
1075 // We found a legitimate DOTx construct, so add it to the
1079 *tk++ = (TOKEN)dotxtab[*ln++];
1084 p = nullspot = ln++; // Nullspot -> start of this symbol
1086 // Find end of symbol (and compute its length)
1087 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1090 // Handle "DOT" special forms (like ".b") that follow a normal
1091 // symbol or keyword:
1094 *ln++ = EOS; // Terminate symbol
1095 stuffnull = 0; // And never try it again
1097 // Character following the `.' must have a DOT attribute, and
1098 // the chararacter after THAT one must not have a start-symbol
1099 // attribute (to prevent symbols that look like, for example,
1100 // "zingo.barf", which might be a good idea anyway....)
1101 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1102 return error("[bwsl] must follow '.' in symbol");
1104 v = (VALUE)dotxtab[*ln++];
1106 if (chrtab[*ln] & CTSYM)
1107 return error("misuse of '.', not allowed in symbols");
1110 // If the symbol is small, check to see if it's really the name of
1114 for(state=0; state>=0;)
1116 j = (int)tolowertab[*p++];
1119 if (kwcheck[j] != state)
1125 if (*p == EOS || p == ln)
1139 // Make j = -1 if user tries to use a RISC register while in 68K mode
1140 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1145 // Make j = -1 if time, date etc with no preceeding ^^
1146 // defined, referenced, streq, macdef, date and time
1149 case 112: // defined
1150 case 113: // referenced
1158 // If not tokenized keyword OR token was not found
1159 if ((j < 0) || (state < 0))
1163 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1164 //system, this will cause all kinds of mischief.
1166 *tk++ = (TOKEN)nullspot;
1168 string[stringNum] = nullspot;
1179 if (v) // Record attribute token (if any)
1182 if (stuffnull) // Arrange for string termination on next pass
1188 // Handle identity tokens
1195 // Handle multiple-character tokens
1201 case '!': // ! or !=
1211 case '\'': // 'string'
1214 // Hardcoded for now, maybe this will change in the future
1219 case '\"': // "string"
1223 string[stringNum] = ln;
1227 for(p=ln; *ln!=EOS && *ln!=c1;)
1236 return(error("unterminated string"));
1265 warn("bad backslash code in string");
1275 return error("unterminated string");
1279 case '$': // $, hex constant
1280 if (chrtab[*ln] & HDIGIT)
1284 // Parse the hex value
1285 while (hextab[*ln] >= 0)
1286 v = (v << 4) + (int)hextab[*ln++];
1290 if (obj_format == BSD)
1292 if ((*(ln + 1) & 0xDF) == 'B')
1297 else if ((*(ln + 1) & 0xDF) == 'W')
1302 else if ((*(ln + 1) & 0xDF) == 'L')
1312 if (obj_format == ALCYON)
1316 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1321 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1333 case '<': // < or << or <> or <=
1352 case ':': // : or ::
1362 case '=': // = or ==
1372 case '>': // > or >> or >=
1387 case '%': // % or binary constant
1388 if (*ln < '0' || *ln > '1')
1396 while (*ln >= '0' && *ln <= '1')
1397 v = (v << 1) + *ln++ - '0';
1401 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1407 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1413 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1422 case '@': // @ or octal constant
1423 if (*ln < '0' || *ln > '7')
1431 while (*ln >= '0' && *ln <= '7')
1432 v = (v << 3) + *ln++ - '0';
1436 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1442 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1448 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1457 case '^': // ^ or ^^ <operator-name>
1464 if (((int)chrtab[*++ln] & STSYM) == 0)
1466 error("invalid symbol following ^^");
1472 while ((int)chrtab[*ln] & CTSYM)
1475 for(state=0; state>=0;)
1477 // Get char, convert to lowercase
1480 if (j >= 'A' && j <= 'Z')
1485 if (kwcheck[j] != state)
1491 if (*p == EOS || p == ln)
1500 if (j < 0 || state < 0)
1502 error("unknown symbol following ^^");
1509 interror(2); // Bad MULTX entry in chrtab
1514 // Handle decimal constant
1519 while ((int)chrtab[*ln] & DIGIT)
1520 v = (v * 10) + *ln++ - '0';
1522 // See if there's a .[bwl] after the constant & deal with it if so
1525 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1530 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1535 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1543 //printf("CONST: %i\n", v);
1547 // Handle illegal character
1548 return error("illegal character");
1551 // Terminate line of tokens and return "success."
1554 tok = etok; // Set tok to beginning of line
1556 if (stuffnull) // Terminate last SYMBOL
1566 // .GOTO <label> goto directive
1568 // The label is searched for starting from the first line of the current,
1569 // enclosing macro definition. If no enclosing macro exists, an error is
1572 // A label is of the form:
1574 // :<name><whitespace>
1576 // The colon must appear in column 1. The label is stripped prior to macro
1577 // expansion, and is NOT subject to macro expansion. The whitespace may also
1580 int d_goto(WORD unused)
1582 // Setup for the search
1584 return error("missing label");
1586 char * sym = string[tok[1]];
1589 if (cur_inobj->in_type != SRC_IMACRO)
1590 return error("goto not in macro");
1592 IMACRO * imacro = cur_inobj->inobj.imacro;
1593 struct LineList * defln = imacro->im_macro->lineList;
1595 // Attempt to find the label, starting with the first line.
1596 for(; defln!=NULL; defln=defln->next)
1598 // Must start with a colon
1599 if (defln->line[0] == ':')
1601 // Compare names (sleazo string compare)
1603 char * s2 = defln->line;
1605 // Either we will match the strings to EOS on both, or we will
1606 // match EOS on string 1 to whitespace on string 2. Otherwise, we
1608 while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1610 // If we reached the end of string 1 (sym), we're done.
1611 // Note that we're also checking for the end of string 2 as
1612 // well, since we've established they're equal above.
1615 // Found the label, set new macro next-line and return.
1616 imacro->im_nextln = defln;
1626 return error("goto label not found");
1630 void DumpTokenBuffer(void)
1633 printf("Tokens [%X]: ", sloc);
1635 for(t=tokbuf; *t!=EOL; t++)
1639 else if (*t == CONST)
1642 printf("[CONST: $%X]", (uint32_t)*t);
1644 else if (*t == ACONST)
1646 else if (*t == STRING)
1649 printf("[STRING:\"%s\"]", string[*t]);
1651 else if (*t == SYMBOL)
1654 printf("[SYMBOL:\"%s\"]", string[*t]);
1658 else if (*t == TKEOF)
1660 else if (*t == DEQUALS)
1661 printf("[DEQUALS]");
1666 else if (*t == DCOLON)
1678 else if (*t == UNMINUS)
1679 printf("[UNMINUS]");
1680 else if (*t == DOTB)
1682 else if (*t == DOTW)
1684 else if (*t == DOTL)
1686 else if (*t == DOTI)
1688 else if (*t == ENDEXPR)
1689 printf("[ENDEXPR]");
1690 else if (*t == CR_ABSCOUNT)
1691 printf("[CR_ABSCOUNT]");
1692 else if (*t == CR_DEFINED)
1693 printf("[CR_DEFINED]");
1694 else if (*t == CR_REFERENCED)
1695 printf("[CR_REFERENCED]");
1696 else if (*t == CR_STREQ)
1697 printf("[CR_STREQ]");
1698 else if (*t == CR_MACDEF)
1699 printf("[CR_MACDEF]");
1700 else if (*t == CR_TIME)
1701 printf("[CR_TIME]");
1702 else if (*t == CR_DATE)
1703 printf("[CR_DATE]");
1704 else if (*t >= 0x20 && *t <= 0x2F)
1705 printf("[%c]", (char)*t);
1706 else if (*t >= 0x3A && *t <= 0x3F)
1707 printf("[%c]", (char)*t);
1708 else if (*t >= 0x80 && *t <= 0x87)
1709 printf("[D%u]", ((uint32_t)*t) - 0x80);
1710 else if (*t >= 0x88 && *t <= 0x8F)
1711 printf("[A%u]", ((uint32_t)*t) - 0x88);
1713 printf("[%X:%c]", (uint32_t)*t, (char)*t);