2 // RMAC - Reboot's Macro Assembler for the Atari Jaguar Console System
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
17 #define DECL_KW // Declare keyword arrays
18 #define DEF_KW // Declare keyword values
19 #include "kwtab.h" // Incl generated keyword tables & defs
22 int lnsave; // 1; strcpy() text of current line
23 int curlineno; // Current line number
24 int totlines; // Total # of lines
25 int mjump_align = 0; // mjump alignment flag
26 char lntag; // Line tag
27 char * curfname; // Current filename
28 char tolowertab[128]; // Uppercase ==> lowercase
29 int8_t hextab[128]; // Table of hex values
30 char dotxtab[128]; // Table for ".b", ".s", etc.
31 char irbuf[LNSIZ]; // Text for .rept block line
32 char lnbuf[LNSIZ]; // Text of current line
33 WORD filecount; // Unique file number counter
34 WORD cfileno; // Current file number
35 TOKEN * tok; // Ptr to current token
36 TOKEN * etok; // Ptr past last token in tokbuf[]
37 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
38 char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage
40 // File record, used to maintain a list of every include file ever visited
41 #define FILEREC struct _filerec
51 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
52 static INOBJ * f_inobj; // Ptr list of free INOBJs
53 static IFILE * f_ifile; // Ptr list of free IFILEs
54 static IMACRO * f_imacro; // Ptr list of free IMACROs
56 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
59 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
60 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
61 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
62 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
64 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
65 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
66 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
67 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
69 WHITE, MULTX, MULTX, SELF, // SP ! " #
70 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
71 SELF, SELF, SELF, SELF, // ( ) * +
72 SELF, SELF, STSYM, SELF, // , - . /
74 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
75 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
76 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
77 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
78 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
80 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
82 MULTX, STSYM+CTSYM+HDIGIT, // @ A
83 (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
84 STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
85 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
86 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
87 (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
89 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // P Q R S
90 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // T U V W
91 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // X Y Z [
92 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
94 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
95 (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
96 STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
97 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
98 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
99 (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
101 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // p q r s
102 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // t u v w
103 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
104 SELF, SELF, SELF, ILLEG // | } ~ DEL
107 // Names of registers
108 static char * regname[] = {
109 "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
110 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
111 "pc", "ssp", "usp", "sr", "ccr"
114 static char * riscregname[] = {
115 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
116 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
117 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
118 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
123 // Initialize tokenizer
125 void InitTokenizer(void)
128 char * htab = "0123456789abcdefABCDEF"; // Hex character table
130 lnsave = 0; // Don't save lines
131 curfname = ""; // No file, empty filename
132 filecount = (WORD)-1;
133 cfileno = (WORD)-1; // cfileno gets bumped to 0
145 // Initialize hex, "dot" and tolower tables
150 tolowertab[i] = (char)i;
153 for(i=0; htab[i]!=EOS; i++)
154 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
156 for(i='A'; i<='Z'; i++)
157 tolowertab[i] |= 0x20;
159 // These characters are legal immediately after a period
160 dotxtab['b'] = DOTB; // .b .B .s .S
164 dotxtab['w'] = DOTW; // .w .W
166 dotxtab['l'] = DOTL; // .l .L
168 dotxtab['i'] = DOTI; // .i .I (???)
173 void SetFilenameForErrorReporting(void)
177 // Check for absolute top filename (this should never happen)
180 curfname = "(*top*)";
184 FILEREC * fr = filerec;
186 // Advance to the correct record...
187 while (fr != NULL && fnum != 0)
193 // Check for file # record not found (this should never happen either)
196 curfname = "(*NOT FOUND*)";
200 curfname = fr->frec_name;
205 // Allocate an IFILE or IMACRO
207 INOBJ * a_inobj(int typ)
213 // Allocate and initialize INOBJ first
215 inobj = malloc(sizeof(INOBJ));
219 f_inobj = f_inobj->in_link;
224 case SRC_IFILE: // Alloc and init an IFILE
226 ifile = malloc(sizeof(IFILE));
230 f_ifile = f_ifile->if_link;
233 inobj->inobj.ifile = ifile;
235 case SRC_IMACRO: // Alloc and init an IMACRO
236 if (f_imacro == NULL)
237 imacro = malloc(sizeof(IMACRO));
241 f_imacro = f_imacro->im_link;
244 inobj->inobj.imacro = imacro;
246 case SRC_IREPT: // Alloc and init an IREPT
247 inobj->inobj.irept = malloc(sizeof(IREPT));
248 DEBUG printf("alloc IREPT\n");
252 // Install INOBJ on top of input stack
253 inobj->in_ifent = ifent; // Record .if context on entry
254 inobj->in_type = (WORD)typ;
255 inobj->in_otok = tok;
256 inobj->in_etok = etok;
257 inobj->in_link = cur_inobj;
265 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
266 // A macro reference is in one of two forms:
267 // \name <non-name-character>
269 // A doubled backslash (\\) is compressed to a single backslash (\).
270 // Argument definitions have been pre-tokenized, so we have to turn them back
271 // into text. This means that numbers, in particular, become hex, regardless of
272 // their representation when the macro was invoked. This is a hack.
273 // A label may appear at the beginning of the line:
274 // :<name><whitespace>
275 // (the colon must be in the first column). These labels are stripped before
276 // macro expansion takes place.
278 int ExpandMacro(char * src, char * dest, int destsiz)
281 int questmark; // \? for testing argument existence
282 char mname[128]; // Assume max size of a formal arg name
283 char numbuf[20]; // Buffer for text of CONSTs
286 char ** symbolString;
288 DEBUG { printf("ExM: src=\"%s\"\n", src); }
290 IMACRO * imacro = cur_inobj->inobj.imacro;
291 int macnum = (int)(imacro->im_macro->sattr);
293 char * dst = dest; // Next dest slot
294 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
296 // Check for (and skip over) any "label" on the line
302 while (*s != EOS && !(chrtab[*s] & WHITE))
306 s++; // Skip first whitespace
309 // Expand the rest of the line
312 // Copy single character
318 // Skip comments in case a loose @ or \ is in there
319 // In that case the tokeniser was trying to expand it.
320 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
325 // Do macro expansion
333 case '\\': // \\, \ (collapse to single backslash)
339 case '?': // \? <macro> set `questmark' flag
343 case '#': // \#, number of arguments
344 sprintf(numbuf, "%d", (int)imacro->im_nargs);
346 case '!': // \! size suffix supplied on invocation
347 switch ((int)imacro->im_siz)
349 case SIZN: d = ""; break;
350 case SIZB: d = ".b"; break;
351 case SIZW: d = ".w"; break;
352 case SIZL: d = ".l"; break;
356 case '~': // ==> unique label string Mnnnn...
357 sprintf(numbuf, "M%u", curuniq);
373 return error("missing argument name");
376 // \n ==> argument number 'n', 0..9
377 if (chrtab[*s] & DIGIT)
387 // Get argument name: \name, \{name}
397 while (chrtab[*s] & CTSYM);
402 for(++s; *s != EOS && *s != '}';)
406 return error("missing '}'");
413 // Lookup the argument and copy its (string) value into the
414 // destination string
415 DEBUG printf("argument='%s'\n", mname);
417 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
418 return errors("undefined argument: '%s'", mname);
421 // Convert a string of tokens (terminated with EOL) back into
422 // text. If an argument is out of range (not specified in the
423 // macro invocation) then it is ignored.
424 i = (int)arg->svalue;
426 DEBUG printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase);
429 if (i < imacro->im_nargs)
434 tk = argPtrs[imacro->argBase + i];
436 tk = imacro->argument[i].token;
437 symbolString = imacro->argument[i].string;
440 // printf("ExM: Preparing to parse argument #%u...\n", i);
447 // 0 if the argument is empty or non-existant,
448 // 1 if the argument is not empty
451 if (tk == NULL || *tk == EOL)
457 *dst++ = (char)(questmark + '0');
461 // Argument # is in range, so expand it
466 // Reverse-translation from a token number to a string.
467 // This is a hack. It might be better table-driven.
470 if ((*tk >= KW_D0) && !rdsp && !rgpu)
472 d = regname[(int)*tk++ - KW_D0];
475 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
477 d = riscregname[(int)*tk++ - KW_R0];
486 // d = (char *)*tk++;
489 // This fix should be done for strings too
490 d = symbolString[*tk++];
491 DEBUG printf("ExM: SYMBOL=\"%s\"", d);
496 // d = (char *)*tk++;
499 d = symbolString[*tk++];
520 // Shamus: Changing the format specifier from %lx to %ux caused
521 // the assembler to choke on legitimate code... Need to investigate
522 // this further before changing anything else here!
524 sprintf(numbuf, "$%lx", (LONG)*tk++);
588 *dst++ = (char)*(tk - 1);
593 // If 'd' != NULL, copy string to destination
597 DEBUG printf("d='%s'\n", d);
616 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
621 DEBUG printf("*** OVERFLOW LINE ***\n%s\n", dest);
622 return fatal("line too long as a result of macro expansion");
627 // Get next line of text from a macro
629 char * GetNextMacroLine(void)
631 IMACRO * imacro = cur_inobj->inobj.imacro;
632 // LONG * strp = imacro->im_nextln;
633 struct LineList * strp = imacro->im_nextln;
635 if (strp == NULL) // End-of-macro
638 imacro->im_nextln = strp->next;
639 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
640 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
642 return imacro->im_lnbuf;
647 // Get next line of text from a repeat block
649 char * GetNextRepeatLine(void)
652 IREPT * irept = cur_inobj->inobj.irept;
653 LONG * strp = irept->ir_nextln; // initial null
655 // Do repeat at end of .rept block's string list
658 DEBUG printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count);
659 irept->ir_nextln = irept->ir_firstln; // copy first line
661 if (irept->ir_count-- == 0)
663 DEBUG printf("end-repeat-block\n");
667 strp = irept->ir_nextln;
670 strcpy(irbuf, (char *)(irept->ir_nextln + 1));
671 DEBUG printf("repeat line='%s'\n", irbuf);
672 irept->ir_nextln = (LONG *)*strp;
679 // Include a source file used at the root, and for ".include" files
681 int include(int handle, char * fname)
685 printf("[include: %s, cfileno=%u]\n", fname, cfileno);
687 // Alloc and initialize include-descriptors
688 INOBJ * inobj = a_inobj(SRC_IFILE);
689 IFILE * ifile = inobj->inobj.ifile;
691 ifile->ifhandle = handle; // Setup file handle
692 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
693 ifile->ifoldlineno = curlineno; // Save old line number
694 ifile->ifoldfname = curfname; // Save old filename
695 ifile->ifno = cfileno; // Save old file number
697 // NB: This *must* be preincrement, we're adding one to the filecount here!
698 cfileno = ++filecount; // Compute NEW file number
699 curfname = strdup(fname); // Set current filename (alloc storage)
700 curlineno = 0; // Start on line zero
702 // Add another file to the file-record
703 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
704 fr->frec_next = NULL;
705 fr->frec_name = curfname;
708 filerec = fr; // Add first filerec
710 last_fr->frec_next = fr; // Append to list of filerecs
713 DEBUG printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno);
720 // Pop the current input level
727 INOBJ * inobj = cur_inobj;
731 // Pop IFENT levels until we reach the conditional assembly context we
732 // were at when the input object was entered.
733 int numUnmatched = 0;
735 while (ifent != inobj->in_ifent)
737 if (d_endif() != 0) // Something bad happened during endif parsing?
738 return -1; // If yes, bail instead of getting stuck in a loop
743 // Give a warning to the user that we had to wipe their bum for them
744 if (numUnmatched > 0)
745 warni("missing %d .endif(s)", numUnmatched);
747 tok = inobj->in_otok; // Restore tok and otok
748 etok = inobj->in_etok;
750 switch (inobj->in_type)
752 case SRC_IFILE: // Pop and release an IFILE
754 printf("[Leaving: %s]\n", curfname);
756 ifile = inobj->inobj.ifile;
757 ifile->if_link = f_ifile;
759 close(ifile->ifhandle); // Close source file
760 if (debug) printf("[fpop (pre): curfname=%s]\n", curfname);
761 curfname = ifile->ifoldfname; // Set current filename
762 if (debug) printf("[fpop (post): curfname=%s]\n", curfname);
763 if (debug) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
764 curlineno = ifile->ifoldlineno; // Set current line#
765 DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno);
766 cfileno = ifile->ifno; // Restore current file number
767 if (debug) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
769 case SRC_IMACRO: // Pop and release an IMACRO
770 imacro = inobj->inobj.imacro;
771 imacro->im_link = f_imacro;
774 case SRC_IREPT: // Pop and release an IREPT
775 DEBUG printf("dealloc IREPT\n");
776 p = inobj->inobj.irept->ir_firstln;
787 cur_inobj = inobj->in_link;
788 inobj->in_link = f_inobj;
797 // Get line from file into buf, return NULL on EOF or ptr to the start of a
800 char * GetNextLine(void)
804 int readamt = -1; // 0 if last read() yeilded 0 bytes
805 IFILE * fl = cur_inobj->inobj.ifile;
809 // Scan for next end-of-line; handle stupid text formats by treating
810 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
812 d = &fl->ifbuf[fl->ifind];
814 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
816 if (*p == '\r' || *p == '\n')
823 break; // Need to read more, then look for '\n' to eat
824 else if (p[1] == '\n')
828 // Cover up the newline with end-of-string sentinel
837 // Handle hanging lines by ignoring them (Input file is exhausted, no
838 // \r or \n on last line)
839 // Shamus: This is retarded. Never ignore any input!
840 if (!readamt && fl->ifcnt)
847 // Really should check to see if we're at the end of the buffer!
849 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
851 return &fl->ifbuf[fl->ifind];
855 // Truncate and return absurdly long lines.
856 if (fl->ifcnt >= QUANTUM)
858 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
860 return &fl->ifbuf[fl->ifind];
863 // Relocate what's left of a line to the beginning of the buffer, and
864 // read some more of the file in; return NULL if the buffer's empty and
868 p = &fl->ifbuf[fl->ifind];
869 d = &fl->ifbuf[fl->ifcnt & 1];
871 for(i=0; i<fl->ifcnt; i++)
874 fl->ifind = fl->ifcnt & 1;
877 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
882 if ((fl->ifcnt += readamt) == 0)
891 int TokenizeLine(void)
893 char * ln = NULL; // Ptr to current position in line
894 char * p; // Random character ptr
895 TOKEN * tk; // Token-deposit ptr
896 int state = 0; // State for keyword detector
897 int j = 0; // Var for keyword detector
898 char c; // Random char
899 VALUE v; // Random value
900 char * nullspot = NULL; // Spot to clobber for SYMBOL termination
901 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
903 int stringNum = 0; // Pointer to string locations in tokenized line
907 if (cur_inobj == NULL) // Return EOF if input stack is empty
910 // Get another line of input from the current input source: a file, a
911 // macro, or a repeat-block
912 switch (cur_inobj->in_type)
916 // o bump source line number;
917 // o tag the listing-line with a space;
918 // o kludge lines generated by Alcyon C.
920 if ((ln = GetNextLine()) == NULL)
922 if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
923 if (fpop() == 0) // Pop input level
924 goto retry; // Try for more lines
927 ifent->if_prev = (IFENT *) - 1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
932 curlineno++; // Bump line number
937 // AS68 compatibility, throw away all lines starting with
938 // back-quotes, tildes, or '*'
939 // On other lines, turn the first '*' into a semi-colon.
940 if (*ln == '`' || *ln == '~' || *ln == '*')
944 for(p=ln; *p!=EOS; p++)
957 // o Handle end-of-macro;
958 // o tag the listing-line with an at (@) sign.
960 if ((ln = GetNextMacroLine()) == NULL)
962 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
963 goto retry; // Try for more lines...
965 return TKEOF; // Oops, we got a non zero return code, signal EOF
971 // o Handle end-of-repeat-block;
972 // o tag the listing-line with a pound (#) sign.
974 if ((ln = GetNextRepeatLine()) == NULL)
976 if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
985 // Save text of the line. We only do this during listings and within
986 // macro-type blocks, since it is expensive to unconditionally copy every
991 // General house-keeping
992 tok = tokeol; // Set "tok" to EOL in case of error
993 tk = etok; // Reset token ptr
994 stuffnull = 0; // Don't stuff nulls
995 totlines++; // Bump total #lines assembled
997 // See if the entire line is a comment. This is a win if the programmer
998 // puts in lots of comments
999 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1002 // Main tokenization loop;
1003 // o skip whitespace;
1004 // o handle end-of-line;
1005 // o handle symbols;
1006 // o handle single-character tokens (operators, etc.);
1007 // o handle multiple-character tokens (constants, strings, etc.).
1010 // Skip whitespace, handle EOL
1011 while ((int)chrtab[*ln] & WHITE)
1014 // Handle EOL, comment with ';'
1015 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1018 // Handle start of symbol. Symbols are null-terminated in place. The
1019 // termination is always one symbol behind, since there may be no place
1020 // for a null in the case that an operator immediately follows the name.
1025 if (stuffnull) // Terminate old symbol from previous pass
1028 v = 0; // Assume no DOT attrib follows symbol
1031 // In some cases, we need to check for a DOTx at the *beginning*
1032 // of a symbol, as the "start" of the line we're currently looking
1033 // at could be somewhere in the middle of that line!
1036 // Make sure that it's *only* a .[bwsl] following, and not the
1037 // start of a local symbol:
1038 if ((chrtab[*(ln + 1)] & DOT)
1039 && (dotxtab[*(ln + 1)] != 0)
1040 && !(chrtab[*(ln + 2)] & CTSYM))
1042 // We found a legitimate DOTx construct, so add it to the
1046 *tk++ = (TOKEN)dotxtab[*ln++];
1051 p = nullspot = ln++; // Nullspot -> start of this symbol
1053 // Find end of symbol (and compute its length)
1054 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1057 // Handle "DOT" special forms (like ".b") that follow a normal
1058 // symbol or keyword:
1061 *ln++ = EOS; // Terminate symbol
1062 stuffnull = 0; // And never try it again
1064 // Character following the `.' must have a DOT attribute, and
1065 // the chararacter after THAT one must not have a start-symbol
1066 // attribute (to prevent symbols that look like, for example,
1067 // "zingo.barf", which might be a good idea anyway....)
1068 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1069 return error("[bwsl] must follow '.' in symbol");
1071 v = (VALUE)dotxtab[*ln++];
1073 if (chrtab[*ln] & CTSYM)
1074 return error("misuse of '.', not allowed in symbols");
1077 // If the symbol is small, check to see if it's really the name of
1081 for(state=0; state>=0;)
1083 j = (int)tolowertab[*p++];
1086 if (kwcheck[j] != state)
1092 if (*p == EOS || p == ln)
1106 // Make j = -1 if user tries to use a RISC register while in 68K mode
1107 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1112 // Make j = -1 if time, date etc with no preceeding ^^
1113 // defined, referenced, streq, macdef, date and time
1116 case 112: // defined
1117 case 113: // referenced
1125 // If not tokenized keyword OR token was not found
1126 if ((j < 0) || (state < 0))
1130 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1131 //system, this will cause all kinds of mischief.
1133 *tk++ = (TOKEN)nullspot;
1135 string[stringNum] = nullspot;
1146 if (v) // Record attribute token (if any)
1149 if (stuffnull) // Arrange for string termination on next pass
1155 // Handle identity tokens
1162 // Handle multiple-character tokens
1169 case '!': // ! or !=
1179 case '\'': // 'string'
1181 stringtype = A8INT; // hardcoded for now, maybe this will change in the future
1184 case '\"': // "string"
1188 // More char * stuffing (8 bytes) into the space of 4 (TOKEN).
1189 // Need to figure out how to fix this crap.
1193 string[stringNum] = ln;
1198 for(p=ln; *ln!=EOS && *ln!=c1;)
1207 return(error("unterminated string"));
1236 warn("bad backslash code in string");
1246 return error("unterminated string");
1250 case '$': // $, hex constant
1251 if (chrtab[*ln] & HDIGIT)
1255 // Parse the hex value
1256 while (hextab[*ln] >= 0)
1257 v = (v << 4) + (int)hextab[*ln++];
1259 // ggn: Okay, some comments here are in order I think....
1260 // The original madmac sources didn't parse the size at
1261 // this point (i.e. .b/.w/.l). It was probably done at
1262 // another point, although it's unclear to me exactly
1263 // where. So why change this? My understanding (at least
1264 // from what SCPCD said on IRC) is that .w addressing
1265 // formats produce wrong code on jaguar (or doesn't execute
1266 // properly? something like that). So the code was changed
1267 // to mask off the upper bits depending on length (note: I
1268 // don't think .b is valid at all! I only know of .w/.l, so
1269 // this should probably be wiped). Then the code that
1270 // parses the constant and checks to see if it's between
1271 // $ffff0000 and $8000 never got triggered, so yay job
1272 // done! ...now say we want to assemble a st .prg. One of
1273 // the most widely spread optimisations is move.X expr.w,Y
1274 // (or vice versa, or both, anyway...) to access hardware
1275 // registers (which are mapped to $fxxxxx). This botchy
1276 // thing would create "hilarious" code while trying to
1277 // access hardware registers. So I made a condition to see
1278 // if st mode or jaguar is active and apply the both or
1279 // not. One last note: this is hardcoded to get optimised
1280 // for now on ST mode, i.e. it can't generate code like
1281 // move.w $00001234,d0 - it'll always get optimised to
1282 // move.w $1234.w,d0. It's probably ok, but maybe a warning
1283 // should be emitted? Or maybe finding a way to make it not
1284 // auto-optimise? I think it's ok for now...
1287 if (obj_format == BSD)
1289 if ((*(ln + 1) & 0xDF) == 'B')
1294 else if ((*(ln + 1) & 0xDF) == 'W')
1299 else if ((*(ln + 1) & 0xDF) == 'L')
1309 if (obj_format == ALCYON)
1311 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1316 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1327 case '<': // < or << or <> or <=
1346 case ':': // : or ::
1356 case '=': // = or ==
1366 case '>': // > or >> or >=
1381 case '%': // % or binary constant
1382 if (*ln < '0' || *ln > '1')
1390 while (*ln >= '0' && *ln <= '1')
1391 v = (v << 1) + *ln++ - '0';
1395 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1401 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1407 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1416 case '@': // @ or octal constant
1417 if (*ln < '0' || *ln > '7')
1425 while (*ln >= '0' && *ln <= '7')
1426 v = (v << 3) + *ln++ - '0';
1430 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1436 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1442 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1451 case '^': // ^ or ^^ <operator-name>
1458 if (((int)chrtab[*++ln] & STSYM) == 0)
1460 error("invalid symbol following ^^");
1466 while ((int)chrtab[*ln] & CTSYM)
1469 for(state=0; state>=0;)
1471 // Get char, convert to lowercase
1474 if (j >= 'A' && j <= 'Z')
1479 if (kwcheck[j] != state)
1485 if (*p == EOS || p == ln)
1494 if (j < 0 || state < 0)
1496 error("unknown symbol following ^^");
1503 interror(2); // Bad MULTX entry in chrtab
1508 // Handle decimal constant
1513 while ((int)chrtab[*ln] & DIGIT)
1514 v = (v * 10) + *ln++ - '0';
1516 // See if there's a .[bwl] after the constant & deal with it if so
1519 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1524 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1529 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1537 //printf("CONST: %i\n", v);
1541 // Handle illegal character
1542 return error("illegal character");
1545 // Terminate line of tokens and return "success."
1548 tok = etok; // Set tok to beginning of line
1550 if (stuffnull) // Terminate last SYMBOL
1560 // .GOTO <label> goto directive
1562 // The label is searched for starting from the first line of the current,
1563 // enclosing macro definition. If no enclosing macro exists, an error is
1566 // A label is of the form:
1568 // :<name><whitespace>
1570 // The colon must appear in column 1. The label is stripped prior to macro
1571 // expansion, and is NOT subject to macro expansion. The whitespace may also
1574 //int d_goto(WORD siz) {
1576 int d_goto(WORD unused)
1580 // Setup for the search
1582 return error("missing label");
1584 // sym = (char *)tok[1];
1585 char * sym = string[tok[1]];
1588 if (cur_inobj->in_type != SRC_IMACRO)
1589 return error("goto not in macro");
1591 IMACRO * imacro = cur_inobj->inobj.imacro;
1592 // defln = (LONG *)imacro->im_macro->svalue;
1593 struct LineList * defln = imacro->im_macro->lineList;
1595 // Find the label, starting with the first line.
1596 for(; defln!=NULL; defln=defln->next)
1598 // if (*(char *)(defln + 1) == ':')
1599 if (defln->line[0] == ':')
1601 // Compare names (sleazo string compare)
1602 // This string compare is not right. Doesn't check for lengths.
1603 // (actually it does, but in a crappy, unclear way.)
1604 WARNING(!!!! Bad string comparison !!!)
1606 // s2 = (char *)(defln + 1) + 1;
1620 // Found the label, set new macro next-line and return.
1621 if ((*s2 == EOS) || ((int)chrtab[*s2] & WHITE))
1623 imacro->im_nextln = defln;
1629 return error("goto label not found");
1633 void DumpTokenBuffer(void)
1636 printf("Tokens [%X]: ", sloc);
1638 for(t=tokbuf; *t!=EOL; t++)
1642 else if (*t == CONST)
1645 printf("[CONST: $%X]", (uint32_t)*t);
1647 else if (*t == ACONST)
1649 else if (*t == STRING)
1652 printf("[STRING:\"%s\"]", string[*t]);
1654 else if (*t == SYMBOL)
1657 printf("[SYMBOL:\"%s\"]", string[*t]);
1661 else if (*t == TKEOF)
1663 else if (*t == DEQUALS)
1664 printf("[DEQUALS]");
1669 else if (*t == DCOLON)
1681 else if (*t == UNMINUS)
1682 printf("[UNMINUS]");
1683 else if (*t == DOTB)
1685 else if (*t == DOTW)
1687 else if (*t == DOTL)
1689 else if (*t == DOTI)
1691 else if (*t == ENDEXPR)
1692 printf("[ENDEXPR]");
1693 else if (*t == CR_ABSCOUNT)
1694 printf("[CR_ABSCOUNT]");
1695 else if (*t == CR_DEFINED)
1696 printf("[CR_DEFINED]");
1697 else if (*t == CR_REFERENCED)
1698 printf("[CR_REFERENCED]");
1699 else if (*t == CR_STREQ)
1700 printf("[CR_STREQ]");
1701 else if (*t == CR_MACDEF)
1702 printf("[CR_MACDEF]");
1703 else if (*t == CR_TIME)
1704 printf("[CR_TIME]");
1705 else if (*t == CR_DATE)
1706 printf("[CR_DATE]");
1707 else if (*t >= 0x20 && *t <= 0x2F)
1708 printf("[%c]", (char)*t);
1709 else if (*t >= 0x3A && *t <= 0x3F)
1710 printf("[%c]", (char)*t);
1711 else if (*t >= 0x80 && *t <= 0x87)
1712 printf("[D%u]", ((uint32_t)*t) - 0x80);
1713 else if (*t >= 0x88 && *t <= 0x8F)
1714 printf("[A%u]", ((uint32_t)*t) - 0x88);
1716 printf("[%X:%c]", (uint32_t)*t, (char)*t);