2 // RMAC - Reboot's Macro Assembler for the Atari Jaguar Console System
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2012 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
15 #define DECL_KW // Declare keyword arrays
16 #define DEF_KW // Declare keyword values
17 #include "kwtab.h" // Incl generated keyword tables & defs
20 int lnsave; // 1; strcpy() text of current line
21 int curlineno; // Current line number
22 int totlines; // Total # of lines
23 int mjump_align = 0; // mjump alignment flag
24 char lntag; // Line tag
25 char * curfname; // Current filename
26 char tolowertab[128]; // Uppercase ==> lowercase
27 int8_t hextab[128]; // Table of hex values
28 char dotxtab[128]; // Table for ".b", ".s", etc.
29 char irbuf[LNSIZ]; // Text for .rept block line
30 char lnbuf[LNSIZ]; // Text of current line
31 WORD filecount; // Unique file number counter
32 WORD cfileno; // Current file number
33 TOKEN * tok; // Ptr to current token
34 TOKEN * etok; // Ptr past last token in tokbuf[]
35 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
36 char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage
38 // File record, used to maintain a list of every include file ever visited
39 #define FILEREC struct _filerec
49 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
50 static INOBJ * f_inobj; // Ptr list of free INOBJs
51 static IFILE * f_ifile; // Ptr list of free IFILEs
52 static IMACRO * f_imacro; // Ptr list of free IMACROs
54 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
57 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
58 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
59 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
60 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
62 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
63 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
64 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
65 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
67 WHITE, MULTX, MULTX, SELF, // SP ! " #
68 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
69 SELF, SELF, SELF, SELF, // ( ) * +
70 SELF, SELF, STSYM, SELF, // , - . /
72 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
73 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
74 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
75 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
76 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
78 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
80 MULTX, STSYM+CTSYM+HDIGIT, // @ A
81 (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
82 STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
83 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
84 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
85 (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
87 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // P Q R S
88 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // T U V W
89 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // X Y Z [
90 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
92 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
93 (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
94 STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
95 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
96 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
97 (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
99 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // p q r s
100 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // t u v w
101 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
102 SELF, SELF, SELF, ILLEG // | } ~ DEL
105 // Names of registers
106 static char * regname[] = {
107 "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
108 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
109 "pc", "ssp", "usp", "sr", "ccr"
112 static char * riscregname[] = {
113 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
114 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
115 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
116 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
121 // Initialize tokenizer
123 void InitTokenizer(void)
126 char * htab = "0123456789abcdefABCDEF"; // Hex character table
128 lnsave = 0; // Don't save lines
129 curfname = ""; // No file, empty filename
130 filecount = (WORD)-1;
131 cfileno = (WORD)-1; // cfileno gets bumped to 0
143 // Initialize hex, "dot" and tolower tables
148 tolowertab[i] = (char)i;
151 for(i=0; htab[i]!=EOS; i++)
152 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
154 for(i='A'; i<='Z'; i++)
155 tolowertab[i] |= 0x20;
157 // These characters are legal immediately after a period
158 dotxtab['b'] = DOTB; // .b .B .s .S
162 dotxtab['w'] = DOTW; // .w .W
164 dotxtab['l'] = DOTL; // .l .L
166 dotxtab['i'] = DOTI; // .i .I (???)
171 void SetFilenameForErrorReporting(void)
175 // Check for absolute top filename (this should never happen)
178 curfname = "(*top*)";
182 FILEREC * fr = filerec;
184 // Advance to the correct record...
185 while (fr != NULL && fnum != 0)
191 // Check for file # record not found (this should never happen either)
194 curfname = "(*NOT FOUND*)";
198 curfname = fr->frec_name;
203 // Allocate an IFILE or IMACRO
205 INOBJ * a_inobj(int typ)
211 // Allocate and initialize INOBJ first
213 inobj = malloc(sizeof(INOBJ));
217 f_inobj = f_inobj->in_link;
222 case SRC_IFILE: // Alloc and init an IFILE
224 ifile = malloc(sizeof(IFILE));
228 f_ifile = f_ifile->if_link;
231 inobj->inobj.ifile = ifile;
233 case SRC_IMACRO: // Alloc and init an IMACRO
234 if (f_imacro == NULL)
235 imacro = malloc(sizeof(IMACRO));
239 f_imacro = f_imacro->im_link;
242 inobj->inobj.imacro = imacro;
244 case SRC_IREPT: // Alloc and init an IREPT
245 inobj->inobj.irept = malloc(sizeof(IREPT));
246 DEBUG printf("alloc IREPT\n");
250 // Install INOBJ on top of input stack
251 inobj->in_ifent = ifent; // Record .if context on entry
252 inobj->in_type = (WORD)typ;
253 inobj->in_otok = tok;
254 inobj->in_etok = etok;
255 inobj->in_link = cur_inobj;
263 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
264 // A macro reference is in one of two forms:
265 // \name <non-name-character>
267 // A doubled backslash (\\) is compressed to a single backslash (\).
268 // Argument definitions have been pre-tokenized, so we have to turn them back
269 // into text. This means that numbers, in particular, become hex, regardless of
270 // their representation when the macro was invoked. This is a hack.
271 // A label may appear at the beginning of the line:
272 // :<name><whitespace>
273 // (the colon must be in the first column). These labels are stripped before
274 // macro expansion takes place.
276 int ExpandMacro(char * src, char * dest, int destsiz)
279 int questmark; // \? for testing argument existence
280 char mname[128]; // Assume max size of a formal arg name
281 char numbuf[20]; // Buffer for text of CONSTs
284 char ** symbolString;
286 DEBUG { printf("ExM: src=\"%s\"\n", src); }
288 IMACRO * imacro = cur_inobj->inobj.imacro;
289 int macnum = (int)(imacro->im_macro->sattr);
292 char * dst = dest; // Next dest slot
293 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
295 // Check for (and skip over) any "label" on the line
301 while (*s != EOS && !(chrtab[*s] & WHITE))
305 s++; // Skip first whitespace
308 // Expand the rest of the line
311 // Copy single character
317 // Skip comments in case a loose @ or \ is in there
318 // In that case the tokeniser was trying to expand it.
319 if (*s == '*' || *s == ';' || ((*s == '/') && (*(s + 1) == '/')))
324 // Do macro expansion
332 case '\\': // \\, \ (collapse to single backslash)
338 case '?': // \? <macro> set `questmark' flag
342 case '#': // \#, number of arguments
343 sprintf(numbuf, "%d", (int)imacro->im_nargs);
345 case '!': // \! size suffix supplied on invocation
346 switch ((int)imacro->im_siz)
348 case SIZN: d = ""; break;
349 case SIZB: d = ".b"; break;
350 case SIZW: d = ".w"; break;
351 case SIZL: d = ".l"; break;
355 case '~': // ==> unique label string Mnnnn...
356 sprintf(numbuf, "M%u", curuniq);
372 return error("missing argument name");
375 // \n ==> argument number 'n', 0..9
376 if (chrtab[*s] & DIGIT)
386 // Get argument name: \name, \{name}
396 while (chrtab[*s] & CTSYM);
401 for(++s; *s != EOS && *s != '}';)
405 return error("missing '}'");
412 // Lookup the argument and copy its (string) value into the
413 // destination string
414 DEBUG printf("argument='%s'\n", mname);
416 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
417 return errors("undefined argument: '%s'", mname);
420 // Convert a string of tokens (terminated with EOL) back into
421 // text. If an argument is out of range (not specified in the
422 // macro invocation) then it is ignored.
423 i = (int)arg->svalue;
425 DEBUG printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase);
428 if (i < imacro->im_nargs)
433 tk = argPtrs[imacro->argBase + i];
435 tk = imacro->argument[i].token;
436 symbolString = imacro->argument[i].string;
439 // printf("ExM: Preparing to parse argument #%u...\n", i);
446 // 0 if the argument is empty or non-existant,
447 // 1 if the argument is not empty
450 if (tk == NULL || *tk == EOL)
456 *dst++ = (char)(questmark + '0');
460 // Argument # is in range, so expand it
465 // Reverse-translation from a token number to a string.
466 // This is a hack. It might be better table-driven.
469 if ((*tk >= KW_D0) && !rdsp && !rgpu)
471 d = regname[(int)*tk++ - KW_D0];
474 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
476 d = riscregname[(int)*tk++ - KW_R0];
485 // d = (char *)*tk++;
488 // This fix should be done for strings too
489 d = symbolString[*tk++];
490 DEBUG printf("ExM: SYMBOL=\"%s\"", d);
495 // d = (char *)*tk++;
498 d = symbolString[*tk++];
519 // Shamus: Changing the format specifier from %lx to %ux caused
520 // the assembler to choke on legitimate code... Need to investigate
521 // this further before changing anything else here!
523 sprintf(numbuf, "$%lx", (LONG)*tk++);
587 *dst++ = (char)*(tk - 1);
592 // If 'd' != NULL, copy string to destination
596 DEBUG printf("d='%s'\n", d);
615 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
620 DEBUG printf("*** OVERFLOW LINE ***\n%s\n", dest);
621 return fatal("line too long as a result of macro expansion");
626 // Get next line of text from a macro
628 char * GetNextMacroLine(void)
630 // unsigned source_addr;
632 IMACRO * imacro = cur_inobj->inobj.imacro;
633 // LONG * strp = imacro->im_nextln;
634 struct LineList * strp = imacro->im_nextln;
636 if (strp == NULL) // End-of-macro
639 // imacro->im_nextln = (LONG *)*strp;
640 imacro->im_nextln = strp->next;
641 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
642 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
644 return imacro->im_lnbuf;
649 // Get next line of text from a repeat block
651 char * GetNextRepeatLine(void)
654 IREPT * irept = cur_inobj->inobj.irept;
655 LONG * strp = irept->ir_nextln; // initial null
657 // Do repeat at end of .rept block's string list
660 DEBUG printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count);
661 irept->ir_nextln = irept->ir_firstln; // copy first line
663 if (irept->ir_count-- == 0)
665 DEBUG printf("end-repeat-block\n");
669 strp = irept->ir_nextln;
672 strcpy(irbuf, (char *)(irept->ir_nextln + 1));
673 DEBUG printf("repeat line='%s'\n", irbuf);
674 irept->ir_nextln = (LONG *)*strp;
681 // Include a source file used at the root, and for ".include" files
683 int include(int handle, char * fname)
691 printf("[include: %s, cfileno=%u]\n", fname, cfileno);
693 // Alloc and initialize include-descriptors
694 inobj = a_inobj(SRC_IFILE);
695 ifile = inobj->inobj.ifile;
697 ifile->ifhandle = handle; // Setup file handle
698 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
699 ifile->ifoldlineno = curlineno; // Save old line number
700 ifile->ifoldfname = curfname; // Save old filename
701 ifile->ifno = cfileno; // Save old file number
703 // cfileno = filecount++; // Compute new file number
704 // NB: This *must* be preincrement, we're adding one to the filecount here!
705 cfileno = ++filecount; // Compute NEW file number
706 curfname = strdup(fname); // Set current filename (alloc storage)
707 curlineno = 0; // Start on line zero
709 // Add another file to the file-record
710 fr = (FILEREC *)malloc(sizeof(FILEREC));
711 fr->frec_next = NULL;
712 fr->frec_name = curfname;
715 filerec = fr; // Add first filerec
717 last_fr->frec_next = fr; // Append to list of filerecs
720 DEBUG printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno);
727 // Pop the current input level
734 INOBJ * inobj = cur_inobj;
738 // Pop IFENT levels until we reach the conditional assembly context we
739 // were at when the input object was entered.
740 int numUnmatched = 0;
742 while (ifent != inobj->in_ifent)
744 if (d_endif() != 0) // Something bad happened during endif parsing?
745 return -1; // If yes, bail instead of getting stuck in a loop
750 // Give a warning to the user that we had to wipe their bum for them
751 if (numUnmatched > 0)
752 warni("missing %d .endif(s)", numUnmatched);
754 tok = inobj->in_otok; // Restore tok and otok
755 etok = inobj->in_etok;
757 switch (inobj->in_type)
759 case SRC_IFILE: // Pop and release an IFILE
761 printf("[Leaving: %s]\n", curfname);
763 ifile = inobj->inobj.ifile;
764 ifile->if_link = f_ifile;
766 close(ifile->ifhandle); // Close source file
767 if (debug) printf("[fpop (pre): curfname=%s]\n", curfname);
768 curfname = ifile->ifoldfname; // Set current filename
769 if (debug) printf("[fpop (post): curfname=%s]\n", curfname);
770 if (debug) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
771 curlineno = ifile->ifoldlineno; // Set current line#
772 DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno);
773 cfileno = ifile->ifno; // Restore current file number
774 if (debug) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
776 case SRC_IMACRO: // Pop and release an IMACRO
777 imacro = inobj->inobj.imacro;
778 imacro->im_link = f_imacro;
781 case SRC_IREPT: // Pop and release an IREPT
782 DEBUG printf("dealloc IREPT\n");
783 p = inobj->inobj.irept->ir_firstln;
794 cur_inobj = inobj->in_link;
795 inobj->in_link = f_inobj;
804 // Get line from file into buf, return NULL on EOF or ptr to the start of a
807 char * GetNextLine(void)
811 int readamt = -1; // 0 if last read() yeilded 0 bytes
812 IFILE * fl = cur_inobj->inobj.ifile;
816 // Scan for next end-of-line; handle stupid text formats by treating
817 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
819 d = &fl->ifbuf[fl->ifind];
821 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
823 if (*p == '\r' || *p == '\n')
830 break; // Need to read more, then look for '\n' to eat
831 else if (p[1] == '\n')
835 // Cover up the newline with end-of-string sentinel
844 // Handle hanging lines by ignoring them (Input file is exhausted, no
845 // \r or \n on last line)
846 // Shamus: This is retarded. Never ignore any input!
847 if (!readamt && fl->ifcnt)
854 // Really should check to see if we're at the end of the buffer!
856 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
858 return &fl->ifbuf[fl->ifind];
862 // Truncate and return absurdly long lines.
863 if (fl->ifcnt >= QUANTUM)
865 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
867 return &fl->ifbuf[fl->ifind];
870 // Relocate what's left of a line to the beginning of the buffer, and
871 // read some more of the file in; return NULL if the buffer's empty and
875 p = &fl->ifbuf[fl->ifind];
876 d = &fl->ifbuf[fl->ifcnt & 1];
878 for(i=0; i<fl->ifcnt; i++)
881 fl->ifind = fl->ifcnt & 1;
884 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
889 if ((fl->ifcnt += readamt) == 0)
898 int TokenizeLine(void)
900 char * ln = NULL; // Ptr to current position in line
901 char * p; // Random character ptr
902 TOKEN * tk; // Token-deposit ptr
903 int state = 0; // State for keyword detector
904 int j = 0; // Var for keyword detector
905 char c; // Random char
906 VALUE v; // Random value
907 char * nullspot = NULL; // Spot to clobber for SYMBOL termination
908 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
910 int stringNum = 0; // Pointer to string locations in tokenized line
914 if (cur_inobj == NULL) // Return EOF if input stack is empty
917 // Get another line of input from the current input source: a file, a
918 // macro, or a repeat-block
919 switch (cur_inobj->in_type)
923 // o bump source line number;
924 // o tag the listing-line with a space;
925 // o kludge lines generated by Alcyon C.
927 if ((ln = GetNextLine()) == NULL)
929 if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
930 if (fpop() == 0) // Pop input level
931 goto retry; // Try for more lines
934 ifent->if_prev = (IFENT *) - 1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
939 curlineno++; // Bump line number
944 // AS68 compatibility, throw away all lines starting with
945 // back-quotes, tildes, or '*'
946 // On other lines, turn the first '*' into a semi-colon.
947 if (*ln == '`' || *ln == '~' || *ln == '*')
951 for(p=ln; *p!=EOS; p++)
964 // o Handle end-of-macro;
965 // o tag the listing-line with an at (@) sign.
967 if ((ln = GetNextMacroLine()) == NULL)
969 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
970 goto retry; // Try for more lines...
972 return TKEOF; // Oops, we got a non zero return code, signal EOF
978 // o Handle end-of-repeat-block;
979 // o tag the listing-line with a pound (#) sign.
981 if ((ln = GetNextRepeatLine()) == NULL)
983 if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
992 // Save text of the line. We only do this during listings and within
993 // macro-type blocks, since it is expensive to unconditionally copy every
998 // General house-keeping
999 tok = tokeol; // Set "tok" to EOL in case of error
1000 tk = etok; // Reset token ptr
1001 stuffnull = 0; // Don't stuff nulls
1002 totlines++; // Bump total #lines assembled
1004 // See if the entire line is a comment. This is a win if the programmer
1005 // puts in lots of comments
1006 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1009 // Main tokenization loop;
1010 // o skip whitespace;
1011 // o handle end-of-line;
1012 // o handle symbols;
1013 // o handle single-character tokens (operators, etc.);
1014 // o handle multiple-character tokens (constants, strings, etc.).
1017 // Skip whitespace, handle EOL
1018 while ((int)chrtab[*ln] & WHITE)
1021 // Handle EOL, comment with ';'
1022 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1025 // Handle start of symbol. Symbols are null-terminated in place. The
1026 // termination is always one symbol behind, since there may be no place
1027 // for a null in the case that an operator immediately follows the name.
1032 if (stuffnull) // Terminate old symbol from previous pass
1035 v = 0; // Assume no DOT attrib follows symbol
1038 // In some cases, we need to check for a DOTx at the *beginning*
1039 // of a symbol, as the "start" of the line we're currently looking
1040 // at could be somewhere in the middle of that line!
1043 // Make sure that it's *only* a .[bwsl] following, and not the
1044 // start of a local symbol:
1045 if ((chrtab[*(ln + 1)] & DOT)
1046 && (dotxtab[*(ln + 1)] != 0)
1047 && !(chrtab[*(ln + 2)] & CTSYM))
1049 // We found a legitimate DOTx construct, so add it to the
1053 *tk++ = (TOKEN)dotxtab[*ln++];
1058 p = nullspot = ln++; // Nullspot -> start of this symbol
1060 // Find end of symbol (and compute its length)
1061 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1064 // Handle "DOT" special forms (like ".b") that follow a normal
1065 // symbol or keyword:
1068 *ln++ = EOS; // Terminate symbol
1069 stuffnull = 0; // And never try it again
1071 // Character following the `.' must have a DOT attribute, and
1072 // the chararacter after THAT one must not have a start-symbol
1073 // attribute (to prevent symbols that look like, for example,
1074 // "zingo.barf", which might be a good idea anyway....)
1075 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1076 return error("[bwsl] must follow '.' in symbol");
1078 v = (VALUE)dotxtab[*ln++];
1080 if (chrtab[*ln] & CTSYM)
1081 return error("misuse of '.', not allowed in symbols");
1084 // If the symbol is small, check to see if it's really the name of
1088 for(state=0; state>=0;)
1090 j = (int)tolowertab[*p++];
1093 if (kwcheck[j] != state)
1099 if (*p == EOS || p == ln)
1113 // Make j = -1 if user tries to use a RISC register while in 68K mode
1114 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1119 // Make j = -1 if time, date etc with no preceeding ^^
1120 // defined, referenced, streq, macdef, date and time
1123 case 112: // defined
1124 case 113: // referenced
1132 // If not tokenized keyword OR token was not found
1133 if ((j < 0) || (state < 0))
1137 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1138 //system, this will cause all kinds of mischief.
1140 *tk++ = (TOKEN)nullspot;
1142 string[stringNum] = nullspot;
1153 if (v) // Record attribute token (if any)
1156 if (stuffnull) // Arrange for string termination on next pass
1162 // Handle identity tokens
1169 // Handle multiple-character tokens
1174 case '!': // ! or !=
1184 case '\'': // 'string'
1185 case '\"': // "string"
1189 // More char * stuffing (8 bytes) into the space of 4 (TOKEN).
1190 // Need to figure out how to fix this crap.
1194 string[stringNum] = ln;
1199 for(p=ln; *ln!=EOS && *ln!=c1;)
1208 return(error("unterminated string"));
1237 warn("bad backslash code in string");
1247 return error("unterminated string");
1251 case '$': // $, hex constant
1252 if (chrtab[*ln] & HDIGIT)
1256 // Parse the hex value
1257 while (hextab[*ln] >= 0)
1258 v = (v << 4) + (int)hextab[*ln++];
1260 // ggn: Okay, some comments here are in order I think....
1261 // The original madmac sources didn't parse the size at
1262 // this point (i.e. .b/.w/.l). It was probably done at
1263 // another point, although it's unclear to me exactly
1264 // where. So why change this? My understanding (at least
1265 // from what SCPCD said on IRC) is that .w addressing
1266 // formats produce wrong code on jaguar (or doesn't execute
1267 // properly? something like that). So the code was changed
1268 // to mask off the upper bits depending on length (note: I
1269 // don't think .b is valid at all! I only know of .w/.l, so
1270 // this should probably be wiped). Then the code that
1271 // parses the constant and checks to see if it's between
1272 // $ffff0000 and $8000 never got triggered, so yay job
1273 // done! ...now say we want to assemble a st .prg. One of
1274 // the most widely spread optimisations is move.X expr.w,Y
1275 // (or vice versa, or both, anyway...) to access hardware
1276 // registers (which are mapped to $fxxxxx). This botchy
1277 // thing would create "hilarious" code while trying to
1278 // access hardware registers. So I made a condition to see
1279 // if st mode or jaguar is active and apply the both or
1280 // not. One last note: this is hardcoded to get optimised
1281 // for now on ST mode, i.e. it can't generate code like
1282 // move.w $00001234,d0 - it'll always get optimised to
1283 // move.w $1234.w,d0. It's probably ok, but maybe a warning
1284 // should be emitted? Or maybe finding a way to make it not
1285 // auto-optimise? I think it's ok for now...
1288 if (obj_format == BSD)
1290 if ((*(ln + 1) & 0xDF) == 'B')
1295 else if ((*(ln + 1) & 0xDF) == 'W')
1300 else if ((*(ln + 1) & 0xDF) == 'L')
1310 if (obj_format == ALCYON)
1312 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1317 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1328 case '<': // < or << or <> or <=
1347 case ':': // : or ::
1357 case '=': // = or ==
1367 case '>': // > or >> or >=
1382 case '%': // % or binary constant
1383 if (*ln < '0' || *ln > '1')
1391 while (*ln >= '0' && *ln <= '1')
1392 v = (v << 1) + *ln++ - '0';
1396 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1402 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1408 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1417 case '@': // @ or octal constant
1418 if (*ln < '0' || *ln > '7')
1426 while (*ln >= '0' && *ln <= '7')
1427 v = (v << 3) + *ln++ - '0';
1431 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1437 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1443 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1452 case '^': // ^ or ^^ <operator-name>
1459 if (((int)chrtab[*++ln] & STSYM) == 0)
1461 error("invalid symbol following ^^");
1467 while ((int)chrtab[*ln] & CTSYM)
1470 for(state=0; state>=0;)
1472 // Get char, convert to lowercase
1475 if (j >= 'A' && j <= 'Z')
1480 if (kwcheck[j] != state)
1486 if (*p == EOS || p == ln)
1495 if (j < 0 || state < 0)
1497 error("unknown symbol following ^^");
1504 interror(2); // Bad MULTX entry in chrtab
1509 // Handle decimal constant
1514 while ((int)chrtab[*ln] & DIGIT)
1515 v = (v * 10) + *ln++ - '0';
1517 // See if there's a .[bwl] after the constant & deal with it if so
1520 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1525 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1530 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1538 //printf("CONST: %i\n", v);
1542 // Handle illegal character
1543 return error("illegal character");
1546 // Terminate line of tokens and return "success."
1549 tok = etok; // Set tok to beginning of line
1551 if (stuffnull) // Terminate last SYMBOL
1561 // .GOTO <label> goto directive
1563 // The label is searched for starting from the first line of the current,
1564 // enclosing macro definition. If no enclosing macro exists, an error is
1567 // A label is of the form:
1569 // :<name><whitespace>
1571 // The colon must appear in column 1. The label is stripped prior to macro
1572 // expansion, and is NOT subject to macro expansion. The whitespace may also
1575 //int d_goto(WORD siz) {
1577 int d_goto(WORD unused)
1581 // Setup for the search
1583 return error("missing label");
1585 // sym = (char *)tok[1];
1586 char * sym = string[tok[1]];
1589 if (cur_inobj->in_type != SRC_IMACRO)
1590 return error("goto not in macro");
1592 IMACRO * imacro = cur_inobj->inobj.imacro;
1593 // defln = (LONG *)imacro->im_macro->svalue;
1594 struct LineList * defln = imacro->im_macro->lineList;
1596 // Find the label, starting with the first line.
1597 // for(; defln!=NULL; defln=(LONG *)*defln)
1598 for(; defln!=NULL; defln=defln->next)
1600 // if (*(char *)(defln + 1) == ':')
1601 if (defln->line[0] == ':')
1603 // Compare names (sleazo string compare)
1604 // This string compare is not right. Doesn't check for lengths.
1605 // (actually it does, but in a crappy, unclear way.)
1606 WARNING(!!!! Bad string comparison !!!)
1608 // s2 = (char *)(defln + 1) + 1;
1622 // Found the label, set new macro next-line and return.
1623 if ((*s2 == EOS) || ((int)chrtab[*s2] & WHITE))
1625 imacro->im_nextln = defln;
1631 return error("goto label not found");
1635 void DumpTokenBuffer(void)
1638 printf("Tokens [%X]: ", sloc);
1640 for(t=tokbuf; *t!=EOL; t++)
1644 else if (*t == CONST)
1647 printf("[CONST: $%X]", (uint32_t)*t);
1649 else if (*t == ACONST)
1651 else if (*t == STRING)
1654 printf("[STRING:\"%s\"]", string[*t]);
1656 else if (*t == SYMBOL)
1659 printf("[SYMBOL:\"%s\"]", string[*t]);
1663 else if (*t == TKEOF)
1665 else if (*t == DEQUALS)
1666 printf("[DEQUALS]");
1671 else if (*t == DCOLON)
1683 else if (*t == UNMINUS)
1684 printf("[UNMINUS]");
1685 else if (*t == DOTB)
1687 else if (*t == DOTW)
1689 else if (*t == DOTL)
1691 else if (*t == DOTI)
1693 else if (*t == ENDEXPR)
1694 printf("[ENDEXPR]");
1695 else if (*t == CR_ABSCOUNT)
1696 printf("[CR_ABSCOUNT]");
1697 else if (*t == CR_DEFINED)
1698 printf("[CR_DEFINED]");
1699 else if (*t == CR_REFERENCED)
1700 printf("[CR_REFERENCED]");
1701 else if (*t == CR_STREQ)
1702 printf("[CR_STREQ]");
1703 else if (*t == CR_MACDEF)
1704 printf("[CR_MACDEF]");
1705 else if (*t == CR_TIME)
1706 printf("[CR_TIME]");
1707 else if (*t == CR_DATE)
1708 printf("[CR_DATE]");
1709 else if (*t >= 0x20 && *t <= 0x2F)
1710 printf("[%c]", (char)*t);
1711 else if (*t >= 0x3A && *t <= 0x3F)
1712 printf("[%c]", (char)*t);
1713 else if (*t >= 0x80 && *t <= 0x87)
1714 printf("[D%u]", ((uint32_t)*t) - 0x80);
1715 else if (*t >= 0x88 && *t <= 0x8F)
1716 printf("[A%u]", ((uint32_t)*t) - 0x88);
1718 printf("[%X:%c]", (uint32_t)*t, (char)*t);