2 // RMAC - Reboot's Macro Assembler for the Atari Jaguar Console System
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
15 #define DECL_KW // Declare keyword arrays
16 #define DEF_KW // Declare keyword values
17 #include "kwtab.h" // Incl generated keyword tables & defs
20 int lnsave; // 1; strcpy() text of current line
21 int curlineno; // Current line number
22 int totlines; // Total # of lines
23 int mjump_align = 0; // mjump alignment flag
24 char lntag; // Line tag
25 char * curfname; // Current filename
26 char tolowertab[128]; // Uppercase ==> lowercase
27 int8_t hextab[128]; // Table of hex values
28 char dotxtab[128]; // Table for ".b", ".s", etc.
29 char irbuf[LNSIZ]; // Text for .rept block line
30 char lnbuf[LNSIZ]; // Text of current line
31 WORD filecount; // Unique file number counter
32 WORD cfileno; // Current file number
33 TOKEN * tok; // Ptr to current token
34 TOKEN * etok; // Ptr past last token in tokbuf[]
35 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
36 char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage
38 // File record, used to maintain a list of every include file ever visited
39 #define FILEREC struct _filerec
49 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
50 static INOBJ * f_inobj; // Ptr list of free INOBJs
51 static IFILE * f_ifile; // Ptr list of free IFILEs
52 static IMACRO * f_imacro; // Ptr list of free IMACROs
54 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
57 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
58 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
59 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
60 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
62 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
63 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
64 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
65 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
67 WHITE, MULTX, MULTX, SELF, // SP ! " #
68 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
69 SELF, SELF, SELF, SELF, // ( ) * +
70 SELF, SELF, STSYM, SELF, // , - . /
72 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
73 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
74 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
75 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
76 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
78 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
80 MULTX, STSYM+CTSYM+HDIGIT, // @ A
81 (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
82 STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
83 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
84 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
85 (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
87 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // P Q R S
88 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // T U V W
89 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // X Y Z [
90 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
92 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
93 (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
94 STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
95 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
96 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
97 (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
99 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // p q r s
100 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // t u v w
101 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
102 SELF, SELF, SELF, ILLEG // | } ~ DEL
105 // Names of registers
106 static char * regname[] = {
107 "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
108 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
109 "pc", "ssp", "usp", "sr", "ccr"
112 static char * riscregname[] = {
113 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
114 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
115 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
116 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
121 // Initialize tokenizer
123 void InitTokenizer(void)
126 char * htab = "0123456789abcdefABCDEF"; // Hex character table
128 lnsave = 0; // Don't save lines
129 curfname = ""; // No file, empty filename
130 filecount = (WORD)-1;
131 cfileno = (WORD)-1; // cfileno gets bumped to 0
143 // Initialize hex, "dot" and tolower tables
148 tolowertab[i] = (char)i;
151 for(i=0; htab[i]!=EOS; i++)
152 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
154 for(i='A'; i<='Z'; i++)
155 tolowertab[i] |= 0x20;
157 // These characters are legal immediately after a period
158 dotxtab['b'] = DOTB; // .b .B .s .S
162 dotxtab['w'] = DOTW; // .w .W
164 dotxtab['l'] = DOTL; // .l .L
166 dotxtab['i'] = DOTI; // .i .I (???)
171 void SetFilenameForErrorReporting(void)
175 // Check for absolute top filename (this should never happen)
178 curfname = "(*top*)";
182 FILEREC * fr = filerec;
184 // Advance to the correct record...
185 while (fr != NULL && fnum != 0)
191 // Check for file # record not found (this should never happen either)
194 curfname = "(*NOT FOUND*)";
198 curfname = fr->frec_name;
203 // Allocate an IFILE or IMACRO
205 INOBJ * a_inobj(int typ)
211 // Allocate and initialize INOBJ first
213 inobj = malloc(sizeof(INOBJ));
217 f_inobj = f_inobj->in_link;
222 case SRC_IFILE: // Alloc and init an IFILE
224 ifile = malloc(sizeof(IFILE));
228 f_ifile = f_ifile->if_link;
231 inobj->inobj.ifile = ifile;
233 case SRC_IMACRO: // Alloc and init an IMACRO
234 if (f_imacro == NULL)
235 imacro = malloc(sizeof(IMACRO));
239 f_imacro = f_imacro->im_link;
242 inobj->inobj.imacro = imacro;
244 case SRC_IREPT: // Alloc and init an IREPT
245 inobj->inobj.irept = malloc(sizeof(IREPT));
246 DEBUG printf("alloc IREPT\n");
250 // Install INOBJ on top of input stack
251 inobj->in_ifent = ifent; // Record .if context on entry
252 inobj->in_type = (WORD)typ;
253 inobj->in_otok = tok;
254 inobj->in_etok = etok;
255 inobj->in_link = cur_inobj;
263 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
264 // A macro reference is in one of two forms:
265 // \name <non-name-character>
267 // A doubled backslash (\\) is compressed to a single backslash (\).
268 // Argument definitions have been pre-tokenized, so we have to turn them back
269 // into text. This means that numbers, in particular, become hex, regardless of
270 // their representation when the macro was invoked. This is a hack.
271 // A label may appear at the beginning of the line:
272 // :<name><whitespace>
273 // (the colon must be in the first column). These labels are stripped before
274 // macro expansion takes place.
276 int ExpandMacro(char * src, char * dest, int destsiz)
279 int questmark; // \? for testing argument existence
280 char mname[128]; // Assume max size of a formal arg name
281 char numbuf[20]; // Buffer for text of CONSTs
284 char ** symbolString;
286 DEBUG { printf("ExM: src=\"%s\"\n", src); }
288 IMACRO * imacro = cur_inobj->inobj.imacro;
289 int macnum = (int)(imacro->im_macro->sattr);
291 char * dst = dest; // Next dest slot
292 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
294 // Check for (and skip over) any "label" on the line
300 while (*s != EOS && !(chrtab[*s] & WHITE))
304 s++; // Skip first whitespace
307 // Expand the rest of the line
310 // Copy single character
316 // Skip comments in case a loose @ or \ is in there
317 // In that case the tokeniser was trying to expand it.
318 if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
323 // Do macro expansion
331 case '\\': // \\, \ (collapse to single backslash)
337 case '?': // \? <macro> set `questmark' flag
341 case '#': // \#, number of arguments
342 sprintf(numbuf, "%d", (int)imacro->im_nargs);
344 case '!': // \! size suffix supplied on invocation
345 switch ((int)imacro->im_siz)
347 case SIZN: d = ""; break;
348 case SIZB: d = ".b"; break;
349 case SIZW: d = ".w"; break;
350 case SIZL: d = ".l"; break;
354 case '~': // ==> unique label string Mnnnn...
355 sprintf(numbuf, "M%u", curuniq);
371 return error("missing argument name");
374 // \n ==> argument number 'n', 0..9
375 if (chrtab[*s] & DIGIT)
385 // Get argument name: \name, \{name}
395 while (chrtab[*s] & CTSYM);
400 for(++s; *s != EOS && *s != '}';)
404 return error("missing '}'");
411 // Lookup the argument and copy its (string) value into the
412 // destination string
413 DEBUG printf("argument='%s'\n", mname);
415 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
416 return errors("undefined argument: '%s'", mname);
419 // Convert a string of tokens (terminated with EOL) back into
420 // text. If an argument is out of range (not specified in the
421 // macro invocation) then it is ignored.
422 i = (int)arg->svalue;
424 DEBUG printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase);
427 if (i < imacro->im_nargs)
432 tk = argPtrs[imacro->argBase + i];
434 tk = imacro->argument[i].token;
435 symbolString = imacro->argument[i].string;
438 // printf("ExM: Preparing to parse argument #%u...\n", i);
445 // 0 if the argument is empty or non-existant,
446 // 1 if the argument is not empty
449 if (tk == NULL || *tk == EOL)
455 *dst++ = (char)(questmark + '0');
459 // Argument # is in range, so expand it
464 // Reverse-translation from a token number to a string.
465 // This is a hack. It might be better table-driven.
468 if ((*tk >= KW_D0) && !rdsp && !rgpu)
470 d = regname[(int)*tk++ - KW_D0];
473 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
475 d = riscregname[(int)*tk++ - KW_R0];
484 // d = (char *)*tk++;
487 // This fix should be done for strings too
488 d = symbolString[*tk++];
489 DEBUG printf("ExM: SYMBOL=\"%s\"", d);
494 // d = (char *)*tk++;
497 d = symbolString[*tk++];
518 // Shamus: Changing the format specifier from %lx to %ux caused
519 // the assembler to choke on legitimate code... Need to investigate
520 // this further before changing anything else here!
522 sprintf(numbuf, "$%lx", (LONG)*tk++);
586 *dst++ = (char)*(tk - 1);
591 // If 'd' != NULL, copy string to destination
595 DEBUG printf("d='%s'\n", d);
614 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
619 DEBUG printf("*** OVERFLOW LINE ***\n%s\n", dest);
620 return fatal("line too long as a result of macro expansion");
625 // Get next line of text from a macro
627 char * GetNextMacroLine(void)
629 IMACRO * imacro = cur_inobj->inobj.imacro;
630 // LONG * strp = imacro->im_nextln;
631 struct LineList * strp = imacro->im_nextln;
633 if (strp == NULL) // End-of-macro
636 imacro->im_nextln = strp->next;
637 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
638 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
640 return imacro->im_lnbuf;
645 // Get next line of text from a repeat block
647 char * GetNextRepeatLine(void)
650 IREPT * irept = cur_inobj->inobj.irept;
651 LONG * strp = irept->ir_nextln; // initial null
653 // Do repeat at end of .rept block's string list
656 DEBUG printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count);
657 irept->ir_nextln = irept->ir_firstln; // copy first line
659 if (irept->ir_count-- == 0)
661 DEBUG printf("end-repeat-block\n");
665 strp = irept->ir_nextln;
668 strcpy(irbuf, (char *)(irept->ir_nextln + 1));
669 DEBUG printf("repeat line='%s'\n", irbuf);
670 irept->ir_nextln = (LONG *)*strp;
677 // Include a source file used at the root, and for ".include" files
679 int include(int handle, char * fname)
683 printf("[include: %s, cfileno=%u]\n", fname, cfileno);
685 // Alloc and initialize include-descriptors
686 INOBJ * inobj = a_inobj(SRC_IFILE);
687 IFILE * ifile = inobj->inobj.ifile;
689 ifile->ifhandle = handle; // Setup file handle
690 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
691 ifile->ifoldlineno = curlineno; // Save old line number
692 ifile->ifoldfname = curfname; // Save old filename
693 ifile->ifno = cfileno; // Save old file number
695 // NB: This *must* be preincrement, we're adding one to the filecount here!
696 cfileno = ++filecount; // Compute NEW file number
697 curfname = strdup(fname); // Set current filename (alloc storage)
698 curlineno = 0; // Start on line zero
700 // Add another file to the file-record
701 FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
702 fr->frec_next = NULL;
703 fr->frec_name = curfname;
706 filerec = fr; // Add first filerec
708 last_fr->frec_next = fr; // Append to list of filerecs
711 DEBUG printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno);
718 // Pop the current input level
725 INOBJ * inobj = cur_inobj;
729 // Pop IFENT levels until we reach the conditional assembly context we
730 // were at when the input object was entered.
731 int numUnmatched = 0;
733 while (ifent != inobj->in_ifent)
735 if (d_endif() != 0) // Something bad happened during endif parsing?
736 return -1; // If yes, bail instead of getting stuck in a loop
741 // Give a warning to the user that we had to wipe their bum for them
742 if (numUnmatched > 0)
743 warni("missing %d .endif(s)", numUnmatched);
745 tok = inobj->in_otok; // Restore tok and otok
746 etok = inobj->in_etok;
748 switch (inobj->in_type)
750 case SRC_IFILE: // Pop and release an IFILE
752 printf("[Leaving: %s]\n", curfname);
754 ifile = inobj->inobj.ifile;
755 ifile->if_link = f_ifile;
757 close(ifile->ifhandle); // Close source file
758 if (debug) printf("[fpop (pre): curfname=%s]\n", curfname);
759 curfname = ifile->ifoldfname; // Set current filename
760 if (debug) printf("[fpop (post): curfname=%s]\n", curfname);
761 if (debug) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
762 curlineno = ifile->ifoldlineno; // Set current line#
763 DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno);
764 cfileno = ifile->ifno; // Restore current file number
765 if (debug) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
767 case SRC_IMACRO: // Pop and release an IMACRO
768 imacro = inobj->inobj.imacro;
769 imacro->im_link = f_imacro;
772 case SRC_IREPT: // Pop and release an IREPT
773 DEBUG printf("dealloc IREPT\n");
774 p = inobj->inobj.irept->ir_firstln;
785 cur_inobj = inobj->in_link;
786 inobj->in_link = f_inobj;
795 // Get line from file into buf, return NULL on EOF or ptr to the start of a
798 char * GetNextLine(void)
802 int readamt = -1; // 0 if last read() yeilded 0 bytes
803 IFILE * fl = cur_inobj->inobj.ifile;
807 // Scan for next end-of-line; handle stupid text formats by treating
808 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
810 d = &fl->ifbuf[fl->ifind];
812 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
814 if (*p == '\r' || *p == '\n')
821 break; // Need to read more, then look for '\n' to eat
822 else if (p[1] == '\n')
826 // Cover up the newline with end-of-string sentinel
835 // Handle hanging lines by ignoring them (Input file is exhausted, no
836 // \r or \n on last line)
837 // Shamus: This is retarded. Never ignore any input!
838 if (!readamt && fl->ifcnt)
845 // Really should check to see if we're at the end of the buffer!
847 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
849 return &fl->ifbuf[fl->ifind];
853 // Truncate and return absurdly long lines.
854 if (fl->ifcnt >= QUANTUM)
856 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
858 return &fl->ifbuf[fl->ifind];
861 // Relocate what's left of a line to the beginning of the buffer, and
862 // read some more of the file in; return NULL if the buffer's empty and
866 p = &fl->ifbuf[fl->ifind];
867 d = &fl->ifbuf[fl->ifcnt & 1];
869 for(i=0; i<fl->ifcnt; i++)
872 fl->ifind = fl->ifcnt & 1;
875 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
880 if ((fl->ifcnt += readamt) == 0)
889 int TokenizeLine(void)
891 char * ln = NULL; // Ptr to current position in line
892 char * p; // Random character ptr
893 TOKEN * tk; // Token-deposit ptr
894 int state = 0; // State for keyword detector
895 int j = 0; // Var for keyword detector
896 char c; // Random char
897 VALUE v; // Random value
898 char * nullspot = NULL; // Spot to clobber for SYMBOL termination
899 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
901 int stringNum = 0; // Pointer to string locations in tokenized line
905 if (cur_inobj == NULL) // Return EOF if input stack is empty
908 // Get another line of input from the current input source: a file, a
909 // macro, or a repeat-block
910 switch (cur_inobj->in_type)
914 // o bump source line number;
915 // o tag the listing-line with a space;
916 // o kludge lines generated by Alcyon C.
918 if ((ln = GetNextLine()) == NULL)
920 if (debug) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
921 if (fpop() == 0) // Pop input level
922 goto retry; // Try for more lines
925 ifent->if_prev = (IFENT *) - 1; //Signal Assemble() that we have reached EOF with unbalanced if/endifs
930 curlineno++; // Bump line number
935 // AS68 compatibility, throw away all lines starting with
936 // back-quotes, tildes, or '*'
937 // On other lines, turn the first '*' into a semi-colon.
938 if (*ln == '`' || *ln == '~' || *ln == '*')
942 for(p=ln; *p!=EOS; p++)
955 // o Handle end-of-macro;
956 // o tag the listing-line with an at (@) sign.
958 if ((ln = GetNextMacroLine()) == NULL)
960 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
961 goto retry; // Try for more lines...
963 return TKEOF; // Oops, we got a non zero return code, signal EOF
969 // o Handle end-of-repeat-block;
970 // o tag the listing-line with a pound (#) sign.
972 if ((ln = GetNextRepeatLine()) == NULL)
974 if (debug) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
983 // Save text of the line. We only do this during listings and within
984 // macro-type blocks, since it is expensive to unconditionally copy every
989 // General house-keeping
990 tok = tokeol; // Set "tok" to EOL in case of error
991 tk = etok; // Reset token ptr
992 stuffnull = 0; // Don't stuff nulls
993 totlines++; // Bump total #lines assembled
995 // See if the entire line is a comment. This is a win if the programmer
996 // puts in lots of comments
997 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1000 // Main tokenization loop;
1001 // o skip whitespace;
1002 // o handle end-of-line;
1003 // o handle symbols;
1004 // o handle single-character tokens (operators, etc.);
1005 // o handle multiple-character tokens (constants, strings, etc.).
1008 // Skip whitespace, handle EOL
1009 while ((int)chrtab[*ln] & WHITE)
1012 // Handle EOL, comment with ';'
1013 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1016 // Handle start of symbol. Symbols are null-terminated in place. The
1017 // termination is always one symbol behind, since there may be no place
1018 // for a null in the case that an operator immediately follows the name.
1023 if (stuffnull) // Terminate old symbol from previous pass
1026 v = 0; // Assume no DOT attrib follows symbol
1029 // In some cases, we need to check for a DOTx at the *beginning*
1030 // of a symbol, as the "start" of the line we're currently looking
1031 // at could be somewhere in the middle of that line!
1034 // Make sure that it's *only* a .[bwsl] following, and not the
1035 // start of a local symbol:
1036 if ((chrtab[*(ln + 1)] & DOT)
1037 && (dotxtab[*(ln + 1)] != 0)
1038 && !(chrtab[*(ln + 2)] & CTSYM))
1040 // We found a legitimate DOTx construct, so add it to the
1044 *tk++ = (TOKEN)dotxtab[*ln++];
1049 p = nullspot = ln++; // Nullspot -> start of this symbol
1051 // Find end of symbol (and compute its length)
1052 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1055 // Handle "DOT" special forms (like ".b") that follow a normal
1056 // symbol or keyword:
1059 *ln++ = EOS; // Terminate symbol
1060 stuffnull = 0; // And never try it again
1062 // Character following the `.' must have a DOT attribute, and
1063 // the chararacter after THAT one must not have a start-symbol
1064 // attribute (to prevent symbols that look like, for example,
1065 // "zingo.barf", which might be a good idea anyway....)
1066 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1067 return error("[bwsl] must follow '.' in symbol");
1069 v = (VALUE)dotxtab[*ln++];
1071 if (chrtab[*ln] & CTSYM)
1072 return error("misuse of '.', not allowed in symbols");
1075 // If the symbol is small, check to see if it's really the name of
1079 for(state=0; state>=0;)
1081 j = (int)tolowertab[*p++];
1084 if (kwcheck[j] != state)
1090 if (*p == EOS || p == ln)
1104 // Make j = -1 if user tries to use a RISC register while in 68K mode
1105 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1110 // Make j = -1 if time, date etc with no preceeding ^^
1111 // defined, referenced, streq, macdef, date and time
1114 case 112: // defined
1115 case 113: // referenced
1123 // If not tokenized keyword OR token was not found
1124 if ((j < 0) || (state < 0))
1128 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1129 //system, this will cause all kinds of mischief.
1131 *tk++ = (TOKEN)nullspot;
1133 string[stringNum] = nullspot;
1144 if (v) // Record attribute token (if any)
1147 if (stuffnull) // Arrange for string termination on next pass
1153 // Handle identity tokens
1160 // Handle multiple-character tokens
1165 case '!': // ! or !=
1175 case '\'': // 'string'
1176 case '\"': // "string"
1180 // More char * stuffing (8 bytes) into the space of 4 (TOKEN).
1181 // Need to figure out how to fix this crap.
1185 string[stringNum] = ln;
1190 for(p=ln; *ln!=EOS && *ln!=c1;)
1199 return(error("unterminated string"));
1228 warn("bad backslash code in string");
1238 return error("unterminated string");
1242 case '$': // $, hex constant
1243 if (chrtab[*ln] & HDIGIT)
1247 // Parse the hex value
1248 while (hextab[*ln] >= 0)
1249 v = (v << 4) + (int)hextab[*ln++];
1251 // ggn: Okay, some comments here are in order I think....
1252 // The original madmac sources didn't parse the size at
1253 // this point (i.e. .b/.w/.l). It was probably done at
1254 // another point, although it's unclear to me exactly
1255 // where. So why change this? My understanding (at least
1256 // from what SCPCD said on IRC) is that .w addressing
1257 // formats produce wrong code on jaguar (or doesn't execute
1258 // properly? something like that). So the code was changed
1259 // to mask off the upper bits depending on length (note: I
1260 // don't think .b is valid at all! I only know of .w/.l, so
1261 // this should probably be wiped). Then the code that
1262 // parses the constant and checks to see if it's between
1263 // $ffff0000 and $8000 never got triggered, so yay job
1264 // done! ...now say we want to assemble a st .prg. One of
1265 // the most widely spread optimisations is move.X expr.w,Y
1266 // (or vice versa, or both, anyway...) to access hardware
1267 // registers (which are mapped to $fxxxxx). This botchy
1268 // thing would create "hilarious" code while trying to
1269 // access hardware registers. So I made a condition to see
1270 // if st mode or jaguar is active and apply the both or
1271 // not. One last note: this is hardcoded to get optimised
1272 // for now on ST mode, i.e. it can't generate code like
1273 // move.w $00001234,d0 - it'll always get optimised to
1274 // move.w $1234.w,d0. It's probably ok, but maybe a warning
1275 // should be emitted? Or maybe finding a way to make it not
1276 // auto-optimise? I think it's ok for now...
1279 if (obj_format == BSD)
1281 if ((*(ln + 1) & 0xDF) == 'B')
1286 else if ((*(ln + 1) & 0xDF) == 'W')
1291 else if ((*(ln + 1) & 0xDF) == 'L')
1301 if (obj_format == ALCYON)
1303 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1308 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1319 case '<': // < or << or <> or <=
1338 case ':': // : or ::
1348 case '=': // = or ==
1358 case '>': // > or >> or >=
1373 case '%': // % or binary constant
1374 if (*ln < '0' || *ln > '1')
1382 while (*ln >= '0' && *ln <= '1')
1383 v = (v << 1) + *ln++ - '0';
1387 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1393 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1399 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1408 case '@': // @ or octal constant
1409 if (*ln < '0' || *ln > '7')
1417 while (*ln >= '0' && *ln <= '7')
1418 v = (v << 3) + *ln++ - '0';
1422 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1428 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1434 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1443 case '^': // ^ or ^^ <operator-name>
1450 if (((int)chrtab[*++ln] & STSYM) == 0)
1452 error("invalid symbol following ^^");
1458 while ((int)chrtab[*ln] & CTSYM)
1461 for(state=0; state>=0;)
1463 // Get char, convert to lowercase
1466 if (j >= 'A' && j <= 'Z')
1471 if (kwcheck[j] != state)
1477 if (*p == EOS || p == ln)
1486 if (j < 0 || state < 0)
1488 error("unknown symbol following ^^");
1495 interror(2); // Bad MULTX entry in chrtab
1500 // Handle decimal constant
1505 while ((int)chrtab[*ln] & DIGIT)
1506 v = (v * 10) + *ln++ - '0';
1508 // See if there's a .[bwl] after the constant & deal with it if so
1511 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1516 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1521 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1529 //printf("CONST: %i\n", v);
1533 // Handle illegal character
1534 return error("illegal character");
1537 // Terminate line of tokens and return "success."
1540 tok = etok; // Set tok to beginning of line
1542 if (stuffnull) // Terminate last SYMBOL
1552 // .GOTO <label> goto directive
1554 // The label is searched for starting from the first line of the current,
1555 // enclosing macro definition. If no enclosing macro exists, an error is
1558 // A label is of the form:
1560 // :<name><whitespace>
1562 // The colon must appear in column 1. The label is stripped prior to macro
1563 // expansion, and is NOT subject to macro expansion. The whitespace may also
1566 //int d_goto(WORD siz) {
1568 int d_goto(WORD unused)
1572 // Setup for the search
1574 return error("missing label");
1576 // sym = (char *)tok[1];
1577 char * sym = string[tok[1]];
1580 if (cur_inobj->in_type != SRC_IMACRO)
1581 return error("goto not in macro");
1583 IMACRO * imacro = cur_inobj->inobj.imacro;
1584 // defln = (LONG *)imacro->im_macro->svalue;
1585 struct LineList * defln = imacro->im_macro->lineList;
1587 // Find the label, starting with the first line.
1588 for(; defln!=NULL; defln=defln->next)
1590 // if (*(char *)(defln + 1) == ':')
1591 if (defln->line[0] == ':')
1593 // Compare names (sleazo string compare)
1594 // This string compare is not right. Doesn't check for lengths.
1595 // (actually it does, but in a crappy, unclear way.)
1596 WARNING(!!!! Bad string comparison !!!)
1598 // s2 = (char *)(defln + 1) + 1;
1612 // Found the label, set new macro next-line and return.
1613 if ((*s2 == EOS) || ((int)chrtab[*s2] & WHITE))
1615 imacro->im_nextln = defln;
1621 return error("goto label not found");
1625 void DumpTokenBuffer(void)
1628 printf("Tokens [%X]: ", sloc);
1630 for(t=tokbuf; *t!=EOL; t++)
1634 else if (*t == CONST)
1637 printf("[CONST: $%X]", (uint32_t)*t);
1639 else if (*t == ACONST)
1641 else if (*t == STRING)
1644 printf("[STRING:\"%s\"]", string[*t]);
1646 else if (*t == SYMBOL)
1649 printf("[SYMBOL:\"%s\"]", string[*t]);
1653 else if (*t == TKEOF)
1655 else if (*t == DEQUALS)
1656 printf("[DEQUALS]");
1661 else if (*t == DCOLON)
1673 else if (*t == UNMINUS)
1674 printf("[UNMINUS]");
1675 else if (*t == DOTB)
1677 else if (*t == DOTW)
1679 else if (*t == DOTL)
1681 else if (*t == DOTI)
1683 else if (*t == ENDEXPR)
1684 printf("[ENDEXPR]");
1685 else if (*t == CR_ABSCOUNT)
1686 printf("[CR_ABSCOUNT]");
1687 else if (*t == CR_DEFINED)
1688 printf("[CR_DEFINED]");
1689 else if (*t == CR_REFERENCED)
1690 printf("[CR_REFERENCED]");
1691 else if (*t == CR_STREQ)
1692 printf("[CR_STREQ]");
1693 else if (*t == CR_MACDEF)
1694 printf("[CR_MACDEF]");
1695 else if (*t == CR_TIME)
1696 printf("[CR_TIME]");
1697 else if (*t == CR_DATE)
1698 printf("[CR_DATE]");
1699 else if (*t >= 0x20 && *t <= 0x2F)
1700 printf("[%c]", (char)*t);
1701 else if (*t >= 0x3A && *t <= 0x3F)
1702 printf("[%c]", (char)*t);
1703 else if (*t >= 0x80 && *t <= 0x87)
1704 printf("[D%u]", ((uint32_t)*t) - 0x80);
1705 else if (*t >= 0x88 && *t <= 0x8F)
1706 printf("[A%u]", ((uint32_t)*t) - 0x88);
1708 printf("[%X:%c]", (uint32_t)*t, (char)*t);