2 // RMAC - Reboot's Macro Assembler for the Atari Jaguar Console System
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2012 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
15 #define DECL_KW // Declare keyword arrays
16 #define DEF_KW // Declare keyword values
17 #include "kwtab.h" // Incl generated keyword tables & defs
20 int lnsave; // 1; strcpy() text of current line
21 int curlineno; // Current line number
22 int totlines; // Total # of lines
23 int mjump_align = 0; // mjump alignment flag
24 char lntag; // Line tag
25 char * curfname; // Current filename
26 char tolowertab[128]; // Uppercase ==> lowercase
27 char hextab[128]; // Table of hex values
28 char dotxtab[128]; // Table for ".b", ".s", etc.
29 char irbuf[LNSIZ]; // Text for .rept block line
30 char lnbuf[LNSIZ]; // Text of current line
31 WORD filecount; // Unique file number counter
32 WORD cfileno; // Current file number
33 TOKEN * tok; // Ptr to current token
34 TOKEN * etok; // Ptr past last token in tokbuf[]
35 TOKEN tokeol[1] = {EOL}; // Bailout end-of-line token
36 char * string[TOKBUFSIZE*2]; // Token buffer string pointer storage
38 // File record, used to maintain a list of every include file ever visited
39 #define FILEREC struct _filerec
49 INOBJ * cur_inobj; // Ptr current input obj (IFILE/IMACRO)
50 static INOBJ * f_inobj; // Ptr list of free INOBJs
51 static IFILE * f_ifile; // Ptr list of free IFILEs
52 static IMACRO * f_imacro; // Ptr list of free IMACROs
54 static TOKEN tokbuf[TOKBUFSIZE]; // Token buffer (stack-like, all files)
57 ILLEG, ILLEG, ILLEG, ILLEG, // NUL SOH STX ETX
58 ILLEG, ILLEG, ILLEG, ILLEG, // EOT ENQ ACK BEL
59 ILLEG, WHITE, ILLEG, ILLEG, // BS HT LF VT
60 WHITE, ILLEG, ILLEG, ILLEG, // FF CR SO SI
62 ILLEG, ILLEG, ILLEG, ILLEG, // DLE DC1 DC2 DC3
63 ILLEG, ILLEG, ILLEG, ILLEG, // DC4 NAK SYN ETB
64 ILLEG, ILLEG, ILLEG, ILLEG, // CAN EM SUB ESC
65 ILLEG, ILLEG, ILLEG, ILLEG, // FS GS RS US
67 WHITE, MULTX, MULTX, SELF, // SP ! " #
68 MULTX+CTSYM, MULTX, SELF, MULTX, // $ % & '
69 SELF, SELF, SELF, SELF, // ( ) * +
70 SELF, SELF, STSYM, SELF, // , - . /
72 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 0 1
73 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 2 3
74 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 4 5
75 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 6 7
76 DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM, // 8 9
78 MULTX, MULTX, MULTX, STSYM+CTSYM, // < = > ?
80 MULTX, STSYM+CTSYM+HDIGIT, // @ A
81 (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // B C
82 STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // D E
83 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // F G
84 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
85 (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
87 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // P Q R S
88 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // T U V W
89 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // X Y Z [
90 SELF, SELF, MULTX, STSYM+CTSYM, // \ ] ^ _
92 ILLEG, STSYM+CTSYM+HDIGIT, // ` a
93 (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // b c
94 STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT, // d e
95 STSYM+CTSYM+HDIGIT, STSYM+CTSYM, // f g
96 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
97 (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
99 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // p q r s
100 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM, // t u v w
101 STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF, // x y z {
102 SELF, SELF, SELF, ILLEG // | } ~ DEL
105 // Names of registers
106 static char * regname[] = {
107 "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
108 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
109 "pc", "ssp", "usp", "sr", "ccr"
112 static char * riscregname[] = {
113 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
114 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
115 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
116 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
121 // Initialize tokenizer
123 void InitTokenizer(void)
126 char * htab = "0123456789abcdefABCDEF"; // Hex character table
128 lnsave = 0; // Don't save lines
129 curfname = ""; // No file, empty filename
130 filecount = (WORD)-1;
131 cfileno = (WORD)-1; // cfileno gets bumped to 0
143 // Initialize hex, "dot" and tolower tables
148 tolowertab[i] = (char)i;
151 for(i=0; htab[i]!=EOS; i++)
152 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
154 for(i='A'; i<='Z'; i++)
155 tolowertab[i] |= 0x20;
157 // These characters are legal immediately after a period
158 dotxtab['b'] = DOTB; // .b .B .s .S
162 dotxtab['w'] = DOTW; // .w .W
164 dotxtab['l'] = DOTL; // .l .L
166 dotxtab['i'] = DOTI; // .i .I (???)
171 void SetFilenameForErrorReporting(void)
175 // Check for absolute top filename (this should never happen)
178 curfname = "(*top*)";
182 FILEREC * fr = filerec;
184 // Advance to the correct record...
185 while (fr != NULL && fnum != 0)
191 // Check for file # record not found (this should never happen either)
194 curfname = "(*NOT FOUND*)";
198 curfname = fr->frec_name;
203 // Allocate an IFILE or IMACRO
205 INOBJ * a_inobj(int typ)
211 // Allocate and initialize INOBJ first
213 inobj = malloc(sizeof(INOBJ));
217 f_inobj = f_inobj->in_link;
222 case SRC_IFILE: // Alloc and init an IFILE
224 ifile = malloc(sizeof(IFILE));
228 f_ifile = f_ifile->if_link;
231 inobj->inobj.ifile = ifile;
233 case SRC_IMACRO: // Alloc and init an IMACRO
234 if (f_imacro == NULL)
235 imacro = malloc(sizeof(IMACRO));
239 f_imacro = f_imacro->im_link;
242 inobj->inobj.imacro = imacro;
244 case SRC_IREPT: // Alloc and init an IREPT
245 inobj->inobj.irept = malloc(sizeof(IREPT));
246 DEBUG printf("alloc IREPT\n");
250 // Install INOBJ on top of input stack
251 inobj->in_ifent = ifent; // Record .if context on entry
252 inobj->in_type = (WORD)typ;
253 inobj->in_otok = tok;
254 inobj->in_etok = etok;
255 inobj->in_link = cur_inobj;
263 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
264 // A macro reference is in one of two forms:
265 // \name <non-name-character>
267 // A doubled backslash (\\) is compressed to a single backslash (\).
268 // Argument definitions have been pre-tokenized, so we have to turn them back
269 // into text. This means that numbers, in particular, become hex, regardless of
270 // their representation when the macro was invoked. This is a hack.
271 // A label may appear at the beginning of the line:
272 // :<name><whitespace>
273 // (the colon must be in the first column). These labels are stripped before
274 // macro expansion takes place.
276 int ExpandMacro(char * src, char * dest, int destsiz)
279 int questmark; // \? for testing argument existence
280 char mname[128]; // Assume max size of a formal arg name
281 char numbuf[20]; // Buffer for text of CONSTs
284 char ** symbolString;
286 DEBUG { printf("ExM: src=\"%s\"\n", src); }
288 IMACRO * imacro = cur_inobj->inobj.imacro;
289 int macnum = (int)(imacro->im_macro->sattr);
292 char * dst = dest; // Next dest slot
293 char * edst = dest + destsiz - 1; // End + 1(?) of dest buffer
295 // Check for (and skip over) any "label" on the line
301 while (*s != EOS && !(chrtab[*s] & WHITE))
305 s++; // Skip first whitespace
308 // Expand the rest of the line
311 // Copy single character
319 // Do macro expansion
327 case '\\': // \\, \ (collapse to single backslash)
333 case '?': // \? <macro> set `questmark' flag
337 case '#': // \#, number of arguments
338 sprintf(numbuf, "%d", (int)imacro->im_nargs);
340 case '!': // \! size suffix supplied on invocation
341 switch ((int)imacro->im_siz)
343 case SIZN: d = ""; break;
344 case SIZB: d = ".b"; break;
345 case SIZW: d = ".w"; break;
346 case SIZL: d = ".l"; break;
350 case '~': // ==> unique label string Mnnnn...
351 sprintf(numbuf, "M%u", curuniq);
367 return error("missing argument name");
370 // \n ==> argument number 'n', 0..9
371 if (chrtab[*s] & DIGIT)
381 // Get argument name: \name, \{name}
391 while (chrtab[*s] & CTSYM);
396 for(++s; *s != EOS && *s != '}';)
400 return error("missing '}'");
407 // Lookup the argument and copy its (string) value into the
408 // destination string
409 DEBUG printf("argument='%s'\n", mname);
411 if ((arg = lookup(mname, MACARG, macnum)) == NULL)
412 return errors("undefined argument: '%s'", mname);
415 // Convert a string of tokens (terminated with EOL) back into
416 // text. If an argument is out of range (not specified in the
417 // macro invocation) then it is ignored.
418 i = (int)arg->svalue;
420 DEBUG printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase);
423 if (i < imacro->im_nargs)
428 tk = argPtrs[imacro->argBase + i];
430 tk = imacro->argument[i].token;
431 symbolString = imacro->argument[i].string;
434 // printf("ExM: Preparing to parse argument #%u...\n", i);
441 // 0 if the argument is empty or non-existant,
442 // 1 if the argument is not empty
445 if (tk == NULL || *tk == EOL)
451 *dst++ = (char)(questmark + '0');
455 // Argument # is in range, so expand it
460 // Reverse-translation from a token number to a string.
461 // This is a hack. It might be better table-driven.
464 if ((*tk >= KW_D0) && !rdsp && !rgpu)
466 d = regname[(int)*tk++ - KW_D0];
469 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
471 d = riscregname[(int)*tk++ - KW_R0];
480 // d = (char *)*tk++;
483 // This fix should be done for strings too
484 d = symbolString[*tk++];
485 DEBUG printf("ExM: SYMBOL=\"%s\"", d);
490 // d = (char *)*tk++;
493 d = symbolString[*tk++];
514 // Shamus: Changing the format specifier from %lx to %ux caused
515 // the assembler to choke on legitimate code... Need to investigate
516 // this further before changing anything else here!
518 sprintf(numbuf, "$%lx", (LONG)*tk++);
582 *dst++ = (char)*(tk - 1);
587 // If 'd' != NULL, copy string to destination
591 DEBUG printf("d='%s'\n", d);
608 DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
613 DEBUG printf("*** OVERFLOW LINE ***\n%s\n", dest);
614 return fatal("line too long as a result of macro expansion");
619 // Get next line of text from a macro
621 char * GetNextMacroLine(void)
623 // unsigned source_addr;
625 IMACRO * imacro = cur_inobj->inobj.imacro;
626 // LONG * strp = imacro->im_nextln;
627 struct LineList * strp = imacro->im_nextln;
629 if (strp == NULL) // End-of-macro
632 // imacro->im_nextln = (LONG *)*strp;
633 imacro->im_nextln = strp->next;
634 // ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
635 ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
637 return imacro->im_lnbuf;
642 // Get next line of text from a repeat block
644 char * GetNextRepeatLine(void)
647 IREPT * irept = cur_inobj->inobj.irept;
648 LONG * strp = irept->ir_nextln; // initial null
650 // Do repeat at end of .rept block's string list
653 DEBUG printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count);
654 irept->ir_nextln = irept->ir_firstln; // copy first line
656 if (irept->ir_count-- == 0)
658 DEBUG printf("end-repeat-block\n");
662 strp = irept->ir_nextln;
665 strcpy(irbuf, (char *)(irept->ir_nextln + 1));
666 DEBUG printf("repeat line='%s'\n", irbuf);
667 irept->ir_nextln = (LONG *)*strp;
674 // Include a source file used at the root, and for ".include" files
676 int include(int handle, char * fname)
684 printf("[include: %s, cfileno=%u]\n", fname, cfileno);
686 // Alloc and initialize include-descriptors
687 inobj = a_inobj(SRC_IFILE);
688 ifile = inobj->inobj.ifile;
690 ifile->ifhandle = handle; // Setup file handle
691 ifile->ifind = ifile->ifcnt = 0; // Setup buffer indices
692 ifile->ifoldlineno = curlineno; // Save old line number
693 ifile->ifoldfname = curfname; // Save old filename
694 ifile->ifno = cfileno; // Save old file number
696 // cfileno = filecount++; // Compute new file number
697 // NB: This *must* be preincrement, we're adding one to the filecount here!
698 cfileno = ++filecount; // Compute NEW file number
699 curfname = strdup(fname); // Set current filename (alloc storage)
700 curlineno = 0; // Start on line zero
702 // Add another file to the file-record
703 fr = (FILEREC *)malloc(sizeof(FILEREC));
704 fr->frec_next = NULL;
705 fr->frec_name = curfname;
708 filerec = fr; // Add first filerec
710 last_fr->frec_next = fr; // Append to list of filerecs
713 DEBUG printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno);
720 // Pop the current input level
727 INOBJ * inobj = cur_inobj;
731 // Pop IFENT levels until we reach the conditional assembly context we
732 // were at when the input object was entered.
733 while (ifent != inobj->in_ifent)
735 if (d_endif() != 0) // Something bad happened during endif parsing?
736 return -1; // If yes, bail instead of getting stuck in a loop
739 tok = inobj->in_otok; // Restore tok and otok
740 etok = inobj->in_etok;
742 switch (inobj->in_type)
744 case SRC_IFILE: // Pop and release an IFILE
746 printf("[Leaving: %s]\n", curfname);
748 ifile = inobj->inobj.ifile;
749 ifile->if_link = f_ifile;
751 close(ifile->ifhandle); // Close source file
752 if (verb_flag) printf("[fpop (pre): curfname=%s]\n", curfname);
753 curfname = ifile->ifoldfname; // Set current filename
754 if (verb_flag) printf("[fpop (post): curfname=%s]\n", curfname);
755 if (verb_flag) printf("[fpop: (pre) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
756 curlineno = ifile->ifoldlineno; // Set current line#
757 DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno);
758 cfileno = ifile->ifno; // Restore current file number
759 if (verb_flag) printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno);
761 case SRC_IMACRO: // Pop and release an IMACRO
762 imacro = inobj->inobj.imacro;
763 imacro->im_link = f_imacro;
766 case SRC_IREPT: // Pop and release an IREPT
767 DEBUG printf("dealloc IREPT\n");
768 p = inobj->inobj.irept->ir_firstln;
779 cur_inobj = inobj->in_link;
780 inobj->in_link = f_inobj;
789 // Get line from file into buf, return NULL on EOF or ptr to the start of a
792 char * GetNextLine(void)
796 int readamt = -1; // 0 if last read() yeilded 0 bytes
797 IFILE * fl = cur_inobj->inobj.ifile;
801 // Scan for next end-of-line; handle stupid text formats by treating
802 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
804 d = &fl->ifbuf[fl->ifind];
806 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
808 if (*p == '\r' || *p == '\n')
815 break; // Need to read more, then look for '\n' to eat
816 else if (p[1] == '\n')
820 // Cover up the newline with end-of-string sentinel
829 // Handle hanging lines by ignoring them (Input file is exhausted, no
830 // \r or \n on last line)
831 // Shamus: This is retarded. Never ignore any input!
832 if (!readamt && fl->ifcnt)
839 // Really should check to see if we're at the end of the buffer!
841 fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
843 return &fl->ifbuf[fl->ifind];
847 // Truncate and return absurdly long lines.
848 if (fl->ifcnt >= QUANTUM)
850 fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
852 return &fl->ifbuf[fl->ifind];
855 // Relocate what's left of a line to the beginning of the buffer, and
856 // read some more of the file in; return NULL if the buffer's empty and
860 p = &fl->ifbuf[fl->ifind];
861 d = &fl->ifbuf[fl->ifcnt & 1];
863 for(i=0; i<fl->ifcnt; i++)
866 fl->ifind = fl->ifcnt & 1;
869 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
874 if ((fl->ifcnt += readamt) == 0)
883 int TokenizeLine(void)
885 char * ln = NULL; // Ptr to current position in line
886 char * p; // Random character ptr
887 TOKEN * tk; // Token-deposit ptr
888 int state = 0; // State for keyword detector
889 int j = 0; // Var for keyword detector
890 char c; // Random char
891 VALUE v; // Random value
892 char * nullspot = NULL; // Spot to clobber for SYMBOL termination
893 int stuffnull; // 1:terminate SYMBOL '\0' at *nullspot
895 int stringNum = 0; // Pointer to string locations in tokenized line
899 if (cur_inobj == NULL) // Return EOF if input stack is empty
902 // Get another line of input from the current input source: a file, a
903 // macro, or a repeat-block
904 switch (cur_inobj->in_type)
908 // o bump source line number;
909 // o tag the listing-line with a space;
910 // o kludge lines generated by Alcyon C.
912 if ((ln = GetNextLine()) == NULL)
914 if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n");
915 fpop(); // Pop input level
916 goto retry; // Try for more lines
919 curlineno++; // Bump line number
924 // AS68 compatibility, throw away all lines starting with
925 // back-quotes, tildes, or '*'
926 // On other lines, turn the first '*' into a semi-colon.
927 if (*ln == '`' || *ln == '~' || *ln == '*')
931 for(p=ln; *p!=EOS; p++)
944 // o Handle end-of-macro;
945 // o tag the listing-line with an at (@) sign.
947 if ((ln = GetNextMacroLine()) == NULL)
949 if (ExitMacro() == 0) // Exit macro (pop args, do fpop(), etc)
950 goto retry; // Try for more lines...
952 return TKEOF; // Oops, we got a non zero return code, signal EOF
958 // o Handle end-of-repeat-block;
959 // o tag the listing-line with a pound (#) sign.
961 if ((ln = GetNextRepeatLine()) == NULL)
963 if (verb_flag) printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n");
972 // Save text of the line. We only do this during listings and within
973 // macro-type blocks, since it is expensive to unconditionally copy every
978 // General house-keeping
979 tok = tokeol; // Set "tok" to EOL in case of error
980 tk = etok; // Reset token ptr
981 stuffnull = 0; // Don't stuff nulls
982 totlines++; // Bump total #lines assembled
984 // See if the entire line is a comment. This is a win if the programmer
985 // puts in lots of comments
986 if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
989 // Main tokenization loop;
990 // o skip whitespace;
991 // o handle end-of-line;
993 // o handle single-character tokens (operators, etc.);
994 // o handle multiple-character tokens (constants, strings, etc.).
997 // Skip whitespace, handle EOL
998 while ((int)chrtab[*ln] & WHITE)
1001 // Handle EOL, comment with ';'
1002 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1005 // Handle start of symbol. Symbols are null-terminated in place. The
1006 // termination is always one symbol behind, since there may be no place
1007 // for a null in the case that an operator immediately follows the name.
1012 if (stuffnull) // Terminate old symbol from previous pass
1015 v = 0; // Assume no DOT attrib follows symbol
1017 p = nullspot = ln++; // Nullspot -> start of this symbol
1019 // Find end of symbol (and compute its length)
1020 for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1023 // Handle "DOT" special forms (like ".b") that follow a normal
1024 // symbol or keyword:
1027 *ln++ = EOS; // Terminate symbol
1028 stuffnull = 0; // And never try it again
1030 // Character following the `.' must have a DOT attribute, and
1031 // the chararacter after THAT one must not have a start-symbol
1032 // attribute (to prevent symbols that look like, for example,
1033 // "zingo.barf", which might be a good idea anyway....)
1034 if ((((int)chrtab[*ln] & DOT) == 0) || ((int)dotxtab[*ln] <= 0))
1035 return error("[bwsl] must follow `.' in symbol");
1037 v = (VALUE)dotxtab[*ln++];
1039 if ((int)chrtab[*ln] & CTSYM)
1040 return error("misuse of `.', not allowed in symbols");
1043 // If the symbol is small, check to see if it's really the name of
1047 for(state=0; state>=0;)
1049 j = (int)tolowertab[*p++];
1052 if (kwcheck[j] != state)
1058 if (*p == EOS || p == ln)
1072 // Make j = -1 if user tries to use a RISC register while in 68K mode
1073 if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1078 // Make j = -1 if time, date etc with no preceeding ^^
1079 // defined, referenced, streq, macdef, date and time
1082 case 112: // defined
1083 case 113: // referenced
1091 // If not tokenized keyword OR token was not found
1092 if (j < 0 || state < 0)
1096 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1097 //system, this will cause all kinds of mischief.
1099 *tk++ = (TOKEN)nullspot;
1101 string[stringNum] = nullspot;
1112 if (v) // Record attribute token (if any)
1115 if (stuffnull) // Arrange for string termination on next pass
1121 // Handle identity tokens
1128 // Handle multiple-character tokens
1133 case '!': // ! or !=
1143 case '\'': // 'string'
1144 case '\"': // "string"
1148 // More char * stuffing (8 bytes) into the space of 4 (TOKEN).
1149 // Need to figure out how to fix this crap.
1153 string[stringNum] = ln;
1158 for(p=ln; *ln!=EOS && *ln!=c1;)
1167 return(error("unterminated string"));
1196 warn("bad backslash code in string");
1206 return error("unterminated string");
1210 case '$': // $, hex constant
1211 if ((int)chrtab[*ln] & HDIGIT)
1215 // Parse the hex value
1216 while ((int)hextab[*ln] >= 0)
1217 v = (v << 4) + (int)hextab[*ln++];
1219 // ggn: Okay, some comments here are in order I think....
1220 // The original madmac sources didn't parse the size at
1221 // this point (i.e. .b/.w/.l). It was probably done at
1222 // another point, although it's unclear to me exactly
1223 // where. So why change this? My understanding (at least
1224 // from what SCPCD said on IRC) is that .w addressing
1225 // formats produce wrong code on jaguar (or doesn't execute
1226 // properly? something like that). So the code was changed
1227 // to mask off the upper bits depending on length (note: I
1228 // don't think .b is valid at all! I only know of .w/.l, so
1229 // this should probably be wiped). Then the code that
1230 // parses the constant and checks to see if it's between
1231 // $ffff0000 and $8000 never got triggered, so yay job
1232 // done! ...now say we want to assemble a st .prg. One of
1233 // the most widely spread optimisations is move.X expr.w,Y
1234 // (or vice versa, or both, anyway...) to access hardware
1235 // registers (which are mapped to $fxxxxx). This botchy
1236 // thing would create "hilarious" code while trying to
1237 // access hardware registers. So I made a condition to see
1238 // if st mode or jaguar is active and apply the both or
1239 // not. One last note: this is hardcoded to get optimised
1240 // for now on ST mode, i.e. it can't generate code like
1241 // move.w $00001234,d0 - it'll always get optimised to
1242 // move.w $1234.w,d0. It's probably ok, but maybe a warning
1243 // should be emitted? Or maybe finding a way to make it not
1244 // auto-optimise? I think it's ok for now...
1247 if (obj_format == ALCYON)
1249 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B') || (*(ln + 1) == 'w') || (*(ln + 1) == 'W') || (*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1256 if ((*(ln + 1) & 0xDF) == 'B')
1261 else if ((*(ln + 1) & 0xDF) == 'W')
1266 else if ((*(ln + 1) & 0xDF) == 'L')
1280 case '<': // < or << or <> or <=
1299 case ':': // : or ::
1309 case '=': // = or ==
1319 case '>': // > or >> or >=
1334 case '%': // % or binary constant
1335 if (*ln < '0' || *ln > '1')
1343 while (*ln >= '0' && *ln <= '1')
1344 v = (v << 1) + *ln++ - '0';
1348 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1354 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1360 if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1369 case '@': // @ or octal constant
1370 if (*ln < '0' || *ln > '7')
1378 while (*ln >= '0' && *ln <= '7')
1379 v = (v << 3) + *ln++ - '0';
1383 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1389 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1395 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1404 case '^': // ^ or ^^ <operator-name>
1411 if (((int)chrtab[*++ln] & STSYM) == 0)
1413 error("invalid symbol following ^^");
1419 while ((int)chrtab[*ln] & CTSYM)
1422 for(state=0; state>=0;)
1424 // Get char, convert to lowercase
1427 if (j >= 'A' && j <= 'Z')
1432 if (kwcheck[j] != state)
1438 if (*p == EOS || p == ln)
1447 if (j < 0 || state < 0)
1449 error("unknown symbol following ^^");
1456 interror(2); // Bad MULTX entry in chrtab
1461 // Handle decimal constant
1466 while ((int)chrtab[*ln] & DIGIT)
1467 v = (v * 10) + *ln++ - '0';
1469 // See if there's a .[bwl] after the constant & deal with it if so
1472 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1477 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1482 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1490 //printf("CONST: %i\n", v);
1494 // Handle illegal character
1495 return error("illegal character");
1498 // Terminate line of tokens and return "success."
1501 tok = etok; // Set tok to beginning of line
1503 if (stuffnull) // Terminate last SYMBOL
1513 // .GOTO <label> goto directive
1515 // The label is searched for starting from the first line of the current,
1516 // enclosing macro definition. If no enclosing macro exists, an error is
1519 // A label is of the form:
1521 // :<name><whitespace>
1523 // The colon must appear in column 1. The label is stripped prior to macro
1524 // expansion, and is NOT subject to macro expansion. The whitespace may also
1527 //int d_goto(WORD siz) {
1529 int d_goto(WORD unused)
1533 // Setup for the search
1535 return error("missing label");
1537 // sym = (char *)tok[1];
1538 char * sym = string[tok[1]];
1541 if (cur_inobj->in_type != SRC_IMACRO)
1542 return error("goto not in macro");
1544 IMACRO * imacro = cur_inobj->inobj.imacro;
1545 // defln = (LONG *)imacro->im_macro->svalue;
1546 struct LineList * defln = imacro->im_macro->lineList;
1548 // Find the label, starting with the first line.
1549 // for(; defln!=NULL; defln=(LONG *)*defln)
1550 for(; defln!=NULL; defln=defln->next)
1552 // if (*(char *)(defln + 1) == ':')
1553 if (defln->line[0] == ':')
1555 // Compare names (sleazo string compare)
1556 // This string compare is not right. Doesn't check for lengths.
1557 // (actually it does, but in a crappy, unclear way.)
1558 WARNING(!!!! Bad string comparison !!!)
1560 // s2 = (char *)(defln + 1) + 1;
1574 // Found the label, set new macro next-line and return.
1575 if ((*s2 == EOS) || ((int)chrtab[*s2] & WHITE))
1577 imacro->im_nextln = defln;
1583 return error("goto label not found");
1587 void DumpTokenBuffer(void)
1590 printf("Tokens [%X]: ", sloc);
1592 for(t=tokbuf; *t!=EOL; t++)
1596 else if (*t == CONST)
1599 printf("[CONST: $%X]", (uint32_t)*t);
1601 else if (*t == ACONST)
1603 else if (*t == STRING)
1606 printf("[STRING:\"%s\"]", string[*t]);
1608 else if (*t == SYMBOL)
1611 printf("[SYMBOL:\"%s\"]", string[*t]);
1615 else if (*t == TKEOF)
1617 else if (*t == DEQUALS)
1618 printf("[DEQUALS]");
1623 else if (*t == DCOLON)
1635 else if (*t == UNMINUS)
1636 printf("[UNMINUS]");
1637 else if (*t == DOTB)
1639 else if (*t == DOTW)
1641 else if (*t == DOTL)
1643 else if (*t == DOTI)
1645 else if (*t == ENDEXPR)
1646 printf("[ENDEXPR]");
1647 else if (*t == CR_ABSCOUNT)
1648 printf("[CR_ABSCOUNT]");
1649 else if (*t == CR_DEFINED)
1650 printf("[CR_DEFINED]");
1651 else if (*t == CR_REFERENCED)
1652 printf("[CR_REFERENCED]");
1653 else if (*t == CR_STREQ)
1654 printf("[CR_STREQ]");
1655 else if (*t == CR_MACDEF)
1656 printf("[CR_MACDEF]");
1657 else if (*t == CR_TIME)
1658 printf("[CR_TIME]");
1659 else if (*t == CR_DATE)
1660 printf("[CR_DATE]");
1661 else if (*t >= 0x20 && *t <= 0x2F)
1662 printf("[%c]", (char)*t);
1663 else if (*t >= 0x3A && *t <= 0x3F)
1664 printf("[%c]", (char)*t);
1665 else if (*t >= 0x80 && *t <= 0x87)
1666 printf("[D%u]", ((uint32_t)*t) - 0x80);
1667 else if (*t >= 0x88 && *t <= 0x8F)
1668 printf("[A%u]", ((uint32_t)*t) - 0x88);
1670 printf("[%X:%c]", (uint32_t)*t, (char)*t);