Version bump for last commit; now at v2.0.23.
[rmac] / token.c
1 //
2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2020 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
7 //
8
9 #include "token.h"
10
11 #include <errno.h>
12 #include "direct.h"
13 #include "error.h"
14 #include "macro.h"
15 #include "procln.h"
16 #include "sect.h"
17 #include "symbol.h"
18
19 #define DECL_KW                         // Declare keyword arrays
20 #define DEF_KW                          // Declare keyword values
21 #include "kwtab.h"                      // Incl generated keyword tables & defs
22
23
24 int lnsave;                                     // 1; strcpy() text of current line
25 uint16_t curlineno;                     // Current line number (64K max currently)
26 int totlines;                           // Total # of lines
27 int mjump_align = 0;            // mjump alignment flag
28 char lntag;                                     // Line tag
29 char * curfname;                        // Current filename
30 char tolowertab[128];           // Uppercase ==> lowercase
31 int8_t hextab[128];                     // Table of hex values
32 char dotxtab[128];                      // Table for ".b", ".s", etc.
33 char irbuf[LNSIZ];                      // Text for .rept block line
34 char lnbuf[LNSIZ];                      // Text of current line
35 WORD filecount;                         // Unique file number counter
36 WORD cfileno;                           // Current file number
37 TOKEN * tok;                            // Ptr to current token
38 TOKEN * etok;                           // Ptr past last token in tokbuf[]
39 TOKEN tokeol[1] = {EOL};        // Bailout end-of-line token
40 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
41 int optimizeOff;                        // Optimization override flag
42
43 // File record, used to maintain a list of every include file ever visited
44 #define FILEREC struct _filerec
45 FILEREC
46 {
47    FILEREC * frec_next;
48    char * frec_name;
49 };
50
51 FILEREC * filerec;
52 FILEREC * last_fr;
53
54 INOBJ * cur_inobj;                      // Ptr current input obj (IFILE/IMACRO)
55 static INOBJ * f_inobj;         // Ptr list of free INOBJs
56 static IFILE * f_ifile;         // Ptr list of free IFILEs
57 static IMACRO * f_imacro;       // Ptr list of free IMACROs
58
59 static TOKEN tokbuf[TOKBUFSIZE];        // Token buffer (stack-like, all files)
60
61 uint8_t chrtab[0x100] = {
62         ILLEG, ILLEG, ILLEG, ILLEG,                     // NUL SOH STX ETX
63         ILLEG, ILLEG, ILLEG, ILLEG,                     // EOT ENQ ACK BEL
64         ILLEG, WHITE, ILLEG, ILLEG,                     // BS HT LF VT
65         WHITE, ILLEG, ILLEG, ILLEG,                     // FF CR SO SI
66
67         ILLEG, ILLEG, ILLEG, ILLEG,                     // DLE DC1 DC2 DC3
68         ILLEG, ILLEG, ILLEG, ILLEG,                     // DC4 NAK SYN ETB
69         ILLEG, ILLEG, ILLEG, ILLEG,                     // CAN EM SUB ESC
70         ILLEG, ILLEG, ILLEG, ILLEG,                     // FS GS RS US
71
72         WHITE, MULTX, MULTX, SELF,                      // SP ! " #
73         MULTX+CTSYM, MULTX, SELF, MULTX,        // $ % & '
74         SELF, SELF, SELF, SELF,                         // ( ) * +
75         SELF, SELF, STSYM, SELF,                        // , - . /
76
77         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 0 1
78         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 2 3
79         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 4 5
80         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 6 7
81         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 8 9
82         MULTX, MULTX,                                                           // : ;
83         MULTX, MULTX, MULTX, STSYM+CTSYM,                       // < = > ?
84
85         MULTX, STSYM+CTSYM+HDIGIT,                                      // @ A
86         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // B C
87         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // D E
88         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // F G
89         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
90         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
91
92         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
93         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
94         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
95         SELF, SELF, MULTX, STSYM+CTSYM,                         // \ ] ^ _
96
97         ILLEG, STSYM+CTSYM+HDIGIT,                                      // ` a
98         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // b c
99         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // d e
100         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // f g
101         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
102         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
103
104         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
105         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
106         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                // x y z {
107         SELF, SELF, SELF, ILLEG,                                        // | } ~ DEL
108
109         // Anything above $7F is illegal (and yes, we need to check for this,
110         // otherwise you get strange and spurious errors that will lead you astray)
111         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
125         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
126         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
127 };
128
129 // Names of registers
130 static char * regname[] = {
131         "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
132         "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
133         "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
134         "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
135         "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
136         "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
137         "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
138         "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
139         "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
140         "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
141         "tt0","tt1","crp","","","","","", // 208,215
142         "","","","","fpiar","fpsr","fpcr","", // 216,223
143         "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
144         "","","","","","","","", // 232,239
145         "","","","","","","","", // 240,247
146         "","","","","","","","", // 248,255
147         "","","","","x0","x1","y0","y1", // 256,263
148         "","b0","","b2","","b1","a","b", // 264,271
149         "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
150         "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
151         "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
152         "","","","","","","l","p", // 296,303
153         "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
154         "a10","b10","x","y","","","ab","ba"  // 312,319
155 };
156
157 static char * riscregname[] = {
158          "r0",  "r1",  "r2",  "r3",  "r4", "r5",   "r6",  "r7",
159          "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",
160         "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
161         "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
162 };
163
164
165 //
166 // Initialize tokenizer
167 //
168 void InitTokenizer(void)
169 {
170         int i;                                                                  // Iterator
171         char * htab = "0123456789abcdefABCDEF"; // Hex character table
172
173         lnsave = 0;                                                             // Don't save lines
174         curfname = "";                                                  // No file, empty filename
175         filecount = (WORD)-1;
176         cfileno = (WORD)-1;                                             // cfileno gets bumped to 0
177         curlineno = 0;
178         totlines = 0;
179         etok = tokbuf;
180         f_inobj = NULL;
181         f_ifile = NULL;
182         f_imacro = NULL;
183         cur_inobj = NULL;
184         filerec = NULL;
185         last_fr = NULL;
186         lntag = SPACE;
187
188         // Initialize hex, "dot" and tolower tables
189         for(i=0; i<128; i++)
190         {
191                 hextab[i] = -1;
192                 dotxtab[i] = 0;
193                 tolowertab[i] = (char)i;
194         }
195
196         for(i=0; htab[i]!=EOS; i++)
197                 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
198
199         for(i='A'; i<='Z'; i++)
200                 tolowertab[i] |= 0x20;
201
202         // These characters are legal immediately after a period
203         dotxtab['b'] = DOTB;                                    // .b .B .s .S
204         dotxtab['B'] = DOTB;
205         //dotxtab['s'] = DOTB;
206         //dotxtab['S'] = DOTB;
207         dotxtab['w'] = DOTW;                                    // .w .W
208         dotxtab['W'] = DOTW;
209         dotxtab['l'] = DOTL;                                    // .l .L
210         dotxtab['L'] = DOTL;
211         dotxtab['i'] = DOTI;                                    // .i .I (WTF is this???)
212         dotxtab['I'] = DOTI;
213         dotxtab['D'] = DOTD;                                    // .d .D (double)
214         dotxtab['d'] = DOTD;
215         dotxtab['S'] = DOTS;                                    // .s .S
216         dotxtab['s'] = DOTS;
217         dotxtab['Q'] = DOTQ;                                    // .q .Q (quad word)
218         dotxtab['q'] = DOTQ;
219         dotxtab['X'] = DOTX;                                    // .x .x
220         dotxtab['x'] = DOTX;
221         dotxtab['P'] = DOTP;                                    // .p .P
222         dotxtab['p'] = DOTP;
223 }
224
225
226 void SetFilenameForErrorReporting(void)
227 {
228         WORD fnum = cfileno;
229
230         // Check for absolute top filename (this should never happen)
231         if (fnum == -1)
232         {
233                 curfname = "(*top*)";
234                 return;
235         }
236
237         FILEREC * fr = filerec;
238
239         // Advance to the correct record...
240         while (fr != NULL && fnum != 0)
241         {
242                 fr = fr->frec_next;
243                 fnum--;
244         }
245
246         // Check for file # record not found (this should never happen either)
247         if (fr == NULL)
248         {
249                 curfname = "(*NOT FOUND*)";
250                 return;
251         }
252
253         curfname = fr->frec_name;
254 }
255
256
257 //
258 // Allocate an IFILE or IMACRO
259 //
260 INOBJ * a_inobj(int typ)
261 {
262         INOBJ * inobj;
263         IFILE * ifile;
264         IMACRO * imacro;
265
266         // Allocate and initialize INOBJ first
267         if (f_inobj == NULL)
268                 inobj = malloc(sizeof(INOBJ));
269         else
270         {
271                 inobj = f_inobj;
272                 f_inobj = f_inobj->in_link;
273         }
274
275         switch (typ)
276         {
277         case SRC_IFILE:                                                 // Alloc and init an IFILE
278                 if (f_ifile == NULL)
279                         ifile = malloc(sizeof(IFILE));
280                 else
281                 {
282                         ifile = f_ifile;
283                         f_ifile = f_ifile->if_link;
284                 }
285
286                 inobj->inobj.ifile = ifile;
287                 break;
288
289         case SRC_IMACRO:                                                // Alloc and init an IMACRO
290                 if (f_imacro == NULL)
291                         imacro = malloc(sizeof(IMACRO));
292                 else
293                 {
294                         imacro = f_imacro;
295                         f_imacro = f_imacro->im_link;
296                 }
297
298                 inobj->inobj.imacro = imacro;
299                 break;
300
301         case SRC_IREPT:                                                 // Alloc and init an IREPT
302                 inobj->inobj.irept = malloc(sizeof(IREPT));
303                 DEBUG { printf("alloc IREPT\n"); }
304                 break;
305         }
306
307         // Install INOBJ on top of input stack
308         inobj->in_ifent = ifent;                                // Record .if context on entry
309         inobj->in_type = (WORD)typ;
310         inobj->in_otok = tok;
311         inobj->in_etok = etok;
312         inobj->in_link = cur_inobj;
313         cur_inobj = inobj;
314
315         return inobj;
316 }
317
318
319 //
320 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
321 // A macro reference is in one of two forms:
322 // \name <non-name-character>
323 // \{name}
324 // A doubled backslash (\\) is compressed to a single backslash (\).
325 // Argument definitions have been pre-tokenized, so we have to turn them back
326 // into text. This means that numbers, in particular, become hex, regardless of
327 // their representation when the macro was invoked. This is a hack.
328 // A label may appear at the beginning of the line:
329 // :<name><whitespace>
330 // (the colon must be in the first column). These labels are stripped before
331 // macro expansion takes place.
332 //
333 int ExpandMacro(char * src, char * dest, int destsiz)
334 {
335         int i;
336         int questmark;                  // \? for testing argument existence
337         char mname[128];                // Assume max size of a formal arg name
338         char numbuf[20];                // Buffer for text of CONSTs
339         TOKEN * tk;
340         SYM * arg;
341         char ** symbolString;
342
343         DEBUG { printf("ExM: src=\"%s\"\n", src); }
344
345         IMACRO * imacro = cur_inobj->inobj.imacro;
346         int macnum = (int)(imacro->im_macro->sattr);
347
348         char * dst = dest;                                              // Next dest slot
349         char * edst = dest + destsiz - 1;               // End + 1(?) of dest buffer
350
351         // Check for (and skip over) any "label" on the line
352         char * s = src;
353         char * d = NULL;
354
355         if (*s == ':')
356         {
357                 while (*s != EOS && !(chrtab[*s] & WHITE))
358                         s++;
359
360                 if (*s != EOS)
361                         s++;                                                    // Skip first whitespace
362         }
363
364         // Expand the rest of the line
365         while (*s != EOS)
366         {
367                 // Copy single character
368                 if (*s != '\\')
369                 {
370                         if (dst >= edst)
371                                 goto overflow;
372
373                         // Skip comments in case a loose @ or \ is in there
374                         // In that case the tokeniser was trying to expand it.
375                         if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
376                                 goto skipcomments;
377
378                         *dst++ = *s++;
379                 }
380                 // Do macro expansion
381                 else
382                 {
383                         questmark = 0;
384
385                         // Do special cases
386                         switch (*++s)
387                         {
388                         case '\\':                                              // \\, \ (collapse to single backslash)
389                                 if (dst >= edst)
390                                         goto overflow;
391
392                                 *dst++ = *s++;
393                                 continue;
394                         case '?':                                               // \? <macro>  set `questmark' flag
395                                 s++;
396                                 questmark = 1;
397                                 break;
398                         case '#':                                               // \#, number of arguments
399                                 sprintf(numbuf, "%d", (int)imacro->im_nargs);
400                                 goto copystr;
401                         case '!':                                               // \! size suffix supplied on invocation
402                                 switch ((int)imacro->im_siz)
403                                 {
404                                 case SIZN: d = "";   break;
405                                 case SIZB: d = ".b"; break;
406                                 case SIZW: d = ".w"; break;
407                                 case SIZL: d = ".l"; break;
408                                 }
409
410                                 goto copy_d;
411                         case '~':                                               // ==> unique label string Mnnnn...
412                                 sprintf(numbuf, "M%u", curuniq);
413 copystr:
414                                 d = numbuf;
415 copy_d:
416                                 s++;
417
418                                 while (*d != EOS)
419                                 {
420                                         if (dst >= edst)
421                                                 goto overflow;
422                                         else
423                                                 *dst++ = *d++;
424                                 }
425
426                                 continue;
427                         case EOS:
428                                 return error("missing argument name");
429                         }
430
431                         // \n ==> argument number 'n', 0..9
432                         if (chrtab[*s] & DIGIT)
433                         {
434                                 i = *s++ - '1';
435
436                                 if (i < 0)
437                                         i = 9;
438
439                                 goto arg_num;
440                         }
441
442                         // Get argument name: \name, \{name}
443                         d = mname;
444
445                         // \label
446                         if (*s != '{')
447                         {
448                                 do
449                                 {
450                                         *d++ = *s++;
451                                 }
452                                 while (chrtab[*s] & CTSYM);
453                         }
454                         // \\{label}
455                         else
456                         {
457                                 for(++s; *s != EOS && *s != '}';)
458                                         *d++ = *s++;
459
460                                 if (*s != '}')
461                                         return error("missing closing brace ('}')");
462                                 else
463                                         s++;
464                         }
465
466                         *d = EOS;
467
468                         // Lookup the argument and copy its (string) value into the
469                         // destination string
470                         DEBUG { printf("argument='%s'\n", mname); }
471
472                         if ((arg = lookup(mname, MACARG, macnum)) == NULL)
473                                 return error("undefined argument: '%s'", mname);
474                         else
475                         {
476                                 // Convert a string of tokens (terminated with EOL) back into
477                                 // text. If an argument is out of range (not specified in the
478                                 // macro invocation) then it is ignored.
479                                 i = (int)arg->svalue;
480 arg_num:
481                                 DEBUG { printf("~argnumber=%d\n", i); }
482                                 tk = NULL;
483
484                                 if (i < imacro->im_nargs)
485                                 {
486                                         tk = imacro->argument[i].token;
487                                         symbolString = imacro->argument[i].string;
488 //DEBUG
489 //{
490 //      printf("ExM: Preparing to parse argument #%u...\n", i);
491 //      DumpTokens(tk);
492 //}
493                                 }
494
495                                 // \?arg yields:
496                                 //    0  if the argument is empty or non-existant,
497                                 //    1  if the argument is not empty
498                                 if (questmark)
499                                 {
500                                         if (tk == NULL || *tk == EOL)
501                                                 questmark = 0;
502
503                                         if (dst >= edst)
504                                                 goto overflow;
505
506                                         *dst++ = (char)(questmark + '0');
507                                         continue;
508                                 }
509
510                                 // Argument # is in range, so expand it
511                                 if (tk != NULL)
512                                 {
513                                         while (*tk != EOL)
514                                         {
515                                                 // Reverse-translation from a token number to a string.
516                                                 // This is a hack. It might be better table-driven.
517                                                 d = NULL;
518
519                                                 if ((*tk >= KW_D0) && !rdsp && !rgpu)
520                                                 {
521                                                         d = regname[(int)*tk++ - KW_D0];
522                                                         goto strcopy;
523                                                 }
524                                                 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
525                                                 {
526                                                         d = riscregname[(int)*tk++ - KW_R0];
527                                                         goto strcopy;
528                                                 }
529                                                 else
530                                                 {
531                                                         switch ((int)*tk++)
532                                                         {
533                                                         case SYMBOL:
534                                                                 d = symbolString[*tk++];
535 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
536                                                                 break;
537                                                         case STRING:
538                                                                 d = symbolString[*tk++];
539
540                                                                 if (dst >= edst)
541                                                                         goto overflow;
542
543                                                                 *dst++ = '"';
544
545                                                                 while (*d != EOS)
546                                                                 {
547                                                                         if (dst >= edst)
548                                                                                 goto overflow;
549                                                                         else
550                                                                                 *dst++ = *d++;
551                                                                 }
552
553                                                                 if (dst >= edst)
554                                                                         goto overflow;
555
556                                                                 *dst++ = '"';
557                                                                 continue;
558                                                                 break;
559 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
560 //         to choke on legitimate code... Need to investigate this further
561 //         before changing anything else here!
562                                                         case CONST:
563 //                                                              sprintf(numbuf, "$%lx", (uint64_t)*tk++);
564                                                                 sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
565                                                                 tk++;
566                                                                 d = numbuf;
567                                                                 break;
568                                                         case DEQUALS:
569                                                                 d = "==";
570                                                                 break;
571                                                         case SET:
572                                                                 d = "set";
573                                                                 break;
574                                                         case COLON:
575                                                                 d = ":";
576                                                                 break;
577                                                         case DCOLON:
578                                                                 d = "::";
579                                                                 break;
580                                                         case GE:
581                                                                 d = ">=";
582                                                                 break;
583                                                         case LE:
584                                                                 d = "<=";
585                                                                 break;
586                                                         case NE:
587                                                                 d = "<>";
588                                                                 break;
589                                                         case SHR:
590                                                                 d = ">>";
591                                                                 break;
592                                                         case SHL:
593                                                                 d = "<<";
594                                                                 break;
595                                                         case DOTB:
596                                                                 d = ".b";
597                                                                 break;
598                                                         case DOTW:
599                                                                 d = ".w";
600                                                                 break;
601                                                         case DOTL:
602                                                                 d = ".l";
603                                                                 break;
604                                                         case CR_ABSCOUNT:
605                                                                 d = "^^abscount";
606                                                                 break;
607                                                         case CR_FILESIZE:
608                                                                 d = "^^filesize";
609                                                                 break;
610                                                         case CR_DATE:
611                                                                 d = "^^date";
612                                                                 break;
613                                                         case CR_TIME:
614                                                                 d = "^^time";
615                                                                 break;
616                                                         case CR_DEFINED:
617                                                                 d = "^^defined ";
618                                                                 break;
619                                                         case CR_REFERENCED:
620                                                                 d = "^^referenced ";
621                                                                 break;
622                                                         case CR_STREQ:
623                                                                 d = "^^streq ";
624                                                                 break;
625                                                         case CR_MACDEF:
626                                                                 d = "^^macdef ";
627                                                                 break;
628                                                         default:
629                                                                 if (dst >= edst)
630                                                                         goto overflow;
631
632                                                                 *dst++ = (char)*(tk - 1);
633                                                                 break;
634                                                         }
635                                                 }
636
637                                                 // If 'd' != NULL, copy string to destination
638                                                 if (d != NULL)
639                                                 {
640 strcopy:
641                                                         DEBUG printf("d='%s'\n", d);
642
643                                                         while (*d != EOS)
644                                                         {
645                                                                 if (dst >= edst)
646                                                                         goto overflow;
647                                                                 else
648                                                                         *dst++ = *d++;
649                                                         }
650                                                 }
651                                         }
652                                 }
653                         }
654                 }
655         }
656
657 skipcomments:
658
659         *dst = EOS;
660         DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
661         return OK;
662
663 overflow:
664         *dst = EOS;
665         DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
666         return fatal("line too long as a result of macro expansion");
667 }
668
669
670 //
671 // Get next line of text from a macro
672 //
673 char * GetNextMacroLine(void)
674 {
675         IMACRO * imacro = cur_inobj->inobj.imacro;
676 //      LONG * strp = imacro->im_nextln;
677         LLIST * strp = imacro->im_nextln;
678
679         if (strp == NULL)                                               // End-of-macro
680                 return NULL;
681
682         imacro->im_nextln = strp->next;
683 //      ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
684         ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
685
686         return imacro->im_lnbuf;
687 }
688
689
690 //
691 // Get next line of text from a repeat block
692 //
693 char * GetNextRepeatLine(void)
694 {
695         IREPT * irept = cur_inobj->inobj.irept;
696 //      LONG * strp = irept->ir_nextln;                 // initial null
697
698         // Do repeat at end of .rept block's string list
699 //      if (strp == NULL)
700         if (irept->ir_nextln == NULL)
701         {
702                 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
703                 irept->ir_nextln = irept->ir_firstln;   // copy first line
704
705                 if (irept->ir_count-- == 0)
706                 {
707                         DEBUG { printf("end-repeat-block\n"); }
708                         return NULL;
709                 }
710
711 //              strp = irept->ir_nextln;
712         }
713         // Mark the current macro line in the irept object
714         // This is probably overkill - a global variable
715         // would suffice here (it only gets used during
716         // error reporting anyway)
717         irept->lineno = irept->ir_nextln->lineno;
718
719 //      strcpy(irbuf, (char *)(irept->ir_nextln + 1));
720         strcpy(irbuf, irept->ir_nextln->line);
721         DEBUG { printf("repeat line='%s'\n", irbuf); }
722 //      irept->ir_nextln = (LONG *)*strp;
723         irept->ir_nextln = irept->ir_nextln->next;
724
725         return irbuf;
726 }
727
728
729 //
730 // Include a source file used at the root, and for ".include" files
731 //
732 int include(int handle, char * fname)
733 {
734         // Debug mode
735         DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
736
737         // Alloc and initialize include-descriptors
738         INOBJ * inobj = a_inobj(SRC_IFILE);
739         IFILE * ifile = inobj->inobj.ifile;
740
741         ifile->ifhandle = handle;                       // Setup file handle
742         ifile->ifind = ifile->ifcnt = 0;        // Setup buffer indices
743         ifile->ifoldlineno = curlineno;         // Save old line number
744         ifile->ifoldfname = curfname;           // Save old filename
745         ifile->ifno = cfileno;                          // Save old file number
746
747         // NB: This *must* be preincrement, we're adding one to the filecount here!
748         cfileno = ++filecount;                          // Compute NEW file number
749         curfname = strdup(fname);                       // Set current filename (alloc storage)
750         curlineno = 0;                                          // Start on line zero
751
752         // Add another file to the file-record
753         FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
754         fr->frec_next = NULL;
755         fr->frec_name = curfname;
756
757         if (last_fr == NULL)
758                 filerec = fr;                                   // Add first filerec
759         else
760                 last_fr->frec_next = fr;                // Append to list of filerecs
761
762         last_fr = fr;
763         DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
764
765         return OK;
766 }
767
768
769 //
770 // Pop the current input level
771 //
772 int fpop(void)
773 {
774         INOBJ * inobj = cur_inobj;
775
776         if (inobj == NULL)
777                 return 0;
778
779         // Pop IFENT levels until we reach the conditional assembly context we
780         // were at when the input object was entered.
781         int numUnmatched = 0;
782
783         while (ifent != inobj->in_ifent)
784         {
785                 if (d_endif() != 0)     // Something bad happened during endif parsing?
786                         return -1;              // If yes, bail instead of getting stuck in a loop
787
788                 numUnmatched++;
789         }
790
791         // Give a warning to the user that we had to wipe their bum for them
792         if (numUnmatched > 0)
793                 warn("missing %d .endif(s)", numUnmatched);
794
795         tok = inobj->in_otok;   // Restore tok and otok
796         etok = inobj->in_etok;
797
798         switch (inobj->in_type)
799         {
800         case SRC_IFILE:                 // Pop and release an IFILE
801         {
802                 DEBUG { printf("[Leaving: %s]\n", curfname); }
803
804                 IFILE * ifile = inobj->inobj.ifile;
805                 ifile->if_link = f_ifile;
806                 f_ifile = ifile;
807                 close(ifile->ifhandle);                 // Close source file
808 DEBUG { printf("[fpop (pre):  curfname=%s]\n", curfname); }
809                 curfname = ifile->ifoldfname;   // Set current filename
810 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
811 DEBUG { printf("[fpop: (pre)  cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
812                 curlineno = ifile->ifoldlineno; // Set current line#
813                 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
814                 cfileno = ifile->ifno;                  // Restore current file number
815 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
816                 break;
817         }
818
819         case SRC_IMACRO:                                        // Pop and release an IMACRO
820         {
821                 IMACRO * imacro = inobj->inobj.imacro;
822                 imacro->im_link = f_imacro;
823                 f_imacro = imacro;
824                 break;
825         }
826
827         case SRC_IREPT:                                         // Pop and release an IREPT
828         {
829                 DEBUG { printf("dealloc IREPT\n"); }
830                 LLIST * p = inobj->inobj.irept->ir_firstln;
831
832                 // Deallocate repeat lines
833                 while (p != NULL)
834                 {
835                         free(p->line);
836                         p = p->next;
837                 }
838
839                 break;
840         }
841         }
842
843         cur_inobj = inobj->in_link;
844         inobj->in_link = f_inobj;
845         f_inobj = inobj;
846
847         return 0;
848 }
849
850
851 //
852 // Get line from file into buf, return NULL on EOF or ptr to the start of a
853 // null-term line
854 //
855 char * GetNextLine(void)
856 {
857         int i, j;
858         char * p, * d;
859         int readamt = -1;                                               // 0 if last read() yeilded 0 bytes
860         IFILE * fl = cur_inobj->inobj.ifile;
861
862         for(;;)
863         {
864                 // Scan for next end-of-line; handle stupid text formats by treating
865                 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
866                 // check for '\n').
867                 d = &fl->ifbuf[fl->ifind];
868
869                 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
870                 {
871                         if (*p == '\r' || *p == '\n')
872                         {
873                                 i++;
874
875                                 if (*p == '\r')
876                                 {
877                                         if (i >= j)
878                                                 break;  // Need to read more, then look for '\n' to eat
879                                         else if (p[1] == '\n')
880                                                 i++;
881                                 }
882
883                                 // Cover up the newline with end-of-string sentinel
884                                 *p = '\0';
885
886                                 fl->ifind += i;
887                                 fl->ifcnt -= i;
888                                 return d;
889                         }
890                 }
891
892                 // Handle hanging lines by ignoring them (Input file is exhausted, no
893                 // \r or \n on last line)
894                 // Shamus: This is retarded. Never ignore any input!
895                 if (!readamt && fl->ifcnt)
896                 {
897 #if 0
898                         fl->ifcnt = 0;
899                         *p = '\0';
900                         return NULL;
901 #else
902                         // Really should check to see if we're at the end of the buffer!
903                         // :-P
904                         fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
905                         fl->ifcnt = 0;
906                         return &fl->ifbuf[fl->ifind];
907 #endif
908                 }
909
910                 // Truncate and return absurdly long lines.
911                 if (fl->ifcnt >= QUANTUM)
912                 {
913                         fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
914                         fl->ifcnt = 0;
915                         return &fl->ifbuf[fl->ifind];
916                 }
917
918                 // Relocate what's left of a line to the beginning of the buffer, and
919                 // read some more of the file in; return NULL if the buffer's empty and
920                 // on EOF.
921                 if (fl->ifind != 0)
922                 {
923                         p = &fl->ifbuf[fl->ifind];
924                         d = &fl->ifbuf[fl->ifcnt & 1];
925
926                         for(i=0; i<fl->ifcnt; i++)
927                                 *d++ = *p++;
928
929                         fl->ifind = fl->ifcnt & 1;
930                 }
931
932                 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
933
934                 if (readamt < 0)
935                         return NULL;
936
937                 if ((fl->ifcnt += readamt) == 0)
938                         return NULL;
939         }
940 }
941
942
943 //
944 // Tokenize a line
945 //
946 int TokenizeLine(void)
947 {
948         uint8_t * ln = NULL;            // Ptr to current position in line
949         uint8_t * p;                            // Random character ptr
950         PTR tk;                                         // Token-deposit ptr
951         int state = 0;                          // State for keyword detector
952         int j = 0;                                      // Var for keyword detector
953         uint8_t c;                                      // Random char
954         uint64_t v;                                     // Random value
955         uint32_t cursize = 0;           // Current line's size (.b, .w, .l, .s, .q, .d)
956         uint8_t * nullspot = NULL;      // Spot to clobber for SYMBOL termination
957         int stuffnull;                          // 1:terminate SYMBOL '\0' at *nullspot
958         uint8_t c1;
959         int stringNum = 0;                      // Pointer to string locations in tokenized line
960
961 retry:
962
963         if (cur_inobj == NULL)          // Return EOF if input stack is empty
964                 return TKEOF;
965
966         // Get another line of input from the current input source: a file, a
967         // macro, or a repeat-block
968         switch (cur_inobj->in_type)
969         {
970         // Include-file:
971         // o  handle EOF;
972         // o  bump source line number;
973         // o  tag the listing-line with a space;
974         // o  kludge lines generated by Alcyon C.
975         case SRC_IFILE:
976                 if ((ln = GetNextLine()) == NULL)
977                 {
978 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
979                         if (fpop() == 0)        // Pop input level
980                                 goto retry;             // Try for more lines
981                         else
982                         {
983                                 ifent->if_prev = (IFENT *)-1;   //Signal Assemble() that we have reached EOF with unbalanced if/endifs
984                                 return TKEOF;
985                         }
986                 }
987
988                 curlineno++;                    // Bump line number
989                 lntag = SPACE;
990
991                 if (as68_flag)
992                 {
993                         // AS68 compatibility, throw away all lines starting with
994                         // back-quotes, tildes, or '*'
995                         // On other lines, turn the first '*' into a semi-colon.
996                         if (*ln == '`' || *ln == '~' || *ln == '*')
997                                 *ln = ';';
998                         else
999                         {
1000                                 for(p=ln; *p!=EOS; p++)
1001                                 {
1002                                         if (*p == '*')
1003                                         {
1004                                                 *p = ';';
1005                                                 break;
1006                                         }
1007                                 }
1008                         }
1009                 }
1010
1011                 break;
1012
1013         // Macro-block:
1014         // o  Handle end-of-macro;
1015         // o  tag the listing-line with an at (@) sign.
1016         case SRC_IMACRO:
1017                 if ((ln = GetNextMacroLine()) == NULL)
1018                 {
1019                         if (ExitMacro() == 0)   // Exit macro (pop args, do fpop(), etc)
1020                                 goto retry;                     // Try for more lines...
1021                         else
1022                                 return TKEOF;           // Oops, we got a non zero return code, signal EOF
1023                 }
1024
1025                 lntag = '@';
1026                 break;
1027
1028         // Repeat-block:
1029         // o  Handle end-of-repeat-block;
1030         // o  tag the listing-line with a pound (#) sign.
1031         case SRC_IREPT:
1032                 if ((ln = GetNextRepeatLine()) == NULL)
1033                 {
1034                         DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1035                         fpop();
1036                         goto retry;
1037                 }
1038
1039                 lntag = '#';
1040                 break;
1041         }
1042
1043         // Save text of the line. We only do this during listings and within
1044         // macro-type blocks, since it is expensive to unconditionally copy every
1045         // line.
1046         if (lnsave)
1047                 strcpy(lnbuf, ln);
1048
1049         // General housekeeping
1050         tok = tokeol;                   // Set "tok" to EOL in case of error
1051         tk.u32 = etok;                  // Reset token ptr
1052         stuffnull = 0;                  // Don't stuff nulls
1053         totlines++;                             // Bump total #lines assembled
1054
1055         // See if the entire line is a comment. This is a win if the programmer
1056         // puts in lots of comments
1057         if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1058                 goto goteol;
1059
1060         // And here we have a very ugly hack for signalling a single line 'turn off
1061         // optimization'. There's really no nice way to do this, so hack it is!
1062         optimizeOff = 0;                // Default is to take optimizations as they come
1063
1064         if (*ln == '!')
1065         {
1066                 optimizeOff = 1;        // Signal that we don't want to optimize this line
1067                 ln++;                           // & skip over the darned thing
1068         }
1069
1070         // Main tokenization loop;
1071         //  o  skip whitespace;
1072         //  o  handle end-of-line;
1073         //  o  handle symbols;
1074         //  o  handle single-character tokens (operators, etc.);
1075         //  o  handle multiple-character tokens (constants, strings, etc.).
1076         for(; *ln!=EOS;)
1077         {
1078                 // Skip whitespace, handle EOL
1079                 while (chrtab[*ln] & WHITE)
1080                         ln++;
1081
1082                 // Handle EOL, comment with ';'
1083                 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1084                         break;
1085
1086                 // Handle start of symbol. Symbols are null-terminated in place. The
1087                 // termination is always one symbol behind, since there may be no place
1088                 // for a null in the case that an operator immediately follows the name.
1089                 c = chrtab[*ln];
1090
1091                 if (c & STSYM)
1092                 {
1093                         if (stuffnull)                  // Terminate old symbol from previous pass
1094                                 *nullspot = EOS;
1095
1096                         v = 0;                                  // Assume no DOT attrib follows symbol
1097                         stuffnull = 1;
1098
1099                         // In some cases, we need to check for a DOTx at the *beginning*
1100                         // of a symbol, as the "start" of the line we're currently looking
1101                         // at could be somewhere in the middle of that line!
1102                         if (*ln == '.')
1103                         {
1104                                 // Make sure that it's *only* a .[bwsl] following, and not the
1105                                 // start of a local symbol:
1106                                 if ((chrtab[*(ln + 1)] & DOT)
1107                                         && (dotxtab[*(ln + 1)] != 0)
1108                                         && !(chrtab[*(ln + 2)] & CTSYM))
1109                                 {
1110                                         // We found a legitimate DOTx construct, so add it to the
1111                                         // token stream:
1112                                         ln++;
1113                                         stuffnull = 0;
1114                                         *tk.u32++ = (TOKEN)dotxtab[*ln++];
1115                                         continue;
1116                                 }
1117                         }
1118
1119                         p = nullspot = ln++;    // Nullspot -> start of this symbol
1120
1121                         // Find end of symbol (and compute its length)
1122                         for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1123                                 ln++;
1124
1125                         // Handle "DOT" special forms (like ".b") that follow a normal
1126                         // symbol or keyword:
1127                         if (*ln == '.')
1128                         {
1129                                 *ln++ = EOS;            // Terminate symbol
1130                                 stuffnull = 0;          // And never try it again
1131
1132                                 // Character following the '.' must have a DOT attribute, and
1133                                 // the chararacter after THAT one must not have a start-symbol
1134                                 // attribute (to prevent symbols that look like, for example,
1135                                 // "zingo.barf", which might be a good idea anyway....)
1136                                 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1137                                         return error("[bwsl] must follow '.' in symbol");
1138
1139                                 v = (uint32_t)dotxtab[*ln++];
1140                                 cursize = (uint32_t)v;
1141
1142                                 if (chrtab[*ln] & CTSYM)
1143                                         return error("misuse of '.'; not allowed in symbols");
1144                         }
1145
1146                         // If the symbol is small, check to see if it's really the name of
1147                         // a register.
1148                         if (j <= KWSIZE)
1149                         {
1150                                 for(state=0; state>=0;)
1151                                 {
1152                                         j = (int)tolowertab[*p++];
1153                                         j += kwbase[state];
1154
1155                                         if (kwcheck[j] != state)
1156                                         {
1157                                                 j = -1;
1158                                                 break;
1159                                         }
1160
1161                                         if (*p == EOS || p == ln)
1162                                         {
1163                                                 j = kwaccept[j];
1164                                                 break;
1165                                         }
1166
1167                                         state = kwtab[j];
1168                                 }
1169                         }
1170                         else
1171                         {
1172                                 j = -1;
1173                         }
1174
1175                         // Make j = -1 if user tries to use a RISC register while in 68K mode
1176                         if (!(rgpu || rdsp || dsp56001) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1177                         {
1178                                 j = -1;
1179                         }
1180
1181                         // Make j = -1 if time, date etc with no preceeding ^^
1182                         // defined, referenced, streq, macdef, date and time
1183                         switch ((TOKEN)j)
1184                         {
1185                         case 112:   // defined
1186                         case 113:   // referenced
1187                         case 118:   // streq
1188                         case 119:   // macdef
1189                         case 120:   // time
1190                         case 121:   // date
1191                                 j = -1;
1192                         }
1193
1194                         // If not tokenized keyword OR token was not found
1195                         if ((j < 0) || (state < 0))
1196                         {
1197                                 *tk.u32++ = SYMBOL;
1198                                 string[stringNum] = nullspot;
1199                                 *tk.u32++ = stringNum;
1200                                 stringNum++;
1201                         }
1202                         else
1203                         {
1204                                 *tk.u32++ = (TOKEN)j;
1205                                 stuffnull = 0;
1206                         }
1207
1208                         if (v)                  // Record attribute token (if any)
1209                                 *tk.u32++ = (TOKEN)v;
1210
1211                         if (stuffnull)  // Arrange for string termination on next pass
1212                                 nullspot = ln;
1213
1214                         continue;
1215                 }
1216
1217                 // Handle identity tokens
1218                 if (c & SELF)
1219                 {
1220                         *tk.u32++ = *ln++;
1221                         continue;
1222                 }
1223
1224                 // Handle multiple-character tokens
1225                 if (c & MULTX)
1226                 {
1227                         switch (*ln++)
1228                         {
1229                         case '!':               // ! or !=
1230                                 if (*ln == '=')
1231                                 {
1232                                         *tk.u32++ = NE;
1233                                         ln++;
1234                                 }
1235                                 else
1236                                         *tk.u32++ = '!';
1237
1238                                 continue;
1239                         case '\'':              // 'string'
1240                                 if (m6502)
1241                                 {
1242                                         // Hardcoded for now, maybe this will change in the future
1243                                         *tk.u32++ = STRINGA8;
1244                                         goto dostring;
1245                                 }
1246                                 // Fall through
1247                         case '\"':              // "string"
1248                                 *tk.u32++ = STRING;
1249 dostring:
1250                                 c1 = ln[-1];
1251                                 string[stringNum] = ln;
1252                                 *tk.u32++ = stringNum;
1253                                 stringNum++;
1254
1255                                 for(p=ln; *ln!=EOS && *ln!=c1;)
1256                                 {
1257                                         c = *ln++;
1258
1259                                         if (c == '\\')
1260                                         {
1261                                                 switch (*ln++)
1262                                                 {
1263                                                 case EOS:
1264                                                         return(error("unterminated string"));
1265                                                 case 'e':
1266                                                         c = '\033';
1267                                                         break;
1268                                                 case 'n':
1269                                                         c = '\n';
1270                                                         break;
1271                                                 case 'b':
1272                                                         c = '\b';
1273                                                         break;
1274                                                 case 't':
1275                                                         c = '\t';
1276                                                         break;
1277                                                 case 'r':
1278                                                         c = '\r';
1279                                                         break;
1280                                                 case 'f':
1281                                                         c = '\f';
1282                                                         break;
1283                                                 case '\"':
1284                                                         c = '\"';
1285                                                         break;
1286                                                 case '\'':
1287                                                         c = '\'';
1288                                                         break;
1289                                                 case '\\':
1290                                                         c = '\\';
1291                                                         break;
1292                                                 case '{':
1293                                                         // If we're evaluating a macro
1294                                                         // this is valid because it's
1295                                                         // a parameter expansion
1296                                                 case '!':
1297                                                         // If we're evaluating a macro
1298                                                         // this is valid and expands to
1299                                                         // "dot-size"
1300                                                         break;
1301                                                 default:
1302                                                         warn("bad backslash code in string");
1303                                                         ln--;
1304                                                         break;
1305                                                 }
1306                                         }
1307
1308                                         *p++ = c;
1309                                 }
1310
1311                                 if (*ln++ != c1)
1312                                         return error("unterminated string");
1313
1314                                 *p++ = EOS;
1315                                 continue;
1316                         case '$':               // $, hex constant
1317                                 if (chrtab[*ln] & HDIGIT)
1318                                 {
1319                                         v = 0;
1320
1321                                         // Parse the hex value
1322                                         while (hextab[*ln] >= 0)
1323                                                 v = (v << 4) + (int)hextab[*ln++];
1324
1325                                         *tk.u32++ = CONST;
1326                                         *tk.u64++ = v;
1327
1328                                         if (*ln == '.')
1329                                         {
1330                                                 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1331                                                 {
1332                                                         *tk.u32++ = DOTW;
1333                                                         ln += 2;
1334                                                 }
1335                                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1336                                                 {
1337                                                         *tk.u32++ = DOTL;
1338                                                         ln += 2;
1339                                                 }
1340                                         }
1341                                 }
1342                                 else
1343                                         *tk.u32++ = '$';
1344
1345                                 continue;
1346                         case '<':               // < or << or <> or <=
1347                                 switch (*ln)
1348                                 {
1349                                 case '<':
1350                                         *tk.u32++ = SHL;
1351                                         ln++;
1352                                         continue;
1353                                 case '>':
1354                                         *tk.u32++ = NE;
1355                                         ln++;
1356                                         continue;
1357                                 case '=':
1358                                         *tk.u32++ = LE;
1359                                         ln++;
1360                                         continue;
1361                                 default:
1362                                         *tk.u32++ = '<';
1363                                         continue;
1364                                 }
1365                         case ':':               // : or ::
1366                                 if (*ln == ':')
1367                                 {
1368                                         *tk.u32++ = DCOLON;
1369                                         ln++;
1370                                 }
1371                                 else
1372                                         *tk.u32++ = ':';
1373
1374                                 continue;
1375                         case '=':               // = or ==
1376                                 if (*ln == '=')
1377                                 {
1378                                         *tk.u32++ = DEQUALS;
1379                                         ln++;
1380                                 }
1381                                 else
1382                                         *tk.u32++ = '=';
1383
1384                                 continue;
1385                         case '>':               // > or >> or >=
1386                                 switch (*ln)
1387                                 {
1388                                 case '>':
1389                                         *tk.u32++ = SHR;
1390                                         ln++;
1391                                         continue;
1392                                 case '=':
1393                                         *tk.u32++ = GE;
1394                                         ln++;
1395                                         continue;
1396                                 default:
1397                                         *tk.u32++ = '>';
1398                                         continue;
1399                                 }
1400                         case '%':               // % or binary constant
1401                                 if (*ln < '0' || *ln > '1')
1402                                 {
1403                                         *tk.u32++ = '%';
1404                                         continue;
1405                                 }
1406
1407                                 v = 0;
1408
1409                                 while (*ln >= '0' && *ln <= '1')
1410                                         v = (v << 1) + *ln++ - '0';
1411
1412                                 if (*ln == '.')
1413                                 {
1414                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1415                                         {
1416                                                 v &= 0x000000FF;
1417                                                 ln += 2;
1418                                         }
1419
1420                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1421                                         {
1422                                                 v &= 0x0000FFFF;
1423                                                 ln += 2;
1424                                         }
1425
1426                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1427                                         {
1428                                                 v &= 0xFFFFFFFF;
1429                                                 ln += 2;
1430                                         }
1431                                 }
1432
1433                                 *tk.u32++ = CONST;
1434                                 *tk.u64++ = v;
1435                                 continue;
1436                         case '@':               // @ or octal constant
1437                                 if (*ln < '0' || *ln > '7')
1438                                 {
1439                                         *tk.u32++ = '@';
1440                                         continue;
1441                                 }
1442
1443                                 v = 0;
1444
1445                                 while (*ln >= '0' && *ln <= '7')
1446                                         v = (v << 3) + *ln++ - '0';
1447
1448                                 if (*ln == '.')
1449                                 {
1450                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1451                                         {
1452                                                 v &= 0x000000FF;
1453                                                 ln += 2;
1454                                         }
1455
1456                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1457                                         {
1458                                                 v &= 0x0000FFFF;
1459                                                 ln += 2;
1460                                         }
1461
1462                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1463                                         {
1464                                                 v &= 0xFFFFFFFF;
1465                                                 ln += 2;
1466                                         }
1467                                 }
1468
1469                                 *tk.u32++ = CONST;
1470                                 *tk.u64++ = v;
1471                                 continue;
1472                         case '^':               // ^ or ^^ <operator-name>
1473                                 if (*ln != '^')
1474                                 {
1475                                         *tk.u32++ = '^';
1476                                         continue;
1477                                 }
1478
1479                                 if (((int)chrtab[*++ln] & STSYM) == 0)
1480                                 {
1481                                         error("invalid symbol following ^^");
1482                                         continue;
1483                                 }
1484
1485                                 p = ln++;
1486
1487                                 while ((int)chrtab[*ln] & CTSYM)
1488                                         ++ln;
1489
1490                                 for(state=0; state>=0;)
1491                                 {
1492                                         // Get char, convert to lowercase
1493                                         j = *p++;
1494
1495                                         if (j >= 'A' && j <= 'Z')
1496                                                 j += 0x20;
1497
1498                                         j += kwbase[state];
1499
1500                                         if (kwcheck[j] != state)
1501                                         {
1502                                                 j = -1;
1503                                                 break;
1504                                         }
1505
1506                                         if (*p == EOS || p == ln)
1507                                         {
1508                                                 j = kwaccept[j];
1509                                                 break;
1510                                         }
1511
1512                                         state = kwtab[j];
1513                                 }
1514
1515                                 if (j < 0 || state < 0)
1516                                 {
1517                                         error("unknown symbol following ^^");
1518                                         continue;
1519                                 }
1520
1521                                 *tk.u32++ = (TOKEN)j;
1522                                 continue;
1523                         default:
1524                                 interror(2);    // Bad MULTX entry in chrtab
1525                                 continue;
1526                         }
1527                 }
1528
1529                 // Handle decimal constant
1530                 if (c & DIGIT)
1531                 {
1532                         uint8_t * numStart = ln;
1533                         v = 0;
1534
1535                         while ((int)chrtab[*ln] & DIGIT)
1536                                 v = (v * 10) + *ln++ - '0';
1537
1538                         // See if there's a .[bwl] after the constant & deal with it if so
1539                         if (*ln == '.')
1540                         {
1541                                 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1542                                 {
1543                                         v &= 0x000000FF;
1544                                         ln += 2;
1545                                         *tk.u32++ = CONST;
1546                                         *tk.u64++ = v;
1547                                         *tk.u32++ = DOTB;
1548                                 }
1549                                 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1550                                 {
1551                                         v &= 0x0000FFFF;
1552                                         ln += 2;
1553                                         *tk.u32++ = CONST;
1554                                         *tk.u64++ = v;
1555                                         *tk.u32++ = DOTW;
1556                                 }
1557                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1558                                 {
1559                                         v &= 0xFFFFFFFF;
1560                                         ln += 2;
1561                                         *tk.u32++ = CONST;
1562                                         *tk.u64++ = v;
1563                                         *tk.u32++ = DOTL;
1564                                 }
1565                                 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1566                                 {
1567                                         // Hey, more digits after the dot, so we assume it's a
1568                                         // floating point number of some kind... numEnd will point
1569                                         // to the first non-float character after it's done
1570                                         char * numEnd;
1571                                         errno = 0;
1572                                         double f = strtod(numStart, &numEnd);
1573                                         ln = (uint8_t *)numEnd;
1574
1575                                         if (errno != 0)
1576                                                 return error("floating point parse error");
1577
1578                                         // N.B.: We use the C compiler's internal double
1579                                         //       representation for all internal float calcs and
1580                                         //       are reasonably sure that the size of said double
1581                                         //       is 8 bytes long (which we check for in fltpoint.c)
1582                                         *tk.u32++ = FCONST;
1583                                         *tk.dp = f;
1584                                         tk.u64++;
1585                                         continue;
1586                                 }
1587                         }
1588                         else
1589                         {
1590                                 *tk.u32++ = CONST;
1591                                 *tk.u64++ = v;
1592                         }
1593
1594 //printf("CONST: %i\n", v);
1595                         continue;
1596                 }
1597
1598                 // Handle illegal character
1599                 return error("illegal character $%02X found", *ln);
1600         }
1601
1602         // Terminate line of tokens and return "success."
1603
1604 goteol:
1605         tok = etok;                             // Set tok to beginning of line
1606
1607         if (stuffnull)                  // Terminate last SYMBOL
1608                 *nullspot = EOS;
1609
1610         *tk.u32++ = EOL;
1611
1612         return OK;
1613 }
1614
1615
1616 //
1617 // .GOTO <label>        goto directive
1618 //
1619 // The label is searched for starting from the first line of the current,
1620 // enclosing macro definition. If no enclosing macro exists, an error is
1621 // generated.
1622 //
1623 // A label is of the form:
1624 //
1625 // :<name><whitespace>
1626 //
1627 // The colon must appear in column 1.  The label is stripped prior to macro
1628 // expansion, and is NOT subject to macro expansion.  The whitespace may also
1629 // be EOL.
1630 //
1631 int d_goto(WORD unused)
1632 {
1633         // Setup for the search
1634         if (*tok != SYMBOL)
1635                 return error("missing label");
1636
1637         char * sym = string[tok[1]];
1638         tok += 2;
1639
1640         if (cur_inobj->in_type != SRC_IMACRO)
1641                 return error("goto not in macro");
1642
1643         IMACRO * imacro = cur_inobj->inobj.imacro;
1644         LLIST * defln = imacro->im_macro->lineList;
1645
1646         // Attempt to find the label, starting with the first line.
1647         for(; defln!=NULL; defln=defln->next)
1648         {
1649                 // Must start with a colon
1650                 if (defln->line[0] == ':')
1651                 {
1652                         // Compare names (sleazo string compare)
1653                         char * s1 = sym;
1654                         char * s2 = defln->line;
1655
1656                         // Either we will match the strings to EOS on both, or we will
1657                         // match EOS on string 1 to whitespace on string 2. Otherwise, we
1658                         // have no match.
1659                         while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1660                         {
1661                                 // If we reached the end of string 1 (sym), we're done.
1662                                 // Note that we're also checking for the end of string 2 as
1663                                 // well, since we've established they're equal above.
1664                                 if (*s1 == EOS)
1665                                 {
1666                                         // Found the label, set new macro next-line and return.
1667                                         imacro->im_nextln = defln;
1668                                         return 0;
1669                                 }
1670
1671                                 s1++;
1672                                 s2++;
1673                         }
1674                 }
1675         }
1676
1677         return error("goto label not found");
1678 }
1679
1680
1681 void DumpToken(TOKEN t)
1682 {
1683         if (t == COLON)
1684                 printf("[COLON]");
1685         else if (t == CONST)
1686                 printf("[CONST]");
1687         else if (t == FCONST)
1688                 printf("[FCONST]");
1689         else if (t == ACONST)
1690                 printf("[ACONST]");
1691         else if (t == STRING)
1692                 printf("[STRING]");
1693         else if (t == SYMBOL)
1694                 printf("[SYMBOL]");
1695         else if (t == EOS)
1696                 printf("[EOS]");
1697         else if (t == TKEOF)
1698                 printf("[TKEOF]");
1699         else if (t == DEQUALS)
1700                 printf("[DEQUALS]");
1701         else if (t == SET)
1702                 printf("[SET]");
1703         else if (t == REG)
1704                 printf("[REG]");
1705         else if (t == DCOLON)
1706                 printf("[DCOLON]");
1707         else if (t == GE)
1708                 printf("[GE]");
1709         else if (t == LE)
1710                 printf("[LE]");
1711         else if (t == NE)
1712                 printf("[NE]");
1713         else if (t == SHR)
1714                 printf("[SHR]");
1715         else if (t == SHL)
1716                 printf("[SHL]");
1717         else if (t == UNMINUS)
1718                 printf("[UNMINUS]");
1719         else if (t == DOTB)
1720                 printf("[DOTB]");
1721         else if (t == DOTW)
1722                 printf("[DOTW]");
1723         else if (t == DOTL)
1724                 printf("[DOTL]");
1725         else if (t == DOTQ)
1726                 printf("[DOTQ]");
1727         else if (t == DOTS)
1728                 printf("[DOTS]");
1729         else if (t == DOTD)
1730                 printf("[DOTD]");
1731         else if (t == DOTI)
1732                 printf("[DOTI]");
1733         else if (t == ENDEXPR)
1734                 printf("[ENDEXPR]");
1735         else if (t == CR_ABSCOUNT)
1736                 printf("[CR_ABSCOUNT]");
1737         else if (t == CR_FILESIZE)
1738                 printf("[CR_FILESIZE]");
1739         else if (t == CR_DEFINED)
1740                 printf("[CR_DEFINED]");
1741         else if (t == CR_REFERENCED)
1742                 printf("[CR_REFERENCED]");
1743         else if (t == CR_STREQ)
1744                 printf("[CR_STREQ]");
1745         else if (t == CR_MACDEF)
1746                 printf("[CR_MACDEF]");
1747         else if (t == CR_TIME)
1748                 printf("[CR_TIME]");
1749         else if (t == CR_DATE)
1750                 printf("[CR_DATE]");
1751         else if (t >= 0x20 && t <= 0x2F)
1752                 printf("[%c]", (char)t);
1753         else if (t >= 0x3A && t <= 0x3F)
1754                 printf("[%c]", (char)t);
1755         else if (t >= 0x80 && t <= 0x87)
1756                 printf("[D%u]", ((uint32_t)t) - 0x80);
1757         else if (t >= 0x88 && t <= 0x8F)
1758                 printf("[A%u]", ((uint32_t)t) - 0x88);
1759         else
1760                 printf("[%X:%c]", (uint32_t)t, (char)t);
1761 }
1762
1763
1764 void DumpTokenBuffer(void)
1765 {
1766         printf("Tokens [%X]: ", sloc);
1767
1768         for(TOKEN * t=tokbuf; *t!=EOL; t++)
1769         {
1770                 if (*t == COLON)
1771                         printf("[COLON]");
1772                 else if (*t == CONST)
1773                 {
1774                         PTR tp;
1775                         tp.u32 = t + 1;
1776                         printf("[CONST: $%lX]", *tp.u64);
1777                         t += 2;
1778                 }
1779                 else if (*t == FCONST)
1780                 {
1781                         PTR tp;
1782                         tp.u32 = t + 1;
1783                         printf("[FCONST: $%lX]", *tp.u64);
1784                         t += 2;
1785                 }
1786                 else if (*t == ACONST)
1787                 {
1788                         printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1789                         t += 2;
1790                 }
1791                 else if (*t == STRING)
1792                 {
1793                         t++;
1794                         printf("[STRING:\"%s\"]", string[*t]);
1795                 }
1796                 else if (*t == SYMBOL)
1797                 {
1798                         t++;
1799                         printf("[SYMBOL:\"%s\"]", string[*t]);
1800                 }
1801                 else if (*t == EOS)
1802                         printf("[EOS]");
1803                 else if (*t == TKEOF)
1804                         printf("[TKEOF]");
1805                 else if (*t == DEQUALS)
1806                         printf("[DEQUALS]");
1807                 else if (*t == SET)
1808                         printf("[SET]");
1809                 else if (*t == REG)
1810                         printf("[REG]");
1811                 else if (*t == DCOLON)
1812                         printf("[DCOLON]");
1813                 else if (*t == GE)
1814                         printf("[GE]");
1815                 else if (*t == LE)
1816                         printf("[LE]");
1817                 else if (*t == NE)
1818                         printf("[NE]");
1819                 else if (*t == SHR)
1820                         printf("[SHR]");
1821                 else if (*t == SHL)
1822                         printf("[SHL]");
1823                 else if (*t == UNMINUS)
1824                         printf("[UNMINUS]");
1825                 else if (*t == DOTB)
1826                         printf("[DOTB]");
1827                 else if (*t == DOTW)
1828                         printf("[DOTW]");
1829                 else if (*t == DOTL)
1830                         printf("[DOTL]");
1831                 else if (*t == DOTQ)
1832                         printf("[DOTQ]");
1833                 else if (*t == DOTS)
1834                         printf("[DOTS]");
1835                 else if (*t == DOTD)
1836                         printf("[DOTD]");
1837                 else if (*t == DOTI)
1838                         printf("[DOTI]");
1839                 else if (*t == ENDEXPR)
1840                         printf("[ENDEXPR]");
1841                 else if (*t == CR_ABSCOUNT)
1842                         printf("[CR_ABSCOUNT]");
1843                 else if (*t == CR_FILESIZE)
1844                         printf("[CR_FILESIZE]");
1845                 else if (*t == CR_DEFINED)
1846                         printf("[CR_DEFINED]");
1847                 else if (*t == CR_REFERENCED)
1848                         printf("[CR_REFERENCED]");
1849                 else if (*t == CR_STREQ)
1850                         printf("[CR_STREQ]");
1851                 else if (*t == CR_MACDEF)
1852                         printf("[CR_MACDEF]");
1853                 else if (*t == CR_TIME)
1854                         printf("[CR_TIME]");
1855                 else if (*t == CR_DATE)
1856                         printf("[CR_DATE]");
1857                 else if (*t >= 0x20 && *t <= 0x2F)
1858                         printf("[%c]", (char)*t);
1859                 else if (*t >= 0x3A && *t <= 0x3F)
1860                         printf("[%c]", (char)*t);
1861                 else if (*t >= 0x80 && *t <= 0x87)
1862                         printf("[D%u]", ((uint32_t)*t) - 0x80);
1863                 else if (*t >= 0x88 && *t <= 0x8F)
1864                         printf("[A%u]", ((uint32_t)*t) - 0x88);
1865                 else
1866                         printf("[%X:%c]", (uint32_t)*t, (char)*t);
1867         }
1868
1869         printf("[EOL]\n");
1870 }
1871