]> Shamusworld >> Repos - rmac/blob - token.c
Fix for #159: Split register sets according to architecture into different tables...
[rmac] / token.c
1 //
2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
7 //
8
9 #include "token.h"
10
11 #include <errno.h>
12 #include "direct.h"
13 #include "error.h"
14 #include "macro.h"
15 #include "procln.h"
16 #include "sect.h"
17 #include "symbol.h"
18
19 #define DECL_KW                         // Declare keyword arrays
20 #define DEF_KW                          // Declare keyword values
21 #include "kwtab.h"                      // Incl generated keyword tables & defs
22 #define DEF_REG68                       // Incl 68k register definitions
23 #include "68kregs.h"
24 #define DEF_REGRISC                     // Include GPU/DSP register definitions
25 #include "riscregs.h"
26 #define DEF_UNARY                       // Declare unary values
27 #define DECL_UNARY                      // Incl uanry keyword state machine tables
28 #include "unarytab.h"           // Incl generated unary tables & defs
29
30
31 int lnsave;                                     // 1; strcpy() text of current line
32 uint32_t curlineno;                     // Current line number (64K max currently)
33 int totlines;                           // Total # of lines
34 int mjump_align = 0;            // mjump alignment flag
35 char lntag;                                     // Line tag
36 char * curfname;                        // Current filename
37 char tolowertab[128];           // Uppercase ==> lowercase
38 int8_t hextab[128];                     // Table of hex values
39 char dotxtab[128];                      // Table for ".b", ".s", etc.
40 char irbuf[LNSIZ];                      // Text for .rept block line
41 char lnbuf[LNSIZ];                      // Text of current line
42 WORD filecount;                         // Unique file number counter
43 WORD cfileno;                           // Current file number
44 TOKEN * tok;                            // Ptr to current token
45 TOKEN * etok;                           // Ptr past last token in tokbuf[]
46 TOKEN tokeol[1] = {EOL};        // Bailout end-of-line token
47 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
48 int optimizeOff;                        // Optimization override flag
49
50
51 FILEREC * filerec;
52 FILEREC * last_fr;
53
54 INOBJ * cur_inobj;                      // Ptr current input obj (IFILE/IMACRO)
55 static INOBJ * f_inobj;         // Ptr list of free INOBJs
56 static IFILE * f_ifile;         // Ptr list of free IFILEs
57 static IMACRO * f_imacro;       // Ptr list of free IMACROs
58
59 static TOKEN tokbuf[TOKBUFSIZE];        // Token buffer (stack-like, all files)
60
61 uint8_t chrtab[0x100] = {
62         ILLEG, ILLEG, ILLEG, ILLEG,                     // NUL SOH STX ETX
63         ILLEG, ILLEG, ILLEG, ILLEG,                     // EOT ENQ ACK BEL
64         ILLEG, WHITE, ILLEG, ILLEG,                     // BS HT LF VT
65         WHITE, ILLEG, ILLEG, ILLEG,                     // FF CR SO SI
66
67         ILLEG, ILLEG, ILLEG, ILLEG,                     // DLE DC1 DC2 DC3
68         ILLEG, ILLEG, ILLEG, ILLEG,                     // DC4 NAK SYN ETB
69         ILLEG, ILLEG, ILLEG, ILLEG,                     // CAN EM SUB ESC
70         ILLEG, ILLEG, ILLEG, ILLEG,                     // FS GS RS US
71
72         WHITE, MULTX, MULTX, SELF,                      // SP ! " #
73         MULTX+CTSYM, MULTX, SELF, MULTX,        // $ % & '
74         SELF, SELF, SELF, SELF,                         // ( ) * +
75         SELF, SELF, STSYM, SELF,                        // , - . /
76
77         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 0 1
78         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 2 3
79         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 4 5
80         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 6 7
81         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 8 9
82         MULTX, MULTX,                                                           // : ;
83         MULTX, MULTX, MULTX, STSYM+CTSYM,                       // < = > ?
84
85         MULTX, STSYM+CTSYM+HDIGIT,                                      // @ A
86         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // B C
87         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // D E
88         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // F G
89         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
90         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
91
92         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
93         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
94         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
95         SELF, SELF, MULTX, STSYM+CTSYM,                         // \ ] ^ _
96
97         ILLEG, STSYM+CTSYM+HDIGIT,                                      // ` a
98         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // b c
99         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // d e
100         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // f g
101         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
102         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
103
104         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
105         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
106         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                // x y z {
107         SELF, SELF, SELF, ILLEG,                                        // | } ~ DEL
108
109         // Anything above $7F is illegal (and yes, we need to check for this,
110         // otherwise you get strange and spurious errors that will lead you astray)
111         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
125         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
126         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
127 };
128
129 // Names of registers
130 static char * regname[] = {
131         "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
132         "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
133         "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
134         "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
135         "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
136         "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
137         "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
138         "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
139         "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
140         "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
141         "tt0","tt1","crp","","","","","", // 208,215
142         "","","","","fpiar","fpsr","fpcr","", // 216,223
143         "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
144         "","","","","","","","", // 232,239
145         "","","","","","","","", // 240,247
146         "","","","","","","","", // 248,255
147         "","","","","x0","x1","y0","y1", // 256,263
148         "","b0","","b2","","b1","a","b", // 264,271
149         "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
150         "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
151         "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
152         "","","","","","","l","p", // 296,303
153         "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
154         "a10","b10","x","y","","","ab","ba"  // 312,319
155 };
156
157 WARNING("We should get rid of this table, it's a subset of the table above")
158 static char * riscregname[] = {
159          "r0",  "r1",  "r2",  "r3",  "r4", "r5",   "r6",  "r7",
160          "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",
161         "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
162         "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
163 };
164
165
166 //
167 // Initialize tokenizer
168 //
169 void InitTokenizer(void)
170 {
171         int i;                                                                  // Iterator
172         char * htab = "0123456789abcdefABCDEF"; // Hex character table
173
174         lnsave = 0;                                                             // Don't save lines
175         curfname = "";                                                  // No file, empty filename
176         filecount = (WORD)-1;
177         cfileno = (WORD)-1;                                             // cfileno gets bumped to 0
178         curlineno = 0;
179         totlines = 0;
180         etok = tokbuf;
181         f_inobj = NULL;
182         f_ifile = NULL;
183         f_imacro = NULL;
184         cur_inobj = NULL;
185         filerec = NULL;
186         last_fr = NULL;
187         lntag = SPACE;
188
189         // Initialize hex, "dot" and tolower tables
190         for(i=0; i<128; i++)
191         {
192                 hextab[i] = -1;
193                 dotxtab[i] = 0;
194                 tolowertab[i] = (char)i;
195         }
196
197         for(i=0; htab[i]!=EOS; i++)
198                 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
199
200         for(i='A'; i<='Z'; i++)
201                 tolowertab[i] |= 0x20;
202
203         // These characters are legal immediately after a period
204         dotxtab['b'] = DOTB;                                    // .b .B .s .S
205         dotxtab['B'] = DOTB;
206         //dotxtab['s'] = DOTB;
207         //dotxtab['S'] = DOTB;
208         dotxtab['w'] = DOTW;                                    // .w .W
209         dotxtab['W'] = DOTW;
210         dotxtab['l'] = DOTL;                                    // .l .L
211         dotxtab['L'] = DOTL;
212         dotxtab['i'] = DOTI;                                    // .i .I (WTF is this???)
213         dotxtab['I'] = DOTI;
214         dotxtab['D'] = DOTD;                                    // .d .D (double)
215         dotxtab['d'] = DOTD;
216         dotxtab['S'] = DOTS;                                    // .s .S
217         dotxtab['s'] = DOTS;
218         dotxtab['Q'] = DOTQ;                                    // .q .Q (quad word)
219         dotxtab['q'] = DOTQ;
220         dotxtab['X'] = DOTX;                                    // .x .x
221         dotxtab['x'] = DOTX;
222         dotxtab['P'] = DOTP;                                    // .p .P
223         dotxtab['p'] = DOTP;
224 }
225
226
227 void SetFilenameForErrorReporting(void)
228 {
229         WORD fnum = cfileno;
230
231         // Check for absolute top filename (this should never happen)
232         if (fnum == -1)
233         {
234                 curfname = "(*top*)";
235                 return;
236         }
237
238         FILEREC * fr = filerec;
239
240         // Advance to the correct record...
241         while (fr != NULL && fnum != 0)
242         {
243                 fr = fr->frec_next;
244                 fnum--;
245         }
246
247         // Check for file # record not found (this should never happen either)
248         if (fr == NULL)
249         {
250                 curfname = "(*NOT FOUND*)";
251                 return;
252         }
253
254         curfname = fr->frec_name;
255 }
256
257
258 //
259 // Allocate an IFILE or IMACRO
260 //
261 INOBJ * a_inobj(int typ)
262 {
263         INOBJ * inobj;
264         IFILE * ifile;
265         IMACRO * imacro;
266
267         // Allocate and initialize INOBJ first
268         if (f_inobj == NULL)
269                 inobj = malloc(sizeof(INOBJ));
270         else
271         {
272                 inobj = f_inobj;
273                 f_inobj = f_inobj->in_link;
274         }
275
276         switch (typ)
277         {
278         case SRC_IFILE:                                                 // Alloc and init an IFILE
279                 if (f_ifile == NULL)
280                         ifile = malloc(sizeof(IFILE));
281                 else
282                 {
283                         ifile = f_ifile;
284                         f_ifile = f_ifile->if_link;
285                 }
286
287                 inobj->inobj.ifile = ifile;
288                 break;
289
290         case SRC_IMACRO:                                                // Alloc and init an IMACRO
291                 if (f_imacro == NULL)
292                         imacro = malloc(sizeof(IMACRO));
293                 else
294                 {
295                         imacro = f_imacro;
296                         f_imacro = f_imacro->im_link;
297                 }
298
299                 inobj->inobj.imacro = imacro;
300                 break;
301
302         case SRC_IREPT:                                                 // Alloc and init an IREPT
303                 inobj->inobj.irept = malloc(sizeof(IREPT));
304                 DEBUG { printf("alloc IREPT\n"); }
305                 break;
306         }
307
308         // Install INOBJ on top of input stack
309         inobj->in_ifent = ifent;                                // Record .if context on entry
310         inobj->in_type = (WORD)typ;
311         inobj->in_otok = tok;
312         inobj->in_etok = etok;
313         inobj->in_link = cur_inobj;
314         cur_inobj = inobj;
315
316         return inobj;
317 }
318
319
320 //
321 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
322 // A macro reference is in one of two forms:
323 // \name <non-name-character>
324 // \{name}
325 // A doubled backslash (\\) is compressed to a single backslash (\).
326 // Argument definitions have been pre-tokenized, so we have to turn them back
327 // into text. This means that numbers, in particular, become hex, regardless of
328 // their representation when the macro was invoked. This is a hack.
329 // A label may appear at the beginning of the line:
330 // :<name><whitespace>
331 // (the colon must be in the first column). These labels are stripped before
332 // macro expansion takes place.
333 //
334 int ExpandMacro(char * src, char * dest, int destsiz)
335 {
336         int i;
337         int questmark;                  // \? for testing argument existence
338         char mname[128];                // Assume max size of a formal arg name
339         char numbuf[20];                // Buffer for text of CONSTs
340         TOKEN * tk;
341         SYM * arg;
342         char ** symbolString;
343
344         DEBUG { printf("ExM: src=\"%s\"\n", src); }
345
346         IMACRO * imacro = cur_inobj->inobj.imacro;
347         int macnum = (int)(imacro->im_macro->sattr);
348
349         char * dst = dest;                                              // Next dest slot
350         char * edst = dest + destsiz - 1;               // End + 1(?) of dest buffer
351
352         // Check for (and skip over) any "label" on the line
353         char * s = src;
354         char * d = NULL;
355
356         if (*s == ':')
357         {
358                 while (*s != EOS && !(chrtab[*s] & WHITE))
359                         s++;
360
361                 if (*s != EOS)
362                         s++;                                                    // Skip first whitespace
363         }
364
365         // Expand the rest of the line
366         while (*s != EOS)
367         {
368                 // Copy single character
369                 if (*s != '\\')
370                 {
371                         if (dst >= edst)
372                                 goto overflow;
373
374                         // Skip comments in case a loose @ or \ is in there
375                         // In that case the tokeniser was trying to expand it.
376                         if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
377                                 goto skipcomments;
378
379                         *dst++ = *s++;
380                 }
381                 // Do macro expansion
382                 else
383                 {
384                         questmark = 0;
385
386                         // Do special cases
387                         switch (*++s)
388                         {
389                         case '\\':                                              // \\, \ (collapse to single backslash)
390                                 if (dst >= edst)
391                                         goto overflow;
392
393                                 *dst++ = *s++;
394                                 continue;
395                         case '?':                                               // \? <macro>  set `questmark' flag
396                                 s++;
397                                 questmark = 1;
398                                 break;
399                         case '#':                                               // \#, number of arguments
400                                 sprintf(numbuf, "%d", (int)imacro->im_nargs);
401                                 goto copystr;
402                         case '!':                                               // \! size suffix supplied on invocation
403                                 switch ((int)imacro->im_siz)
404                                 {
405                                 case SIZN: d = "";   break;
406                                 case SIZB: d = ".b"; break;
407                                 case SIZW: d = ".w"; break;
408                                 case SIZL: d = ".l"; break;
409                                 }
410
411                                 goto copy_d;
412                         case '~':                                               // ==> unique label string Mnnnn...
413                                 sprintf(numbuf, "M%u", curuniq);
414 copystr:
415                                 d = numbuf;
416 copy_d:
417                                 s++;
418
419                                 while (*d != EOS)
420                                 {
421                                         if (dst >= edst)
422                                                 goto overflow;
423                                         else
424                                                 *dst++ = *d++;
425                                 }
426
427                                 continue;
428                         case EOS:
429                                 return error("missing argument name");
430                         }
431
432                         // \n ==> argument number 'n', 0..9
433                         if (chrtab[*s] & DIGIT)
434                         {
435                                 i = *s++ - '1';
436
437                                 if (i < 0)
438                                         i = 9;
439
440                                 goto arg_num;
441                         }
442
443                         // Get argument name: \name, \{name}
444                         d = mname;
445
446                         // \label
447                         if (*s != '{')
448                         {
449                                 do
450                                 {
451                                         *d++ = *s++;
452                                 }
453                                 while (chrtab[*s] & CTSYM);
454                         }
455                         // \\{label}
456                         else
457                         {
458                                 for(++s; *s != EOS && *s != '}';)
459                                         *d++ = *s++;
460
461                                 if (*s != '}')
462                                         return error("missing closing brace ('}')");
463                                 else
464                                         s++;
465                         }
466
467                         *d = EOS;
468
469                         // Lookup the argument and copy its (string) value into the
470                         // destination string
471                         DEBUG { printf("argument='%s'\n", mname); }
472
473                         if ((arg = lookup(mname, MACARG, macnum)) == NULL)
474                                 return error("undefined argument: '%s'", mname);
475                         else
476                         {
477                                 // Convert a string of tokens (terminated with EOL) back into
478                                 // text. If an argument is out of range (not specified in the
479                                 // macro invocation) then it is ignored.
480                                 i = (int)arg->svalue;
481 arg_num:
482                                 DEBUG { printf("~argnumber=%d\n", i); }
483                                 tk = NULL;
484
485                                 if (i < imacro->im_nargs)
486                                 {
487                                         tk = imacro->argument[i].token;
488                                         symbolString = imacro->argument[i].string;
489 //DEBUG
490 //{
491 //      printf("ExM: Preparing to parse argument #%u...\n", i);
492 //      DumpTokens(tk);
493 //}
494                                 }
495
496                                 // \?arg yields:
497                                 //    0  if the argument is empty or non-existant,
498                                 //    1  if the argument is not empty
499                                 if (questmark)
500                                 {
501                                         if (tk == NULL || *tk == EOL)
502                                                 questmark = 0;
503
504                                         if (dst >= edst)
505                                                 goto overflow;
506
507                                         *dst++ = (char)(questmark + '0');
508                                         continue;
509                                 }
510
511                                 // Argument # is in range, so expand it
512                                 if (tk != NULL)
513                                 {
514                                         while (*tk != EOL)
515                                         {
516                                                 // Reverse-translation from a token number to a string.
517                                                 // This is a hack. It might be better table-driven.
518                                                 d = NULL;
519
520                                                 if ((*tk >= REG68_D0) && !rdsp && !rgpu)
521                                                 {
522                                                         d = regname[(int)*tk++ - REG68_D0];
523                                                         goto strcopy;
524                                                 }
525                                                 else if ((*tk >= REGRISC_R0) && (*tk <= REGRISC_R31))
526                                                 {
527                                                         d = riscregname[(int)*tk++ - REGRISC_R0];
528                                                         goto strcopy;
529                                                 }
530                                                 else
531                                                 {
532                                                         switch ((int)*tk++)
533                                                         {
534                                                         case SYMBOL:
535                                                                 d = symbolString[*tk++];
536 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
537                                                                 break;
538                                                         case STRING:
539                                                                 d = symbolString[*tk++];
540
541                                                                 if (dst >= edst)
542                                                                         goto overflow;
543
544                                                                 *dst++ = '"';
545
546                                                                 while (*d != EOS)
547                                                                 {
548                                                                         if (dst >= edst)
549                                                                                 goto overflow;
550                                                                         else
551                                                                                 *dst++ = *d++;
552                                                                 }
553
554                                                                 if (dst >= edst)
555                                                                         goto overflow;
556
557                                                                 *dst++ = '"';
558                                                                 continue;
559                                                                 break;
560 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
561 //         to choke on legitimate code... Need to investigate this further
562 //         before changing anything else here!
563                                                         case CONST:
564 //                                                              sprintf(numbuf, "$%lx", (uint64_t)*tk++);
565                                                                 sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
566                                                                 tk++;
567                                                                 d = numbuf;
568                                                                 break;
569                                                         case DEQUALS:
570                                                                 d = "==";
571                                                                 break;
572                                                         case SET:
573                                                                 d = "set";
574                                                                 break;
575                                                         case COLON:
576                                                                 d = ":";
577                                                                 break;
578                                                         case DCOLON:
579                                                                 d = "::";
580                                                                 break;
581                                                         case GE:
582                                                                 d = ">=";
583                                                                 break;
584                                                         case LE:
585                                                                 d = "<=";
586                                                                 break;
587                                                         case NE:
588                                                                 d = "<>";
589                                                                 break;
590                                                         case SHR:
591                                                                 d = ">>";
592                                                                 break;
593                                                         case SHL:
594                                                                 d = "<<";
595                                                                 break;
596                                                         case DOTB:
597                                                                 d = ".b";
598                                                                 break;
599                                                         case DOTW:
600                                                                 d = ".w";
601                                                                 break;
602                                                         case DOTL:
603                                                                 d = ".l";
604                                                                 break;
605                                                         case CR_ABSCOUNT:
606                                                                 d = "^^abscount";
607                                                                 break;
608                                                         case CR_FILESIZE:
609                                                                 d = "^^filesize";
610                                                                 break;
611                                                         case CR_DATE:
612                                                                 d = "^^date";
613                                                                 break;
614                                                         case CR_TIME:
615                                                                 d = "^^time";
616                                                                 break;
617                                                         case CR_DEFINED:
618                                                                 d = "^^defined ";
619                                                                 break;
620                                                         case CR_REFERENCED:
621                                                                 d = "^^referenced ";
622                                                                 break;
623                                                         case CR_STREQ:
624                                                                 d = "^^streq ";
625                                                                 break;
626                                                         case CR_MACDEF:
627                                                                 d = "^^macdef ";
628                                                                 break;
629                                                         default:
630                                                                 if (dst >= edst)
631                                                                         goto overflow;
632
633                                                                 *dst++ = (char)*(tk - 1);
634                                                                 break;
635                                                         }
636                                                 }
637
638                                                 // If 'd' != NULL, copy string to destination
639                                                 if (d != NULL)
640                                                 {
641 strcopy:
642                                                         DEBUG printf("d='%s'\n", d);
643
644                                                         while (*d != EOS)
645                                                         {
646                                                                 if (dst >= edst)
647                                                                         goto overflow;
648                                                                 else
649                                                                         *dst++ = *d++;
650                                                         }
651                                                 }
652                                         }
653                                 }
654                         }
655                 }
656         }
657
658 skipcomments:
659
660         *dst = EOS;
661         DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
662         return OK;
663
664 overflow:
665         *dst = EOS;
666         DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
667         return fatal("line too long as a result of macro expansion");
668 }
669
670
671 //
672 // Get next line of text from a macro
673 //
674 char * GetNextMacroLine(void)
675 {
676         IMACRO * imacro = cur_inobj->inobj.imacro;
677         LLIST * strp = imacro->im_nextln;
678
679         if (strp == NULL)                                               // End-of-macro
680                 return NULL;
681
682         imacro->im_nextln = strp->next;
683 //      ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
684         ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
685
686         return imacro->im_lnbuf;
687 }
688
689
690 //
691 // Get next line of text from a repeat block
692 //
693 char * GetNextRepeatLine(void)
694 {
695         IREPT * irept = cur_inobj->inobj.irept;
696 //      LONG * strp = irept->ir_nextln;                 // initial null
697
698         // Do repeat at end of .rept block's string list
699 //      if (strp == NULL)
700         if (irept->ir_nextln == NULL)
701         {
702                 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
703                 irept->ir_nextln = irept->ir_firstln;   // copy first line
704
705                 if (irept->ir_count-- == 0)
706                 {
707                         DEBUG { printf("end-repeat-block\n"); }
708                         return NULL;
709                 }
710                 reptuniq++;
711 //              strp = irept->ir_nextln;
712         }
713         // Mark the current macro line in the irept object
714         // This is probably overkill - a global variable
715         // would suffice here (it only gets used during
716         // error reporting anyway)
717         irept->lineno = irept->ir_nextln->lineno;
718
719         // Copy the rept lines verbatim, unless we're in nest level 0.
720         // Then, expand any \~ labels to unique numbers (Rn)
721         if (rptlevel)
722         {
723                 strcpy(irbuf, irept->ir_nextln->line);
724         }
725         else
726         {
727                 uint32_t linelen = strlen(irept->ir_nextln->line);
728                 uint8_t *p_line = irept->ir_nextln->line;
729                 char *irbufwrite = irbuf;
730                 for (int i = 0; i <= linelen; i++)
731                 {
732                         uint8_t c;
733                         c = *p_line++;
734                         if (c == '\\' && *p_line == '~')
735                         {
736                                 p_line++;
737                                 irbufwrite += sprintf(irbufwrite, "R%u", reptuniq);
738                         }
739                         else
740                         {
741                                 *irbufwrite++ = c;
742                         }
743                 }
744         }
745
746         DEBUG { printf("repeat line='%s'\n", irbuf); }
747 //      irept->ir_nextln = (LONG *)*strp;
748         irept->ir_nextln = irept->ir_nextln->next;
749
750         return irbuf;
751 }
752
753
754 //
755 // Include a source file used at the root, and for ".include" files
756 //
757 int include(int handle, char * fname)
758 {
759         // Debug mode
760         DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
761
762         // Alloc and initialize include-descriptors
763         INOBJ * inobj = a_inobj(SRC_IFILE);
764         IFILE * ifile = inobj->inobj.ifile;
765
766         ifile->ifhandle = handle;                       // Setup file handle
767         ifile->ifind = ifile->ifcnt = 0;        // Setup buffer indices
768         ifile->ifoldlineno = curlineno;         // Save old line number
769         ifile->ifoldfname = curfname;           // Save old filename
770         ifile->ifno = cfileno;                          // Save old file number
771
772         // NB: This *must* be preincrement, we're adding one to the filecount here!
773         cfileno = ++filecount;                          // Compute NEW file number
774         curfname = strdup(fname);                       // Set current filename (alloc storage)
775         curlineno = 0;                                          // Start on line zero
776
777         // Add another file to the file-record
778         FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
779         fr->frec_next = NULL;
780         fr->frec_name = curfname;
781
782         if (last_fr == NULL)
783                 filerec = fr;                                   // Add first filerec
784         else
785                 last_fr->frec_next = fr;                // Append to list of filerecs
786
787         last_fr = fr;
788         DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
789
790         return OK;
791 }
792
793
794 //
795 // Pop the current input level
796 //
797 int fpop(void)
798 {
799         INOBJ * inobj = cur_inobj;
800
801         if (inobj == NULL)
802                 return 0;
803
804         // Pop IFENT levels until we reach the conditional assembly context we
805         // were at when the input object was entered.
806         int numUnmatched = 0;
807
808         while (ifent != inobj->in_ifent)
809         {
810                 if (d_endif() != 0)     // Something bad happened during endif parsing?
811                         return -1;              // If yes, bail instead of getting stuck in a loop
812
813                 numUnmatched++;
814         }
815
816         // Give a warning to the user that we had to wipe their bum for them
817         if (numUnmatched > 0)
818                 warn("missing %d .endif(s)", numUnmatched);
819
820         tok = inobj->in_otok;   // Restore tok and etok
821         etok = inobj->in_etok;
822
823         switch (inobj->in_type)
824         {
825         case SRC_IFILE:                 // Pop and release an IFILE
826         {
827                 DEBUG { printf("[Leaving: %s]\n", curfname); }
828
829                 IFILE * ifile = inobj->inobj.ifile;
830                 ifile->if_link = f_ifile;
831                 f_ifile = ifile;
832                 close(ifile->ifhandle);                 // Close source file
833 DEBUG { printf("[fpop (pre):  curfname=%s]\n", curfname); }
834                 curfname = ifile->ifoldfname;   // Set current filename
835 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
836 DEBUG { printf("[fpop: (pre)  cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
837                 curlineno = ifile->ifoldlineno; // Set current line#
838                 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
839                 cfileno = ifile->ifno;                  // Restore current file number
840 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
841                 break;
842         }
843
844         case SRC_IMACRO:                                        // Pop and release an IMACRO
845         {
846                 IMACRO * imacro = inobj->inobj.imacro;
847                 imacro->im_link = f_imacro;
848                 f_imacro = imacro;
849                 break;
850         }
851
852         case SRC_IREPT:                                         // Pop and release an IREPT
853         {
854                 DEBUG { printf("dealloc IREPT\n"); }
855                 LLIST * p = inobj->inobj.irept->ir_firstln;
856
857                 // Deallocate repeat lines
858                 while (p != NULL)
859                 {
860                         free(p->line);
861                         p = p->next;
862                 }
863
864                 break;
865         }
866         }
867
868         cur_inobj = inobj->in_link;
869         inobj->in_link = f_inobj;
870         f_inobj = inobj;
871
872         return 0;
873 }
874
875
876 //
877 // Get line from file into buf, return NULL on EOF or ptr to the start of a
878 // null-term line
879 //
880 char * GetNextLine(void)
881 {
882         int i, j;
883         char * p, * d;
884         int readamt = -1;                                               // 0 if last read() yeilded 0 bytes
885         IFILE * fl = cur_inobj->inobj.ifile;
886
887         for(;;)
888         {
889                 // Scan for next end-of-line; handle stupid text formats by treating
890                 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
891                 // check for '\n').
892                 d = &fl->ifbuf[fl->ifind];
893
894                 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
895                 {
896                         if (*p == '\r' || *p == '\n')
897                         {
898                                 i++;
899
900                                 if (*p == '\r')
901                                 {
902                                         if (i >= j)
903                                                 break;  // Need to read more, then look for '\n' to eat
904                                         else if (p[1] == '\n')
905                                                 i++;
906                                 }
907
908                                 // Cover up the newline with end-of-string sentinel
909                                 *p = '\0';
910
911                                 fl->ifind += i;
912                                 fl->ifcnt -= i;
913                                 return d;
914                         }
915                 }
916
917                 // Handle hanging lines by ignoring them (Input file is exhausted, no
918                 // \r or \n on last line)
919                 // Shamus: This is retarded. Never ignore any input!
920                 if (!readamt && fl->ifcnt)
921                 {
922 #if 0
923                         fl->ifcnt = 0;
924                         *p = '\0';
925                         return NULL;
926 #else
927                         // Really should check to see if we're at the end of the buffer!
928                         // :-P
929                         fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
930                         fl->ifcnt = 0;
931                         return &fl->ifbuf[fl->ifind];
932 #endif
933                 }
934
935                 // Truncate and return absurdly long lines.
936                 if (fl->ifcnt >= QUANTUM)
937                 {
938                         fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
939                         fl->ifcnt = 0;
940                         return &fl->ifbuf[fl->ifind];
941                 }
942
943                 // Relocate what's left of a line to the beginning of the buffer, and
944                 // read some more of the file in; return NULL if the buffer's empty and
945                 // on EOF.
946                 if (fl->ifind != 0)
947                 {
948                         p = &fl->ifbuf[fl->ifind];
949                         d = &fl->ifbuf[fl->ifcnt & 1];
950
951                         for(i=0; i<fl->ifcnt; i++)
952                                 *d++ = *p++;
953
954                         fl->ifind = fl->ifcnt & 1;
955                 }
956
957                 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
958
959                 if (readamt < 0)
960                         return NULL;
961
962                 if ((fl->ifcnt += readamt) == 0)
963                         return NULL;
964         }
965 }
966
967
968 //
969 // Tokenize a line
970 //
971 int TokenizeLine(void)
972 {
973         uint8_t * ln = NULL;            // Ptr to current position in line
974         uint8_t * p;                            // Random character ptr
975         PTR tk;                                         // Token-deposit ptr
976         int state = 0;                          // State for keyword detector
977         int j = 0;                                      // Var for keyword detector
978         uint8_t c;                                      // Random char
979         uint64_t v;                                     // Random value
980         uint32_t cursize = 0;           // Current line's size (.b, .w, .l, .s, .q, .d)
981         uint8_t * nullspot = NULL;      // Spot to clobber for SYMBOL termination
982         int stuffnull;                          // 1:terminate SYMBOL '\0' at *nullspot
983         uint8_t c1;
984         int stringNum = 0;                      // Pointer to string locations in tokenized line
985         SYM* sy;                                        // For looking up symbols (.equr)
986         int equrundef = 0;                      // Flag for equrundef scanning
987
988 retry:
989
990         if (cur_inobj == NULL)          // Return EOF if input stack is empty
991                 return TKEOF;
992
993         // Get another line of input from the current input source: a file, a
994         // macro, or a repeat-block
995         switch (cur_inobj->in_type)
996         {
997         // Include-file:
998         // o  handle EOF;
999         // o  bump source line number;
1000         // o  tag the listing-line with a space;
1001         // o  kludge lines generated by Alcyon C.
1002         case SRC_IFILE:
1003                 if ((ln = GetNextLine()) == NULL)
1004                 {
1005 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
1006                         if (fpop() == 0)        // Pop input level
1007                                 goto retry;             // Try for more lines
1008                         else
1009                         {
1010                                 ifent->if_prev = (IFENT *)-1;   //Signal Assemble() that we have reached EOF with unbalanced if/endifs
1011                                 return TKEOF;
1012                         }
1013                 }
1014
1015                 curlineno++;                    // Bump line number
1016                 lntag = SPACE;
1017
1018                 break;
1019
1020         // Macro-block:
1021         // o  Handle end-of-macro;
1022         // o  tag the listing-line with an at (@) sign.
1023         case SRC_IMACRO:
1024                 if ((ln = GetNextMacroLine()) == NULL)
1025                 {
1026                         if (ExitMacro() == 0)   // Exit macro (pop args, do fpop(), etc)
1027                                 goto retry;                     // Try for more lines...
1028                         else
1029                                 return TKEOF;           // Oops, we got a non zero return code, signal EOF
1030                 }
1031
1032                 lntag = '@';
1033                 break;
1034
1035         // Repeat-block:
1036         // o  Handle end-of-repeat-block;
1037         // o  tag the listing-line with a pound (#) sign.
1038         case SRC_IREPT:
1039                 if ((ln = GetNextRepeatLine()) == NULL)
1040                 {
1041                         DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1042                         fpop();
1043                         goto retry;
1044                 }
1045
1046                 lntag = '#';
1047                 break;
1048         }
1049
1050         // Save text of the line. We only do this during listings and within
1051         // macro-type blocks, since it is expensive to unconditionally copy every
1052         // line.
1053         if (lnsave)
1054         {
1055                 // Sanity check
1056                 if (strlen(ln) > LNSIZ)
1057                         return error("line too long (%d, max %d)", strlen(ln), LNSIZ);
1058
1059                 strcpy(lnbuf, ln);
1060         }
1061
1062         // General housekeeping
1063         tok = tokeol;                   // Set "tok" to EOL in case of error
1064         tk.u32 = etok;                  // Reset token ptr
1065         stuffnull = 0;                  // Don't stuff nulls
1066         totlines++;                             // Bump total #lines assembled
1067
1068         // See if the entire line is a comment. This is a win if the programmer
1069         // puts in lots of comments
1070         if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1071                 goto goteol;
1072
1073         // And here we have a very ugly hack for signalling a single line 'turn off
1074         // optimization'. There's really no nice way to do this, so hack it is!
1075         optimizeOff = 0;                // Default is to take optimizations as they come
1076
1077         if (*ln == '!')
1078         {
1079                 optimizeOff = 1;        // Signal that we don't want to optimize this line
1080                 ln++;                           // & skip over the darned thing
1081         }
1082
1083         // Main tokenization loop;
1084         //  o  skip whitespace;
1085         //  o  handle end-of-line;
1086         //  o  handle symbols;
1087         //  o  handle single-character tokens (operators, etc.);
1088         //  o  handle multiple-character tokens (constants, strings, etc.).
1089         for(; *ln!=EOS;)
1090         {
1091                 // Check to see if there's enough space in the token buffer
1092                 if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20)
1093                 {
1094                         return error("token buffer overrun");
1095                 }
1096
1097                 // Skip whitespace, handle EOL
1098                 while (chrtab[*ln] & WHITE)
1099                         ln++;
1100
1101                 // Handle EOL, comment with ';'
1102                 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1103                         break;
1104
1105                 // Handle start of symbol. Symbols are null-terminated in place. The
1106                 // termination is always one symbol behind, since there may be no place
1107                 // for a null in the case that an operator immediately follows the name.
1108                 c = chrtab[*ln];
1109
1110                 if (c & STSYM)
1111                 {
1112                         if (stuffnull)                  // Terminate old symbol from previous pass
1113                                 *nullspot = EOS;
1114
1115                         v = 0;                                  // Assume no DOT attrib follows symbol
1116                         stuffnull = 1;
1117
1118                         // In some cases, we need to check for a DOTx at the *beginning*
1119                         // of a symbol, as the "start" of the line we're currently looking
1120                         // at could be somewhere in the middle of that line!
1121                         if (*ln == '.')
1122                         {
1123                                 // Make sure that it's *only* a .[bwsl] following, and not the
1124                                 // start of a local symbol:
1125                                 if ((chrtab[*(ln + 1)] & DOT)
1126                                         && (dotxtab[*(ln + 1)] != 0)
1127                                         && !(chrtab[*(ln + 2)] & CTSYM))
1128                                 {
1129                                         // We found a legitimate DOTx construct, so add it to the
1130                                         // token stream:
1131                                         ln++;
1132                                         stuffnull = 0;
1133                                         *tk.u32++ = (TOKEN)dotxtab[*ln++];
1134                                         continue;
1135                                 }
1136                         }
1137
1138                         p = nullspot = ln++;    // Nullspot -> start of this symbol
1139
1140                         // Find end of symbol (and compute its length)
1141                         for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1142                                 ln++;
1143
1144                         // Handle "DOT" special forms (like ".b") that follow a normal
1145                         // symbol or keyword:
1146                         if (*ln == '.')
1147                         {
1148                                 *ln++ = EOS;            // Terminate symbol
1149                                 stuffnull = 0;          // And never try it again
1150
1151                                 // Character following the '.' must have a DOT attribute, and
1152                                 // the chararacter after THAT one must not have a start-symbol
1153                                 // attribute (to prevent symbols that look like, for example,
1154                                 // "zingo.barf", which might be a good idea anyway....)
1155                                 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1156                                         return error("[bwsl] must follow '.' in symbol");
1157
1158                                 v = (uint32_t)dotxtab[*ln++];
1159                                 cursize = (uint32_t)v;
1160
1161                                 if (chrtab[*ln] & CTSYM)
1162                                         return error("misuse of '.'; not allowed in symbols");
1163                         }
1164
1165                         // If the symbol is small, check to see if it's really the name of
1166                         // a register.
1167                         uint8_t *p2 = p;
1168                         if (j <= 5)
1169                         {
1170                                 for (state = 0; state >= 0;)
1171                                 {
1172                                         j = (int)tolowertab[*p++];
1173                                         j += regbase[state];
1174
1175                                         if (regcheck[j] != state)
1176                                         {
1177                                                 j = -1;
1178                                                 break;
1179                                         }
1180
1181                                         if (*p == EOS || p == ln)
1182                                         {
1183                                                 j = regaccept[j];
1184                                                 goto skip_keyword;
1185                                                 break;
1186                                         }
1187
1188                                         state = regtab[j];
1189                                 }
1190                         }
1191
1192                         // Scan for keywords
1193                         if ((j <= 0 || state <= 0) || p==p2)
1194                         {
1195                                 if (j <= KWSIZE)
1196                                 {
1197                                         for (state = 0; state >= 0;)
1198                                         {
1199                                                 j = (int)tolowertab[*p2++];
1200                                                 j += kwbase[state];
1201                         
1202                                                 if (kwcheck[j] != state)
1203                                                 {
1204                                                         j = -1;
1205                                                         break;
1206                                                 }
1207                         
1208                                                 if (*p == EOS || p2 == ln)
1209                                                 {
1210                                                         j = kwaccept[j];
1211                                                         break;
1212                                                 }
1213                         
1214                                                 state = kwtab[j];
1215                                         }
1216                                 }
1217                                 else
1218                                 {
1219                                         j = -1;
1220                                 }
1221                         }
1222
1223                         skip_keyword:
1224
1225                         // If we detected equrundef/regundef set relevant flag
1226                         if (j == KW_EQURUNDEF)
1227                         {
1228                                 equrundef = 1;
1229                                 j = -1;
1230                         }
1231
1232                         // If not tokenized keyword OR token was not found
1233                         if ((j < 0) || (state < 0))
1234                         {
1235                                 // Only proceed if no equrundef has been detected. In that case we need to store the symbol
1236                                 // because the directive handler (d_equrundef) will run outside this loop, further into procln.c
1237                                 if (!equrundef && !disabled)
1238                                 {
1239                                         // Last attempt: let's see if this is an equated register.
1240                                         // If yes, then just store the register's keyword value instead of the symbol
1241                                         char temp = *ln;
1242                                         *ln = 0;
1243                                         sy = lookup(nullspot, LABEL, 0);
1244                                         *ln = temp;
1245                                         if (sy)
1246                                         {
1247                                                 if (sy->sattre & EQUATEDREG)
1248                                                 {
1249                                                         *tk.u32++ = sy->svalue;
1250                                                         stuffnull = 0;
1251                                                         continue;
1252                                                 }
1253                                         }
1254                                 }
1255                                 // Ok, that failed, let's store the symbol instead
1256                                 *tk.u32++ = SYMBOL;
1257                                 string[stringNum] = nullspot;
1258                                 *tk.u32++ = stringNum;
1259                                 stringNum++;
1260                         }
1261                         else
1262                         {
1263                                 *tk.u32++ = (TOKEN)j;
1264                                 stuffnull = 0;
1265                         }
1266
1267                         if (v)                  // Record attribute token (if any)
1268                                 *tk.u32++ = (TOKEN)v;
1269
1270                         if (stuffnull)  // Arrange for string termination on next pass
1271                                 nullspot = ln;
1272
1273                         continue;
1274                 }
1275
1276                 // Handle identity tokens
1277                 if (c & SELF)
1278                 {
1279                         *tk.u32++ = *ln++;
1280                         continue;
1281                 }
1282
1283                 // Handle multiple-character tokens
1284                 if (c & MULTX)
1285                 {
1286                         switch (*ln++)
1287                         {
1288                         case '!':               // ! or !=
1289                                 if (*ln == '=')
1290                                 {
1291                                         *tk.u32++ = NE;
1292                                         ln++;
1293                                 }
1294                                 else
1295                                         *tk.u32++ = '!';
1296
1297                                 continue;
1298                         case '\'':              // 'string'
1299                                 if (m6502)
1300                                 {
1301                                         // Hardcoded for now, maybe this will change in the future
1302                                         *tk.u32++ = STRINGA8;
1303                                         goto dostring;
1304                                 }
1305                                 // Fall through
1306                         case '\"':              // "string"
1307                                 *tk.u32++ = STRING;
1308 dostring:
1309                                 c1 = ln[-1];
1310                                 string[stringNum] = ln;
1311                                 *tk.u32++ = stringNum;
1312                                 stringNum++;
1313
1314                                 for(p=ln; *ln!=EOS && *ln!=c1;)
1315                                 {
1316                                         c = *ln++;
1317
1318                                         if (c == '\\')
1319                                         {
1320                                                 switch (*ln++)
1321                                                 {
1322                                                 case EOS:
1323                                                         return(error("unterminated string"));
1324                                                 case 'e':
1325                                                         c = '\033';
1326                                                         break;
1327                                                 case 'n':
1328                                                         c = '\n';
1329                                                         break;
1330                                                 case 'b':
1331                                                         c = '\b';
1332                                                         break;
1333                                                 case 't':
1334                                                         c = '\t';
1335                                                         break;
1336                                                 case 'r':
1337                                                         c = '\r';
1338                                                         break;
1339                                                 case 'f':
1340                                                         c = '\f';
1341                                                         break;
1342                                                 case '\"':
1343                                                         c = '\"';
1344                                                         break;
1345                                                 case '\'':
1346                                                         c = '\'';
1347                                                         break;
1348                                                 case '\\':
1349                                                         c = '\\';
1350                                                         break;
1351                                                 case '{':
1352                                                         // If we're evaluating a macro
1353                                                         // this is valid because it's
1354                                                         // a parameter expansion
1355                                                 case '!':
1356                                                         // If we're evaluating a macro
1357                                                         // this is valid and expands to
1358                                                         // "dot-size"
1359                                                         break;
1360                                                 default:
1361                                                         warn("bad backslash code in string");
1362                                                         ln--;
1363                                                         break;
1364                                                 }
1365                                         }
1366
1367                                         *p++ = c;
1368                                 }
1369
1370                                 if (*ln++ != c1)
1371                                         return error("unterminated string");
1372
1373                                 *p++ = EOS;
1374                                 continue;
1375                         case '$':               // $, hex constant
1376                                 if (chrtab[*ln] & HDIGIT)
1377                                 {
1378                                         v = 0;
1379
1380                                         // Parse the hex value
1381                                         while (hextab[*ln] >= 0)
1382                                                 v = (v << 4) + (int)hextab[*ln++];
1383
1384                                         *tk.u32++ = CONST;
1385                                         *tk.u64++ = v;
1386
1387                                         if (*ln == '.')
1388                                         {
1389                                                 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1390                                                 {
1391                                                         *tk.u32++ = DOTW;
1392                                                         ln += 2;
1393                                                 }
1394                                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1395                                                 {
1396                                                         *tk.u32++ = DOTL;
1397                                                         ln += 2;
1398                                                 }
1399                                         }
1400                                 }
1401                                 else
1402                                         *tk.u32++ = '$';
1403
1404                                 continue;
1405                         case '<':               // < or << or <> or <=
1406                                 switch (*ln)
1407                                 {
1408                                 case '<':
1409                                         *tk.u32++ = SHL;
1410                                         ln++;
1411                                         continue;
1412                                 case '>':
1413                                         *tk.u32++ = NE;
1414                                         ln++;
1415                                         continue;
1416                                 case '=':
1417                                         *tk.u32++ = LE;
1418                                         ln++;
1419                                         continue;
1420                                 default:
1421                                         *tk.u32++ = '<';
1422                                         continue;
1423                                 }
1424                         case ':':               // : or ::
1425                                 if (*ln == ':')
1426                                 {
1427                                         *tk.u32++ = DCOLON;
1428                                         ln++;
1429                                 }
1430                                 else
1431                                         *tk.u32++ = ':';
1432
1433                                 continue;
1434                         case '=':               // = or ==
1435                                 if (*ln == '=')
1436                                 {
1437                                         *tk.u32++ = DEQUALS;
1438                                         ln++;
1439                                 }
1440                                 else
1441                                         *tk.u32++ = '=';
1442
1443                                 continue;
1444                         case '>':               // > or >> or >=
1445                                 switch (*ln)
1446                                 {
1447                                 case '>':
1448                                         *tk.u32++ = SHR;
1449                                         ln++;
1450                                         continue;
1451                                 case '=':
1452                                         *tk.u32++ = GE;
1453                                         ln++;
1454                                         continue;
1455                                 default:
1456                                         *tk.u32++ = '>';
1457                                         continue;
1458                                 }
1459                         case '%':               // % or binary constant
1460                                 if (*ln < '0' || *ln > '1')
1461                                 {
1462                                         *tk.u32++ = '%';
1463                                         continue;
1464                                 }
1465
1466                                 v = 0;
1467
1468                                 while (*ln >= '0' && *ln <= '1')
1469                                         v = (v << 1) + *ln++ - '0';
1470
1471                                 if (*ln == '.')
1472                                 {
1473                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1474                                         {
1475                                                 v &= 0x000000FF;
1476                                                 ln += 2;
1477                                         }
1478
1479                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1480                                         {
1481                                                 v &= 0x0000FFFF;
1482                                                 ln += 2;
1483                                         }
1484
1485                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1486                                         {
1487                                                 v &= 0xFFFFFFFF;
1488                                                 ln += 2;
1489                                         }
1490                                 }
1491
1492                                 *tk.u32++ = CONST;
1493                                 *tk.u64++ = v;
1494                                 continue;
1495                         case '@':               // @ or octal constant
1496                                 if (*ln < '0' || *ln > '7')
1497                                 {
1498                                         *tk.u32++ = '@';
1499                                         continue;
1500                                 }
1501
1502                                 v = 0;
1503
1504                                 while (*ln >= '0' && *ln <= '7')
1505                                         v = (v << 3) + *ln++ - '0';
1506
1507                                 if (*ln == '.')
1508                                 {
1509                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1510                                         {
1511                                                 v &= 0x000000FF;
1512                                                 ln += 2;
1513                                         }
1514
1515                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1516                                         {
1517                                                 v &= 0x0000FFFF;
1518                                                 ln += 2;
1519                                         }
1520
1521                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1522                                         {
1523                                                 v &= 0xFFFFFFFF;
1524                                                 ln += 2;
1525                                         }
1526                                 }
1527
1528                                 *tk.u32++ = CONST;
1529                                 *tk.u64++ = v;
1530                                 continue;
1531                         case '^':               // ^ or ^^ <operator-name>
1532                                 if (*ln != '^')
1533                                 {
1534                                         *tk.u32++ = '^';
1535                                         continue;
1536                                 }
1537
1538                                 if (((int)chrtab[*++ln] & STSYM) == 0)
1539                                 {
1540                                         error("invalid symbol following ^^");
1541                                         continue;
1542                                 }
1543
1544                                 p = ln++;
1545
1546                                 while ((int)chrtab[*ln] & CTSYM)
1547                                         ++ln;
1548
1549                                 for(state=0; state>=0;)
1550                                 {
1551                                         // Get char, convert to lowercase
1552                                         j = (int)tolowertab[*p++];
1553
1554                                         //if (j >= 'A' && j <= 'Z')
1555                                         //      j += 0x20;
1556
1557                                         j += unarybase[state];
1558
1559                                         if (unarycheck[j] != state)
1560                                         {
1561                                                 j = -1;
1562                                                 break;
1563                                         }
1564
1565                                         if (*p == EOS || p == ln)
1566                                         {
1567                                                 j = unaryaccept[j];
1568                                                 break;
1569                                         }
1570
1571                                         state = unarytab[j];
1572                                 }
1573
1574                                 if (j < 0 || state < 0)
1575                                 {
1576                                         error("unknown symbol following ^^");
1577                                         continue;
1578                                 }
1579
1580                                 *tk.u32++ = (TOKEN)j;
1581                                 continue;
1582                         default:
1583                                 interror(2);    // Bad MULTX entry in chrtab
1584                                 continue;
1585                         }
1586                 }
1587
1588                 // Handle decimal constant
1589                 if (c & DIGIT)
1590                 {
1591                         uint8_t * numStart = ln;
1592                         v = 0;
1593
1594                         while ((int)chrtab[*ln] & DIGIT)
1595                                 v = (v * 10) + *ln++ - '0';
1596
1597                         // See if there's a .[bwl] after the constant & deal with it if so
1598                         if (*ln == '.')
1599                         {
1600                                 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1601                                 {
1602                                         v &= 0x000000FF;
1603                                         ln += 2;
1604                                         *tk.u32++ = CONST;
1605                                         *tk.u64++ = v;
1606                                         *tk.u32++ = DOTB;
1607                                 }
1608                                 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1609                                 {
1610                                         v &= 0x0000FFFF;
1611                                         ln += 2;
1612                                         *tk.u32++ = CONST;
1613                                         *tk.u64++ = v;
1614                                         *tk.u32++ = DOTW;
1615                                 }
1616                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1617                                 {
1618                                         v &= 0xFFFFFFFF;
1619                                         ln += 2;
1620                                         *tk.u32++ = CONST;
1621                                         *tk.u64++ = v;
1622                                         *tk.u32++ = DOTL;
1623                                 }
1624                                 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1625                                 {
1626                                         // Hey, more digits after the dot, so we assume it's a
1627                                         // floating point number of some kind... numEnd will point
1628                                         // to the first non-float character after it's done
1629                                         char * numEnd;
1630                                         errno = 0;
1631                                         double f = strtod(numStart, &numEnd);
1632                                         ln = (uint8_t *)numEnd;
1633
1634                                         if (errno != 0)
1635                                                 return error("floating point parse error");
1636
1637                                         // N.B.: We use the C compiler's internal double
1638                                         //       representation for all internal float calcs and
1639                                         //       are reasonably sure that the size of said double
1640                                         //       is 8 bytes long (which we check for in fltpoint.c)
1641                                         *tk.u32++ = FCONST;
1642                                         *tk.dp = f;
1643                                         tk.u64++;
1644                                         continue;
1645                                 }
1646                         }
1647                         else
1648                         {
1649                                 *tk.u32++ = CONST;
1650                                 *tk.u64++ = v;
1651                         }
1652
1653 //printf("CONST: %i\n", v);
1654                         continue;
1655                 }
1656
1657                 // Handle illegal character
1658                 return error("illegal character $%02X found", *ln);
1659         }
1660
1661         // Terminate line of tokens and return "success."
1662
1663 goteol:
1664         tok = etok;                             // Set tok to beginning of line
1665
1666         if (stuffnull)                  // Terminate last SYMBOL
1667                 *nullspot = EOS;
1668
1669         *tk.u32++ = EOL;
1670
1671         return OK;
1672 }
1673
1674
1675 //
1676 // .GOTO <label>        goto directive
1677 //
1678 // The label is searched for starting from the first line of the current,
1679 // enclosing macro definition. If no enclosing macro exists, an error is
1680 // generated.
1681 //
1682 // A label is of the form:
1683 //
1684 // :<name><whitespace>
1685 //
1686 // The colon must appear in column 1.  The label is stripped prior to macro
1687 // expansion, and is NOT subject to macro expansion.  The whitespace may also
1688 // be EOL.
1689 //
1690 int d_goto(WORD unused)
1691 {
1692         // Setup for the search
1693         if (*tok != SYMBOL)
1694                 return error("missing label");
1695
1696         char * sym = string[tok[1]];
1697         tok += 2;
1698
1699         if (cur_inobj->in_type != SRC_IMACRO)
1700                 return error("goto not in macro");
1701
1702         IMACRO * imacro = cur_inobj->inobj.imacro;
1703         LLIST * defln = imacro->im_macro->lineList;
1704
1705         // Attempt to find the label, starting with the first line.
1706         for(; defln!=NULL; defln=defln->next)
1707         {
1708                 // Must start with a colon
1709                 if (defln->line[0] == ':')
1710                 {
1711                         // Compare names (sleazo string compare)
1712                         char * s1 = sym;
1713                         char * s2 = defln->line + 1;
1714
1715                         // Either we will match the strings to EOS on both, or we will
1716                         // match EOS on string 1 to whitespace on string 2. Otherwise, we
1717                         // have no match.
1718                         while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1719                         {
1720                                 // If we reached the end of string 1 (sym), we're done.
1721                                 // Note that we're also checking for the end of string 2 as
1722                                 // well, since we've established they're equal above.
1723                                 if (*s1 == EOS)
1724                                 {
1725                                         // Found the label, set new macro next-line and return.
1726                                         imacro->im_nextln = defln;
1727                                         return 0;
1728                                 }
1729
1730                                 s1++;
1731                                 s2++;
1732                         }
1733                 }
1734         }
1735
1736         return error("goto label not found");
1737 }
1738
1739
1740 void DumpToken(TOKEN t)
1741 {
1742         if (t == COLON)
1743                 printf("[COLON]");
1744         else if (t == CONST)
1745                 printf("[CONST]");
1746         else if (t == FCONST)
1747                 printf("[FCONST]");
1748         else if (t == ACONST)
1749                 printf("[ACONST]");
1750         else if (t == STRING)
1751                 printf("[STRING]");
1752         else if (t == SYMBOL)
1753                 printf("[SYMBOL]");
1754         else if (t == EOS)
1755                 printf("[EOS]");
1756         else if (t == TKEOF)
1757                 printf("[TKEOF]");
1758         else if (t == DEQUALS)
1759                 printf("[DEQUALS]");
1760         else if (t == SET)
1761                 printf("[SET]");
1762         else if (t == REG)
1763                 printf("[REG]");
1764         else if (t == DCOLON)
1765                 printf("[DCOLON]");
1766         else if (t == GE)
1767                 printf("[GE]");
1768         else if (t == LE)
1769                 printf("[LE]");
1770         else if (t == NE)
1771                 printf("[NE]");
1772         else if (t == SHR)
1773                 printf("[SHR]");
1774         else if (t == SHL)
1775                 printf("[SHL]");
1776         else if (t == UNMINUS)
1777                 printf("[UNMINUS]");
1778         else if (t == DOTB)
1779                 printf("[DOTB]");
1780         else if (t == DOTW)
1781                 printf("[DOTW]");
1782         else if (t == DOTL)
1783                 printf("[DOTL]");
1784         else if (t == DOTQ)
1785                 printf("[DOTQ]");
1786         else if (t == DOTS)
1787                 printf("[DOTS]");
1788         else if (t == DOTD)
1789                 printf("[DOTD]");
1790         else if (t == DOTI)
1791                 printf("[DOTI]");
1792         else if (t == ENDEXPR)
1793                 printf("[ENDEXPR]");
1794         else if (t == CR_ABSCOUNT)
1795                 printf("[CR_ABSCOUNT]");
1796         else if (t == CR_FILESIZE)
1797                 printf("[CR_FILESIZE]");
1798         else if (t == CR_DEFINED)
1799                 printf("[CR_DEFINED]");
1800         else if (t == CR_REFERENCED)
1801                 printf("[CR_REFERENCED]");
1802         else if (t == CR_STREQ)
1803                 printf("[CR_STREQ]");
1804         else if (t == CR_MACDEF)
1805                 printf("[CR_MACDEF]");
1806         else if (t == CR_TIME)
1807                 printf("[CR_TIME]");
1808         else if (t == CR_DATE)
1809                 printf("[CR_DATE]");
1810         else if (t >= 0x20 && t <= 0x2F)
1811                 printf("[%c]", (char)t);
1812         else if (t >= 0x3A && t <= 0x3F)
1813                 printf("[%c]", (char)t);
1814         else if (t >= 0x80 && t <= 0x87)
1815                 printf("[D%u]", ((uint32_t)t) - 0x80);
1816         else if (t >= 0x88 && t <= 0x8F)
1817                 printf("[A%u]", ((uint32_t)t) - 0x88);
1818         else
1819                 printf("[%X:%c]", (uint32_t)t, (char)t);
1820 }
1821
1822
1823 void DumpTokenBuffer(void)
1824 {
1825         printf("Tokens [%X]: ", sloc);
1826
1827         for(TOKEN * t=tokbuf; *t!=EOL; t++)
1828         {
1829                 if (*t == COLON)
1830                         printf("[COLON]");
1831                 else if (*t == CONST)
1832                 {
1833                         PTR tp;
1834                         tp.u32 = t + 1;
1835                         printf("[CONST: $%lX]", *tp.u64);
1836                         t += 2;
1837                 }
1838                 else if (*t == FCONST)
1839                 {
1840                         PTR tp;
1841                         tp.u32 = t + 1;
1842                         printf("[FCONST: $%lX]", *tp.u64);
1843                         t += 2;
1844                 }
1845                 else if (*t == ACONST)
1846                 {
1847                         printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1848                         t += 2;
1849                 }
1850                 else if (*t == STRING)
1851                 {
1852                         t++;
1853                         printf("[STRING:\"%s\"]", string[*t]);
1854                 }
1855                 else if (*t == SYMBOL)
1856                 {
1857                         t++;
1858                         printf("[SYMBOL:\"%s\"]", string[*t]);
1859                 }
1860                 else if (*t == EOS)
1861                         printf("[EOS]");
1862                 else if (*t == TKEOF)
1863                         printf("[TKEOF]");
1864                 else if (*t == DEQUALS)
1865                         printf("[DEQUALS]");
1866                 else if (*t == SET)
1867                         printf("[SET]");
1868                 else if (*t == REG)
1869                         printf("[REG]");
1870                 else if (*t == DCOLON)
1871                         printf("[DCOLON]");
1872                 else if (*t == GE)
1873                         printf("[GE]");
1874                 else if (*t == LE)
1875                         printf("[LE]");
1876                 else if (*t == NE)
1877                         printf("[NE]");
1878                 else if (*t == SHR)
1879                         printf("[SHR]");
1880                 else if (*t == SHL)
1881                         printf("[SHL]");
1882                 else if (*t == UNMINUS)
1883                         printf("[UNMINUS]");
1884                 else if (*t == DOTB)
1885                         printf("[DOTB]");
1886                 else if (*t == DOTW)
1887                         printf("[DOTW]");
1888                 else if (*t == DOTL)
1889                         printf("[DOTL]");
1890                 else if (*t == DOTQ)
1891                         printf("[DOTQ]");
1892                 else if (*t == DOTS)
1893                         printf("[DOTS]");
1894                 else if (*t == DOTD)
1895                         printf("[DOTD]");
1896                 else if (*t == DOTI)
1897                         printf("[DOTI]");
1898                 else if (*t == ENDEXPR)
1899                         printf("[ENDEXPR]");
1900                 else if (*t == CR_ABSCOUNT)
1901                         printf("[CR_ABSCOUNT]");
1902                 else if (*t == CR_FILESIZE)
1903                         printf("[CR_FILESIZE]");
1904                 else if (*t == CR_DEFINED)
1905                         printf("[CR_DEFINED]");
1906                 else if (*t == CR_REFERENCED)
1907                         printf("[CR_REFERENCED]");
1908                 else if (*t == CR_STREQ)
1909                         printf("[CR_STREQ]");
1910                 else if (*t == CR_MACDEF)
1911                         printf("[CR_MACDEF]");
1912                 else if (*t == CR_TIME)
1913                         printf("[CR_TIME]");
1914                 else if (*t == CR_DATE)
1915                         printf("[CR_DATE]");
1916                 else if (*t >= 0x20 && *t <= 0x2F)
1917                         printf("[%c]", (char)*t);
1918                 else if (*t >= 0x3A && *t <= 0x3F)
1919                         printf("[%c]", (char)*t);
1920                 else if (*t >= 0x80 && *t <= 0x87)
1921                         printf("[D%u]", ((uint32_t)*t) - 0x80);
1922                 else if (*t >= 0x88 && *t <= 0x8F)
1923                         printf("[A%u]", ((uint32_t)*t) - 0x88);
1924                 else
1925                         printf("[%X:%c]", (uint32_t)*t, (char)*t);
1926         }
1927
1928         printf("[EOL]\n");
1929 }
1930