]> Shamusworld >> Repos - rmac/blob - token.c
Version bump for last commit. :-)
[rmac] / token.c
1 //
2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
7 //
8
9 #include "token.h"
10
11 #include <errno.h>
12 #include "direct.h"
13 #include "error.h"
14 #include "macro.h"
15 #include "procln.h"
16 #include "sect.h"
17 #include "symbol.h"
18
19 #define DECL_KW                         // Declare keyword arrays
20 #define DEF_KW                          // Declare keyword values
21 #include "kwtab.h"                      // Incl generated keyword tables & defs
22 #define DEF_REG68                       // Incl 68k register definitions
23 #include "68kregs.h"
24 #define DEF_REGRISC                     // Include GPU/DSP register definitions
25 #include "riscregs.h"
26 #define DEF_UNARY                       // Declare unary values
27 #define DECL_UNARY                      // Incl uanry keyword state machine tables
28 #include "unarytab.h"           // Incl generated unary tables & defs
29
30
31 int lnsave;                                     // 1; strcpy() text of current line
32 uint32_t curlineno;                     // Current line number (64K max currently)
33 int totlines;                           // Total # of lines
34 int mjump_align = 0;            // mjump alignment flag
35 char lntag;                                     // Line tag
36 char * curfname;                        // Current filename
37 char tolowertab[128];           // Uppercase ==> lowercase
38 int8_t hextab[128];                     // Table of hex values
39 char dotxtab[128];                      // Table for ".b", ".s", etc.
40 char irbuf[LNSIZ];                      // Text for .rept block line
41 char lnbuf[LNSIZ];                      // Text of current line
42 WORD filecount;                         // Unique file number counter
43 WORD cfileno;                           // Current file number
44 TOKEN * tok;                            // Ptr to current token
45 TOKEN * etok;                           // Ptr past last token in tokbuf[]
46 TOKEN tokeol[1] = {EOL};        // Bailout end-of-line token
47 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
48 int optimizeOff;                        // Optimization override flag
49
50
51 FILEREC * filerec;
52 FILEREC * last_fr;
53
54 INOBJ * cur_inobj;                      // Ptr current input obj (IFILE/IMACRO)
55 static INOBJ * f_inobj;         // Ptr list of free INOBJs
56 static IFILE * f_ifile;         // Ptr list of free IFILEs
57 static IMACRO * f_imacro;       // Ptr list of free IMACROs
58
59 static TOKEN tokbuf[TOKBUFSIZE];        // Token buffer (stack-like, all files)
60
61 uint8_t chrtab[0x100] = {
62         ILLEG, ILLEG, ILLEG, ILLEG,                     // NUL SOH STX ETX
63         ILLEG, ILLEG, ILLEG, ILLEG,                     // EOT ENQ ACK BEL
64         ILLEG, WHITE, ILLEG, ILLEG,                     // BS HT LF VT
65         WHITE, ILLEG, ILLEG, ILLEG,                     // FF CR SO SI
66
67         ILLEG, ILLEG, ILLEG, ILLEG,                     // DLE DC1 DC2 DC3
68         ILLEG, ILLEG, ILLEG, ILLEG,                     // DC4 NAK SYN ETB
69         ILLEG, ILLEG, ILLEG, ILLEG,                     // CAN EM SUB ESC
70         ILLEG, ILLEG, ILLEG, ILLEG,                     // FS GS RS US
71
72         WHITE, MULTX, MULTX, SELF,                      // SP ! " #
73         MULTX+CTSYM, MULTX, SELF, MULTX,        // $ % & '
74         SELF, SELF, SELF, SELF,                         // ( ) * +
75         SELF, SELF, STSYM, SELF,                        // , - . /
76
77         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 0 1
78         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 2 3
79         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 4 5
80         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 6 7
81         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 8 9
82         MULTX, MULTX,                                                           // : ;
83         MULTX, MULTX, MULTX, STSYM+CTSYM,                       // < = > ?
84
85         MULTX, STSYM+CTSYM+HDIGIT,                                      // @ A
86         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // B C
87         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // D E
88         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // F G
89         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
90         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
91
92         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
93         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
94         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
95         SELF, SELF, MULTX, STSYM+CTSYM,                         // \ ] ^ _
96
97         ILLEG, STSYM+CTSYM+HDIGIT,                                      // ` a
98         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // b c
99         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // d e
100         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // f g
101         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
102         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
103
104         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
105         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
106         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                // x y z {
107         SELF, SELF, SELF, ILLEG,                                        // | } ~ DEL
108
109         // Anything above $7F is illegal (and yes, we need to check for this,
110         // otherwise you get strange and spurious errors that will lead you astray)
111         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
125         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
126         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
127 };
128
129 // Names of registers
130 static char * regname[] = {
131         "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
132         "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
133         "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
134         "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
135         "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
136         "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
137         "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
138         "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
139         "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
140         "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
141         "tt0","tt1","crp","","","","","", // 208,215
142         "","","","","fpiar","fpsr","fpcr","", // 216,223
143         "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
144         "","","","","","","","", // 232,239
145         "","","","","","","","", // 240,247
146         "","","","","","","","", // 248,255
147         "","","","","x0","x1","y0","y1", // 256,263
148         "","b0","","b2","","b1","a","b", // 264,271
149         "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
150         "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
151         "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
152         "","","","","","","l","p", // 296,303
153         "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
154         "a10","b10","x","y","","","ab","ba"  // 312,319
155 };
156
157
158 //
159 // Initialize tokenizer
160 //
161 void InitTokenizer(void)
162 {
163         int i;                                                                  // Iterator
164         char * htab = "0123456789abcdefABCDEF"; // Hex character table
165
166         lnsave = 0;                                                             // Don't save lines
167         curfname = "";                                                  // No file, empty filename
168         filecount = (WORD)-1;
169         cfileno = (WORD)-1;                                             // cfileno gets bumped to 0
170         curlineno = 0;
171         totlines = 0;
172         etok = tokbuf;
173         f_inobj = NULL;
174         f_ifile = NULL;
175         f_imacro = NULL;
176         cur_inobj = NULL;
177         filerec = NULL;
178         last_fr = NULL;
179         lntag = SPACE;
180
181         // Initialize hex, "dot" and tolower tables
182         for(i=0; i<128; i++)
183         {
184                 hextab[i] = -1;
185                 dotxtab[i] = 0;
186                 tolowertab[i] = (char)i;
187         }
188
189         for(i=0; htab[i]!=EOS; i++)
190                 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
191
192         for(i='A'; i<='Z'; i++)
193                 tolowertab[i] |= 0x20;
194
195         // These characters are legal immediately after a period
196         dotxtab['b'] = DOTB;                                    // .b .B .s .S
197         dotxtab['B'] = DOTB;
198         //dotxtab['s'] = DOTB;
199         //dotxtab['S'] = DOTB;
200         dotxtab['w'] = DOTW;                                    // .w .W
201         dotxtab['W'] = DOTW;
202         dotxtab['l'] = DOTL;                                    // .l .L
203         dotxtab['L'] = DOTL;
204         dotxtab['i'] = DOTI;                                    // .i .I (WTF is this???)
205         dotxtab['I'] = DOTI;
206         dotxtab['D'] = DOTD;                                    // .d .D (double)
207         dotxtab['d'] = DOTD;
208         dotxtab['S'] = DOTS;                                    // .s .S
209         dotxtab['s'] = DOTS;
210         dotxtab['Q'] = DOTQ;                                    // .q .Q (quad word)
211         dotxtab['q'] = DOTQ;
212         dotxtab['X'] = DOTX;                                    // .x .x
213         dotxtab['x'] = DOTX;
214         dotxtab['P'] = DOTP;                                    // .p .P
215         dotxtab['p'] = DOTP;
216 }
217
218
219 void SetFilenameForErrorReporting(void)
220 {
221         WORD fnum = cfileno;
222
223         // Check for absolute top filename (this should never happen)
224         if (fnum == -1)
225         {
226                 curfname = "(*top*)";
227                 return;
228         }
229
230         FILEREC * fr = filerec;
231
232         // Advance to the correct record...
233         while (fr != NULL && fnum != 0)
234         {
235                 fr = fr->frec_next;
236                 fnum--;
237         }
238
239         // Check for file # record not found (this should never happen either)
240         if (fr == NULL)
241         {
242                 curfname = "(*NOT FOUND*)";
243                 return;
244         }
245
246         curfname = fr->frec_name;
247 }
248
249
250 //
251 // Allocate an IFILE or IMACRO
252 //
253 INOBJ * a_inobj(int typ)
254 {
255         INOBJ * inobj;
256         IFILE * ifile;
257         IMACRO * imacro;
258
259         // Allocate and initialize INOBJ first
260         if (f_inobj == NULL)
261                 inobj = malloc(sizeof(INOBJ));
262         else
263         {
264                 inobj = f_inobj;
265                 f_inobj = f_inobj->in_link;
266         }
267
268         switch (typ)
269         {
270         case SRC_IFILE:                                                 // Alloc and init an IFILE
271                 if (f_ifile == NULL)
272                         ifile = malloc(sizeof(IFILE));
273                 else
274                 {
275                         ifile = f_ifile;
276                         f_ifile = f_ifile->if_link;
277                 }
278
279                 inobj->inobj.ifile = ifile;
280                 break;
281
282         case SRC_IMACRO:                                                // Alloc and init an IMACRO
283                 if (f_imacro == NULL)
284                         imacro = malloc(sizeof(IMACRO));
285                 else
286                 {
287                         imacro = f_imacro;
288                         f_imacro = f_imacro->im_link;
289                 }
290
291                 inobj->inobj.imacro = imacro;
292                 break;
293
294         case SRC_IREPT:                                                 // Alloc and init an IREPT
295                 inobj->inobj.irept = malloc(sizeof(IREPT));
296                 DEBUG { printf("alloc IREPT\n"); }
297                 break;
298         }
299
300         // Install INOBJ on top of input stack
301         inobj->in_ifent = ifent;                                // Record .if context on entry
302         inobj->in_type = (WORD)typ;
303         inobj->in_otok = tok;
304         inobj->in_etok = etok;
305         inobj->in_link = cur_inobj;
306         cur_inobj = inobj;
307
308         return inobj;
309 }
310
311
312 //
313 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
314 // A macro reference is in one of two forms:
315 // \name <non-name-character>
316 // \{name}
317 // A doubled backslash (\\) is compressed to a single backslash (\).
318 // Argument definitions have been pre-tokenized, so we have to turn them back
319 // into text. This means that numbers, in particular, become hex, regardless of
320 // their representation when the macro was invoked. This is a hack.
321 // A label may appear at the beginning of the line:
322 // :<name><whitespace>
323 // (the colon must be in the first column). These labels are stripped before
324 // macro expansion takes place.
325 //
326 int ExpandMacro(char * src, char * dest, int destsiz)
327 {
328         int i;
329         int questmark;                  // \? for testing argument existence
330         char mname[128];                // Assume max size of a formal arg name
331         char numbuf[20];                // Buffer for text of CONSTs
332         TOKEN * tk;
333         SYM * arg;
334         char ** symbolString;
335
336         DEBUG { printf("ExM: src=\"%s\"\n", src); }
337
338         IMACRO * imacro = cur_inobj->inobj.imacro;
339         int macnum = (int)(imacro->im_macro->sattr);
340
341         char * dst = dest;                                              // Next dest slot
342         char * edst = dest + destsiz - 1;               // End + 1(?) of dest buffer
343
344         // Check for (and skip over) any "label" on the line
345         char * s = src;
346         char * d = NULL;
347
348         if (*s == ':')
349         {
350                 while (*s != EOS && !(chrtab[*s] & WHITE))
351                         s++;
352
353                 if (*s != EOS)
354                         s++;                                                    // Skip first whitespace
355         }
356
357         // Expand the rest of the line
358         while (*s != EOS)
359         {
360                 // Copy single character
361                 if (*s != '\\')
362                 {
363                         if (dst >= edst)
364                                 goto overflow;
365
366                         // Skip comments in case a loose @ or \ is in there
367                         // In that case the tokeniser was trying to expand it.
368                         if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
369                                 goto skipcomments;
370
371                         *dst++ = *s++;
372                 }
373                 // Do macro expansion
374                 else
375                 {
376                         questmark = 0;
377
378                         // Do special cases
379                         switch (*++s)
380                         {
381                         case '\\':                                              // \\, \ (collapse to single backslash)
382                                 if (dst >= edst)
383                                         goto overflow;
384
385                                 *dst++ = *s++;
386                                 continue;
387                         case '?':                                               // \? <macro>  set `questmark' flag
388                                 s++;
389                                 questmark = 1;
390                                 break;
391                         case '#':                                               // \#, number of arguments
392                                 sprintf(numbuf, "%d", (int)imacro->im_nargs);
393                                 goto copystr;
394                         case '!':                                               // \! size suffix supplied on invocation
395                                 switch ((int)imacro->im_siz)
396                                 {
397                                 case SIZN: d = "";   break;
398                                 case SIZB: d = ".b"; break;
399                                 case SIZW: d = ".w"; break;
400                                 case SIZL: d = ".l"; break;
401                                 }
402
403                                 goto copy_d;
404                         case '~':                                               // ==> unique label string Mnnnn...
405                                 sprintf(numbuf, "M%u", curuniq);
406 copystr:
407                                 d = numbuf;
408 copy_d:
409                                 s++;
410
411                                 while (*d != EOS)
412                                 {
413                                         if (dst >= edst)
414                                                 goto overflow;
415                                         else
416                                                 *dst++ = *d++;
417                                 }
418
419                                 continue;
420                         case EOS:
421                                 return error("missing argument name");
422                         }
423
424                         // \n ==> argument number 'n', 0..9
425                         if (chrtab[*s] & DIGIT)
426                         {
427                                 i = *s++ - '1';
428
429                                 if (i < 0)
430                                         i = 9;
431
432                                 goto arg_num;
433                         }
434
435                         // Get argument name: \name, \{name}
436                         d = mname;
437
438                         // \label
439                         if (*s != '{')
440                         {
441                                 do
442                                 {
443                                         *d++ = *s++;
444                                 }
445                                 while (chrtab[*s] & CTSYM);
446                         }
447                         // \\{label}
448                         else
449                         {
450                                 for(++s; *s != EOS && *s != '}';)
451                                         *d++ = *s++;
452
453                                 if (*s != '}')
454                                         return error("missing closing brace ('}')");
455                                 else
456                                         s++;
457                         }
458
459                         *d = EOS;
460
461                         // Lookup the argument and copy its (string) value into the
462                         // destination string
463                         DEBUG { printf("argument='%s'\n", mname); }
464
465                         if ((arg = lookup(mname, MACARG, macnum)) == NULL)
466                                 return error("undefined argument: '%s'", mname);
467                         else
468                         {
469                                 // Convert a string of tokens (terminated with EOL) back into
470                                 // text. If an argument is out of range (not specified in the
471                                 // macro invocation) then it is ignored.
472                                 i = (int)arg->svalue;
473 arg_num:
474                                 DEBUG { printf("~argnumber=%d\n", i); }
475                                 tk = NULL;
476
477                                 if (i < imacro->im_nargs)
478                                 {
479                                         tk = imacro->argument[i].token;
480                                         symbolString = imacro->argument[i].string;
481 //DEBUG
482 //{
483 //      printf("ExM: Preparing to parse argument #%u...\n", i);
484 //      DumpTokens(tk);
485 //}
486                                 }
487
488                                 // \?arg yields:
489                                 //    0  if the argument is empty or non-existant,
490                                 //    1  if the argument is not empty
491                                 if (questmark)
492                                 {
493                                         if (tk == NULL || *tk == EOL)
494                                                 questmark = 0;
495
496                                         if (dst >= edst)
497                                                 goto overflow;
498
499                                         *dst++ = (char)(questmark + '0');
500                                         continue;
501                                 }
502
503                                 // Argument # is in range, so expand it
504                                 if (tk != NULL)
505                                 {
506                                         while (*tk != EOL)
507                                         {
508                                                 // Reverse-translation from a token number to a string.
509                                                 // This is a hack. It might be better table-driven.
510                                                 d = NULL;
511
512                                                 if (*tk >= REG68_D0)
513                                                 {
514                                                         d = regname[(int)*tk++ - REG68_D0];
515                                                         goto strcopy;
516                                                 }
517                                                 else
518                                                 {
519                                                         switch ((int)*tk++)
520                                                         {
521                                                         case SYMBOL:
522                                                                 d = symbolString[*tk++];
523 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
524                                                                 break;
525                                                         case STRING:
526                                                                 d = symbolString[*tk++];
527
528                                                                 if (dst >= edst)
529                                                                         goto overflow;
530
531                                                                 *dst++ = '"';
532
533                                                                 while (*d != EOS)
534                                                                 {
535                                                                         if (dst >= edst)
536                                                                                 goto overflow;
537                                                                         else
538                                                                                 *dst++ = *d++;
539                                                                 }
540
541                                                                 if (dst >= edst)
542                                                                         goto overflow;
543
544                                                                 *dst++ = '"';
545                                                                 continue;
546                                                                 break;
547 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
548 //         to choke on legitimate code... Need to investigate this further
549 //         before changing anything else here!
550                                                         case CONST:
551 //                                                              sprintf(numbuf, "$%lx", (uint64_t)*tk++);
552                                                                 sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
553                                                                 tk++;
554                                                                 d = numbuf;
555                                                                 break;
556                                                         case DEQUALS:
557                                                                 d = "==";
558                                                                 break;
559                                                         case SET:
560                                                                 d = "set";
561                                                                 break;
562                                                         case COLON:
563                                                                 d = ":";
564                                                                 break;
565                                                         case DCOLON:
566                                                                 d = "::";
567                                                                 break;
568                                                         case GE:
569                                                                 d = ">=";
570                                                                 break;
571                                                         case LE:
572                                                                 d = "<=";
573                                                                 break;
574                                                         case NE:
575                                                                 d = "<>";
576                                                                 break;
577                                                         case SHR:
578                                                                 d = ">>";
579                                                                 break;
580                                                         case SHL:
581                                                                 d = "<<";
582                                                                 break;
583                                                         case DOTB:
584                                                                 d = ".b";
585                                                                 break;
586                                                         case DOTW:
587                                                                 d = ".w";
588                                                                 break;
589                                                         case DOTL:
590                                                                 d = ".l";
591                                                                 break;
592                                                         case CR_ABSCOUNT:
593                                                                 d = "^^abscount";
594                                                                 break;
595                                                         case CR_FILESIZE:
596                                                                 d = "^^filesize";
597                                                                 break;
598                                                         case CR_DATE:
599                                                                 d = "^^date";
600                                                                 break;
601                                                         case CR_TIME:
602                                                                 d = "^^time";
603                                                                 break;
604                                                         case CR_DEFINED:
605                                                                 d = "^^defined ";
606                                                                 break;
607                                                         case CR_REFERENCED:
608                                                                 d = "^^referenced ";
609                                                                 break;
610                                                         case CR_STREQ:
611                                                                 d = "^^streq ";
612                                                                 break;
613                                                         case CR_MACDEF:
614                                                                 d = "^^macdef ";
615                                                                 break;
616                                                         default:
617                                                                 if (dst >= edst)
618                                                                         goto overflow;
619
620                                                                 *dst++ = (char)*(tk - 1);
621                                                                 break;
622                                                         }
623                                                 }
624
625                                                 // If 'd' != NULL, copy string to destination
626                                                 if (d != NULL)
627                                                 {
628 strcopy:
629                                                         DEBUG printf("d='%s'\n", d);
630
631                                                         while (*d != EOS)
632                                                         {
633                                                                 if (dst >= edst)
634                                                                         goto overflow;
635                                                                 else
636                                                                         *dst++ = *d++;
637                                                         }
638                                                 }
639                                         }
640                                 }
641                         }
642                 }
643         }
644
645 skipcomments:
646
647         *dst = EOS;
648         DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
649         return OK;
650
651 overflow:
652         *dst = EOS;
653         DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
654         return fatal("line too long as a result of macro expansion");
655 }
656
657
658 //
659 // Get next line of text from a macro
660 //
661 char * GetNextMacroLine(void)
662 {
663         IMACRO * imacro = cur_inobj->inobj.imacro;
664         LLIST * strp = imacro->im_nextln;
665
666         if (strp == NULL)                                               // End-of-macro
667                 return NULL;
668
669         imacro->im_nextln = strp->next;
670 //      ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
671         ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
672
673         return imacro->im_lnbuf;
674 }
675
676
677 //
678 // Get next line of text from a repeat block
679 //
680 char * GetNextRepeatLine(void)
681 {
682         IREPT * irept = cur_inobj->inobj.irept;
683 //      LONG * strp = irept->ir_nextln;                 // initial null
684
685         // Do repeat at end of .rept block's string list
686 //      if (strp == NULL)
687         if (irept->ir_nextln == NULL)
688         {
689                 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
690                 irept->ir_nextln = irept->ir_firstln;   // copy first line
691
692                 if (irept->ir_count-- == 0)
693                 {
694                         DEBUG { printf("end-repeat-block\n"); }
695                         return NULL;
696                 }
697                 reptuniq++;
698 //              strp = irept->ir_nextln;
699         }
700         // Mark the current macro line in the irept object
701         // This is probably overkill - a global variable
702         // would suffice here (it only gets used during
703         // error reporting anyway)
704         irept->lineno = irept->ir_nextln->lineno;
705
706         // Copy the rept lines verbatim, unless we're in nest level 0.
707         // Then, expand any \~ labels to unique numbers (Rn)
708         if (rptlevel)
709         {
710                 strcpy(irbuf, irept->ir_nextln->line);
711         }
712         else
713         {
714                 uint32_t linelen = strlen(irept->ir_nextln->line);
715                 uint8_t *p_line = irept->ir_nextln->line;
716                 char *irbufwrite = irbuf;
717                 for (int i = 0; i <= linelen; i++)
718                 {
719                         uint8_t c;
720                         c = *p_line++;
721                         if (c == '\\' && *p_line == '~')
722                         {
723                                 p_line++;
724                                 irbufwrite += sprintf(irbufwrite, "R%u", reptuniq);
725                         }
726                         else
727                         {
728                                 *irbufwrite++ = c;
729                         }
730                 }
731         }
732
733         DEBUG { printf("repeat line='%s'\n", irbuf); }
734 //      irept->ir_nextln = (LONG *)*strp;
735         irept->ir_nextln = irept->ir_nextln->next;
736
737         return irbuf;
738 }
739
740
741 //
742 // Include a source file used at the root, and for ".include" files
743 //
744 int include(int handle, char * fname)
745 {
746         // Debug mode
747         DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
748
749         // Alloc and initialize include-descriptors
750         INOBJ * inobj = a_inobj(SRC_IFILE);
751         IFILE * ifile = inobj->inobj.ifile;
752
753         ifile->ifhandle = handle;                       // Setup file handle
754         ifile->ifind = ifile->ifcnt = 0;        // Setup buffer indices
755         ifile->ifoldlineno = curlineno;         // Save old line number
756         ifile->ifoldfname = curfname;           // Save old filename
757         ifile->ifno = cfileno;                          // Save old file number
758
759         // NB: This *must* be preincrement, we're adding one to the filecount here!
760         cfileno = ++filecount;                          // Compute NEW file number
761         curfname = strdup(fname);                       // Set current filename (alloc storage)
762         curlineno = 0;                                          // Start on line zero
763
764         // Add another file to the file-record
765         FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
766         fr->frec_next = NULL;
767         fr->frec_name = curfname;
768
769         if (last_fr == NULL)
770                 filerec = fr;                                   // Add first filerec
771         else
772                 last_fr->frec_next = fr;                // Append to list of filerecs
773
774         last_fr = fr;
775         DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
776
777         return OK;
778 }
779
780
781 //
782 // Pop the current input level
783 //
784 int fpop(void)
785 {
786         INOBJ * inobj = cur_inobj;
787
788         if (inobj == NULL)
789                 return 0;
790
791         // Pop IFENT levels until we reach the conditional assembly context we
792         // were at when the input object was entered.
793         int numUnmatched = 0;
794
795         while (ifent != inobj->in_ifent)
796         {
797                 if (d_endif() != 0)     // Something bad happened during endif parsing?
798                         return -1;              // If yes, bail instead of getting stuck in a loop
799
800                 numUnmatched++;
801         }
802
803         // Give a warning to the user that we had to wipe their bum for them
804         if (numUnmatched > 0)
805                 warn("missing %d .endif(s)", numUnmatched);
806
807         tok = inobj->in_otok;   // Restore tok and etok
808         etok = inobj->in_etok;
809
810         switch (inobj->in_type)
811         {
812         case SRC_IFILE:                 // Pop and release an IFILE
813         {
814                 DEBUG { printf("[Leaving: %s]\n", curfname); }
815
816                 IFILE * ifile = inobj->inobj.ifile;
817                 ifile->if_link = f_ifile;
818                 f_ifile = ifile;
819                 close(ifile->ifhandle);                 // Close source file
820 DEBUG { printf("[fpop (pre):  curfname=%s]\n", curfname); }
821                 curfname = ifile->ifoldfname;   // Set current filename
822 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
823 DEBUG { printf("[fpop: (pre)  cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
824                 curlineno = ifile->ifoldlineno; // Set current line#
825                 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
826                 cfileno = ifile->ifno;                  // Restore current file number
827 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
828                 break;
829         }
830
831         case SRC_IMACRO:                                        // Pop and release an IMACRO
832         {
833                 IMACRO * imacro = inobj->inobj.imacro;
834                 imacro->im_link = f_imacro;
835                 f_imacro = imacro;
836                 break;
837         }
838
839         case SRC_IREPT:                                         // Pop and release an IREPT
840         {
841                 DEBUG { printf("dealloc IREPT\n"); }
842                 LLIST * p = inobj->inobj.irept->ir_firstln;
843
844                 // Deallocate repeat lines
845                 while (p != NULL)
846                 {
847                         free(p->line);
848                         p = p->next;
849                 }
850
851                 break;
852         }
853         }
854
855         cur_inobj = inobj->in_link;
856         inobj->in_link = f_inobj;
857         f_inobj = inobj;
858
859         return 0;
860 }
861
862
863 //
864 // Get line from file into buf, return NULL on EOF or ptr to the start of a
865 // null-term line
866 //
867 char * GetNextLine(void)
868 {
869         int i, j;
870         char * p, * d;
871         int readamt = -1;                                               // 0 if last read() yeilded 0 bytes
872         IFILE * fl = cur_inobj->inobj.ifile;
873
874         for(;;)
875         {
876                 // Scan for next end-of-line; handle stupid text formats by treating
877                 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
878                 // check for '\n').
879                 d = &fl->ifbuf[fl->ifind];
880
881                 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
882                 {
883                         if (*p == '\r' || *p == '\n')
884                         {
885                                 i++;
886
887                                 if (*p == '\r')
888                                 {
889                                         if (i >= j)
890                                                 break;  // Need to read more, then look for '\n' to eat
891                                         else if (p[1] == '\n')
892                                                 i++;
893                                 }
894
895                                 // Cover up the newline with end-of-string sentinel
896                                 *p = '\0';
897
898                                 fl->ifind += i;
899                                 fl->ifcnt -= i;
900                                 return d;
901                         }
902                 }
903
904                 // Handle hanging lines by ignoring them (Input file is exhausted, no
905                 // \r or \n on last line)
906                 // Shamus: This is retarded. Never ignore any input!
907                 if (!readamt && fl->ifcnt)
908                 {
909 #if 0
910                         fl->ifcnt = 0;
911                         *p = '\0';
912                         return NULL;
913 #else
914                         // Really should check to see if we're at the end of the buffer!
915                         // :-P
916                         fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
917                         fl->ifcnt = 0;
918                         return &fl->ifbuf[fl->ifind];
919 #endif
920                 }
921
922                 // Truncate and return absurdly long lines.
923                 if (fl->ifcnt >= QUANTUM)
924                 {
925                         fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
926                         fl->ifcnt = 0;
927                         return &fl->ifbuf[fl->ifind];
928                 }
929
930                 // Relocate what's left of a line to the beginning of the buffer, and
931                 // read some more of the file in; return NULL if the buffer's empty and
932                 // on EOF.
933                 if (fl->ifind != 0)
934                 {
935                         p = &fl->ifbuf[fl->ifind];
936                         d = &fl->ifbuf[fl->ifcnt & 1];
937
938                         for(i=0; i<fl->ifcnt; i++)
939                                 *d++ = *p++;
940
941                         fl->ifind = fl->ifcnt & 1;
942                 }
943
944                 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
945
946                 if (readamt < 0)
947                         return NULL;
948
949                 if ((fl->ifcnt += readamt) == 0)
950                         return NULL;
951         }
952 }
953
954
955 //
956 // Tokenize a line
957 //
958 int TokenizeLine(void)
959 {
960         uint8_t * ln = NULL;            // Ptr to current position in line
961         uint8_t * p;                            // Random character ptr
962         PTR tk;                                         // Token-deposit ptr
963         int state = 0;                          // State for keyword detector
964         int j = 0;                                      // Var for keyword detector
965         uint8_t c;                                      // Random char
966         uint64_t v;                                     // Random value
967         uint32_t cursize = 0;           // Current line's size (.b, .w, .l, .s, .q, .d)
968         uint8_t * nullspot = NULL;      // Spot to clobber for SYMBOL termination
969         int stuffnull;                          // 1:terminate SYMBOL '\0' at *nullspot
970         uint8_t c1;
971         int stringNum = 0;                      // Pointer to string locations in tokenized line
972         SYM* sy;                                        // For looking up symbols (.equr)
973         int equrundef = 0;                      // Flag for equrundef scanning
974
975 retry:
976
977         if (cur_inobj == NULL)          // Return EOF if input stack is empty
978                 return TKEOF;
979
980         // Get another line of input from the current input source: a file, a
981         // macro, or a repeat-block
982         switch (cur_inobj->in_type)
983         {
984         // Include-file:
985         // o  handle EOF;
986         // o  bump source line number;
987         // o  tag the listing-line with a space;
988         // o  kludge lines generated by Alcyon C.
989         case SRC_IFILE:
990                 if ((ln = GetNextLine()) == NULL)
991                 {
992 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
993                         if (fpop() == 0)        // Pop input level
994                                 goto retry;             // Try for more lines
995                         else
996                         {
997                                 ifent->if_prev = (IFENT *)-1;   //Signal Assemble() that we have reached EOF with unbalanced if/endifs
998                                 return TKEOF;
999                         }
1000                 }
1001
1002                 curlineno++;                    // Bump line number
1003                 lntag = SPACE;
1004
1005                 break;
1006
1007         // Macro-block:
1008         // o  Handle end-of-macro;
1009         // o  tag the listing-line with an at (@) sign.
1010         case SRC_IMACRO:
1011                 if ((ln = GetNextMacroLine()) == NULL)
1012                 {
1013                         if (ExitMacro() == 0)   // Exit macro (pop args, do fpop(), etc)
1014                                 goto retry;                     // Try for more lines...
1015                         else
1016                                 return TKEOF;           // Oops, we got a non zero return code, signal EOF
1017                 }
1018
1019                 lntag = '@';
1020                 break;
1021
1022         // Repeat-block:
1023         // o  Handle end-of-repeat-block;
1024         // o  tag the listing-line with a pound (#) sign.
1025         case SRC_IREPT:
1026                 if ((ln = GetNextRepeatLine()) == NULL)
1027                 {
1028                         DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1029                         fpop();
1030                         goto retry;
1031                 }
1032
1033                 lntag = '#';
1034                 break;
1035         }
1036
1037         // Save text of the line. We only do this during listings and within
1038         // macro-type blocks, since it is expensive to unconditionally copy every
1039         // line.
1040         if (lnsave)
1041         {
1042                 // Sanity check
1043                 if (strlen(ln) > LNSIZ)
1044                         return error("line too long (%d, max %d)", strlen(ln), LNSIZ);
1045
1046                 strcpy(lnbuf, ln);
1047         }
1048
1049         // General housekeeping
1050         tok = tokeol;                   // Set "tok" to EOL in case of error
1051         tk.u32 = etok;                  // Reset token ptr
1052         stuffnull = 0;                  // Don't stuff nulls
1053         totlines++;                             // Bump total #lines assembled
1054
1055         // See if the entire line is a comment. This is a win if the programmer
1056         // puts in lots of comments
1057         if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1058                 goto goteol;
1059
1060         // And here we have a very ugly hack for signalling a single line 'turn off
1061         // optimization'. There's really no nice way to do this, so hack it is!
1062         optimizeOff = 0;                // Default is to take optimizations as they come
1063
1064         if (*ln == '!')
1065         {
1066                 optimizeOff = 1;        // Signal that we don't want to optimize this line
1067                 ln++;                           // & skip over the darned thing
1068         }
1069
1070         // Main tokenization loop;
1071         //  o  skip whitespace;
1072         //  o  handle end-of-line;
1073         //  o  handle symbols;
1074         //  o  handle single-character tokens (operators, etc.);
1075         //  o  handle multiple-character tokens (constants, strings, etc.).
1076         for(; *ln!=EOS;)
1077         {
1078                 // Check to see if there's enough space in the token buffer
1079                 if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20)
1080                 {
1081                         return error("token buffer overrun");
1082                 }
1083
1084                 // Skip whitespace, handle EOL
1085                 while (chrtab[*ln] & WHITE)
1086                         ln++;
1087
1088                 // Handle EOL, comment with ';'
1089                 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1090                         break;
1091
1092                 // Handle start of symbol. Symbols are null-terminated in place. The
1093                 // termination is always one symbol behind, since there may be no place
1094                 // for a null in the case that an operator immediately follows the name.
1095                 c = chrtab[*ln];
1096
1097                 if (c & STSYM)
1098                 {
1099                         if (stuffnull)                  // Terminate old symbol from previous pass
1100                                 *nullspot = EOS;
1101
1102                         v = 0;                                  // Assume no DOT attrib follows symbol
1103                         stuffnull = 1;
1104
1105                         // In some cases, we need to check for a DOTx at the *beginning*
1106                         // of a symbol, as the "start" of the line we're currently looking
1107                         // at could be somewhere in the middle of that line!
1108                         if (*ln == '.')
1109                         {
1110                                 // Make sure that it's *only* a .[bwsl] following, and not the
1111                                 // start of a local symbol:
1112                                 if ((chrtab[*(ln + 1)] & DOT)
1113                                         && (dotxtab[*(ln + 1)] != 0)
1114                                         && !(chrtab[*(ln + 2)] & CTSYM))
1115                                 {
1116                                         // We found a legitimate DOTx construct, so add it to the
1117                                         // token stream:
1118                                         ln++;
1119                                         stuffnull = 0;
1120                                         *tk.u32++ = (TOKEN)dotxtab[*ln++];
1121                                         continue;
1122                                 }
1123                         }
1124
1125                         p = nullspot = ln++;    // Nullspot -> start of this symbol
1126
1127                         // Find end of symbol (and compute its length)
1128                         for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1129                                 ln++;
1130
1131                         // Handle "DOT" special forms (like ".b") that follow a normal
1132                         // symbol or keyword:
1133                         if (*ln == '.')
1134                         {
1135                                 *ln++ = EOS;            // Terminate symbol
1136                                 stuffnull = 0;          // And never try it again
1137
1138                                 // Character following the '.' must have a DOT attribute, and
1139                                 // the chararacter after THAT one must not have a start-symbol
1140                                 // attribute (to prevent symbols that look like, for example,
1141                                 // "zingo.barf", which might be a good idea anyway....)
1142                                 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1143                                         return error("[bwsl] must follow '.' in symbol");
1144
1145                                 v = (uint32_t)dotxtab[*ln++];
1146                                 cursize = (uint32_t)v;
1147
1148                                 if (chrtab[*ln] & CTSYM)
1149                                         return error("misuse of '.'; not allowed in symbols");
1150                         }
1151
1152                         // If the symbol is small, check to see if it's really the name of
1153                         // a register.
1154                         uint8_t *p2 = p;
1155                         if (j <= 5)
1156                         {
1157                                 for (state = 0; state >= 0;)
1158                                 {
1159                                         j = (int)tolowertab[*p++];
1160                                         j += regbase[state];
1161
1162                                         if (regcheck[j] != state)
1163                                         {
1164                                                 j = -1;
1165                                                 break;
1166                                         }
1167
1168                                         if (*p == EOS || p == ln)
1169                                         {
1170                                                 j = regaccept[j];
1171                                                 goto skip_keyword;
1172                                                 break;
1173                                         }
1174
1175                                         state = regtab[j];
1176                                 }
1177                         }
1178
1179                         // Scan for keywords
1180                         if ((j <= 0 || state <= 0) || p==p2)
1181                         {
1182                                 if (j <= KWSIZE)
1183                                 {
1184                                         for (state = 0; state >= 0;)
1185                                         {
1186                                                 j = (int)tolowertab[*p2++];
1187                                                 j += kwbase[state];
1188                         
1189                                                 if (kwcheck[j] != state)
1190                                                 {
1191                                                         j = -1;
1192                                                         break;
1193                                                 }
1194                         
1195                                                 if (*p == EOS || p2 == ln)
1196                                                 {
1197                                                         j = kwaccept[j];
1198                                                         break;
1199                                                 }
1200                         
1201                                                 state = kwtab[j];
1202                                         }
1203                                 }
1204                                 else
1205                                 {
1206                                         j = -1;
1207                                 }
1208                         }
1209
1210                         skip_keyword:
1211
1212                         // If we detected equrundef/regundef set relevant flag
1213                         if (j == KW_EQURUNDEF)
1214                         {
1215                                 equrundef = 1;
1216                                 j = -1;
1217                         }
1218
1219                         // If not tokenized keyword OR token was not found
1220                         if ((j < 0) || (state < 0))
1221                         {
1222                                 // Only proceed if no equrundef has been detected. In that case we need to store the symbol
1223                                 // because the directive handler (d_equrundef) will run outside this loop, further into procln.c
1224                                 if (!equrundef && !disabled)
1225                                 {
1226                                         // Last attempt: let's see if this is an equated register.
1227                                         // If yes, then just store the register's keyword value instead of the symbol
1228                                         char temp = *ln;
1229                                         *ln = 0;
1230                                         sy = lookup(nullspot, LABEL, 0);
1231                                         *ln = temp;
1232                                         if (sy)
1233                                         {
1234                                                 if (sy->sattre & EQUATEDREG)
1235                                                 {
1236                                                         *tk.u32++ = sy->svalue;
1237                                                         stuffnull = 0;
1238                                                         continue;
1239                                                 }
1240                                         }
1241                                 }
1242                                 // Ok, that failed, let's store the symbol instead
1243                                 *tk.u32++ = SYMBOL;
1244                                 string[stringNum] = nullspot;
1245                                 *tk.u32++ = stringNum;
1246                                 stringNum++;
1247                         }
1248                         else
1249                         {
1250                                 *tk.u32++ = (TOKEN)j;
1251                                 stuffnull = 0;
1252                         }
1253
1254                         if (v)                  // Record attribute token (if any)
1255                                 *tk.u32++ = (TOKEN)v;
1256
1257                         if (stuffnull)  // Arrange for string termination on next pass
1258                                 nullspot = ln;
1259
1260                         if (disabled)
1261                         {
1262                                 // When we are in a disabled code block, the only thing that can break out
1263                                 // of this is an ".endif" keyword, so this is the minimum we have to parse
1264                                 // in order to discover such a keyword.
1265                                 goto goteol;
1266                         }
1267
1268                         continue;
1269                 }
1270
1271                 // Handle identity tokens
1272                 if (c & SELF)
1273                 {
1274                         *tk.u32++ = *ln++;
1275                         continue;
1276                 }
1277
1278                 // Handle multiple-character tokens
1279                 if (c & MULTX)
1280                 {
1281                         switch (*ln++)
1282                         {
1283                         case '!':               // ! or !=
1284                                 if (*ln == '=')
1285                                 {
1286                                         *tk.u32++ = NE;
1287                                         ln++;
1288                                 }
1289                                 else
1290                                         *tk.u32++ = '!';
1291
1292                                 continue;
1293                         case '\'':              // 'string'
1294                                 if (m6502)
1295                                 {
1296                                         // Hardcoded for now, maybe this will change in the future
1297                                         *tk.u32++ = STRINGA8;
1298                                         goto dostring;
1299                                 }
1300                                 // Fall through
1301                         case '\"':              // "string"
1302                                 *tk.u32++ = STRING;
1303 dostring:
1304                                 c1 = ln[-1];
1305                                 string[stringNum] = ln;
1306                                 *tk.u32++ = stringNum;
1307                                 stringNum++;
1308
1309                                 for(p=ln; *ln!=EOS && *ln!=c1;)
1310                                 {
1311                                         c = *ln++;
1312
1313                                         if (c == '\\')
1314                                         {
1315                                                 switch (*ln++)
1316                                                 {
1317                                                 case EOS:
1318                                                         return(error("unterminated string"));
1319                                                 case 'e':
1320                                                         c = '\033';
1321                                                         break;
1322                                                 case 'n':
1323                                                         c = '\n';
1324                                                         break;
1325                                                 case 'b':
1326                                                         c = '\b';
1327                                                         break;
1328                                                 case 't':
1329                                                         c = '\t';
1330                                                         break;
1331                                                 case 'r':
1332                                                         c = '\r';
1333                                                         break;
1334                                                 case 'f':
1335                                                         c = '\f';
1336                                                         break;
1337                                                 case '\"':
1338                                                         c = '\"';
1339                                                         break;
1340                                                 case '\'':
1341                                                         c = '\'';
1342                                                         break;
1343                                                 case '\\':
1344                                                         c = '\\';
1345                                                         break;
1346                                                 case '{':
1347                                                         // If we're evaluating a macro
1348                                                         // this is valid because it's
1349                                                         // a parameter expansion
1350                                                 case '!':
1351                                                         // If we're evaluating a macro
1352                                                         // this is valid and expands to
1353                                                         // "dot-size"
1354                                                         break;
1355                                                 default:
1356                                                         warn("bad backslash code in string");
1357                                                         ln--;
1358                                                         break;
1359                                                 }
1360                                         }
1361
1362                                         *p++ = c;
1363                                 }
1364
1365                                 if (*ln++ != c1)
1366                                         return error("unterminated string");
1367
1368                                 *p++ = EOS;
1369                                 continue;
1370                         case '$':               // $, hex constant
1371                                 if (chrtab[*ln] & HDIGIT)
1372                                 {
1373                                         v = 0;
1374
1375                                         // Parse the hex value
1376                                         while (hextab[*ln] >= 0)
1377                                                 v = (v << 4) + (int)hextab[*ln++];
1378
1379                                         *tk.u32++ = CONST;
1380                                         *tk.u64++ = v;
1381
1382                                         if (*ln == '.')
1383                                         {
1384                                                 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1385                                                 {
1386                                                         *tk.u32++ = DOTW;
1387                                                         ln += 2;
1388                                                 }
1389                                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1390                                                 {
1391                                                         *tk.u32++ = DOTL;
1392                                                         ln += 2;
1393                                                 }
1394                                         }
1395                                 }
1396                                 else
1397                                         *tk.u32++ = '$';
1398
1399                                 continue;
1400                         case '<':               // < or << or <> or <=
1401                                 switch (*ln)
1402                                 {
1403                                 case '<':
1404                                         *tk.u32++ = SHL;
1405                                         ln++;
1406                                         continue;
1407                                 case '>':
1408                                         *tk.u32++ = NE;
1409                                         ln++;
1410                                         continue;
1411                                 case '=':
1412                                         *tk.u32++ = LE;
1413                                         ln++;
1414                                         continue;
1415                                 default:
1416                                         *tk.u32++ = '<';
1417                                         continue;
1418                                 }
1419                         case ':':               // : or ::
1420                                 if (*ln == ':')
1421                                 {
1422                                         *tk.u32++ = DCOLON;
1423                                         ln++;
1424                                 }
1425                                 else
1426                                         *tk.u32++ = ':';
1427
1428                                 continue;
1429                         case '=':               // = or ==
1430                                 if (*ln == '=')
1431                                 {
1432                                         *tk.u32++ = DEQUALS;
1433                                         ln++;
1434                                 }
1435                                 else
1436                                         *tk.u32++ = '=';
1437
1438                                 continue;
1439                         case '>':               // > or >> or >=
1440                                 switch (*ln)
1441                                 {
1442                                 case '>':
1443                                         *tk.u32++ = SHR;
1444                                         ln++;
1445                                         continue;
1446                                 case '=':
1447                                         *tk.u32++ = GE;
1448                                         ln++;
1449                                         continue;
1450                                 default:
1451                                         *tk.u32++ = '>';
1452                                         continue;
1453                                 }
1454                         case '%':               // % or binary constant
1455                                 if (*ln < '0' || *ln > '1')
1456                                 {
1457                                         *tk.u32++ = '%';
1458                                         continue;
1459                                 }
1460
1461                                 v = 0;
1462
1463                                 while (*ln >= '0' && *ln <= '1')
1464                                         v = (v << 1) + *ln++ - '0';
1465
1466                                 if (*ln == '.')
1467                                 {
1468                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1469                                         {
1470                                                 v &= 0x000000FF;
1471                                                 ln += 2;
1472                                         }
1473
1474                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1475                                         {
1476                                                 v &= 0x0000FFFF;
1477                                                 ln += 2;
1478                                         }
1479
1480                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1481                                         {
1482                                                 v &= 0xFFFFFFFF;
1483                                                 ln += 2;
1484                                         }
1485                                 }
1486
1487                                 *tk.u32++ = CONST;
1488                                 *tk.u64++ = v;
1489                                 continue;
1490                         case '@':               // @ or octal constant
1491                                 if (*ln < '0' || *ln > '7')
1492                                 {
1493                                         *tk.u32++ = '@';
1494                                         continue;
1495                                 }
1496
1497                                 v = 0;
1498
1499                                 while (*ln >= '0' && *ln <= '7')
1500                                         v = (v << 3) + *ln++ - '0';
1501
1502                                 if (*ln == '.')
1503                                 {
1504                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1505                                         {
1506                                                 v &= 0x000000FF;
1507                                                 ln += 2;
1508                                         }
1509
1510                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1511                                         {
1512                                                 v &= 0x0000FFFF;
1513                                                 ln += 2;
1514                                         }
1515
1516                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1517                                         {
1518                                                 v &= 0xFFFFFFFF;
1519                                                 ln += 2;
1520                                         }
1521                                 }
1522
1523                                 *tk.u32++ = CONST;
1524                                 *tk.u64++ = v;
1525                                 continue;
1526                         case '^':               // ^ or ^^ <operator-name>
1527                                 if (*ln != '^')
1528                                 {
1529                                         *tk.u32++ = '^';
1530                                         continue;
1531                                 }
1532
1533                                 if (((int)chrtab[*++ln] & STSYM) == 0)
1534                                 {
1535                                         error("invalid symbol following ^^");
1536                                         continue;
1537                                 }
1538
1539                                 p = ln++;
1540
1541                                 while ((int)chrtab[*ln] & CTSYM)
1542                                         ++ln;
1543
1544                                 for(state=0; state>=0;)
1545                                 {
1546                                         // Get char, convert to lowercase
1547                                         j = (int)tolowertab[*p++];
1548
1549                                         //if (j >= 'A' && j <= 'Z')
1550                                         //      j += 0x20;
1551
1552                                         j += unarybase[state];
1553
1554                                         if (unarycheck[j] != state)
1555                                         {
1556                                                 j = -1;
1557                                                 break;
1558                                         }
1559
1560                                         if (*p == EOS || p == ln)
1561                                         {
1562                                                 j = unaryaccept[j];
1563                                                 break;
1564                                         }
1565
1566                                         state = unarytab[j];
1567                                 }
1568
1569                                 if (j < 0 || state < 0)
1570                                 {
1571                                         error("unknown symbol following ^^");
1572                                         continue;
1573                                 }
1574
1575                                 *tk.u32++ = (TOKEN)j;
1576                                 continue;
1577                         default:
1578                                 interror(2);    // Bad MULTX entry in chrtab
1579                                 continue;
1580                         }
1581                 }
1582
1583                 // Handle decimal constant
1584                 if (c & DIGIT)
1585                 {
1586                         uint8_t * numStart = ln;
1587                         v = 0;
1588
1589                         while ((int)chrtab[*ln] & DIGIT)
1590                                 v = (v * 10) + *ln++ - '0';
1591
1592                         // See if there's a .[bwl] after the constant & deal with it if so
1593                         if (*ln == '.')
1594                         {
1595                                 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1596                                 {
1597                                         v &= 0x000000FF;
1598                                         ln += 2;
1599                                         *tk.u32++ = CONST;
1600                                         *tk.u64++ = v;
1601                                         *tk.u32++ = DOTB;
1602                                 }
1603                                 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1604                                 {
1605                                         v &= 0x0000FFFF;
1606                                         ln += 2;
1607                                         *tk.u32++ = CONST;
1608                                         *tk.u64++ = v;
1609                                         *tk.u32++ = DOTW;
1610                                 }
1611                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1612                                 {
1613                                         v &= 0xFFFFFFFF;
1614                                         ln += 2;
1615                                         *tk.u32++ = CONST;
1616                                         *tk.u64++ = v;
1617                                         *tk.u32++ = DOTL;
1618                                 }
1619                                 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1620                                 {
1621                                         // Hey, more digits after the dot, so we assume it's a
1622                                         // floating point number of some kind... numEnd will point
1623                                         // to the first non-float character after it's done
1624                                         char * numEnd;
1625                                         errno = 0;
1626                                         double f = strtod(numStart, &numEnd);
1627                                         ln = (uint8_t *)numEnd;
1628
1629                                         if (errno != 0)
1630                                                 return error("floating point parse error");
1631
1632                                         // N.B.: We use the C compiler's internal double
1633                                         //       representation for all internal float calcs and
1634                                         //       are reasonably sure that the size of said double
1635                                         //       is 8 bytes long (which we check for in fltpoint.c)
1636                                         *tk.u32++ = FCONST;
1637                                         *tk.dp = f;
1638                                         tk.u64++;
1639                                         continue;
1640                                 }
1641                         }
1642                         else
1643                         {
1644                                 *tk.u32++ = CONST;
1645                                 *tk.u64++ = v;
1646                         }
1647
1648 //printf("CONST: %i\n", v);
1649                         continue;
1650                 }
1651
1652                 // Handle illegal character
1653                 return error("illegal character $%02X found", *ln);
1654         }
1655
1656         // Terminate line of tokens and return "success."
1657
1658 goteol:
1659         tok = etok;                             // Set tok to beginning of line
1660
1661         if (stuffnull)                  // Terminate last SYMBOL
1662                 *nullspot = EOS;
1663
1664         *tk.u32++ = EOL;
1665
1666         return OK;
1667 }
1668
1669
1670 //
1671 // .GOTO <label>        goto directive
1672 //
1673 // The label is searched for starting from the first line of the current,
1674 // enclosing macro definition. If no enclosing macro exists, an error is
1675 // generated.
1676 //
1677 // A label is of the form:
1678 //
1679 // :<name><whitespace>
1680 //
1681 // The colon must appear in column 1.  The label is stripped prior to macro
1682 // expansion, and is NOT subject to macro expansion.  The whitespace may also
1683 // be EOL.
1684 //
1685 int d_goto(WORD unused)
1686 {
1687         // Setup for the search
1688         if (*tok != SYMBOL)
1689                 return error("missing label");
1690
1691         char * sym = string[tok[1]];
1692         tok += 2;
1693
1694         if (cur_inobj->in_type != SRC_IMACRO)
1695                 return error("goto not in macro");
1696
1697         IMACRO * imacro = cur_inobj->inobj.imacro;
1698         LLIST * defln = imacro->im_macro->lineList;
1699
1700         // Attempt to find the label, starting with the first line.
1701         for(; defln!=NULL; defln=defln->next)
1702         {
1703                 // Must start with a colon
1704                 if (defln->line[0] == ':')
1705                 {
1706                         // Compare names (sleazo string compare)
1707                         char * s1 = sym;
1708                         char * s2 = defln->line + 1;
1709
1710                         // Either we will match the strings to EOS on both, or we will
1711                         // match EOS on string 1 to whitespace on string 2. Otherwise, we
1712                         // have no match.
1713                         while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1714                         {
1715                                 // If we reached the end of string 1 (sym), we're done.
1716                                 // Note that we're also checking for the end of string 2 as
1717                                 // well, since we've established they're equal above.
1718                                 if (*s1 == EOS)
1719                                 {
1720                                         // Found the label, set new macro next-line and return.
1721                                         imacro->im_nextln = defln;
1722                                         return 0;
1723                                 }
1724
1725                                 s1++;
1726                                 s2++;
1727                         }
1728                 }
1729         }
1730
1731         return error("goto label not found");
1732 }
1733
1734
1735 void DumpToken(TOKEN t)
1736 {
1737         if (t == COLON)
1738                 printf("[COLON]");
1739         else if (t == CONST)
1740                 printf("[CONST]");
1741         else if (t == FCONST)
1742                 printf("[FCONST]");
1743         else if (t == ACONST)
1744                 printf("[ACONST]");
1745         else if (t == STRING)
1746                 printf("[STRING]");
1747         else if (t == SYMBOL)
1748                 printf("[SYMBOL]");
1749         else if (t == EOS)
1750                 printf("[EOS]");
1751         else if (t == TKEOF)
1752                 printf("[TKEOF]");
1753         else if (t == DEQUALS)
1754                 printf("[DEQUALS]");
1755         else if (t == SET)
1756                 printf("[SET]");
1757         else if (t == REG)
1758                 printf("[REG]");
1759         else if (t == DCOLON)
1760                 printf("[DCOLON]");
1761         else if (t == GE)
1762                 printf("[GE]");
1763         else if (t == LE)
1764                 printf("[LE]");
1765         else if (t == NE)
1766                 printf("[NE]");
1767         else if (t == SHR)
1768                 printf("[SHR]");
1769         else if (t == SHL)
1770                 printf("[SHL]");
1771         else if (t == UNMINUS)
1772                 printf("[UNMINUS]");
1773         else if (t == DOTB)
1774                 printf("[DOTB]");
1775         else if (t == DOTW)
1776                 printf("[DOTW]");
1777         else if (t == DOTL)
1778                 printf("[DOTL]");
1779         else if (t == DOTQ)
1780                 printf("[DOTQ]");
1781         else if (t == DOTS)
1782                 printf("[DOTS]");
1783         else if (t == DOTD)
1784                 printf("[DOTD]");
1785         else if (t == DOTI)
1786                 printf("[DOTI]");
1787         else if (t == ENDEXPR)
1788                 printf("[ENDEXPR]");
1789         else if (t == CR_ABSCOUNT)
1790                 printf("[CR_ABSCOUNT]");
1791         else if (t == CR_FILESIZE)
1792                 printf("[CR_FILESIZE]");
1793         else if (t == CR_DEFINED)
1794                 printf("[CR_DEFINED]");
1795         else if (t == CR_REFERENCED)
1796                 printf("[CR_REFERENCED]");
1797         else if (t == CR_STREQ)
1798                 printf("[CR_STREQ]");
1799         else if (t == CR_MACDEF)
1800                 printf("[CR_MACDEF]");
1801         else if (t == CR_TIME)
1802                 printf("[CR_TIME]");
1803         else if (t == CR_DATE)
1804                 printf("[CR_DATE]");
1805         else if (t >= 0x20 && t <= 0x2F)
1806                 printf("[%c]", (char)t);
1807         else if (t >= 0x3A && t <= 0x3F)
1808                 printf("[%c]", (char)t);
1809         else if (t >= 0x80 && t <= 0x87)
1810                 printf("[D%u]", ((uint32_t)t) - 0x80);
1811         else if (t >= 0x88 && t <= 0x8F)
1812                 printf("[A%u]", ((uint32_t)t) - 0x88);
1813         else
1814                 printf("[%X:%c]", (uint32_t)t, (char)t);
1815 }
1816
1817
1818 void DumpTokenBuffer(void)
1819 {
1820         printf("Tokens [%X]: ", sloc);
1821
1822         for(TOKEN * t=tokbuf; *t!=EOL; t++)
1823         {
1824                 if (*t == COLON)
1825                         printf("[COLON]");
1826                 else if (*t == CONST)
1827                 {
1828                         PTR tp;
1829                         tp.u32 = t + 1;
1830                         printf("[CONST: $%lX]", *tp.u64);
1831                         t += 2;
1832                 }
1833                 else if (*t == FCONST)
1834                 {
1835                         PTR tp;
1836                         tp.u32 = t + 1;
1837                         printf("[FCONST: $%lX]", *tp.u64);
1838                         t += 2;
1839                 }
1840                 else if (*t == ACONST)
1841                 {
1842                         printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1843                         t += 2;
1844                 }
1845                 else if (*t == STRING)
1846                 {
1847                         t++;
1848                         printf("[STRING:\"%s\"]", string[*t]);
1849                 }
1850                 else if (*t == SYMBOL)
1851                 {
1852                         t++;
1853                         printf("[SYMBOL:\"%s\"]", string[*t]);
1854                 }
1855                 else if (*t == EOS)
1856                         printf("[EOS]");
1857                 else if (*t == TKEOF)
1858                         printf("[TKEOF]");
1859                 else if (*t == DEQUALS)
1860                         printf("[DEQUALS]");
1861                 else if (*t == SET)
1862                         printf("[SET]");
1863                 else if (*t == REG)
1864                         printf("[REG]");
1865                 else if (*t == DCOLON)
1866                         printf("[DCOLON]");
1867                 else if (*t == GE)
1868                         printf("[GE]");
1869                 else if (*t == LE)
1870                         printf("[LE]");
1871                 else if (*t == NE)
1872                         printf("[NE]");
1873                 else if (*t == SHR)
1874                         printf("[SHR]");
1875                 else if (*t == SHL)
1876                         printf("[SHL]");
1877                 else if (*t == UNMINUS)
1878                         printf("[UNMINUS]");
1879                 else if (*t == DOTB)
1880                         printf("[DOTB]");
1881                 else if (*t == DOTW)
1882                         printf("[DOTW]");
1883                 else if (*t == DOTL)
1884                         printf("[DOTL]");
1885                 else if (*t == DOTQ)
1886                         printf("[DOTQ]");
1887                 else if (*t == DOTS)
1888                         printf("[DOTS]");
1889                 else if (*t == DOTD)
1890                         printf("[DOTD]");
1891                 else if (*t == DOTI)
1892                         printf("[DOTI]");
1893                 else if (*t == ENDEXPR)
1894                         printf("[ENDEXPR]");
1895                 else if (*t == CR_ABSCOUNT)
1896                         printf("[CR_ABSCOUNT]");
1897                 else if (*t == CR_FILESIZE)
1898                         printf("[CR_FILESIZE]");
1899                 else if (*t == CR_DEFINED)
1900                         printf("[CR_DEFINED]");
1901                 else if (*t == CR_REFERENCED)
1902                         printf("[CR_REFERENCED]");
1903                 else if (*t == CR_STREQ)
1904                         printf("[CR_STREQ]");
1905                 else if (*t == CR_MACDEF)
1906                         printf("[CR_MACDEF]");
1907                 else if (*t == CR_TIME)
1908                         printf("[CR_TIME]");
1909                 else if (*t == CR_DATE)
1910                         printf("[CR_DATE]");
1911                 else if (*t >= 0x20 && *t <= 0x2F)
1912                         printf("[%c]", (char)*t);
1913                 else if (*t >= 0x3A && *t <= 0x3F)
1914                         printf("[%c]", (char)*t);
1915                 else if (*t >= 0x80 && *t <= 0x87)
1916                         printf("[D%u]", ((uint32_t)*t) - 0x80);
1917                 else if (*t >= 0x88 && *t <= 0x8F)
1918                         printf("[A%u]", ((uint32_t)*t) - 0x88);
1919                 else
1920                         printf("[%X:%c]", (uint32_t)*t, (char)*t);
1921         }
1922
1923         printf("[EOL]\n");
1924 }
1925