]> Shamusworld >> Repos - rmac/blob - token.c
Fix for ^^filesize (these kind of hacks need to go in the long game)
[rmac] / token.c
1 //
2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
7 //
8
9 #include "token.h"
10
11 #include <errno.h>
12 #include "direct.h"
13 #include "error.h"
14 #include "macro.h"
15 #include "procln.h"
16 #include "sect.h"
17 #include "symbol.h"
18
19 #define DECL_KW                         // Declare keyword arrays
20 #define DEF_KW                          // Declare keyword values
21 #include "kwtab.h"                      // Incl generated keyword tables & defs
22
23
24 int lnsave;                                     // 1; strcpy() text of current line
25 uint32_t curlineno;                     // Current line number (64K max currently)
26 int totlines;                           // Total # of lines
27 int mjump_align = 0;            // mjump alignment flag
28 char lntag;                                     // Line tag
29 char * curfname;                        // Current filename
30 char tolowertab[128];           // Uppercase ==> lowercase
31 int8_t hextab[128];                     // Table of hex values
32 char dotxtab[128];                      // Table for ".b", ".s", etc.
33 char irbuf[LNSIZ];                      // Text for .rept block line
34 char lnbuf[LNSIZ];                      // Text of current line
35 WORD filecount;                         // Unique file number counter
36 WORD cfileno;                           // Current file number
37 TOKEN * tok;                            // Ptr to current token
38 TOKEN * etok;                           // Ptr past last token in tokbuf[]
39 TOKEN tokeol[1] = {EOL};        // Bailout end-of-line token
40 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
41 int optimizeOff;                        // Optimization override flag
42
43
44 FILEREC * filerec;
45 FILEREC * last_fr;
46
47 INOBJ * cur_inobj;                      // Ptr current input obj (IFILE/IMACRO)
48 static INOBJ * f_inobj;         // Ptr list of free INOBJs
49 static IFILE * f_ifile;         // Ptr list of free IFILEs
50 static IMACRO * f_imacro;       // Ptr list of free IMACROs
51
52 static TOKEN tokbuf[TOKBUFSIZE];        // Token buffer (stack-like, all files)
53
54 uint8_t chrtab[0x100] = {
55         ILLEG, ILLEG, ILLEG, ILLEG,                     // NUL SOH STX ETX
56         ILLEG, ILLEG, ILLEG, ILLEG,                     // EOT ENQ ACK BEL
57         ILLEG, WHITE, ILLEG, ILLEG,                     // BS HT LF VT
58         WHITE, ILLEG, ILLEG, ILLEG,                     // FF CR SO SI
59
60         ILLEG, ILLEG, ILLEG, ILLEG,                     // DLE DC1 DC2 DC3
61         ILLEG, ILLEG, ILLEG, ILLEG,                     // DC4 NAK SYN ETB
62         ILLEG, ILLEG, ILLEG, ILLEG,                     // CAN EM SUB ESC
63         ILLEG, ILLEG, ILLEG, ILLEG,                     // FS GS RS US
64
65         WHITE, MULTX, MULTX, SELF,                      // SP ! " #
66         MULTX+CTSYM, MULTX, SELF, MULTX,        // $ % & '
67         SELF, SELF, SELF, SELF,                         // ( ) * +
68         SELF, SELF, STSYM, SELF,                        // , - . /
69
70         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 0 1
71         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 2 3
72         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 4 5
73         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 6 7
74         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 8 9
75         MULTX, MULTX,                                                           // : ;
76         MULTX, MULTX, MULTX, STSYM+CTSYM,                       // < = > ?
77
78         MULTX, STSYM+CTSYM+HDIGIT,                                      // @ A
79         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // B C
80         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // D E
81         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // F G
82         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
83         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
84
85         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
86         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
87         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
88         SELF, SELF, MULTX, STSYM+CTSYM,                         // \ ] ^ _
89
90         ILLEG, STSYM+CTSYM+HDIGIT,                                      // ` a
91         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // b c
92         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // d e
93         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // f g
94         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
95         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
96
97         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
98         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
99         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                // x y z {
100         SELF, SELF, SELF, ILLEG,                                        // | } ~ DEL
101
102         // Anything above $7F is illegal (and yes, we need to check for this,
103         // otherwise you get strange and spurious errors that will lead you astray)
104         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
105         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
106         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
107         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
108         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
109         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
110         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
111         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
120 };
121
122 // Names of registers
123 static char * regname[] = {
124         "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
125         "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
126         "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
127         "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
128         "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
129         "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
130         "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
131         "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
132         "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
133         "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
134         "tt0","tt1","crp","","","","","", // 208,215
135         "","","","","fpiar","fpsr","fpcr","", // 216,223
136         "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
137         "","","","","","","","", // 232,239
138         "","","","","","","","", // 240,247
139         "","","","","","","","", // 248,255
140         "","","","","x0","x1","y0","y1", // 256,263
141         "","b0","","b2","","b1","a","b", // 264,271
142         "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
143         "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
144         "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
145         "","","","","","","l","p", // 296,303
146         "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
147         "a10","b10","x","y","","","ab","ba"  // 312,319
148 };
149
150 static char * riscregname[] = {
151          "r0",  "r1",  "r2",  "r3",  "r4", "r5",   "r6",  "r7",
152          "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",
153         "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
154         "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
155 };
156
157
158 //
159 // Initialize tokenizer
160 //
161 void InitTokenizer(void)
162 {
163         int i;                                                                  // Iterator
164         char * htab = "0123456789abcdefABCDEF"; // Hex character table
165
166         lnsave = 0;                                                             // Don't save lines
167         curfname = "";                                                  // No file, empty filename
168         filecount = (WORD)-1;
169         cfileno = (WORD)-1;                                             // cfileno gets bumped to 0
170         curlineno = 0;
171         totlines = 0;
172         etok = tokbuf;
173         f_inobj = NULL;
174         f_ifile = NULL;
175         f_imacro = NULL;
176         cur_inobj = NULL;
177         filerec = NULL;
178         last_fr = NULL;
179         lntag = SPACE;
180
181         // Initialize hex, "dot" and tolower tables
182         for(i=0; i<128; i++)
183         {
184                 hextab[i] = -1;
185                 dotxtab[i] = 0;
186                 tolowertab[i] = (char)i;
187         }
188
189         for(i=0; htab[i]!=EOS; i++)
190                 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
191
192         for(i='A'; i<='Z'; i++)
193                 tolowertab[i] |= 0x20;
194
195         // These characters are legal immediately after a period
196         dotxtab['b'] = DOTB;                                    // .b .B .s .S
197         dotxtab['B'] = DOTB;
198         //dotxtab['s'] = DOTB;
199         //dotxtab['S'] = DOTB;
200         dotxtab['w'] = DOTW;                                    // .w .W
201         dotxtab['W'] = DOTW;
202         dotxtab['l'] = DOTL;                                    // .l .L
203         dotxtab['L'] = DOTL;
204         dotxtab['i'] = DOTI;                                    // .i .I (WTF is this???)
205         dotxtab['I'] = DOTI;
206         dotxtab['D'] = DOTD;                                    // .d .D (double)
207         dotxtab['d'] = DOTD;
208         dotxtab['S'] = DOTS;                                    // .s .S
209         dotxtab['s'] = DOTS;
210         dotxtab['Q'] = DOTQ;                                    // .q .Q (quad word)
211         dotxtab['q'] = DOTQ;
212         dotxtab['X'] = DOTX;                                    // .x .x
213         dotxtab['x'] = DOTX;
214         dotxtab['P'] = DOTP;                                    // .p .P
215         dotxtab['p'] = DOTP;
216 }
217
218
219 void SetFilenameForErrorReporting(void)
220 {
221         WORD fnum = cfileno;
222
223         // Check for absolute top filename (this should never happen)
224         if (fnum == -1)
225         {
226                 curfname = "(*top*)";
227                 return;
228         }
229
230         FILEREC * fr = filerec;
231
232         // Advance to the correct record...
233         while (fr != NULL && fnum != 0)
234         {
235                 fr = fr->frec_next;
236                 fnum--;
237         }
238
239         // Check for file # record not found (this should never happen either)
240         if (fr == NULL)
241         {
242                 curfname = "(*NOT FOUND*)";
243                 return;
244         }
245
246         curfname = fr->frec_name;
247 }
248
249
250 //
251 // Allocate an IFILE or IMACRO
252 //
253 INOBJ * a_inobj(int typ)
254 {
255         INOBJ * inobj;
256         IFILE * ifile;
257         IMACRO * imacro;
258
259         // Allocate and initialize INOBJ first
260         if (f_inobj == NULL)
261                 inobj = malloc(sizeof(INOBJ));
262         else
263         {
264                 inobj = f_inobj;
265                 f_inobj = f_inobj->in_link;
266         }
267
268         switch (typ)
269         {
270         case SRC_IFILE:                                                 // Alloc and init an IFILE
271                 if (f_ifile == NULL)
272                         ifile = malloc(sizeof(IFILE));
273                 else
274                 {
275                         ifile = f_ifile;
276                         f_ifile = f_ifile->if_link;
277                 }
278
279                 inobj->inobj.ifile = ifile;
280                 break;
281
282         case SRC_IMACRO:                                                // Alloc and init an IMACRO
283                 if (f_imacro == NULL)
284                         imacro = malloc(sizeof(IMACRO));
285                 else
286                 {
287                         imacro = f_imacro;
288                         f_imacro = f_imacro->im_link;
289                 }
290
291                 inobj->inobj.imacro = imacro;
292                 break;
293
294         case SRC_IREPT:                                                 // Alloc and init an IREPT
295                 inobj->inobj.irept = malloc(sizeof(IREPT));
296                 DEBUG { printf("alloc IREPT\n"); }
297                 break;
298         }
299
300         // Install INOBJ on top of input stack
301         inobj->in_ifent = ifent;                                // Record .if context on entry
302         inobj->in_type = (WORD)typ;
303         inobj->in_otok = tok;
304         inobj->in_etok = etok;
305         inobj->in_link = cur_inobj;
306         cur_inobj = inobj;
307
308         return inobj;
309 }
310
311
312 //
313 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
314 // A macro reference is in one of two forms:
315 // \name <non-name-character>
316 // \{name}
317 // A doubled backslash (\\) is compressed to a single backslash (\).
318 // Argument definitions have been pre-tokenized, so we have to turn them back
319 // into text. This means that numbers, in particular, become hex, regardless of
320 // their representation when the macro was invoked. This is a hack.
321 // A label may appear at the beginning of the line:
322 // :<name><whitespace>
323 // (the colon must be in the first column). These labels are stripped before
324 // macro expansion takes place.
325 //
326 int ExpandMacro(char * src, char * dest, int destsiz)
327 {
328         int i;
329         int questmark;                  // \? for testing argument existence
330         char mname[128];                // Assume max size of a formal arg name
331         char numbuf[20];                // Buffer for text of CONSTs
332         TOKEN * tk;
333         SYM * arg;
334         char ** symbolString;
335
336         DEBUG { printf("ExM: src=\"%s\"\n", src); }
337
338         IMACRO * imacro = cur_inobj->inobj.imacro;
339         int macnum = (int)(imacro->im_macro->sattr);
340
341         char * dst = dest;                                              // Next dest slot
342         char * edst = dest + destsiz - 1;               // End + 1(?) of dest buffer
343
344         // Check for (and skip over) any "label" on the line
345         char * s = src;
346         char * d = NULL;
347
348         if (*s == ':')
349         {
350                 while (*s != EOS && !(chrtab[*s] & WHITE))
351                         s++;
352
353                 if (*s != EOS)
354                         s++;                                                    // Skip first whitespace
355         }
356
357         // Expand the rest of the line
358         while (*s != EOS)
359         {
360                 // Copy single character
361                 if (*s != '\\')
362                 {
363                         if (dst >= edst)
364                                 goto overflow;
365
366                         // Skip comments in case a loose @ or \ is in there
367                         // In that case the tokeniser was trying to expand it.
368                         if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
369                                 goto skipcomments;
370
371                         *dst++ = *s++;
372                 }
373                 // Do macro expansion
374                 else
375                 {
376                         questmark = 0;
377
378                         // Do special cases
379                         switch (*++s)
380                         {
381                         case '\\':                                              // \\, \ (collapse to single backslash)
382                                 if (dst >= edst)
383                                         goto overflow;
384
385                                 *dst++ = *s++;
386                                 continue;
387                         case '?':                                               // \? <macro>  set `questmark' flag
388                                 s++;
389                                 questmark = 1;
390                                 break;
391                         case '#':                                               // \#, number of arguments
392                                 sprintf(numbuf, "%d", (int)imacro->im_nargs);
393                                 goto copystr;
394                         case '!':                                               // \! size suffix supplied on invocation
395                                 switch ((int)imacro->im_siz)
396                                 {
397                                 case SIZN: d = "";   break;
398                                 case SIZB: d = ".b"; break;
399                                 case SIZW: d = ".w"; break;
400                                 case SIZL: d = ".l"; break;
401                                 }
402
403                                 goto copy_d;
404                         case '~':                                               // ==> unique label string Mnnnn...
405                                 sprintf(numbuf, "M%u", curuniq);
406 copystr:
407                                 d = numbuf;
408 copy_d:
409                                 s++;
410
411                                 while (*d != EOS)
412                                 {
413                                         if (dst >= edst)
414                                                 goto overflow;
415                                         else
416                                                 *dst++ = *d++;
417                                 }
418
419                                 continue;
420                         case EOS:
421                                 return error("missing argument name");
422                         }
423
424                         // \n ==> argument number 'n', 0..9
425                         if (chrtab[*s] & DIGIT)
426                         {
427                                 i = *s++ - '1';
428
429                                 if (i < 0)
430                                         i = 9;
431
432                                 goto arg_num;
433                         }
434
435                         // Get argument name: \name, \{name}
436                         d = mname;
437
438                         // \label
439                         if (*s != '{')
440                         {
441                                 do
442                                 {
443                                         *d++ = *s++;
444                                 }
445                                 while (chrtab[*s] & CTSYM);
446                         }
447                         // \\{label}
448                         else
449                         {
450                                 for(++s; *s != EOS && *s != '}';)
451                                         *d++ = *s++;
452
453                                 if (*s != '}')
454                                         return error("missing closing brace ('}')");
455                                 else
456                                         s++;
457                         }
458
459                         *d = EOS;
460
461                         // Lookup the argument and copy its (string) value into the
462                         // destination string
463                         DEBUG { printf("argument='%s'\n", mname); }
464
465                         if ((arg = lookup(mname, MACARG, macnum)) == NULL)
466                                 return error("undefined argument: '%s'", mname);
467                         else
468                         {
469                                 // Convert a string of tokens (terminated with EOL) back into
470                                 // text. If an argument is out of range (not specified in the
471                                 // macro invocation) then it is ignored.
472                                 i = (int)arg->svalue;
473 arg_num:
474                                 DEBUG { printf("~argnumber=%d\n", i); }
475                                 tk = NULL;
476
477                                 if (i < imacro->im_nargs)
478                                 {
479                                         tk = imacro->argument[i].token;
480                                         symbolString = imacro->argument[i].string;
481 //DEBUG
482 //{
483 //      printf("ExM: Preparing to parse argument #%u...\n", i);
484 //      DumpTokens(tk);
485 //}
486                                 }
487
488                                 // \?arg yields:
489                                 //    0  if the argument is empty or non-existant,
490                                 //    1  if the argument is not empty
491                                 if (questmark)
492                                 {
493                                         if (tk == NULL || *tk == EOL)
494                                                 questmark = 0;
495
496                                         if (dst >= edst)
497                                                 goto overflow;
498
499                                         *dst++ = (char)(questmark + '0');
500                                         continue;
501                                 }
502
503                                 // Argument # is in range, so expand it
504                                 if (tk != NULL)
505                                 {
506                                         while (*tk != EOL)
507                                         {
508                                                 // Reverse-translation from a token number to a string.
509                                                 // This is a hack. It might be better table-driven.
510                                                 d = NULL;
511
512                                                 if ((*tk >= KW_D0) && !rdsp && !rgpu)
513                                                 {
514                                                         d = regname[(int)*tk++ - KW_D0];
515                                                         goto strcopy;
516                                                 }
517                                                 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
518                                                 {
519                                                         d = riscregname[(int)*tk++ - KW_R0];
520                                                         goto strcopy;
521                                                 }
522                                                 else
523                                                 {
524                                                         switch ((int)*tk++)
525                                                         {
526                                                         case SYMBOL:
527                                                                 d = symbolString[*tk++];
528 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
529                                                                 break;
530                                                         case STRING:
531                                                                 d = symbolString[*tk++];
532
533                                                                 if (dst >= edst)
534                                                                         goto overflow;
535
536                                                                 *dst++ = '"';
537
538                                                                 while (*d != EOS)
539                                                                 {
540                                                                         if (dst >= edst)
541                                                                                 goto overflow;
542                                                                         else
543                                                                                 *dst++ = *d++;
544                                                                 }
545
546                                                                 if (dst >= edst)
547                                                                         goto overflow;
548
549                                                                 *dst++ = '"';
550                                                                 continue;
551                                                                 break;
552 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
553 //         to choke on legitimate code... Need to investigate this further
554 //         before changing anything else here!
555                                                         case CONST:
556 //                                                              sprintf(numbuf, "$%lx", (uint64_t)*tk++);
557                                                                 sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
558                                                                 tk++;
559                                                                 d = numbuf;
560                                                                 break;
561                                                         case DEQUALS:
562                                                                 d = "==";
563                                                                 break;
564                                                         case SET:
565                                                                 d = "set";
566                                                                 break;
567                                                         case COLON:
568                                                                 d = ":";
569                                                                 break;
570                                                         case DCOLON:
571                                                                 d = "::";
572                                                                 break;
573                                                         case GE:
574                                                                 d = ">=";
575                                                                 break;
576                                                         case LE:
577                                                                 d = "<=";
578                                                                 break;
579                                                         case NE:
580                                                                 d = "<>";
581                                                                 break;
582                                                         case SHR:
583                                                                 d = ">>";
584                                                                 break;
585                                                         case SHL:
586                                                                 d = "<<";
587                                                                 break;
588                                                         case DOTB:
589                                                                 d = ".b";
590                                                                 break;
591                                                         case DOTW:
592                                                                 d = ".w";
593                                                                 break;
594                                                         case DOTL:
595                                                                 d = ".l";
596                                                                 break;
597                                                         case CR_ABSCOUNT:
598                                                                 d = "^^abscount";
599                                                                 break;
600                                                         case CR_FILESIZE:
601                                                                 d = "^^filesize";
602                                                                 break;
603                                                         case CR_DATE:
604                                                                 d = "^^date";
605                                                                 break;
606                                                         case CR_TIME:
607                                                                 d = "^^time";
608                                                                 break;
609                                                         case CR_DEFINED:
610                                                                 d = "^^defined ";
611                                                                 break;
612                                                         case CR_REFERENCED:
613                                                                 d = "^^referenced ";
614                                                                 break;
615                                                         case CR_STREQ:
616                                                                 d = "^^streq ";
617                                                                 break;
618                                                         case CR_MACDEF:
619                                                                 d = "^^macdef ";
620                                                                 break;
621                                                         default:
622                                                                 if (dst >= edst)
623                                                                         goto overflow;
624
625                                                                 *dst++ = (char)*(tk - 1);
626                                                                 break;
627                                                         }
628                                                 }
629
630                                                 // If 'd' != NULL, copy string to destination
631                                                 if (d != NULL)
632                                                 {
633 strcopy:
634                                                         DEBUG printf("d='%s'\n", d);
635
636                                                         while (*d != EOS)
637                                                         {
638                                                                 if (dst >= edst)
639                                                                         goto overflow;
640                                                                 else
641                                                                         *dst++ = *d++;
642                                                         }
643                                                 }
644                                         }
645                                 }
646                         }
647                 }
648         }
649
650 skipcomments:
651
652         *dst = EOS;
653         DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
654         return OK;
655
656 overflow:
657         *dst = EOS;
658         DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
659         return fatal("line too long as a result of macro expansion");
660 }
661
662
663 //
664 // Get next line of text from a macro
665 //
666 char * GetNextMacroLine(void)
667 {
668         IMACRO * imacro = cur_inobj->inobj.imacro;
669         LLIST * strp = imacro->im_nextln;
670
671         if (strp == NULL)                                               // End-of-macro
672                 return NULL;
673
674         imacro->im_nextln = strp->next;
675 //      ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
676         ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
677
678         return imacro->im_lnbuf;
679 }
680
681
682 //
683 // Get next line of text from a repeat block
684 //
685 char * GetNextRepeatLine(void)
686 {
687         IREPT * irept = cur_inobj->inobj.irept;
688 //      LONG * strp = irept->ir_nextln;                 // initial null
689
690         // Do repeat at end of .rept block's string list
691 //      if (strp == NULL)
692         if (irept->ir_nextln == NULL)
693         {
694                 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
695                 irept->ir_nextln = irept->ir_firstln;   // copy first line
696
697                 if (irept->ir_count-- == 0)
698                 {
699                         DEBUG { printf("end-repeat-block\n"); }
700                         return NULL;
701                 }
702                 reptuniq++;
703 //              strp = irept->ir_nextln;
704         }
705         // Mark the current macro line in the irept object
706         // This is probably overkill - a global variable
707         // would suffice here (it only gets used during
708         // error reporting anyway)
709         irept->lineno = irept->ir_nextln->lineno;
710
711         // Copy the rept lines verbatim, unless we're in nest level 0.
712         // Then, expand any \~ labels to unique numbers (Rn)
713         if (rptlevel)
714         {
715                 strcpy(irbuf, irept->ir_nextln->line);
716         }
717         else
718         {
719                 uint32_t linelen = strlen(irept->ir_nextln->line);
720                 uint8_t *p_line = irept->ir_nextln->line;
721                 char *irbufwrite = irbuf;
722                 for (int i = 0; i <= linelen; i++)
723                 {
724                         uint8_t c;
725                         c = *p_line++;
726                         if (c == '\\' && *p_line == '~')
727                         {
728                                 p_line++;
729                                 irbufwrite += sprintf(irbufwrite, "R%u", reptuniq);
730                         }
731                         else
732                         {
733                                 *irbufwrite++ = c;
734                         }
735                 }
736         }
737
738         DEBUG { printf("repeat line='%s'\n", irbuf); }
739 //      irept->ir_nextln = (LONG *)*strp;
740         irept->ir_nextln = irept->ir_nextln->next;
741
742         return irbuf;
743 }
744
745
746 //
747 // Include a source file used at the root, and for ".include" files
748 //
749 int include(int handle, char * fname)
750 {
751         // Debug mode
752         DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
753
754         // Alloc and initialize include-descriptors
755         INOBJ * inobj = a_inobj(SRC_IFILE);
756         IFILE * ifile = inobj->inobj.ifile;
757
758         ifile->ifhandle = handle;                       // Setup file handle
759         ifile->ifind = ifile->ifcnt = 0;        // Setup buffer indices
760         ifile->ifoldlineno = curlineno;         // Save old line number
761         ifile->ifoldfname = curfname;           // Save old filename
762         ifile->ifno = cfileno;                          // Save old file number
763
764         // NB: This *must* be preincrement, we're adding one to the filecount here!
765         cfileno = ++filecount;                          // Compute NEW file number
766         curfname = strdup(fname);                       // Set current filename (alloc storage)
767         curlineno = 0;                                          // Start on line zero
768
769         // Add another file to the file-record
770         FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
771         fr->frec_next = NULL;
772         fr->frec_name = curfname;
773
774         if (last_fr == NULL)
775                 filerec = fr;                                   // Add first filerec
776         else
777                 last_fr->frec_next = fr;                // Append to list of filerecs
778
779         last_fr = fr;
780         DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
781
782         return OK;
783 }
784
785
786 //
787 // Pop the current input level
788 //
789 int fpop(void)
790 {
791         INOBJ * inobj = cur_inobj;
792
793         if (inobj == NULL)
794                 return 0;
795
796         // Pop IFENT levels until we reach the conditional assembly context we
797         // were at when the input object was entered.
798         int numUnmatched = 0;
799
800         while (ifent != inobj->in_ifent)
801         {
802                 if (d_endif() != 0)     // Something bad happened during endif parsing?
803                         return -1;              // If yes, bail instead of getting stuck in a loop
804
805                 numUnmatched++;
806         }
807
808         // Give a warning to the user that we had to wipe their bum for them
809         if (numUnmatched > 0)
810                 warn("missing %d .endif(s)", numUnmatched);
811
812         tok = inobj->in_otok;   // Restore tok and etok
813         etok = inobj->in_etok;
814
815         switch (inobj->in_type)
816         {
817         case SRC_IFILE:                 // Pop and release an IFILE
818         {
819                 DEBUG { printf("[Leaving: %s]\n", curfname); }
820
821                 IFILE * ifile = inobj->inobj.ifile;
822                 ifile->if_link = f_ifile;
823                 f_ifile = ifile;
824                 close(ifile->ifhandle);                 // Close source file
825 DEBUG { printf("[fpop (pre):  curfname=%s]\n", curfname); }
826                 curfname = ifile->ifoldfname;   // Set current filename
827 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
828 DEBUG { printf("[fpop: (pre)  cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
829                 curlineno = ifile->ifoldlineno; // Set current line#
830                 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
831                 cfileno = ifile->ifno;                  // Restore current file number
832 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
833                 break;
834         }
835
836         case SRC_IMACRO:                                        // Pop and release an IMACRO
837         {
838                 IMACRO * imacro = inobj->inobj.imacro;
839                 imacro->im_link = f_imacro;
840                 f_imacro = imacro;
841                 break;
842         }
843
844         case SRC_IREPT:                                         // Pop and release an IREPT
845         {
846                 DEBUG { printf("dealloc IREPT\n"); }
847                 LLIST * p = inobj->inobj.irept->ir_firstln;
848
849                 // Deallocate repeat lines
850                 while (p != NULL)
851                 {
852                         free(p->line);
853                         p = p->next;
854                 }
855
856                 break;
857         }
858         }
859
860         cur_inobj = inobj->in_link;
861         inobj->in_link = f_inobj;
862         f_inobj = inobj;
863
864         return 0;
865 }
866
867
868 //
869 // Get line from file into buf, return NULL on EOF or ptr to the start of a
870 // null-term line
871 //
872 char * GetNextLine(void)
873 {
874         int i, j;
875         char * p, * d;
876         int readamt = -1;                                               // 0 if last read() yeilded 0 bytes
877         IFILE * fl = cur_inobj->inobj.ifile;
878
879         for(;;)
880         {
881                 // Scan for next end-of-line; handle stupid text formats by treating
882                 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
883                 // check for '\n').
884                 d = &fl->ifbuf[fl->ifind];
885
886                 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
887                 {
888                         if (*p == '\r' || *p == '\n')
889                         {
890                                 i++;
891
892                                 if (*p == '\r')
893                                 {
894                                         if (i >= j)
895                                                 break;  // Need to read more, then look for '\n' to eat
896                                         else if (p[1] == '\n')
897                                                 i++;
898                                 }
899
900                                 // Cover up the newline with end-of-string sentinel
901                                 *p = '\0';
902
903                                 fl->ifind += i;
904                                 fl->ifcnt -= i;
905                                 return d;
906                         }
907                 }
908
909                 // Handle hanging lines by ignoring them (Input file is exhausted, no
910                 // \r or \n on last line)
911                 // Shamus: This is retarded. Never ignore any input!
912                 if (!readamt && fl->ifcnt)
913                 {
914 #if 0
915                         fl->ifcnt = 0;
916                         *p = '\0';
917                         return NULL;
918 #else
919                         // Really should check to see if we're at the end of the buffer!
920                         // :-P
921                         fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
922                         fl->ifcnt = 0;
923                         return &fl->ifbuf[fl->ifind];
924 #endif
925                 }
926
927                 // Truncate and return absurdly long lines.
928                 if (fl->ifcnt >= QUANTUM)
929                 {
930                         fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
931                         fl->ifcnt = 0;
932                         return &fl->ifbuf[fl->ifind];
933                 }
934
935                 // Relocate what's left of a line to the beginning of the buffer, and
936                 // read some more of the file in; return NULL if the buffer's empty and
937                 // on EOF.
938                 if (fl->ifind != 0)
939                 {
940                         p = &fl->ifbuf[fl->ifind];
941                         d = &fl->ifbuf[fl->ifcnt & 1];
942
943                         for(i=0; i<fl->ifcnt; i++)
944                                 *d++ = *p++;
945
946                         fl->ifind = fl->ifcnt & 1;
947                 }
948
949                 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
950
951                 if (readamt < 0)
952                         return NULL;
953
954                 if ((fl->ifcnt += readamt) == 0)
955                         return NULL;
956         }
957 }
958
959
960 //
961 // Tokenize a line
962 //
963 int TokenizeLine(void)
964 {
965         uint8_t * ln = NULL;            // Ptr to current position in line
966         uint8_t * p;                            // Random character ptr
967         PTR tk;                                         // Token-deposit ptr
968         int state = 0;                          // State for keyword detector
969         int j = 0;                                      // Var for keyword detector
970         uint8_t c;                                      // Random char
971         uint64_t v;                                     // Random value
972         uint32_t cursize = 0;           // Current line's size (.b, .w, .l, .s, .q, .d)
973         uint8_t * nullspot = NULL;      // Spot to clobber for SYMBOL termination
974         int stuffnull;                          // 1:terminate SYMBOL '\0' at *nullspot
975         uint8_t c1;
976         int stringNum = 0;                      // Pointer to string locations in tokenized line
977         SYM* sy;                                        // For looking up symbols (.equr)
978         int equrundef = 0;                      // Flag for equrundef scanning
979
980 retry:
981
982         if (cur_inobj == NULL)          // Return EOF if input stack is empty
983                 return TKEOF;
984
985         // Get another line of input from the current input source: a file, a
986         // macro, or a repeat-block
987         switch (cur_inobj->in_type)
988         {
989         // Include-file:
990         // o  handle EOF;
991         // o  bump source line number;
992         // o  tag the listing-line with a space;
993         // o  kludge lines generated by Alcyon C.
994         case SRC_IFILE:
995                 if ((ln = GetNextLine()) == NULL)
996                 {
997 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
998                         if (fpop() == 0)        // Pop input level
999                                 goto retry;             // Try for more lines
1000                         else
1001                         {
1002                                 ifent->if_prev = (IFENT *)-1;   //Signal Assemble() that we have reached EOF with unbalanced if/endifs
1003                                 return TKEOF;
1004                         }
1005                 }
1006
1007                 curlineno++;                    // Bump line number
1008                 lntag = SPACE;
1009
1010                 break;
1011
1012         // Macro-block:
1013         // o  Handle end-of-macro;
1014         // o  tag the listing-line with an at (@) sign.
1015         case SRC_IMACRO:
1016                 if ((ln = GetNextMacroLine()) == NULL)
1017                 {
1018                         if (ExitMacro() == 0)   // Exit macro (pop args, do fpop(), etc)
1019                                 goto retry;                     // Try for more lines...
1020                         else
1021                                 return TKEOF;           // Oops, we got a non zero return code, signal EOF
1022                 }
1023
1024                 lntag = '@';
1025                 break;
1026
1027         // Repeat-block:
1028         // o  Handle end-of-repeat-block;
1029         // o  tag the listing-line with a pound (#) sign.
1030         case SRC_IREPT:
1031                 if ((ln = GetNextRepeatLine()) == NULL)
1032                 {
1033                         DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1034                         fpop();
1035                         goto retry;
1036                 }
1037
1038                 lntag = '#';
1039                 break;
1040         }
1041
1042         // Save text of the line. We only do this during listings and within
1043         // macro-type blocks, since it is expensive to unconditionally copy every
1044         // line.
1045         if (lnsave)
1046         {
1047                 // Sanity check
1048                 if (strlen(ln) > LNSIZ)
1049                         return error("line too long (%d, max %d)", strlen(ln), LNSIZ);
1050
1051                 strcpy(lnbuf, ln);
1052         }
1053
1054         // General housekeeping
1055         tok = tokeol;                   // Set "tok" to EOL in case of error
1056         tk.u32 = etok;                  // Reset token ptr
1057         stuffnull = 0;                  // Don't stuff nulls
1058         totlines++;                             // Bump total #lines assembled
1059
1060         // See if the entire line is a comment. This is a win if the programmer
1061         // puts in lots of comments
1062         if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1063                 goto goteol;
1064
1065         // And here we have a very ugly hack for signalling a single line 'turn off
1066         // optimization'. There's really no nice way to do this, so hack it is!
1067         optimizeOff = 0;                // Default is to take optimizations as they come
1068
1069         if (*ln == '!')
1070         {
1071                 optimizeOff = 1;        // Signal that we don't want to optimize this line
1072                 ln++;                           // & skip over the darned thing
1073         }
1074
1075         // Main tokenization loop;
1076         //  o  skip whitespace;
1077         //  o  handle end-of-line;
1078         //  o  handle symbols;
1079         //  o  handle single-character tokens (operators, etc.);
1080         //  o  handle multiple-character tokens (constants, strings, etc.).
1081         for(; *ln!=EOS;)
1082         {
1083                 // Check to see if there's enough space in the token buffer
1084                 if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20)
1085                 {
1086                         return error("token buffer overrun");
1087                 }
1088
1089                 // Skip whitespace, handle EOL
1090                 while (chrtab[*ln] & WHITE)
1091                         ln++;
1092
1093                 // Handle EOL, comment with ';'
1094                 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1095                         break;
1096
1097                 // Handle start of symbol. Symbols are null-terminated in place. The
1098                 // termination is always one symbol behind, since there may be no place
1099                 // for a null in the case that an operator immediately follows the name.
1100                 c = chrtab[*ln];
1101
1102                 if (c & STSYM)
1103                 {
1104                         if (stuffnull)                  // Terminate old symbol from previous pass
1105                                 *nullspot = EOS;
1106
1107                         v = 0;                                  // Assume no DOT attrib follows symbol
1108                         stuffnull = 1;
1109
1110                         // In some cases, we need to check for a DOTx at the *beginning*
1111                         // of a symbol, as the "start" of the line we're currently looking
1112                         // at could be somewhere in the middle of that line!
1113                         if (*ln == '.')
1114                         {
1115                                 // Make sure that it's *only* a .[bwsl] following, and not the
1116                                 // start of a local symbol:
1117                                 if ((chrtab[*(ln + 1)] & DOT)
1118                                         && (dotxtab[*(ln + 1)] != 0)
1119                                         && !(chrtab[*(ln + 2)] & CTSYM))
1120                                 {
1121                                         // We found a legitimate DOTx construct, so add it to the
1122                                         // token stream:
1123                                         ln++;
1124                                         stuffnull = 0;
1125                                         *tk.u32++ = (TOKEN)dotxtab[*ln++];
1126                                         continue;
1127                                 }
1128                         }
1129
1130                         p = nullspot = ln++;    // Nullspot -> start of this symbol
1131
1132                         // Find end of symbol (and compute its length)
1133                         for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1134                                 ln++;
1135
1136                         // Handle "DOT" special forms (like ".b") that follow a normal
1137                         // symbol or keyword:
1138                         if (*ln == '.')
1139                         {
1140                                 *ln++ = EOS;            // Terminate symbol
1141                                 stuffnull = 0;          // And never try it again
1142
1143                                 // Character following the '.' must have a DOT attribute, and
1144                                 // the chararacter after THAT one must not have a start-symbol
1145                                 // attribute (to prevent symbols that look like, for example,
1146                                 // "zingo.barf", which might be a good idea anyway....)
1147                                 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1148                                         return error("[bwsl] must follow '.' in symbol");
1149
1150                                 v = (uint32_t)dotxtab[*ln++];
1151                                 cursize = (uint32_t)v;
1152
1153                                 if (chrtab[*ln] & CTSYM)
1154                                         return error("misuse of '.'; not allowed in symbols");
1155                         }
1156
1157                         // If the symbol is small, check to see if it's really the name of
1158                         // a register.
1159                         if (j <= KWSIZE)
1160                         {
1161                                 for(state=0; state>=0;)
1162                                 {
1163                                         j = (int)tolowertab[*p++];
1164                                         j += kwbase[state];
1165
1166                                         if (kwcheck[j] != state)
1167                                         {
1168                                                 j = -1;
1169                                                 break;
1170                                         }
1171
1172                                         if (*p == EOS || p == ln)
1173                                         {
1174                                                 j = kwaccept[j];
1175                                                 break;
1176                                         }
1177
1178                                         state = kwtab[j];
1179                                 }
1180                         }
1181                         else
1182                         {
1183                                 j = -1;
1184                         }
1185
1186                         // Make j = -1 if user tries to use a RISC register while in 68K mode
1187                         if (!(rgpu || rdsp || dsp56001) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1188                         {
1189                                 j = -1;
1190                         }
1191
1192                         // Make j = -1 if time, date etc with no preceeding ^^
1193                         // defined, referenced, streq, macdef, date and time
1194                         switch ((TOKEN)j)
1195                         {
1196                         case 112:   // defined
1197                         case 113:   // referenced
1198                         case 118:   // streq
1199                         case 119:   // macdef
1200                         case 120:   // time
1201                         case 121:   // date
1202                         case KW_FILESIZE: // filesize
1203                                 j = -1;
1204                         }
1205                         
1206                         // If we detected equrundef/regundef set relevant flag
1207                         if (j == KW_EQURUNDEF)
1208                         {
1209                                 equrundef = 1;
1210                                 j = -1;
1211                                 //printf("line %d, equrundef found\n", curlineno);
1212                         }
1213
1214                         // If not tokenized keyword OR token was not found
1215                         if ((j < 0) || (state < 0))
1216                         {
1217                                 // Only proceed if no equrundef has been detected. In that case we need to store the symbol
1218                                 // because the directive handler (d_equrundef) will run outside this loop, further into procln.c
1219                                 if (!equrundef)
1220                                 {
1221                                         // Last attempt: let's see if this is an equated register
1222                                         char temp = *ln;
1223                                         *ln = 0;
1224                                         sy = lookup(nullspot, LABEL, 0);
1225                                         *ln = temp;
1226                                         if (sy)
1227                                         {
1228                                                 if (sy->sattre & EQUATEDREG)
1229                                                 {
1230                                                         uint32_t register_token = sy->svalue;
1231                                                         if (rgpu || rdsp)
1232                                                         {
1233                                                                 // If we are in GPU or DSP mode then mark the register bank.
1234                                                                 // We will use it during EvaluateRegisterFromTokenStream()
1235                                                                 // when we check if we can use the equated register with the currently
1236                                                                 // selected bank.
1237                                                                 // Note (ggn): I find all this superfluous. Do we really want to be so
1238                                                                 //             protective? Plus, the current implementation happily skips
1239                                                                 //                         these checks on .equr that are set during fixups - oops!
1240                                                                 register_token |= 0x80000000;           // Mark that this is an .equr
1241                                                                 if (sy->sattre & BANK_1)
1242                                                                 {
1243                                                                         register_token |= 0x40000000;   // Mark bank 1
1244                                                                 }
1245                                                         }
1246                                                         *tk.u32++ = register_token;
1247                                                         stuffnull = 0;
1248                                                         continue;
1249                                                 }
1250                                         }
1251                                 }
1252                                 // Ok, that failed, let's store the symbol instead
1253                                 *tk.u32++ = SYMBOL;
1254                                 string[stringNum] = nullspot;
1255                                 *tk.u32++ = stringNum;
1256                                 stringNum++;
1257                         }
1258                         else
1259                         {
1260                                 *tk.u32++ = (TOKEN)j;
1261                                 stuffnull = 0;
1262                         }
1263
1264                         if (v)                  // Record attribute token (if any)
1265                                 *tk.u32++ = (TOKEN)v;
1266
1267                         if (stuffnull)  // Arrange for string termination on next pass
1268                                 nullspot = ln;
1269
1270                         continue;
1271                 }
1272
1273                 // Handle identity tokens
1274                 if (c & SELF)
1275                 {
1276                         *tk.u32++ = *ln++;
1277                         continue;
1278                 }
1279
1280                 // Handle multiple-character tokens
1281                 if (c & MULTX)
1282                 {
1283                         switch (*ln++)
1284                         {
1285                         case '!':               // ! or !=
1286                                 if (*ln == '=')
1287                                 {
1288                                         *tk.u32++ = NE;
1289                                         ln++;
1290                                 }
1291                                 else
1292                                         *tk.u32++ = '!';
1293
1294                                 continue;
1295                         case '\'':              // 'string'
1296                                 if (m6502)
1297                                 {
1298                                         // Hardcoded for now, maybe this will change in the future
1299                                         *tk.u32++ = STRINGA8;
1300                                         goto dostring;
1301                                 }
1302                                 // Fall through
1303                         case '\"':              // "string"
1304                                 *tk.u32++ = STRING;
1305 dostring:
1306                                 c1 = ln[-1];
1307                                 string[stringNum] = ln;
1308                                 *tk.u32++ = stringNum;
1309                                 stringNum++;
1310
1311                                 for(p=ln; *ln!=EOS && *ln!=c1;)
1312                                 {
1313                                         c = *ln++;
1314
1315                                         if (c == '\\')
1316                                         {
1317                                                 switch (*ln++)
1318                                                 {
1319                                                 case EOS:
1320                                                         return(error("unterminated string"));
1321                                                 case 'e':
1322                                                         c = '\033';
1323                                                         break;
1324                                                 case 'n':
1325                                                         c = '\n';
1326                                                         break;
1327                                                 case 'b':
1328                                                         c = '\b';
1329                                                         break;
1330                                                 case 't':
1331                                                         c = '\t';
1332                                                         break;
1333                                                 case 'r':
1334                                                         c = '\r';
1335                                                         break;
1336                                                 case 'f':
1337                                                         c = '\f';
1338                                                         break;
1339                                                 case '\"':
1340                                                         c = '\"';
1341                                                         break;
1342                                                 case '\'':
1343                                                         c = '\'';
1344                                                         break;
1345                                                 case '\\':
1346                                                         c = '\\';
1347                                                         break;
1348                                                 case '{':
1349                                                         // If we're evaluating a macro
1350                                                         // this is valid because it's
1351                                                         // a parameter expansion
1352                                                 case '!':
1353                                                         // If we're evaluating a macro
1354                                                         // this is valid and expands to
1355                                                         // "dot-size"
1356                                                         break;
1357                                                 default:
1358                                                         warn("bad backslash code in string");
1359                                                         ln--;
1360                                                         break;
1361                                                 }
1362                                         }
1363
1364                                         *p++ = c;
1365                                 }
1366
1367                                 if (*ln++ != c1)
1368                                         return error("unterminated string");
1369
1370                                 *p++ = EOS;
1371                                 continue;
1372                         case '$':               // $, hex constant
1373                                 if (chrtab[*ln] & HDIGIT)
1374                                 {
1375                                         v = 0;
1376
1377                                         // Parse the hex value
1378                                         while (hextab[*ln] >= 0)
1379                                                 v = (v << 4) + (int)hextab[*ln++];
1380
1381                                         *tk.u32++ = CONST;
1382                                         *tk.u64++ = v;
1383
1384                                         if (*ln == '.')
1385                                         {
1386                                                 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1387                                                 {
1388                                                         *tk.u32++ = DOTW;
1389                                                         ln += 2;
1390                                                 }
1391                                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1392                                                 {
1393                                                         *tk.u32++ = DOTL;
1394                                                         ln += 2;
1395                                                 }
1396                                         }
1397                                 }
1398                                 else
1399                                         *tk.u32++ = '$';
1400
1401                                 continue;
1402                         case '<':               // < or << or <> or <=
1403                                 switch (*ln)
1404                                 {
1405                                 case '<':
1406                                         *tk.u32++ = SHL;
1407                                         ln++;
1408                                         continue;
1409                                 case '>':
1410                                         *tk.u32++ = NE;
1411                                         ln++;
1412                                         continue;
1413                                 case '=':
1414                                         *tk.u32++ = LE;
1415                                         ln++;
1416                                         continue;
1417                                 default:
1418                                         *tk.u32++ = '<';
1419                                         continue;
1420                                 }
1421                         case ':':               // : or ::
1422                                 if (*ln == ':')
1423                                 {
1424                                         *tk.u32++ = DCOLON;
1425                                         ln++;
1426                                 }
1427                                 else
1428                                         *tk.u32++ = ':';
1429
1430                                 continue;
1431                         case '=':               // = or ==
1432                                 if (*ln == '=')
1433                                 {
1434                                         *tk.u32++ = DEQUALS;
1435                                         ln++;
1436                                 }
1437                                 else
1438                                         *tk.u32++ = '=';
1439
1440                                 continue;
1441                         case '>':               // > or >> or >=
1442                                 switch (*ln)
1443                                 {
1444                                 case '>':
1445                                         *tk.u32++ = SHR;
1446                                         ln++;
1447                                         continue;
1448                                 case '=':
1449                                         *tk.u32++ = GE;
1450                                         ln++;
1451                                         continue;
1452                                 default:
1453                                         *tk.u32++ = '>';
1454                                         continue;
1455                                 }
1456                         case '%':               // % or binary constant
1457                                 if (*ln < '0' || *ln > '1')
1458                                 {
1459                                         *tk.u32++ = '%';
1460                                         continue;
1461                                 }
1462
1463                                 v = 0;
1464
1465                                 while (*ln >= '0' && *ln <= '1')
1466                                         v = (v << 1) + *ln++ - '0';
1467
1468                                 if (*ln == '.')
1469                                 {
1470                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1471                                         {
1472                                                 v &= 0x000000FF;
1473                                                 ln += 2;
1474                                         }
1475
1476                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1477                                         {
1478                                                 v &= 0x0000FFFF;
1479                                                 ln += 2;
1480                                         }
1481
1482                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1483                                         {
1484                                                 v &= 0xFFFFFFFF;
1485                                                 ln += 2;
1486                                         }
1487                                 }
1488
1489                                 *tk.u32++ = CONST;
1490                                 *tk.u64++ = v;
1491                                 continue;
1492                         case '@':               // @ or octal constant
1493                                 if (*ln < '0' || *ln > '7')
1494                                 {
1495                                         *tk.u32++ = '@';
1496                                         continue;
1497                                 }
1498
1499                                 v = 0;
1500
1501                                 while (*ln >= '0' && *ln <= '7')
1502                                         v = (v << 3) + *ln++ - '0';
1503
1504                                 if (*ln == '.')
1505                                 {
1506                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1507                                         {
1508                                                 v &= 0x000000FF;
1509                                                 ln += 2;
1510                                         }
1511
1512                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1513                                         {
1514                                                 v &= 0x0000FFFF;
1515                                                 ln += 2;
1516                                         }
1517
1518                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1519                                         {
1520                                                 v &= 0xFFFFFFFF;
1521                                                 ln += 2;
1522                                         }
1523                                 }
1524
1525                                 *tk.u32++ = CONST;
1526                                 *tk.u64++ = v;
1527                                 continue;
1528                         case '^':               // ^ or ^^ <operator-name>
1529                                 if (*ln != '^')
1530                                 {
1531                                         *tk.u32++ = '^';
1532                                         continue;
1533                                 }
1534
1535                                 if (((int)chrtab[*++ln] & STSYM) == 0)
1536                                 {
1537                                         error("invalid symbol following ^^");
1538                                         continue;
1539                                 }
1540
1541                                 p = ln++;
1542
1543                                 while ((int)chrtab[*ln] & CTSYM)
1544                                         ++ln;
1545
1546                                 for(state=0; state>=0;)
1547                                 {
1548                                         // Get char, convert to lowercase
1549                                         j = *p++;
1550
1551                                         if (j >= 'A' && j <= 'Z')
1552                                                 j += 0x20;
1553
1554                                         j += kwbase[state];
1555
1556                                         if (kwcheck[j] != state)
1557                                         {
1558                                                 j = -1;
1559                                                 break;
1560                                         }
1561
1562                                         if (*p == EOS || p == ln)
1563                                         {
1564                                                 j = kwaccept[j];
1565                                                 break;
1566                                         }
1567
1568                                         state = kwtab[j];
1569                                 }
1570
1571                                 if (j < 0 || state < 0)
1572                                 {
1573                                         error("unknown symbol following ^^");
1574                                         continue;
1575                                 }
1576
1577                                 *tk.u32++ = (TOKEN)j;
1578                                 continue;
1579                         default:
1580                                 interror(2);    // Bad MULTX entry in chrtab
1581                                 continue;
1582                         }
1583                 }
1584
1585                 // Handle decimal constant
1586                 if (c & DIGIT)
1587                 {
1588                         uint8_t * numStart = ln;
1589                         v = 0;
1590
1591                         while ((int)chrtab[*ln] & DIGIT)
1592                                 v = (v * 10) + *ln++ - '0';
1593
1594                         // See if there's a .[bwl] after the constant & deal with it if so
1595                         if (*ln == '.')
1596                         {
1597                                 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1598                                 {
1599                                         v &= 0x000000FF;
1600                                         ln += 2;
1601                                         *tk.u32++ = CONST;
1602                                         *tk.u64++ = v;
1603                                         *tk.u32++ = DOTB;
1604                                 }
1605                                 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1606                                 {
1607                                         v &= 0x0000FFFF;
1608                                         ln += 2;
1609                                         *tk.u32++ = CONST;
1610                                         *tk.u64++ = v;
1611                                         *tk.u32++ = DOTW;
1612                                 }
1613                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1614                                 {
1615                                         v &= 0xFFFFFFFF;
1616                                         ln += 2;
1617                                         *tk.u32++ = CONST;
1618                                         *tk.u64++ = v;
1619                                         *tk.u32++ = DOTL;
1620                                 }
1621                                 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1622                                 {
1623                                         // Hey, more digits after the dot, so we assume it's a
1624                                         // floating point number of some kind... numEnd will point
1625                                         // to the first non-float character after it's done
1626                                         char * numEnd;
1627                                         errno = 0;
1628                                         double f = strtod(numStart, &numEnd);
1629                                         ln = (uint8_t *)numEnd;
1630
1631                                         if (errno != 0)
1632                                                 return error("floating point parse error");
1633
1634                                         // N.B.: We use the C compiler's internal double
1635                                         //       representation for all internal float calcs and
1636                                         //       are reasonably sure that the size of said double
1637                                         //       is 8 bytes long (which we check for in fltpoint.c)
1638                                         *tk.u32++ = FCONST;
1639                                         *tk.dp = f;
1640                                         tk.u64++;
1641                                         continue;
1642                                 }
1643                         }
1644                         else
1645                         {
1646                                 *tk.u32++ = CONST;
1647                                 *tk.u64++ = v;
1648                         }
1649
1650 //printf("CONST: %i\n", v);
1651                         continue;
1652                 }
1653
1654                 // Handle illegal character
1655                 return error("illegal character $%02X found", *ln);
1656         }
1657
1658         // Terminate line of tokens and return "success."
1659
1660 goteol:
1661         tok = etok;                             // Set tok to beginning of line
1662
1663         if (stuffnull)                  // Terminate last SYMBOL
1664                 *nullspot = EOS;
1665
1666         *tk.u32++ = EOL;
1667
1668         return OK;
1669 }
1670
1671
1672 //
1673 // .GOTO <label>        goto directive
1674 //
1675 // The label is searched for starting from the first line of the current,
1676 // enclosing macro definition. If no enclosing macro exists, an error is
1677 // generated.
1678 //
1679 // A label is of the form:
1680 //
1681 // :<name><whitespace>
1682 //
1683 // The colon must appear in column 1.  The label is stripped prior to macro
1684 // expansion, and is NOT subject to macro expansion.  The whitespace may also
1685 // be EOL.
1686 //
1687 int d_goto(WORD unused)
1688 {
1689         // Setup for the search
1690         if (*tok != SYMBOL)
1691                 return error("missing label");
1692
1693         char * sym = string[tok[1]];
1694         tok += 2;
1695
1696         if (cur_inobj->in_type != SRC_IMACRO)
1697                 return error("goto not in macro");
1698
1699         IMACRO * imacro = cur_inobj->inobj.imacro;
1700         LLIST * defln = imacro->im_macro->lineList;
1701
1702         // Attempt to find the label, starting with the first line.
1703         for(; defln!=NULL; defln=defln->next)
1704         {
1705                 // Must start with a colon
1706                 if (defln->line[0] == ':')
1707                 {
1708                         // Compare names (sleazo string compare)
1709                         char * s1 = sym;
1710                         char * s2 = defln->line + 1;
1711
1712                         // Either we will match the strings to EOS on both, or we will
1713                         // match EOS on string 1 to whitespace on string 2. Otherwise, we
1714                         // have no match.
1715                         while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1716                         {
1717                                 // If we reached the end of string 1 (sym), we're done.
1718                                 // Note that we're also checking for the end of string 2 as
1719                                 // well, since we've established they're equal above.
1720                                 if (*s1 == EOS)
1721                                 {
1722                                         // Found the label, set new macro next-line and return.
1723                                         imacro->im_nextln = defln;
1724                                         return 0;
1725                                 }
1726
1727                                 s1++;
1728                                 s2++;
1729                         }
1730                 }
1731         }
1732
1733         return error("goto label not found");
1734 }
1735
1736
1737 void DumpToken(TOKEN t)
1738 {
1739         if (t == COLON)
1740                 printf("[COLON]");
1741         else if (t == CONST)
1742                 printf("[CONST]");
1743         else if (t == FCONST)
1744                 printf("[FCONST]");
1745         else if (t == ACONST)
1746                 printf("[ACONST]");
1747         else if (t == STRING)
1748                 printf("[STRING]");
1749         else if (t == SYMBOL)
1750                 printf("[SYMBOL]");
1751         else if (t == EOS)
1752                 printf("[EOS]");
1753         else if (t == TKEOF)
1754                 printf("[TKEOF]");
1755         else if (t == DEQUALS)
1756                 printf("[DEQUALS]");
1757         else if (t == SET)
1758                 printf("[SET]");
1759         else if (t == REG)
1760                 printf("[REG]");
1761         else if (t == DCOLON)
1762                 printf("[DCOLON]");
1763         else if (t == GE)
1764                 printf("[GE]");
1765         else if (t == LE)
1766                 printf("[LE]");
1767         else if (t == NE)
1768                 printf("[NE]");
1769         else if (t == SHR)
1770                 printf("[SHR]");
1771         else if (t == SHL)
1772                 printf("[SHL]");
1773         else if (t == UNMINUS)
1774                 printf("[UNMINUS]");
1775         else if (t == DOTB)
1776                 printf("[DOTB]");
1777         else if (t == DOTW)
1778                 printf("[DOTW]");
1779         else if (t == DOTL)
1780                 printf("[DOTL]");
1781         else if (t == DOTQ)
1782                 printf("[DOTQ]");
1783         else if (t == DOTS)
1784                 printf("[DOTS]");
1785         else if (t == DOTD)
1786                 printf("[DOTD]");
1787         else if (t == DOTI)
1788                 printf("[DOTI]");
1789         else if (t == ENDEXPR)
1790                 printf("[ENDEXPR]");
1791         else if (t == CR_ABSCOUNT)
1792                 printf("[CR_ABSCOUNT]");
1793         else if (t == CR_FILESIZE)
1794                 printf("[CR_FILESIZE]");
1795         else if (t == CR_DEFINED)
1796                 printf("[CR_DEFINED]");
1797         else if (t == CR_REFERENCED)
1798                 printf("[CR_REFERENCED]");
1799         else if (t == CR_STREQ)
1800                 printf("[CR_STREQ]");
1801         else if (t == CR_MACDEF)
1802                 printf("[CR_MACDEF]");
1803         else if (t == CR_TIME)
1804                 printf("[CR_TIME]");
1805         else if (t == CR_DATE)
1806                 printf("[CR_DATE]");
1807         else if (t >= 0x20 && t <= 0x2F)
1808                 printf("[%c]", (char)t);
1809         else if (t >= 0x3A && t <= 0x3F)
1810                 printf("[%c]", (char)t);
1811         else if (t >= 0x80 && t <= 0x87)
1812                 printf("[D%u]", ((uint32_t)t) - 0x80);
1813         else if (t >= 0x88 && t <= 0x8F)
1814                 printf("[A%u]", ((uint32_t)t) - 0x88);
1815         else
1816                 printf("[%X:%c]", (uint32_t)t, (char)t);
1817 }
1818
1819
1820 void DumpTokenBuffer(void)
1821 {
1822         printf("Tokens [%X]: ", sloc);
1823
1824         for(TOKEN * t=tokbuf; *t!=EOL; t++)
1825         {
1826                 if (*t == COLON)
1827                         printf("[COLON]");
1828                 else if (*t == CONST)
1829                 {
1830                         PTR tp;
1831                         tp.u32 = t + 1;
1832                         printf("[CONST: $%lX]", *tp.u64);
1833                         t += 2;
1834                 }
1835                 else if (*t == FCONST)
1836                 {
1837                         PTR tp;
1838                         tp.u32 = t + 1;
1839                         printf("[FCONST: $%lX]", *tp.u64);
1840                         t += 2;
1841                 }
1842                 else if (*t == ACONST)
1843                 {
1844                         printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1845                         t += 2;
1846                 }
1847                 else if (*t == STRING)
1848                 {
1849                         t++;
1850                         printf("[STRING:\"%s\"]", string[*t]);
1851                 }
1852                 else if (*t == SYMBOL)
1853                 {
1854                         t++;
1855                         printf("[SYMBOL:\"%s\"]", string[*t]);
1856                 }
1857                 else if (*t == EOS)
1858                         printf("[EOS]");
1859                 else if (*t == TKEOF)
1860                         printf("[TKEOF]");
1861                 else if (*t == DEQUALS)
1862                         printf("[DEQUALS]");
1863                 else if (*t == SET)
1864                         printf("[SET]");
1865                 else if (*t == REG)
1866                         printf("[REG]");
1867                 else if (*t == DCOLON)
1868                         printf("[DCOLON]");
1869                 else if (*t == GE)
1870                         printf("[GE]");
1871                 else if (*t == LE)
1872                         printf("[LE]");
1873                 else if (*t == NE)
1874                         printf("[NE]");
1875                 else if (*t == SHR)
1876                         printf("[SHR]");
1877                 else if (*t == SHL)
1878                         printf("[SHL]");
1879                 else if (*t == UNMINUS)
1880                         printf("[UNMINUS]");
1881                 else if (*t == DOTB)
1882                         printf("[DOTB]");
1883                 else if (*t == DOTW)
1884                         printf("[DOTW]");
1885                 else if (*t == DOTL)
1886                         printf("[DOTL]");
1887                 else if (*t == DOTQ)
1888                         printf("[DOTQ]");
1889                 else if (*t == DOTS)
1890                         printf("[DOTS]");
1891                 else if (*t == DOTD)
1892                         printf("[DOTD]");
1893                 else if (*t == DOTI)
1894                         printf("[DOTI]");
1895                 else if (*t == ENDEXPR)
1896                         printf("[ENDEXPR]");
1897                 else if (*t == CR_ABSCOUNT)
1898                         printf("[CR_ABSCOUNT]");
1899                 else if (*t == CR_FILESIZE)
1900                         printf("[CR_FILESIZE]");
1901                 else if (*t == CR_DEFINED)
1902                         printf("[CR_DEFINED]");
1903                 else if (*t == CR_REFERENCED)
1904                         printf("[CR_REFERENCED]");
1905                 else if (*t == CR_STREQ)
1906                         printf("[CR_STREQ]");
1907                 else if (*t == CR_MACDEF)
1908                         printf("[CR_MACDEF]");
1909                 else if (*t == CR_TIME)
1910                         printf("[CR_TIME]");
1911                 else if (*t == CR_DATE)
1912                         printf("[CR_DATE]");
1913                 else if (*t >= 0x20 && *t <= 0x2F)
1914                         printf("[%c]", (char)*t);
1915                 else if (*t >= 0x3A && *t <= 0x3F)
1916                         printf("[%c]", (char)*t);
1917                 else if (*t >= 0x80 && *t <= 0x87)
1918                         printf("[D%u]", ((uint32_t)*t) - 0x80);
1919                 else if (*t >= 0x88 && *t <= 0x8F)
1920                         printf("[A%u]", ((uint32_t)*t) - 0x88);
1921                 else
1922                         printf("[%X:%c]", (uint32_t)*t, (char)*t);
1923         }
1924
1925         printf("[EOL]\n");
1926 }
1927