]> Shamusworld >> Repos - rmac/blob - token.c
Expand \~ in .REPTs to unique label names as in macros. (issue #75)
[rmac] / token.c
1 //
2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
7 //
8
9 #include "token.h"
10
11 #include <errno.h>
12 #include "direct.h"
13 #include "error.h"
14 #include "macro.h"
15 #include "procln.h"
16 #include "sect.h"
17 #include "symbol.h"
18
19 #define DECL_KW                         // Declare keyword arrays
20 #define DEF_KW                          // Declare keyword values
21 #include "kwtab.h"                      // Incl generated keyword tables & defs
22
23
24 int lnsave;                                     // 1; strcpy() text of current line
25 uint32_t curlineno;                     // Current line number (64K max currently)
26 int totlines;                           // Total # of lines
27 int mjump_align = 0;            // mjump alignment flag
28 char lntag;                                     // Line tag
29 char * curfname;                        // Current filename
30 char tolowertab[128];           // Uppercase ==> lowercase
31 int8_t hextab[128];                     // Table of hex values
32 char dotxtab[128];                      // Table for ".b", ".s", etc.
33 char irbuf[LNSIZ];                      // Text for .rept block line
34 char lnbuf[LNSIZ];                      // Text of current line
35 WORD filecount;                         // Unique file number counter
36 WORD cfileno;                           // Current file number
37 TOKEN * tok;                            // Ptr to current token
38 TOKEN * etok;                           // Ptr past last token in tokbuf[]
39 TOKEN tokeol[1] = {EOL};        // Bailout end-of-line token
40 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
41 int optimizeOff;                        // Optimization override flag
42
43
44 FILEREC * filerec;
45 FILEREC * last_fr;
46
47 INOBJ * cur_inobj;                      // Ptr current input obj (IFILE/IMACRO)
48 static INOBJ * f_inobj;         // Ptr list of free INOBJs
49 static IFILE * f_ifile;         // Ptr list of free IFILEs
50 static IMACRO * f_imacro;       // Ptr list of free IMACROs
51
52 static TOKEN tokbuf[TOKBUFSIZE];        // Token buffer (stack-like, all files)
53
54 uint8_t chrtab[0x100] = {
55         ILLEG, ILLEG, ILLEG, ILLEG,                     // NUL SOH STX ETX
56         ILLEG, ILLEG, ILLEG, ILLEG,                     // EOT ENQ ACK BEL
57         ILLEG, WHITE, ILLEG, ILLEG,                     // BS HT LF VT
58         WHITE, ILLEG, ILLEG, ILLEG,                     // FF CR SO SI
59
60         ILLEG, ILLEG, ILLEG, ILLEG,                     // DLE DC1 DC2 DC3
61         ILLEG, ILLEG, ILLEG, ILLEG,                     // DC4 NAK SYN ETB
62         ILLEG, ILLEG, ILLEG, ILLEG,                     // CAN EM SUB ESC
63         ILLEG, ILLEG, ILLEG, ILLEG,                     // FS GS RS US
64
65         WHITE, MULTX, MULTX, SELF,                      // SP ! " #
66         MULTX+CTSYM, MULTX, SELF, MULTX,        // $ % & '
67         SELF, SELF, SELF, SELF,                         // ( ) * +
68         SELF, SELF, STSYM, SELF,                        // , - . /
69
70         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 0 1
71         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 2 3
72         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 4 5
73         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 6 7
74         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 8 9
75         MULTX, MULTX,                                                           // : ;
76         MULTX, MULTX, MULTX, STSYM+CTSYM,                       // < = > ?
77
78         MULTX, STSYM+CTSYM+HDIGIT,                                      // @ A
79         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // B C
80         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // D E
81         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // F G
82         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
83         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
84
85         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
86         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
87         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
88         SELF, SELF, MULTX, STSYM+CTSYM,                         // \ ] ^ _
89
90         ILLEG, STSYM+CTSYM+HDIGIT,                                      // ` a
91         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // b c
92         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // d e
93         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // f g
94         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
95         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
96
97         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
98         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
99         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                // x y z {
100         SELF, SELF, SELF, ILLEG,                                        // | } ~ DEL
101
102         // Anything above $7F is illegal (and yes, we need to check for this,
103         // otherwise you get strange and spurious errors that will lead you astray)
104         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
105         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
106         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
107         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
108         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
109         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
110         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
111         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
120 };
121
122 // Names of registers
123 static char * regname[] = {
124         "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
125         "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
126         "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
127         "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
128         "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
129         "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
130         "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
131         "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
132         "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
133         "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
134         "tt0","tt1","crp","","","","","", // 208,215
135         "","","","","fpiar","fpsr","fpcr","", // 216,223
136         "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
137         "","","","","","","","", // 232,239
138         "","","","","","","","", // 240,247
139         "","","","","","","","", // 248,255
140         "","","","","x0","x1","y0","y1", // 256,263
141         "","b0","","b2","","b1","a","b", // 264,271
142         "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
143         "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
144         "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
145         "","","","","","","l","p", // 296,303
146         "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
147         "a10","b10","x","y","","","ab","ba"  // 312,319
148 };
149
150 static char * riscregname[] = {
151          "r0",  "r1",  "r2",  "r3",  "r4", "r5",   "r6",  "r7",
152          "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",
153         "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
154         "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
155 };
156
157
158 //
159 // Initialize tokenizer
160 //
161 void InitTokenizer(void)
162 {
163         int i;                                                                  // Iterator
164         char * htab = "0123456789abcdefABCDEF"; // Hex character table
165
166         lnsave = 0;                                                             // Don't save lines
167         curfname = "";                                                  // No file, empty filename
168         filecount = (WORD)-1;
169         cfileno = (WORD)-1;                                             // cfileno gets bumped to 0
170         curlineno = 0;
171         totlines = 0;
172         etok = tokbuf;
173         f_inobj = NULL;
174         f_ifile = NULL;
175         f_imacro = NULL;
176         cur_inobj = NULL;
177         filerec = NULL;
178         last_fr = NULL;
179         lntag = SPACE;
180
181         // Initialize hex, "dot" and tolower tables
182         for(i=0; i<128; i++)
183         {
184                 hextab[i] = -1;
185                 dotxtab[i] = 0;
186                 tolowertab[i] = (char)i;
187         }
188
189         for(i=0; htab[i]!=EOS; i++)
190                 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
191
192         for(i='A'; i<='Z'; i++)
193                 tolowertab[i] |= 0x20;
194
195         // These characters are legal immediately after a period
196         dotxtab['b'] = DOTB;                                    // .b .B .s .S
197         dotxtab['B'] = DOTB;
198         //dotxtab['s'] = DOTB;
199         //dotxtab['S'] = DOTB;
200         dotxtab['w'] = DOTW;                                    // .w .W
201         dotxtab['W'] = DOTW;
202         dotxtab['l'] = DOTL;                                    // .l .L
203         dotxtab['L'] = DOTL;
204         dotxtab['i'] = DOTI;                                    // .i .I (WTF is this???)
205         dotxtab['I'] = DOTI;
206         dotxtab['D'] = DOTD;                                    // .d .D (double)
207         dotxtab['d'] = DOTD;
208         dotxtab['S'] = DOTS;                                    // .s .S
209         dotxtab['s'] = DOTS;
210         dotxtab['Q'] = DOTQ;                                    // .q .Q (quad word)
211         dotxtab['q'] = DOTQ;
212         dotxtab['X'] = DOTX;                                    // .x .x
213         dotxtab['x'] = DOTX;
214         dotxtab['P'] = DOTP;                                    // .p .P
215         dotxtab['p'] = DOTP;
216 }
217
218
219 void SetFilenameForErrorReporting(void)
220 {
221         WORD fnum = cfileno;
222
223         // Check for absolute top filename (this should never happen)
224         if (fnum == -1)
225         {
226                 curfname = "(*top*)";
227                 return;
228         }
229
230         FILEREC * fr = filerec;
231
232         // Advance to the correct record...
233         while (fr != NULL && fnum != 0)
234         {
235                 fr = fr->frec_next;
236                 fnum--;
237         }
238
239         // Check for file # record not found (this should never happen either)
240         if (fr == NULL)
241         {
242                 curfname = "(*NOT FOUND*)";
243                 return;
244         }
245
246         curfname = fr->frec_name;
247 }
248
249
250 //
251 // Allocate an IFILE or IMACRO
252 //
253 INOBJ * a_inobj(int typ)
254 {
255         INOBJ * inobj;
256         IFILE * ifile;
257         IMACRO * imacro;
258
259         // Allocate and initialize INOBJ first
260         if (f_inobj == NULL)
261                 inobj = malloc(sizeof(INOBJ));
262         else
263         {
264                 inobj = f_inobj;
265                 f_inobj = f_inobj->in_link;
266         }
267
268         switch (typ)
269         {
270         case SRC_IFILE:                                                 // Alloc and init an IFILE
271                 if (f_ifile == NULL)
272                         ifile = malloc(sizeof(IFILE));
273                 else
274                 {
275                         ifile = f_ifile;
276                         f_ifile = f_ifile->if_link;
277                 }
278
279                 inobj->inobj.ifile = ifile;
280                 break;
281
282         case SRC_IMACRO:                                                // Alloc and init an IMACRO
283                 if (f_imacro == NULL)
284                         imacro = malloc(sizeof(IMACRO));
285                 else
286                 {
287                         imacro = f_imacro;
288                         f_imacro = f_imacro->im_link;
289                 }
290
291                 inobj->inobj.imacro = imacro;
292                 break;
293
294         case SRC_IREPT:                                                 // Alloc and init an IREPT
295                 inobj->inobj.irept = malloc(sizeof(IREPT));
296                 DEBUG { printf("alloc IREPT\n"); }
297                 break;
298         }
299
300         // Install INOBJ on top of input stack
301         inobj->in_ifent = ifent;                                // Record .if context on entry
302         inobj->in_type = (WORD)typ;
303         inobj->in_otok = tok;
304         inobj->in_etok = etok;
305         inobj->in_link = cur_inobj;
306         cur_inobj = inobj;
307
308         return inobj;
309 }
310
311
312 //
313 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
314 // A macro reference is in one of two forms:
315 // \name <non-name-character>
316 // \{name}
317 // A doubled backslash (\\) is compressed to a single backslash (\).
318 // Argument definitions have been pre-tokenized, so we have to turn them back
319 // into text. This means that numbers, in particular, become hex, regardless of
320 // their representation when the macro was invoked. This is a hack.
321 // A label may appear at the beginning of the line:
322 // :<name><whitespace>
323 // (the colon must be in the first column). These labels are stripped before
324 // macro expansion takes place.
325 //
326 int ExpandMacro(char * src, char * dest, int destsiz)
327 {
328         int i;
329         int questmark;                  // \? for testing argument existence
330         char mname[128];                // Assume max size of a formal arg name
331         char numbuf[20];                // Buffer for text of CONSTs
332         TOKEN * tk;
333         SYM * arg;
334         char ** symbolString;
335
336         DEBUG { printf("ExM: src=\"%s\"\n", src); }
337
338         IMACRO * imacro = cur_inobj->inobj.imacro;
339         int macnum = (int)(imacro->im_macro->sattr);
340
341         char * dst = dest;                                              // Next dest slot
342         char * edst = dest + destsiz - 1;               // End + 1(?) of dest buffer
343
344         // Check for (and skip over) any "label" on the line
345         char * s = src;
346         char * d = NULL;
347
348         if (*s == ':')
349         {
350                 while (*s != EOS && !(chrtab[*s] & WHITE))
351                         s++;
352
353                 if (*s != EOS)
354                         s++;                                                    // Skip first whitespace
355         }
356
357         // Expand the rest of the line
358         while (*s != EOS)
359         {
360                 // Copy single character
361                 if (*s != '\\')
362                 {
363                         if (dst >= edst)
364                                 goto overflow;
365
366                         // Skip comments in case a loose @ or \ is in there
367                         // In that case the tokeniser was trying to expand it.
368                         if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
369                                 goto skipcomments;
370
371                         *dst++ = *s++;
372                 }
373                 // Do macro expansion
374                 else
375                 {
376                         questmark = 0;
377
378                         // Do special cases
379                         switch (*++s)
380                         {
381                         case '\\':                                              // \\, \ (collapse to single backslash)
382                                 if (dst >= edst)
383                                         goto overflow;
384
385                                 *dst++ = *s++;
386                                 continue;
387                         case '?':                                               // \? <macro>  set `questmark' flag
388                                 s++;
389                                 questmark = 1;
390                                 break;
391                         case '#':                                               // \#, number of arguments
392                                 sprintf(numbuf, "%d", (int)imacro->im_nargs);
393                                 goto copystr;
394                         case '!':                                               // \! size suffix supplied on invocation
395                                 switch ((int)imacro->im_siz)
396                                 {
397                                 case SIZN: d = "";   break;
398                                 case SIZB: d = ".b"; break;
399                                 case SIZW: d = ".w"; break;
400                                 case SIZL: d = ".l"; break;
401                                 }
402
403                                 goto copy_d;
404                         case '~':                                               // ==> unique label string Mnnnn...
405                                 sprintf(numbuf, "M%u", curuniq);
406 copystr:
407                                 d = numbuf;
408 copy_d:
409                                 s++;
410
411                                 while (*d != EOS)
412                                 {
413                                         if (dst >= edst)
414                                                 goto overflow;
415                                         else
416                                                 *dst++ = *d++;
417                                 }
418
419                                 continue;
420                         case EOS:
421                                 return error("missing argument name");
422                         }
423
424                         // \n ==> argument number 'n', 0..9
425                         if (chrtab[*s] & DIGIT)
426                         {
427                                 i = *s++ - '1';
428
429                                 if (i < 0)
430                                         i = 9;
431
432                                 goto arg_num;
433                         }
434
435                         // Get argument name: \name, \{name}
436                         d = mname;
437
438                         // \label
439                         if (*s != '{')
440                         {
441                                 do
442                                 {
443                                         *d++ = *s++;
444                                 }
445                                 while (chrtab[*s] & CTSYM);
446                         }
447                         // \\{label}
448                         else
449                         {
450                                 for(++s; *s != EOS && *s != '}';)
451                                         *d++ = *s++;
452
453                                 if (*s != '}')
454                                         return error("missing closing brace ('}')");
455                                 else
456                                         s++;
457                         }
458
459                         *d = EOS;
460
461                         // Lookup the argument and copy its (string) value into the
462                         // destination string
463                         DEBUG { printf("argument='%s'\n", mname); }
464
465                         if ((arg = lookup(mname, MACARG, macnum)) == NULL)
466                                 return error("undefined argument: '%s'", mname);
467                         else
468                         {
469                                 // Convert a string of tokens (terminated with EOL) back into
470                                 // text. If an argument is out of range (not specified in the
471                                 // macro invocation) then it is ignored.
472                                 i = (int)arg->svalue;
473 arg_num:
474                                 DEBUG { printf("~argnumber=%d\n", i); }
475                                 tk = NULL;
476
477                                 if (i < imacro->im_nargs)
478                                 {
479                                         tk = imacro->argument[i].token;
480                                         symbolString = imacro->argument[i].string;
481 //DEBUG
482 //{
483 //      printf("ExM: Preparing to parse argument #%u...\n", i);
484 //      DumpTokens(tk);
485 //}
486                                 }
487
488                                 // \?arg yields:
489                                 //    0  if the argument is empty or non-existant,
490                                 //    1  if the argument is not empty
491                                 if (questmark)
492                                 {
493                                         if (tk == NULL || *tk == EOL)
494                                                 questmark = 0;
495
496                                         if (dst >= edst)
497                                                 goto overflow;
498
499                                         *dst++ = (char)(questmark + '0');
500                                         continue;
501                                 }
502
503                                 // Argument # is in range, so expand it
504                                 if (tk != NULL)
505                                 {
506                                         while (*tk != EOL)
507                                         {
508                                                 // Reverse-translation from a token number to a string.
509                                                 // This is a hack. It might be better table-driven.
510                                                 d = NULL;
511
512                                                 if ((*tk >= KW_D0) && !rdsp && !rgpu)
513                                                 {
514                                                         d = regname[(int)*tk++ - KW_D0];
515                                                         goto strcopy;
516                                                 }
517                                                 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
518                                                 {
519                                                         d = riscregname[(int)*tk++ - KW_R0];
520                                                         goto strcopy;
521                                                 }
522                                                 else
523                                                 {
524                                                         switch ((int)*tk++)
525                                                         {
526                                                         case SYMBOL:
527                                                                 d = symbolString[*tk++];
528 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
529                                                                 break;
530                                                         case STRING:
531                                                                 d = symbolString[*tk++];
532
533                                                                 if (dst >= edst)
534                                                                         goto overflow;
535
536                                                                 *dst++ = '"';
537
538                                                                 while (*d != EOS)
539                                                                 {
540                                                                         if (dst >= edst)
541                                                                                 goto overflow;
542                                                                         else
543                                                                                 *dst++ = *d++;
544                                                                 }
545
546                                                                 if (dst >= edst)
547                                                                         goto overflow;
548
549                                                                 *dst++ = '"';
550                                                                 continue;
551                                                                 break;
552 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
553 //         to choke on legitimate code... Need to investigate this further
554 //         before changing anything else here!
555                                                         case CONST:
556 //                                                              sprintf(numbuf, "$%lx", (uint64_t)*tk++);
557                                                                 sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
558                                                                 tk++;
559                                                                 d = numbuf;
560                                                                 break;
561                                                         case DEQUALS:
562                                                                 d = "==";
563                                                                 break;
564                                                         case SET:
565                                                                 d = "set";
566                                                                 break;
567                                                         case COLON:
568                                                                 d = ":";
569                                                                 break;
570                                                         case DCOLON:
571                                                                 d = "::";
572                                                                 break;
573                                                         case GE:
574                                                                 d = ">=";
575                                                                 break;
576                                                         case LE:
577                                                                 d = "<=";
578                                                                 break;
579                                                         case NE:
580                                                                 d = "<>";
581                                                                 break;
582                                                         case SHR:
583                                                                 d = ">>";
584                                                                 break;
585                                                         case SHL:
586                                                                 d = "<<";
587                                                                 break;
588                                                         case DOTB:
589                                                                 d = ".b";
590                                                                 break;
591                                                         case DOTW:
592                                                                 d = ".w";
593                                                                 break;
594                                                         case DOTL:
595                                                                 d = ".l";
596                                                                 break;
597                                                         case CR_ABSCOUNT:
598                                                                 d = "^^abscount";
599                                                                 break;
600                                                         case CR_FILESIZE:
601                                                                 d = "^^filesize";
602                                                                 break;
603                                                         case CR_DATE:
604                                                                 d = "^^date";
605                                                                 break;
606                                                         case CR_TIME:
607                                                                 d = "^^time";
608                                                                 break;
609                                                         case CR_DEFINED:
610                                                                 d = "^^defined ";
611                                                                 break;
612                                                         case CR_REFERENCED:
613                                                                 d = "^^referenced ";
614                                                                 break;
615                                                         case CR_STREQ:
616                                                                 d = "^^streq ";
617                                                                 break;
618                                                         case CR_MACDEF:
619                                                                 d = "^^macdef ";
620                                                                 break;
621                                                         default:
622                                                                 if (dst >= edst)
623                                                                         goto overflow;
624
625                                                                 *dst++ = (char)*(tk - 1);
626                                                                 break;
627                                                         }
628                                                 }
629
630                                                 // If 'd' != NULL, copy string to destination
631                                                 if (d != NULL)
632                                                 {
633 strcopy:
634                                                         DEBUG printf("d='%s'\n", d);
635
636                                                         while (*d != EOS)
637                                                         {
638                                                                 if (dst >= edst)
639                                                                         goto overflow;
640                                                                 else
641                                                                         *dst++ = *d++;
642                                                         }
643                                                 }
644                                         }
645                                 }
646                         }
647                 }
648         }
649
650 skipcomments:
651
652         *dst = EOS;
653         DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
654         return OK;
655
656 overflow:
657         *dst = EOS;
658         DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
659         return fatal("line too long as a result of macro expansion");
660 }
661
662
663 //
664 // Get next line of text from a macro
665 //
666 char * GetNextMacroLine(void)
667 {
668         IMACRO * imacro = cur_inobj->inobj.imacro;
669         LLIST * strp = imacro->im_nextln;
670
671         if (strp == NULL)                                               // End-of-macro
672                 return NULL;
673
674         imacro->im_nextln = strp->next;
675 //      ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
676         ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
677
678         return imacro->im_lnbuf;
679 }
680
681
682 //
683 // Get next line of text from a repeat block
684 //
685 char * GetNextRepeatLine(void)
686 {
687         IREPT * irept = cur_inobj->inobj.irept;
688 //      LONG * strp = irept->ir_nextln;                 // initial null
689
690         // Do repeat at end of .rept block's string list
691 //      if (strp == NULL)
692         if (irept->ir_nextln == NULL)
693         {
694                 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
695                 irept->ir_nextln = irept->ir_firstln;   // copy first line
696
697                 if (irept->ir_count-- == 0)
698                 {
699                         DEBUG { printf("end-repeat-block\n"); }
700                         return NULL;
701                 }
702                 reptuniq++;
703 //              strp = irept->ir_nextln;
704         }
705         // Mark the current macro line in the irept object
706         // This is probably overkill - a global variable
707         // would suffice here (it only gets used during
708         // error reporting anyway)
709         irept->lineno = irept->ir_nextln->lineno;
710
711         // Copy the rept lines verbatim, unless we're in nest level 0.
712         // Then, expand any \~ labels to unique numbers (Rn)
713         if (rptlevel)
714         {
715                 strcpy(irbuf, irept->ir_nextln->line);
716         }
717         else
718         {
719                 uint32_t linelen = strlen(irept->ir_nextln->line);
720                 uint8_t *p_line = irept->ir_nextln->line;
721                 char *irbufwrite = irbuf;
722                 for (int i = 0; i <= linelen; i++)
723                 {
724                         uint8_t c;
725                         c = *p_line++;
726                         if (c == '\\' && *p_line == '~')
727                         {
728                                 p_line++;
729                                 irbufwrite += sprintf(irbufwrite, "R%u", reptuniq);
730                         }
731                         else
732                         {
733                                 *irbufwrite++ = c;
734                         }
735                 }
736         }
737
738         DEBUG { printf("repeat line='%s'\n", irbuf); }
739 //      irept->ir_nextln = (LONG *)*strp;
740         irept->ir_nextln = irept->ir_nextln->next;
741
742         return irbuf;
743 }
744
745
746 //
747 // Include a source file used at the root, and for ".include" files
748 //
749 int include(int handle, char * fname)
750 {
751         // Debug mode
752         DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
753
754         // Alloc and initialize include-descriptors
755         INOBJ * inobj = a_inobj(SRC_IFILE);
756         IFILE * ifile = inobj->inobj.ifile;
757
758         ifile->ifhandle = handle;                       // Setup file handle
759         ifile->ifind = ifile->ifcnt = 0;        // Setup buffer indices
760         ifile->ifoldlineno = curlineno;         // Save old line number
761         ifile->ifoldfname = curfname;           // Save old filename
762         ifile->ifno = cfileno;                          // Save old file number
763
764         // NB: This *must* be preincrement, we're adding one to the filecount here!
765         cfileno = ++filecount;                          // Compute NEW file number
766         curfname = strdup(fname);                       // Set current filename (alloc storage)
767         curlineno = 0;                                          // Start on line zero
768
769         // Add another file to the file-record
770         FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
771         fr->frec_next = NULL;
772         fr->frec_name = curfname;
773
774         if (last_fr == NULL)
775                 filerec = fr;                                   // Add first filerec
776         else
777                 last_fr->frec_next = fr;                // Append to list of filerecs
778
779         last_fr = fr;
780         DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
781
782         return OK;
783 }
784
785
786 //
787 // Pop the current input level
788 //
789 int fpop(void)
790 {
791         INOBJ * inobj = cur_inobj;
792
793         if (inobj == NULL)
794                 return 0;
795
796         // Pop IFENT levels until we reach the conditional assembly context we
797         // were at when the input object was entered.
798         int numUnmatched = 0;
799
800         while (ifent != inobj->in_ifent)
801         {
802                 if (d_endif() != 0)     // Something bad happened during endif parsing?
803                         return -1;              // If yes, bail instead of getting stuck in a loop
804
805                 numUnmatched++;
806         }
807
808         // Give a warning to the user that we had to wipe their bum for them
809         if (numUnmatched > 0)
810                 warn("missing %d .endif(s)", numUnmatched);
811
812         tok = inobj->in_otok;   // Restore tok and etok
813         etok = inobj->in_etok;
814
815         switch (inobj->in_type)
816         {
817         case SRC_IFILE:                 // Pop and release an IFILE
818         {
819                 DEBUG { printf("[Leaving: %s]\n", curfname); }
820
821                 IFILE * ifile = inobj->inobj.ifile;
822                 ifile->if_link = f_ifile;
823                 f_ifile = ifile;
824                 close(ifile->ifhandle);                 // Close source file
825 DEBUG { printf("[fpop (pre):  curfname=%s]\n", curfname); }
826                 curfname = ifile->ifoldfname;   // Set current filename
827 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
828 DEBUG { printf("[fpop: (pre)  cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
829                 curlineno = ifile->ifoldlineno; // Set current line#
830                 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
831                 cfileno = ifile->ifno;                  // Restore current file number
832 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
833                 break;
834         }
835
836         case SRC_IMACRO:                                        // Pop and release an IMACRO
837         {
838                 IMACRO * imacro = inobj->inobj.imacro;
839                 imacro->im_link = f_imacro;
840                 f_imacro = imacro;
841                 break;
842         }
843
844         case SRC_IREPT:                                         // Pop and release an IREPT
845         {
846                 DEBUG { printf("dealloc IREPT\n"); }
847                 LLIST * p = inobj->inobj.irept->ir_firstln;
848
849                 // Deallocate repeat lines
850                 while (p != NULL)
851                 {
852                         free(p->line);
853                         p = p->next;
854                 }
855
856                 break;
857         }
858         }
859
860         cur_inobj = inobj->in_link;
861         inobj->in_link = f_inobj;
862         f_inobj = inobj;
863
864         return 0;
865 }
866
867
868 //
869 // Get line from file into buf, return NULL on EOF or ptr to the start of a
870 // null-term line
871 //
872 char * GetNextLine(void)
873 {
874         int i, j;
875         char * p, * d;
876         int readamt = -1;                                               // 0 if last read() yeilded 0 bytes
877         IFILE * fl = cur_inobj->inobj.ifile;
878
879         for(;;)
880         {
881                 // Scan for next end-of-line; handle stupid text formats by treating
882                 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
883                 // check for '\n').
884                 d = &fl->ifbuf[fl->ifind];
885
886                 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
887                 {
888                         if (*p == '\r' || *p == '\n')
889                         {
890                                 i++;
891
892                                 if (*p == '\r')
893                                 {
894                                         if (i >= j)
895                                                 break;  // Need to read more, then look for '\n' to eat
896                                         else if (p[1] == '\n')
897                                                 i++;
898                                 }
899
900                                 // Cover up the newline with end-of-string sentinel
901                                 *p = '\0';
902
903                                 fl->ifind += i;
904                                 fl->ifcnt -= i;
905                                 return d;
906                         }
907                 }
908
909                 // Handle hanging lines by ignoring them (Input file is exhausted, no
910                 // \r or \n on last line)
911                 // Shamus: This is retarded. Never ignore any input!
912                 if (!readamt && fl->ifcnt)
913                 {
914 #if 0
915                         fl->ifcnt = 0;
916                         *p = '\0';
917                         return NULL;
918 #else
919                         // Really should check to see if we're at the end of the buffer!
920                         // :-P
921                         fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
922                         fl->ifcnt = 0;
923                         return &fl->ifbuf[fl->ifind];
924 #endif
925                 }
926
927                 // Truncate and return absurdly long lines.
928                 if (fl->ifcnt >= QUANTUM)
929                 {
930                         fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
931                         fl->ifcnt = 0;
932                         return &fl->ifbuf[fl->ifind];
933                 }
934
935                 // Relocate what's left of a line to the beginning of the buffer, and
936                 // read some more of the file in; return NULL if the buffer's empty and
937                 // on EOF.
938                 if (fl->ifind != 0)
939                 {
940                         p = &fl->ifbuf[fl->ifind];
941                         d = &fl->ifbuf[fl->ifcnt & 1];
942
943                         for(i=0; i<fl->ifcnt; i++)
944                                 *d++ = *p++;
945
946                         fl->ifind = fl->ifcnt & 1;
947                 }
948
949                 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
950
951                 if (readamt < 0)
952                         return NULL;
953
954                 if ((fl->ifcnt += readamt) == 0)
955                         return NULL;
956         }
957 }
958
959
960 //
961 // Tokenize a line
962 //
963 int TokenizeLine(void)
964 {
965         uint8_t * ln = NULL;            // Ptr to current position in line
966         uint8_t * p;                            // Random character ptr
967         PTR tk;                                         // Token-deposit ptr
968         int state = 0;                          // State for keyword detector
969         int j = 0;                                      // Var for keyword detector
970         uint8_t c;                                      // Random char
971         uint64_t v;                                     // Random value
972         uint32_t cursize = 0;           // Current line's size (.b, .w, .l, .s, .q, .d)
973         uint8_t * nullspot = NULL;      // Spot to clobber for SYMBOL termination
974         int stuffnull;                          // 1:terminate SYMBOL '\0' at *nullspot
975         uint8_t c1;
976         int stringNum = 0;                      // Pointer to string locations in tokenized line
977
978 retry:
979
980         if (cur_inobj == NULL)          // Return EOF if input stack is empty
981                 return TKEOF;
982
983         // Get another line of input from the current input source: a file, a
984         // macro, or a repeat-block
985         switch (cur_inobj->in_type)
986         {
987         // Include-file:
988         // o  handle EOF;
989         // o  bump source line number;
990         // o  tag the listing-line with a space;
991         // o  kludge lines generated by Alcyon C.
992         case SRC_IFILE:
993                 if ((ln = GetNextLine()) == NULL)
994                 {
995 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
996                         if (fpop() == 0)        // Pop input level
997                                 goto retry;             // Try for more lines
998                         else
999                         {
1000                                 ifent->if_prev = (IFENT *)-1;   //Signal Assemble() that we have reached EOF with unbalanced if/endifs
1001                                 return TKEOF;
1002                         }
1003                 }
1004
1005                 curlineno++;                    // Bump line number
1006                 lntag = SPACE;
1007
1008                 if (as68_flag)
1009                 {
1010                         // AS68 compatibility, throw away all lines starting with
1011                         // back-quotes, tildes, or '*'
1012                         // On other lines, turn the first '*' into a semi-colon.
1013                         if (*ln == '`' || *ln == '~' || *ln == '*')
1014                                 *ln = ';';
1015                         else
1016                         {
1017                                 for(p=ln; *p!=EOS; p++)
1018                                 {
1019                                         if (*p == '*')
1020                                         {
1021                                                 *p = ';';
1022                                                 break;
1023                                         }
1024                                 }
1025                         }
1026                 }
1027
1028                 break;
1029
1030         // Macro-block:
1031         // o  Handle end-of-macro;
1032         // o  tag the listing-line with an at (@) sign.
1033         case SRC_IMACRO:
1034                 if ((ln = GetNextMacroLine()) == NULL)
1035                 {
1036                         if (ExitMacro() == 0)   // Exit macro (pop args, do fpop(), etc)
1037                                 goto retry;                     // Try for more lines...
1038                         else
1039                                 return TKEOF;           // Oops, we got a non zero return code, signal EOF
1040                 }
1041
1042                 lntag = '@';
1043                 break;
1044
1045         // Repeat-block:
1046         // o  Handle end-of-repeat-block;
1047         // o  tag the listing-line with a pound (#) sign.
1048         case SRC_IREPT:
1049                 if ((ln = GetNextRepeatLine()) == NULL)
1050                 {
1051                         DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1052                         fpop();
1053                         goto retry;
1054                 }
1055
1056                 lntag = '#';
1057                 break;
1058         }
1059
1060         // Save text of the line. We only do this during listings and within
1061         // macro-type blocks, since it is expensive to unconditionally copy every
1062         // line.
1063         if (lnsave)
1064         {
1065                 // Sanity check
1066                 if (strlen(ln) > LNSIZ)
1067                         return error("line too long (%d, max %d)", strlen(ln), LNSIZ);
1068
1069                 strcpy(lnbuf, ln);
1070         }
1071
1072         // General housekeeping
1073         tok = tokeol;                   // Set "tok" to EOL in case of error
1074         tk.u32 = etok;                  // Reset token ptr
1075         stuffnull = 0;                  // Don't stuff nulls
1076         totlines++;                             // Bump total #lines assembled
1077
1078         // See if the entire line is a comment. This is a win if the programmer
1079         // puts in lots of comments
1080         if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1081                 goto goteol;
1082
1083         // And here we have a very ugly hack for signalling a single line 'turn off
1084         // optimization'. There's really no nice way to do this, so hack it is!
1085         optimizeOff = 0;                // Default is to take optimizations as they come
1086
1087         if (*ln == '!')
1088         {
1089                 optimizeOff = 1;        // Signal that we don't want to optimize this line
1090                 ln++;                           // & skip over the darned thing
1091         }
1092
1093         // Main tokenization loop;
1094         //  o  skip whitespace;
1095         //  o  handle end-of-line;
1096         //  o  handle symbols;
1097         //  o  handle single-character tokens (operators, etc.);
1098         //  o  handle multiple-character tokens (constants, strings, etc.).
1099         for(; *ln!=EOS;)
1100         {
1101                 // Check to see if there's enough space in the token buffer
1102                 if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20)
1103                 {
1104                         return error("token buffer overrun");
1105                 }
1106
1107                 // Skip whitespace, handle EOL
1108                 while (chrtab[*ln] & WHITE)
1109                         ln++;
1110
1111                 // Handle EOL, comment with ';'
1112                 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1113                         break;
1114
1115                 // Handle start of symbol. Symbols are null-terminated in place. The
1116                 // termination is always one symbol behind, since there may be no place
1117                 // for a null in the case that an operator immediately follows the name.
1118                 c = chrtab[*ln];
1119
1120                 if (c & STSYM)
1121                 {
1122                         if (stuffnull)                  // Terminate old symbol from previous pass
1123                                 *nullspot = EOS;
1124
1125                         v = 0;                                  // Assume no DOT attrib follows symbol
1126                         stuffnull = 1;
1127
1128                         // In some cases, we need to check for a DOTx at the *beginning*
1129                         // of a symbol, as the "start" of the line we're currently looking
1130                         // at could be somewhere in the middle of that line!
1131                         if (*ln == '.')
1132                         {
1133                                 // Make sure that it's *only* a .[bwsl] following, and not the
1134                                 // start of a local symbol:
1135                                 if ((chrtab[*(ln + 1)] & DOT)
1136                                         && (dotxtab[*(ln + 1)] != 0)
1137                                         && !(chrtab[*(ln + 2)] & CTSYM))
1138                                 {
1139                                         // We found a legitimate DOTx construct, so add it to the
1140                                         // token stream:
1141                                         ln++;
1142                                         stuffnull = 0;
1143                                         *tk.u32++ = (TOKEN)dotxtab[*ln++];
1144                                         continue;
1145                                 }
1146                         }
1147
1148                         p = nullspot = ln++;    // Nullspot -> start of this symbol
1149
1150                         // Find end of symbol (and compute its length)
1151                         for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1152                                 ln++;
1153
1154                         // Handle "DOT" special forms (like ".b") that follow a normal
1155                         // symbol or keyword:
1156                         if (*ln == '.')
1157                         {
1158                                 *ln++ = EOS;            // Terminate symbol
1159                                 stuffnull = 0;          // And never try it again
1160
1161                                 // Character following the '.' must have a DOT attribute, and
1162                                 // the chararacter after THAT one must not have a start-symbol
1163                                 // attribute (to prevent symbols that look like, for example,
1164                                 // "zingo.barf", which might be a good idea anyway....)
1165                                 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1166                                         return error("[bwsl] must follow '.' in symbol");
1167
1168                                 v = (uint32_t)dotxtab[*ln++];
1169                                 cursize = (uint32_t)v;
1170
1171                                 if (chrtab[*ln] & CTSYM)
1172                                         return error("misuse of '.'; not allowed in symbols");
1173                         }
1174
1175                         // If the symbol is small, check to see if it's really the name of
1176                         // a register.
1177                         if (j <= KWSIZE)
1178                         {
1179                                 for(state=0; state>=0;)
1180                                 {
1181                                         j = (int)tolowertab[*p++];
1182                                         j += kwbase[state];
1183
1184                                         if (kwcheck[j] != state)
1185                                         {
1186                                                 j = -1;
1187                                                 break;
1188                                         }
1189
1190                                         if (*p == EOS || p == ln)
1191                                         {
1192                                                 j = kwaccept[j];
1193                                                 break;
1194                                         }
1195
1196                                         state = kwtab[j];
1197                                 }
1198                         }
1199                         else
1200                         {
1201                                 j = -1;
1202                         }
1203
1204                         // Make j = -1 if user tries to use a RISC register while in 68K mode
1205                         if (!(rgpu || rdsp || dsp56001) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1206                         {
1207                                 j = -1;
1208                         }
1209
1210                         // Make j = -1 if time, date etc with no preceeding ^^
1211                         // defined, referenced, streq, macdef, date and time
1212                         switch ((TOKEN)j)
1213                         {
1214                         case 112:   // defined
1215                         case 113:   // referenced
1216                         case 118:   // streq
1217                         case 119:   // macdef
1218                         case 120:   // time
1219                         case 121:   // date
1220                                 j = -1;
1221                         }
1222
1223                         // If not tokenized keyword OR token was not found
1224                         if ((j < 0) || (state < 0))
1225                         {
1226                                 *tk.u32++ = SYMBOL;
1227                                 string[stringNum] = nullspot;
1228                                 *tk.u32++ = stringNum;
1229                                 stringNum++;
1230                         }
1231                         else
1232                         {
1233                                 *tk.u32++ = (TOKEN)j;
1234                                 stuffnull = 0;
1235                         }
1236
1237                         if (v)                  // Record attribute token (if any)
1238                                 *tk.u32++ = (TOKEN)v;
1239
1240                         if (stuffnull)  // Arrange for string termination on next pass
1241                                 nullspot = ln;
1242
1243                         continue;
1244                 }
1245
1246                 // Handle identity tokens
1247                 if (c & SELF)
1248                 {
1249                         *tk.u32++ = *ln++;
1250                         continue;
1251                 }
1252
1253                 // Handle multiple-character tokens
1254                 if (c & MULTX)
1255                 {
1256                         switch (*ln++)
1257                         {
1258                         case '!':               // ! or !=
1259                                 if (*ln == '=')
1260                                 {
1261                                         *tk.u32++ = NE;
1262                                         ln++;
1263                                 }
1264                                 else
1265                                         *tk.u32++ = '!';
1266
1267                                 continue;
1268                         case '\'':              // 'string'
1269                                 if (m6502)
1270                                 {
1271                                         // Hardcoded for now, maybe this will change in the future
1272                                         *tk.u32++ = STRINGA8;
1273                                         goto dostring;
1274                                 }
1275                                 // Fall through
1276                         case '\"':              // "string"
1277                                 *tk.u32++ = STRING;
1278 dostring:
1279                                 c1 = ln[-1];
1280                                 string[stringNum] = ln;
1281                                 *tk.u32++ = stringNum;
1282                                 stringNum++;
1283
1284                                 for(p=ln; *ln!=EOS && *ln!=c1;)
1285                                 {
1286                                         c = *ln++;
1287
1288                                         if (c == '\\')
1289                                         {
1290                                                 switch (*ln++)
1291                                                 {
1292                                                 case EOS:
1293                                                         return(error("unterminated string"));
1294                                                 case 'e':
1295                                                         c = '\033';
1296                                                         break;
1297                                                 case 'n':
1298                                                         c = '\n';
1299                                                         break;
1300                                                 case 'b':
1301                                                         c = '\b';
1302                                                         break;
1303                                                 case 't':
1304                                                         c = '\t';
1305                                                         break;
1306                                                 case 'r':
1307                                                         c = '\r';
1308                                                         break;
1309                                                 case 'f':
1310                                                         c = '\f';
1311                                                         break;
1312                                                 case '\"':
1313                                                         c = '\"';
1314                                                         break;
1315                                                 case '\'':
1316                                                         c = '\'';
1317                                                         break;
1318                                                 case '\\':
1319                                                         c = '\\';
1320                                                         break;
1321                                                 case '{':
1322                                                         // If we're evaluating a macro
1323                                                         // this is valid because it's
1324                                                         // a parameter expansion
1325                                                 case '!':
1326                                                         // If we're evaluating a macro
1327                                                         // this is valid and expands to
1328                                                         // "dot-size"
1329                                                         break;
1330                                                 default:
1331                                                         warn("bad backslash code in string");
1332                                                         ln--;
1333                                                         break;
1334                                                 }
1335                                         }
1336
1337                                         *p++ = c;
1338                                 }
1339
1340                                 if (*ln++ != c1)
1341                                         return error("unterminated string");
1342
1343                                 *p++ = EOS;
1344                                 continue;
1345                         case '$':               // $, hex constant
1346                                 if (chrtab[*ln] & HDIGIT)
1347                                 {
1348                                         v = 0;
1349
1350                                         // Parse the hex value
1351                                         while (hextab[*ln] >= 0)
1352                                                 v = (v << 4) + (int)hextab[*ln++];
1353
1354                                         *tk.u32++ = CONST;
1355                                         *tk.u64++ = v;
1356
1357                                         if (*ln == '.')
1358                                         {
1359                                                 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1360                                                 {
1361                                                         *tk.u32++ = DOTW;
1362                                                         ln += 2;
1363                                                 }
1364                                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1365                                                 {
1366                                                         *tk.u32++ = DOTL;
1367                                                         ln += 2;
1368                                                 }
1369                                         }
1370                                 }
1371                                 else
1372                                         *tk.u32++ = '$';
1373
1374                                 continue;
1375                         case '<':               // < or << or <> or <=
1376                                 switch (*ln)
1377                                 {
1378                                 case '<':
1379                                         *tk.u32++ = SHL;
1380                                         ln++;
1381                                         continue;
1382                                 case '>':
1383                                         *tk.u32++ = NE;
1384                                         ln++;
1385                                         continue;
1386                                 case '=':
1387                                         *tk.u32++ = LE;
1388                                         ln++;
1389                                         continue;
1390                                 default:
1391                                         *tk.u32++ = '<';
1392                                         continue;
1393                                 }
1394                         case ':':               // : or ::
1395                                 if (*ln == ':')
1396                                 {
1397                                         *tk.u32++ = DCOLON;
1398                                         ln++;
1399                                 }
1400                                 else
1401                                         *tk.u32++ = ':';
1402
1403                                 continue;
1404                         case '=':               // = or ==
1405                                 if (*ln == '=')
1406                                 {
1407                                         *tk.u32++ = DEQUALS;
1408                                         ln++;
1409                                 }
1410                                 else
1411                                         *tk.u32++ = '=';
1412
1413                                 continue;
1414                         case '>':               // > or >> or >=
1415                                 switch (*ln)
1416                                 {
1417                                 case '>':
1418                                         *tk.u32++ = SHR;
1419                                         ln++;
1420                                         continue;
1421                                 case '=':
1422                                         *tk.u32++ = GE;
1423                                         ln++;
1424                                         continue;
1425                                 default:
1426                                         *tk.u32++ = '>';
1427                                         continue;
1428                                 }
1429                         case '%':               // % or binary constant
1430                                 if (*ln < '0' || *ln > '1')
1431                                 {
1432                                         *tk.u32++ = '%';
1433                                         continue;
1434                                 }
1435
1436                                 v = 0;
1437
1438                                 while (*ln >= '0' && *ln <= '1')
1439                                         v = (v << 1) + *ln++ - '0';
1440
1441                                 if (*ln == '.')
1442                                 {
1443                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1444                                         {
1445                                                 v &= 0x000000FF;
1446                                                 ln += 2;
1447                                         }
1448
1449                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1450                                         {
1451                                                 v &= 0x0000FFFF;
1452                                                 ln += 2;
1453                                         }
1454
1455                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1456                                         {
1457                                                 v &= 0xFFFFFFFF;
1458                                                 ln += 2;
1459                                         }
1460                                 }
1461
1462                                 *tk.u32++ = CONST;
1463                                 *tk.u64++ = v;
1464                                 continue;
1465                         case '@':               // @ or octal constant
1466                                 if (*ln < '0' || *ln > '7')
1467                                 {
1468                                         *tk.u32++ = '@';
1469                                         continue;
1470                                 }
1471
1472                                 v = 0;
1473
1474                                 while (*ln >= '0' && *ln <= '7')
1475                                         v = (v << 3) + *ln++ - '0';
1476
1477                                 if (*ln == '.')
1478                                 {
1479                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1480                                         {
1481                                                 v &= 0x000000FF;
1482                                                 ln += 2;
1483                                         }
1484
1485                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1486                                         {
1487                                                 v &= 0x0000FFFF;
1488                                                 ln += 2;
1489                                         }
1490
1491                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1492                                         {
1493                                                 v &= 0xFFFFFFFF;
1494                                                 ln += 2;
1495                                         }
1496                                 }
1497
1498                                 *tk.u32++ = CONST;
1499                                 *tk.u64++ = v;
1500                                 continue;
1501                         case '^':               // ^ or ^^ <operator-name>
1502                                 if (*ln != '^')
1503                                 {
1504                                         *tk.u32++ = '^';
1505                                         continue;
1506                                 }
1507
1508                                 if (((int)chrtab[*++ln] & STSYM) == 0)
1509                                 {
1510                                         error("invalid symbol following ^^");
1511                                         continue;
1512                                 }
1513
1514                                 p = ln++;
1515
1516                                 while ((int)chrtab[*ln] & CTSYM)
1517                                         ++ln;
1518
1519                                 for(state=0; state>=0;)
1520                                 {
1521                                         // Get char, convert to lowercase
1522                                         j = *p++;
1523
1524                                         if (j >= 'A' && j <= 'Z')
1525                                                 j += 0x20;
1526
1527                                         j += kwbase[state];
1528
1529                                         if (kwcheck[j] != state)
1530                                         {
1531                                                 j = -1;
1532                                                 break;
1533                                         }
1534
1535                                         if (*p == EOS || p == ln)
1536                                         {
1537                                                 j = kwaccept[j];
1538                                                 break;
1539                                         }
1540
1541                                         state = kwtab[j];
1542                                 }
1543
1544                                 if (j < 0 || state < 0)
1545                                 {
1546                                         error("unknown symbol following ^^");
1547                                         continue;
1548                                 }
1549
1550                                 *tk.u32++ = (TOKEN)j;
1551                                 continue;
1552                         default:
1553                                 interror(2);    // Bad MULTX entry in chrtab
1554                                 continue;
1555                         }
1556                 }
1557
1558                 // Handle decimal constant
1559                 if (c & DIGIT)
1560                 {
1561                         uint8_t * numStart = ln;
1562                         v = 0;
1563
1564                         while ((int)chrtab[*ln] & DIGIT)
1565                                 v = (v * 10) + *ln++ - '0';
1566
1567                         // See if there's a .[bwl] after the constant & deal with it if so
1568                         if (*ln == '.')
1569                         {
1570                                 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1571                                 {
1572                                         v &= 0x000000FF;
1573                                         ln += 2;
1574                                         *tk.u32++ = CONST;
1575                                         *tk.u64++ = v;
1576                                         *tk.u32++ = DOTB;
1577                                 }
1578                                 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1579                                 {
1580                                         v &= 0x0000FFFF;
1581                                         ln += 2;
1582                                         *tk.u32++ = CONST;
1583                                         *tk.u64++ = v;
1584                                         *tk.u32++ = DOTW;
1585                                 }
1586                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1587                                 {
1588                                         v &= 0xFFFFFFFF;
1589                                         ln += 2;
1590                                         *tk.u32++ = CONST;
1591                                         *tk.u64++ = v;
1592                                         *tk.u32++ = DOTL;
1593                                 }
1594                                 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1595                                 {
1596                                         // Hey, more digits after the dot, so we assume it's a
1597                                         // floating point number of some kind... numEnd will point
1598                                         // to the first non-float character after it's done
1599                                         char * numEnd;
1600                                         errno = 0;
1601                                         double f = strtod(numStart, &numEnd);
1602                                         ln = (uint8_t *)numEnd;
1603
1604                                         if (errno != 0)
1605                                                 return error("floating point parse error");
1606
1607                                         // N.B.: We use the C compiler's internal double
1608                                         //       representation for all internal float calcs and
1609                                         //       are reasonably sure that the size of said double
1610                                         //       is 8 bytes long (which we check for in fltpoint.c)
1611                                         *tk.u32++ = FCONST;
1612                                         *tk.dp = f;
1613                                         tk.u64++;
1614                                         continue;
1615                                 }
1616                         }
1617                         else
1618                         {
1619                                 *tk.u32++ = CONST;
1620                                 *tk.u64++ = v;
1621                         }
1622
1623 //printf("CONST: %i\n", v);
1624                         continue;
1625                 }
1626
1627                 // Handle illegal character
1628                 return error("illegal character $%02X found", *ln);
1629         }
1630
1631         // Terminate line of tokens and return "success."
1632
1633 goteol:
1634         tok = etok;                             // Set tok to beginning of line
1635
1636         if (stuffnull)                  // Terminate last SYMBOL
1637                 *nullspot = EOS;
1638
1639         *tk.u32++ = EOL;
1640
1641         return OK;
1642 }
1643
1644
1645 //
1646 // .GOTO <label>        goto directive
1647 //
1648 // The label is searched for starting from the first line of the current,
1649 // enclosing macro definition. If no enclosing macro exists, an error is
1650 // generated.
1651 //
1652 // A label is of the form:
1653 //
1654 // :<name><whitespace>
1655 //
1656 // The colon must appear in column 1.  The label is stripped prior to macro
1657 // expansion, and is NOT subject to macro expansion.  The whitespace may also
1658 // be EOL.
1659 //
1660 int d_goto(WORD unused)
1661 {
1662         // Setup for the search
1663         if (*tok != SYMBOL)
1664                 return error("missing label");
1665
1666         char * sym = string[tok[1]];
1667         tok += 2;
1668
1669         if (cur_inobj->in_type != SRC_IMACRO)
1670                 return error("goto not in macro");
1671
1672         IMACRO * imacro = cur_inobj->inobj.imacro;
1673         LLIST * defln = imacro->im_macro->lineList;
1674
1675         // Attempt to find the label, starting with the first line.
1676         for(; defln!=NULL; defln=defln->next)
1677         {
1678                 // Must start with a colon
1679                 if (defln->line[0] == ':')
1680                 {
1681                         // Compare names (sleazo string compare)
1682                         char * s1 = sym;
1683                         char * s2 = defln->line + 1;
1684
1685                         // Either we will match the strings to EOS on both, or we will
1686                         // match EOS on string 1 to whitespace on string 2. Otherwise, we
1687                         // have no match.
1688                         while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1689                         {
1690                                 // If we reached the end of string 1 (sym), we're done.
1691                                 // Note that we're also checking for the end of string 2 as
1692                                 // well, since we've established they're equal above.
1693                                 if (*s1 == EOS)
1694                                 {
1695                                         // Found the label, set new macro next-line and return.
1696                                         imacro->im_nextln = defln;
1697                                         return 0;
1698                                 }
1699
1700                                 s1++;
1701                                 s2++;
1702                         }
1703                 }
1704         }
1705
1706         return error("goto label not found");
1707 }
1708
1709
1710 void DumpToken(TOKEN t)
1711 {
1712         if (t == COLON)
1713                 printf("[COLON]");
1714         else if (t == CONST)
1715                 printf("[CONST]");
1716         else if (t == FCONST)
1717                 printf("[FCONST]");
1718         else if (t == ACONST)
1719                 printf("[ACONST]");
1720         else if (t == STRING)
1721                 printf("[STRING]");
1722         else if (t == SYMBOL)
1723                 printf("[SYMBOL]");
1724         else if (t == EOS)
1725                 printf("[EOS]");
1726         else if (t == TKEOF)
1727                 printf("[TKEOF]");
1728         else if (t == DEQUALS)
1729                 printf("[DEQUALS]");
1730         else if (t == SET)
1731                 printf("[SET]");
1732         else if (t == REG)
1733                 printf("[REG]");
1734         else if (t == DCOLON)
1735                 printf("[DCOLON]");
1736         else if (t == GE)
1737                 printf("[GE]");
1738         else if (t == LE)
1739                 printf("[LE]");
1740         else if (t == NE)
1741                 printf("[NE]");
1742         else if (t == SHR)
1743                 printf("[SHR]");
1744         else if (t == SHL)
1745                 printf("[SHL]");
1746         else if (t == UNMINUS)
1747                 printf("[UNMINUS]");
1748         else if (t == DOTB)
1749                 printf("[DOTB]");
1750         else if (t == DOTW)
1751                 printf("[DOTW]");
1752         else if (t == DOTL)
1753                 printf("[DOTL]");
1754         else if (t == DOTQ)
1755                 printf("[DOTQ]");
1756         else if (t == DOTS)
1757                 printf("[DOTS]");
1758         else if (t == DOTD)
1759                 printf("[DOTD]");
1760         else if (t == DOTI)
1761                 printf("[DOTI]");
1762         else if (t == ENDEXPR)
1763                 printf("[ENDEXPR]");
1764         else if (t == CR_ABSCOUNT)
1765                 printf("[CR_ABSCOUNT]");
1766         else if (t == CR_FILESIZE)
1767                 printf("[CR_FILESIZE]");
1768         else if (t == CR_DEFINED)
1769                 printf("[CR_DEFINED]");
1770         else if (t == CR_REFERENCED)
1771                 printf("[CR_REFERENCED]");
1772         else if (t == CR_STREQ)
1773                 printf("[CR_STREQ]");
1774         else if (t == CR_MACDEF)
1775                 printf("[CR_MACDEF]");
1776         else if (t == CR_TIME)
1777                 printf("[CR_TIME]");
1778         else if (t == CR_DATE)
1779                 printf("[CR_DATE]");
1780         else if (t >= 0x20 && t <= 0x2F)
1781                 printf("[%c]", (char)t);
1782         else if (t >= 0x3A && t <= 0x3F)
1783                 printf("[%c]", (char)t);
1784         else if (t >= 0x80 && t <= 0x87)
1785                 printf("[D%u]", ((uint32_t)t) - 0x80);
1786         else if (t >= 0x88 && t <= 0x8F)
1787                 printf("[A%u]", ((uint32_t)t) - 0x88);
1788         else
1789                 printf("[%X:%c]", (uint32_t)t, (char)t);
1790 }
1791
1792
1793 void DumpTokenBuffer(void)
1794 {
1795         printf("Tokens [%X]: ", sloc);
1796
1797         for(TOKEN * t=tokbuf; *t!=EOL; t++)
1798         {
1799                 if (*t == COLON)
1800                         printf("[COLON]");
1801                 else if (*t == CONST)
1802                 {
1803                         PTR tp;
1804                         tp.u32 = t + 1;
1805                         printf("[CONST: $%lX]", *tp.u64);
1806                         t += 2;
1807                 }
1808                 else if (*t == FCONST)
1809                 {
1810                         PTR tp;
1811                         tp.u32 = t + 1;
1812                         printf("[FCONST: $%lX]", *tp.u64);
1813                         t += 2;
1814                 }
1815                 else if (*t == ACONST)
1816                 {
1817                         printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1818                         t += 2;
1819                 }
1820                 else if (*t == STRING)
1821                 {
1822                         t++;
1823                         printf("[STRING:\"%s\"]", string[*t]);
1824                 }
1825                 else if (*t == SYMBOL)
1826                 {
1827                         t++;
1828                         printf("[SYMBOL:\"%s\"]", string[*t]);
1829                 }
1830                 else if (*t == EOS)
1831                         printf("[EOS]");
1832                 else if (*t == TKEOF)
1833                         printf("[TKEOF]");
1834                 else if (*t == DEQUALS)
1835                         printf("[DEQUALS]");
1836                 else if (*t == SET)
1837                         printf("[SET]");
1838                 else if (*t == REG)
1839                         printf("[REG]");
1840                 else if (*t == DCOLON)
1841                         printf("[DCOLON]");
1842                 else if (*t == GE)
1843                         printf("[GE]");
1844                 else if (*t == LE)
1845                         printf("[LE]");
1846                 else if (*t == NE)
1847                         printf("[NE]");
1848                 else if (*t == SHR)
1849                         printf("[SHR]");
1850                 else if (*t == SHL)
1851                         printf("[SHL]");
1852                 else if (*t == UNMINUS)
1853                         printf("[UNMINUS]");
1854                 else if (*t == DOTB)
1855                         printf("[DOTB]");
1856                 else if (*t == DOTW)
1857                         printf("[DOTW]");
1858                 else if (*t == DOTL)
1859                         printf("[DOTL]");
1860                 else if (*t == DOTQ)
1861                         printf("[DOTQ]");
1862                 else if (*t == DOTS)
1863                         printf("[DOTS]");
1864                 else if (*t == DOTD)
1865                         printf("[DOTD]");
1866                 else if (*t == DOTI)
1867                         printf("[DOTI]");
1868                 else if (*t == ENDEXPR)
1869                         printf("[ENDEXPR]");
1870                 else if (*t == CR_ABSCOUNT)
1871                         printf("[CR_ABSCOUNT]");
1872                 else if (*t == CR_FILESIZE)
1873                         printf("[CR_FILESIZE]");
1874                 else if (*t == CR_DEFINED)
1875                         printf("[CR_DEFINED]");
1876                 else if (*t == CR_REFERENCED)
1877                         printf("[CR_REFERENCED]");
1878                 else if (*t == CR_STREQ)
1879                         printf("[CR_STREQ]");
1880                 else if (*t == CR_MACDEF)
1881                         printf("[CR_MACDEF]");
1882                 else if (*t == CR_TIME)
1883                         printf("[CR_TIME]");
1884                 else if (*t == CR_DATE)
1885                         printf("[CR_DATE]");
1886                 else if (*t >= 0x20 && *t <= 0x2F)
1887                         printf("[%c]", (char)*t);
1888                 else if (*t >= 0x3A && *t <= 0x3F)
1889                         printf("[%c]", (char)*t);
1890                 else if (*t >= 0x80 && *t <= 0x87)
1891                         printf("[D%u]", ((uint32_t)*t) - 0x80);
1892                 else if (*t >= 0x88 && *t <= 0x8F)
1893                         printf("[A%u]", ((uint32_t)*t) - 0x88);
1894                 else
1895                         printf("[%X:%c]", (uint32_t)*t, (char)*t);
1896         }
1897
1898         printf("[EOL]\n");
1899 }
1900