]> Shamusworld >> Repos - rmac/blob - token.c
Removed redundant table
[rmac] / token.c
1 //
2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
7 //
8
9 #include "token.h"
10
11 #include <errno.h>
12 #include "direct.h"
13 #include "error.h"
14 #include "macro.h"
15 #include "procln.h"
16 #include "sect.h"
17 #include "symbol.h"
18
19 #define DECL_KW                         // Declare keyword arrays
20 #define DEF_KW                          // Declare keyword values
21 #include "kwtab.h"                      // Incl generated keyword tables & defs
22 #define DEF_REG68                       // Incl 68k register definitions
23 #include "68kregs.h"
24 #define DEF_REGRISC                     // Include GPU/DSP register definitions
25 #include "riscregs.h"
26 #define DEF_UNARY                       // Declare unary values
27 #define DECL_UNARY                      // Incl uanry keyword state machine tables
28 #include "unarytab.h"           // Incl generated unary tables & defs
29
30
31 int lnsave;                                     // 1; strcpy() text of current line
32 uint32_t curlineno;                     // Current line number (64K max currently)
33 int totlines;                           // Total # of lines
34 int mjump_align = 0;            // mjump alignment flag
35 char lntag;                                     // Line tag
36 char * curfname;                        // Current filename
37 char tolowertab[128];           // Uppercase ==> lowercase
38 int8_t hextab[128];                     // Table of hex values
39 char dotxtab[128];                      // Table for ".b", ".s", etc.
40 char irbuf[LNSIZ];                      // Text for .rept block line
41 char lnbuf[LNSIZ];                      // Text of current line
42 WORD filecount;                         // Unique file number counter
43 WORD cfileno;                           // Current file number
44 TOKEN * tok;                            // Ptr to current token
45 TOKEN * etok;                           // Ptr past last token in tokbuf[]
46 TOKEN tokeol[1] = {EOL};        // Bailout end-of-line token
47 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
48 int optimizeOff;                        // Optimization override flag
49
50
51 FILEREC * filerec;
52 FILEREC * last_fr;
53
54 INOBJ * cur_inobj;                      // Ptr current input obj (IFILE/IMACRO)
55 static INOBJ * f_inobj;         // Ptr list of free INOBJs
56 static IFILE * f_ifile;         // Ptr list of free IFILEs
57 static IMACRO * f_imacro;       // Ptr list of free IMACROs
58
59 static TOKEN tokbuf[TOKBUFSIZE];        // Token buffer (stack-like, all files)
60
61 uint8_t chrtab[0x100] = {
62         ILLEG, ILLEG, ILLEG, ILLEG,                     // NUL SOH STX ETX
63         ILLEG, ILLEG, ILLEG, ILLEG,                     // EOT ENQ ACK BEL
64         ILLEG, WHITE, ILLEG, ILLEG,                     // BS HT LF VT
65         WHITE, ILLEG, ILLEG, ILLEG,                     // FF CR SO SI
66
67         ILLEG, ILLEG, ILLEG, ILLEG,                     // DLE DC1 DC2 DC3
68         ILLEG, ILLEG, ILLEG, ILLEG,                     // DC4 NAK SYN ETB
69         ILLEG, ILLEG, ILLEG, ILLEG,                     // CAN EM SUB ESC
70         ILLEG, ILLEG, ILLEG, ILLEG,                     // FS GS RS US
71
72         WHITE, MULTX, MULTX, SELF,                      // SP ! " #
73         MULTX+CTSYM, MULTX, SELF, MULTX,        // $ % & '
74         SELF, SELF, SELF, SELF,                         // ( ) * +
75         SELF, SELF, STSYM, SELF,                        // , - . /
76
77         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 0 1
78         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 2 3
79         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 4 5
80         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 6 7
81         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 8 9
82         MULTX, MULTX,                                                           // : ;
83         MULTX, MULTX, MULTX, STSYM+CTSYM,                       // < = > ?
84
85         MULTX, STSYM+CTSYM+HDIGIT,                                      // @ A
86         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // B C
87         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // D E
88         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // F G
89         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
90         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
91
92         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
93         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
94         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
95         SELF, SELF, MULTX, STSYM+CTSYM,                         // \ ] ^ _
96
97         ILLEG, STSYM+CTSYM+HDIGIT,                                      // ` a
98         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // b c
99         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // d e
100         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // f g
101         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
102         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
103
104         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
105         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
106         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                // x y z {
107         SELF, SELF, SELF, ILLEG,                                        // | } ~ DEL
108
109         // Anything above $7F is illegal (and yes, we need to check for this,
110         // otherwise you get strange and spurious errors that will lead you astray)
111         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
125         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
126         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
127 };
128
129 // Names of registers
130 static char * regname[] = {
131         "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
132         "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
133         "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
134         "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
135         "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
136         "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
137         "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
138         "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
139         "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
140         "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
141         "tt0","tt1","crp","","","","","", // 208,215
142         "","","","","fpiar","fpsr","fpcr","", // 216,223
143         "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
144         "","","","","","","","", // 232,239
145         "","","","","","","","", // 240,247
146         "","","","","","","","", // 248,255
147         "","","","","x0","x1","y0","y1", // 256,263
148         "","b0","","b2","","b1","a","b", // 264,271
149         "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
150         "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
151         "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
152         "","","","","","","l","p", // 296,303
153         "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
154         "a10","b10","x","y","","","ab","ba"  // 312,319
155 };
156
157
158 //
159 // Initialize tokenizer
160 //
161 void InitTokenizer(void)
162 {
163         int i;                                                                  // Iterator
164         char * htab = "0123456789abcdefABCDEF"; // Hex character table
165
166         lnsave = 0;                                                             // Don't save lines
167         curfname = "";                                                  // No file, empty filename
168         filecount = (WORD)-1;
169         cfileno = (WORD)-1;                                             // cfileno gets bumped to 0
170         curlineno = 0;
171         totlines = 0;
172         etok = tokbuf;
173         f_inobj = NULL;
174         f_ifile = NULL;
175         f_imacro = NULL;
176         cur_inobj = NULL;
177         filerec = NULL;
178         last_fr = NULL;
179         lntag = SPACE;
180
181         // Initialize hex, "dot" and tolower tables
182         for(i=0; i<128; i++)
183         {
184                 hextab[i] = -1;
185                 dotxtab[i] = 0;
186                 tolowertab[i] = (char)i;
187         }
188
189         for(i=0; htab[i]!=EOS; i++)
190                 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
191
192         for(i='A'; i<='Z'; i++)
193                 tolowertab[i] |= 0x20;
194
195         // These characters are legal immediately after a period
196         dotxtab['b'] = DOTB;                                    // .b .B .s .S
197         dotxtab['B'] = DOTB;
198         //dotxtab['s'] = DOTB;
199         //dotxtab['S'] = DOTB;
200         dotxtab['w'] = DOTW;                                    // .w .W
201         dotxtab['W'] = DOTW;
202         dotxtab['l'] = DOTL;                                    // .l .L
203         dotxtab['L'] = DOTL;
204         dotxtab['i'] = DOTI;                                    // .i .I (WTF is this???)
205         dotxtab['I'] = DOTI;
206         dotxtab['D'] = DOTD;                                    // .d .D (double)
207         dotxtab['d'] = DOTD;
208         dotxtab['S'] = DOTS;                                    // .s .S
209         dotxtab['s'] = DOTS;
210         dotxtab['Q'] = DOTQ;                                    // .q .Q (quad word)
211         dotxtab['q'] = DOTQ;
212         dotxtab['X'] = DOTX;                                    // .x .x
213         dotxtab['x'] = DOTX;
214         dotxtab['P'] = DOTP;                                    // .p .P
215         dotxtab['p'] = DOTP;
216 }
217
218
219 void SetFilenameForErrorReporting(void)
220 {
221         WORD fnum = cfileno;
222
223         // Check for absolute top filename (this should never happen)
224         if (fnum == -1)
225         {
226                 curfname = "(*top*)";
227                 return;
228         }
229
230         FILEREC * fr = filerec;
231
232         // Advance to the correct record...
233         while (fr != NULL && fnum != 0)
234         {
235                 fr = fr->frec_next;
236                 fnum--;
237         }
238
239         // Check for file # record not found (this should never happen either)
240         if (fr == NULL)
241         {
242                 curfname = "(*NOT FOUND*)";
243                 return;
244         }
245
246         curfname = fr->frec_name;
247 }
248
249
250 //
251 // Allocate an IFILE or IMACRO
252 //
253 INOBJ * a_inobj(int typ)
254 {
255         INOBJ * inobj;
256         IFILE * ifile;
257         IMACRO * imacro;
258
259         // Allocate and initialize INOBJ first
260         if (f_inobj == NULL)
261                 inobj = malloc(sizeof(INOBJ));
262         else
263         {
264                 inobj = f_inobj;
265                 f_inobj = f_inobj->in_link;
266         }
267
268         switch (typ)
269         {
270         case SRC_IFILE:                                                 // Alloc and init an IFILE
271                 if (f_ifile == NULL)
272                         ifile = malloc(sizeof(IFILE));
273                 else
274                 {
275                         ifile = f_ifile;
276                         f_ifile = f_ifile->if_link;
277                 }
278
279                 inobj->inobj.ifile = ifile;
280                 break;
281
282         case SRC_IMACRO:                                                // Alloc and init an IMACRO
283                 if (f_imacro == NULL)
284                         imacro = malloc(sizeof(IMACRO));
285                 else
286                 {
287                         imacro = f_imacro;
288                         f_imacro = f_imacro->im_link;
289                 }
290
291                 inobj->inobj.imacro = imacro;
292                 break;
293
294         case SRC_IREPT:                                                 // Alloc and init an IREPT
295                 inobj->inobj.irept = malloc(sizeof(IREPT));
296                 DEBUG { printf("alloc IREPT\n"); }
297                 break;
298         }
299
300         // Install INOBJ on top of input stack
301         inobj->in_ifent = ifent;                                // Record .if context on entry
302         inobj->in_type = (WORD)typ;
303         inobj->in_otok = tok;
304         inobj->in_etok = etok;
305         inobj->in_link = cur_inobj;
306         cur_inobj = inobj;
307
308         return inobj;
309 }
310
311
312 //
313 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
314 // A macro reference is in one of two forms:
315 // \name <non-name-character>
316 // \{name}
317 // A doubled backslash (\\) is compressed to a single backslash (\).
318 // Argument definitions have been pre-tokenized, so we have to turn them back
319 // into text. This means that numbers, in particular, become hex, regardless of
320 // their representation when the macro was invoked. This is a hack.
321 // A label may appear at the beginning of the line:
322 // :<name><whitespace>
323 // (the colon must be in the first column). These labels are stripped before
324 // macro expansion takes place.
325 //
326 int ExpandMacro(char * src, char * dest, int destsiz)
327 {
328         int i;
329         int questmark;                  // \? for testing argument existence
330         char mname[128];                // Assume max size of a formal arg name
331         char numbuf[20];                // Buffer for text of CONSTs
332         TOKEN * tk;
333         SYM * arg;
334         char ** symbolString;
335
336         DEBUG { printf("ExM: src=\"%s\"\n", src); }
337
338         IMACRO * imacro = cur_inobj->inobj.imacro;
339         int macnum = (int)(imacro->im_macro->sattr);
340
341         char * dst = dest;                                              // Next dest slot
342         char * edst = dest + destsiz - 1;               // End + 1(?) of dest buffer
343
344         // Check for (and skip over) any "label" on the line
345         char * s = src;
346         char * d = NULL;
347
348         if (*s == ':')
349         {
350                 while (*s != EOS && !(chrtab[*s] & WHITE))
351                         s++;
352
353                 if (*s != EOS)
354                         s++;                                                    // Skip first whitespace
355         }
356
357         // Expand the rest of the line
358         while (*s != EOS)
359         {
360                 // Copy single character
361                 if (*s != '\\')
362                 {
363                         if (dst >= edst)
364                                 goto overflow;
365
366                         // Skip comments in case a loose @ or \ is in there
367                         // In that case the tokeniser was trying to expand it.
368                         if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
369                                 goto skipcomments;
370
371                         *dst++ = *s++;
372                 }
373                 // Do macro expansion
374                 else
375                 {
376                         questmark = 0;
377
378                         // Do special cases
379                         switch (*++s)
380                         {
381                         case '\\':                                              // \\, \ (collapse to single backslash)
382                                 if (dst >= edst)
383                                         goto overflow;
384
385                                 *dst++ = *s++;
386                                 continue;
387                         case '?':                                               // \? <macro>  set `questmark' flag
388                                 s++;
389                                 questmark = 1;
390                                 break;
391                         case '#':                                               // \#, number of arguments
392                                 sprintf(numbuf, "%d", (int)imacro->im_nargs);
393                                 goto copystr;
394                         case '!':                                               // \! size suffix supplied on invocation
395                                 switch ((int)imacro->im_siz)
396                                 {
397                                 case SIZN: d = "";   break;
398                                 case SIZB: d = ".b"; break;
399                                 case SIZW: d = ".w"; break;
400                                 case SIZL: d = ".l"; break;
401                                 }
402
403                                 goto copy_d;
404                         case '~':                                               // ==> unique label string Mnnnn...
405                                 sprintf(numbuf, "M%u", curuniq);
406 copystr:
407                                 d = numbuf;
408 copy_d:
409                                 s++;
410
411                                 while (*d != EOS)
412                                 {
413                                         if (dst >= edst)
414                                                 goto overflow;
415                                         else
416                                                 *dst++ = *d++;
417                                 }
418
419                                 continue;
420                         case EOS:
421                                 return error("missing argument name");
422                         }
423
424                         // \n ==> argument number 'n', 0..9
425                         if (chrtab[*s] & DIGIT)
426                         {
427                                 i = *s++ - '1';
428
429                                 if (i < 0)
430                                         i = 9;
431
432                                 goto arg_num;
433                         }
434
435                         // Get argument name: \name, \{name}
436                         d = mname;
437
438                         // \label
439                         if (*s != '{')
440                         {
441                                 do
442                                 {
443                                         *d++ = *s++;
444                                 }
445                                 while (chrtab[*s] & CTSYM);
446                         }
447                         // \\{label}
448                         else
449                         {
450                                 for(++s; *s != EOS && *s != '}';)
451                                         *d++ = *s++;
452
453                                 if (*s != '}')
454                                         return error("missing closing brace ('}')");
455                                 else
456                                         s++;
457                         }
458
459                         *d = EOS;
460
461                         // Lookup the argument and copy its (string) value into the
462                         // destination string
463                         DEBUG { printf("argument='%s'\n", mname); }
464
465                         if ((arg = lookup(mname, MACARG, macnum)) == NULL)
466                                 return error("undefined argument: '%s'", mname);
467                         else
468                         {
469                                 // Convert a string of tokens (terminated with EOL) back into
470                                 // text. If an argument is out of range (not specified in the
471                                 // macro invocation) then it is ignored.
472                                 i = (int)arg->svalue;
473 arg_num:
474                                 DEBUG { printf("~argnumber=%d\n", i); }
475                                 tk = NULL;
476
477                                 if (i < imacro->im_nargs)
478                                 {
479                                         tk = imacro->argument[i].token;
480                                         symbolString = imacro->argument[i].string;
481 //DEBUG
482 //{
483 //      printf("ExM: Preparing to parse argument #%u...\n", i);
484 //      DumpTokens(tk);
485 //}
486                                 }
487
488                                 // \?arg yields:
489                                 //    0  if the argument is empty or non-existant,
490                                 //    1  if the argument is not empty
491                                 if (questmark)
492                                 {
493                                         if (tk == NULL || *tk == EOL)
494                                                 questmark = 0;
495
496                                         if (dst >= edst)
497                                                 goto overflow;
498
499                                         *dst++ = (char)(questmark + '0');
500                                         continue;
501                                 }
502
503                                 // Argument # is in range, so expand it
504                                 if (tk != NULL)
505                                 {
506                                         while (*tk != EOL)
507                                         {
508                                                 // Reverse-translation from a token number to a string.
509                                                 // This is a hack. It might be better table-driven.
510                                                 d = NULL;
511
512                                                 if (*tk >= REG68_D0)
513                                                 {
514                                                         d = regname[(int)*tk++ - REG68_D0];
515                                                         goto strcopy;
516                                                 }
517                                                 else
518                                                 {
519                                                         switch ((int)*tk++)
520                                                         {
521                                                         case SYMBOL:
522                                                                 d = symbolString[*tk++];
523 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
524                                                                 break;
525                                                         case STRING:
526                                                                 d = symbolString[*tk++];
527
528                                                                 if (dst >= edst)
529                                                                         goto overflow;
530
531                                                                 *dst++ = '"';
532
533                                                                 while (*d != EOS)
534                                                                 {
535                                                                         if (dst >= edst)
536                                                                                 goto overflow;
537                                                                         else
538                                                                                 *dst++ = *d++;
539                                                                 }
540
541                                                                 if (dst >= edst)
542                                                                         goto overflow;
543
544                                                                 *dst++ = '"';
545                                                                 continue;
546                                                                 break;
547 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
548 //         to choke on legitimate code... Need to investigate this further
549 //         before changing anything else here!
550                                                         case CONST:
551 //                                                              sprintf(numbuf, "$%lx", (uint64_t)*tk++);
552                                                                 sprintf(numbuf, "$%" PRIX64, (uint64_t)*tk++);
553                                                                 tk++;
554                                                                 d = numbuf;
555                                                                 break;
556                                                         case DEQUALS:
557                                                                 d = "==";
558                                                                 break;
559                                                         case SET:
560                                                                 d = "set";
561                                                                 break;
562                                                         case COLON:
563                                                                 d = ":";
564                                                                 break;
565                                                         case DCOLON:
566                                                                 d = "::";
567                                                                 break;
568                                                         case GE:
569                                                                 d = ">=";
570                                                                 break;
571                                                         case LE:
572                                                                 d = "<=";
573                                                                 break;
574                                                         case NE:
575                                                                 d = "<>";
576                                                                 break;
577                                                         case SHR:
578                                                                 d = ">>";
579                                                                 break;
580                                                         case SHL:
581                                                                 d = "<<";
582                                                                 break;
583                                                         case DOTB:
584                                                                 d = ".b";
585                                                                 break;
586                                                         case DOTW:
587                                                                 d = ".w";
588                                                                 break;
589                                                         case DOTL:
590                                                                 d = ".l";
591                                                                 break;
592                                                         case CR_ABSCOUNT:
593                                                                 d = "^^abscount";
594                                                                 break;
595                                                         case CR_FILESIZE:
596                                                                 d = "^^filesize";
597                                                                 break;
598                                                         case CR_DATE:
599                                                                 d = "^^date";
600                                                                 break;
601                                                         case CR_TIME:
602                                                                 d = "^^time";
603                                                                 break;
604                                                         case CR_DEFINED:
605                                                                 d = "^^defined ";
606                                                                 break;
607                                                         case CR_REFERENCED:
608                                                                 d = "^^referenced ";
609                                                                 break;
610                                                         case CR_STREQ:
611                                                                 d = "^^streq ";
612                                                                 break;
613                                                         case CR_MACDEF:
614                                                                 d = "^^macdef ";
615                                                                 break;
616                                                         default:
617                                                                 if (dst >= edst)
618                                                                         goto overflow;
619
620                                                                 *dst++ = (char)*(tk - 1);
621                                                                 break;
622                                                         }
623                                                 }
624
625                                                 // If 'd' != NULL, copy string to destination
626                                                 if (d != NULL)
627                                                 {
628 strcopy:
629                                                         DEBUG printf("d='%s'\n", d);
630
631                                                         while (*d != EOS)
632                                                         {
633                                                                 if (dst >= edst)
634                                                                         goto overflow;
635                                                                 else
636                                                                         *dst++ = *d++;
637                                                         }
638                                                 }
639                                         }
640                                 }
641                         }
642                 }
643         }
644
645 skipcomments:
646
647         *dst = EOS;
648         DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
649         return OK;
650
651 overflow:
652         *dst = EOS;
653         DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
654         return fatal("line too long as a result of macro expansion");
655 }
656
657
658 //
659 // Get next line of text from a macro
660 //
661 char * GetNextMacroLine(void)
662 {
663         IMACRO * imacro = cur_inobj->inobj.imacro;
664         LLIST * strp = imacro->im_nextln;
665
666         if (strp == NULL)                                               // End-of-macro
667                 return NULL;
668
669         imacro->im_nextln = strp->next;
670 //      ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
671         ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
672
673         return imacro->im_lnbuf;
674 }
675
676
677 //
678 // Get next line of text from a repeat block
679 //
680 char * GetNextRepeatLine(void)
681 {
682         IREPT * irept = cur_inobj->inobj.irept;
683 //      LONG * strp = irept->ir_nextln;                 // initial null
684
685         // Do repeat at end of .rept block's string list
686 //      if (strp == NULL)
687         if (irept->ir_nextln == NULL)
688         {
689                 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
690                 irept->ir_nextln = irept->ir_firstln;   // copy first line
691
692                 if (irept->ir_count-- == 0)
693                 {
694                         DEBUG { printf("end-repeat-block\n"); }
695                         return NULL;
696                 }
697                 reptuniq++;
698 //              strp = irept->ir_nextln;
699         }
700         // Mark the current macro line in the irept object
701         // This is probably overkill - a global variable
702         // would suffice here (it only gets used during
703         // error reporting anyway)
704         irept->lineno = irept->ir_nextln->lineno;
705
706         // Copy the rept lines verbatim, unless we're in nest level 0.
707         // Then, expand any \~ labels to unique numbers (Rn)
708         if (rptlevel)
709         {
710                 strcpy(irbuf, irept->ir_nextln->line);
711         }
712         else
713         {
714                 uint32_t linelen = strlen(irept->ir_nextln->line);
715                 uint8_t *p_line = irept->ir_nextln->line;
716                 char *irbufwrite = irbuf;
717                 for (int i = 0; i <= linelen; i++)
718                 {
719                         uint8_t c;
720                         c = *p_line++;
721                         if (c == '\\' && *p_line == '~')
722                         {
723                                 p_line++;
724                                 irbufwrite += sprintf(irbufwrite, "R%u", reptuniq);
725                         }
726                         else
727                         {
728                                 *irbufwrite++ = c;
729                         }
730                 }
731         }
732
733         DEBUG { printf("repeat line='%s'\n", irbuf); }
734 //      irept->ir_nextln = (LONG *)*strp;
735         irept->ir_nextln = irept->ir_nextln->next;
736
737         return irbuf;
738 }
739
740
741 //
742 // Include a source file used at the root, and for ".include" files
743 //
744 int include(int handle, char * fname)
745 {
746         // Debug mode
747         DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
748
749         // Alloc and initialize include-descriptors
750         INOBJ * inobj = a_inobj(SRC_IFILE);
751         IFILE * ifile = inobj->inobj.ifile;
752
753         ifile->ifhandle = handle;                       // Setup file handle
754         ifile->ifind = ifile->ifcnt = 0;        // Setup buffer indices
755         ifile->ifoldlineno = curlineno;         // Save old line number
756         ifile->ifoldfname = curfname;           // Save old filename
757         ifile->ifno = cfileno;                          // Save old file number
758
759         // NB: This *must* be preincrement, we're adding one to the filecount here!
760         cfileno = ++filecount;                          // Compute NEW file number
761         curfname = strdup(fname);                       // Set current filename (alloc storage)
762         curlineno = 0;                                          // Start on line zero
763
764         // Add another file to the file-record
765         FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
766         fr->frec_next = NULL;
767         fr->frec_name = curfname;
768
769         if (last_fr == NULL)
770                 filerec = fr;                                   // Add first filerec
771         else
772                 last_fr->frec_next = fr;                // Append to list of filerecs
773
774         last_fr = fr;
775         DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
776
777         return OK;
778 }
779
780
781 //
782 // Pop the current input level
783 //
784 int fpop(void)
785 {
786         INOBJ * inobj = cur_inobj;
787
788         if (inobj == NULL)
789                 return 0;
790
791         // Pop IFENT levels until we reach the conditional assembly context we
792         // were at when the input object was entered.
793         int numUnmatched = 0;
794
795         while (ifent != inobj->in_ifent)
796         {
797                 if (d_endif() != 0)     // Something bad happened during endif parsing?
798                         return -1;              // If yes, bail instead of getting stuck in a loop
799
800                 numUnmatched++;
801         }
802
803         // Give a warning to the user that we had to wipe their bum for them
804         if (numUnmatched > 0)
805                 warn("missing %d .endif(s)", numUnmatched);
806
807         tok = inobj->in_otok;   // Restore tok and etok
808         etok = inobj->in_etok;
809
810         switch (inobj->in_type)
811         {
812         case SRC_IFILE:                 // Pop and release an IFILE
813         {
814                 DEBUG { printf("[Leaving: %s]\n", curfname); }
815
816                 IFILE * ifile = inobj->inobj.ifile;
817                 ifile->if_link = f_ifile;
818                 f_ifile = ifile;
819                 close(ifile->ifhandle);                 // Close source file
820 DEBUG { printf("[fpop (pre):  curfname=%s]\n", curfname); }
821                 curfname = ifile->ifoldfname;   // Set current filename
822 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
823 DEBUG { printf("[fpop: (pre)  cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
824                 curlineno = ifile->ifoldlineno; // Set current line#
825                 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
826                 cfileno = ifile->ifno;                  // Restore current file number
827 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
828                 break;
829         }
830
831         case SRC_IMACRO:                                        // Pop and release an IMACRO
832         {
833                 IMACRO * imacro = inobj->inobj.imacro;
834                 imacro->im_link = f_imacro;
835                 f_imacro = imacro;
836                 break;
837         }
838
839         case SRC_IREPT:                                         // Pop and release an IREPT
840         {
841                 DEBUG { printf("dealloc IREPT\n"); }
842                 LLIST * p = inobj->inobj.irept->ir_firstln;
843
844                 // Deallocate repeat lines
845                 while (p != NULL)
846                 {
847                         free(p->line);
848                         p = p->next;
849                 }
850
851                 break;
852         }
853         }
854
855         cur_inobj = inobj->in_link;
856         inobj->in_link = f_inobj;
857         f_inobj = inobj;
858
859         return 0;
860 }
861
862
863 //
864 // Get line from file into buf, return NULL on EOF or ptr to the start of a
865 // null-term line
866 //
867 char * GetNextLine(void)
868 {
869         int i, j;
870         char * p, * d;
871         int readamt = -1;                                               // 0 if last read() yeilded 0 bytes
872         IFILE * fl = cur_inobj->inobj.ifile;
873
874         for(;;)
875         {
876                 // Scan for next end-of-line; handle stupid text formats by treating
877                 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
878                 // check for '\n').
879                 d = &fl->ifbuf[fl->ifind];
880
881                 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
882                 {
883                         if (*p == '\r' || *p == '\n')
884                         {
885                                 i++;
886
887                                 if (*p == '\r')
888                                 {
889                                         if (i >= j)
890                                                 break;  // Need to read more, then look for '\n' to eat
891                                         else if (p[1] == '\n')
892                                                 i++;
893                                 }
894
895                                 // Cover up the newline with end-of-string sentinel
896                                 *p = '\0';
897
898                                 fl->ifind += i;
899                                 fl->ifcnt -= i;
900                                 return d;
901                         }
902                 }
903
904                 // Handle hanging lines by ignoring them (Input file is exhausted, no
905                 // \r or \n on last line)
906                 // Shamus: This is retarded. Never ignore any input!
907                 if (!readamt && fl->ifcnt)
908                 {
909 #if 0
910                         fl->ifcnt = 0;
911                         *p = '\0';
912                         return NULL;
913 #else
914                         // Really should check to see if we're at the end of the buffer!
915                         // :-P
916                         fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
917                         fl->ifcnt = 0;
918                         return &fl->ifbuf[fl->ifind];
919 #endif
920                 }
921
922                 // Truncate and return absurdly long lines.
923                 if (fl->ifcnt >= QUANTUM)
924                 {
925                         fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
926                         fl->ifcnt = 0;
927                         return &fl->ifbuf[fl->ifind];
928                 }
929
930                 // Relocate what's left of a line to the beginning of the buffer, and
931                 // read some more of the file in; return NULL if the buffer's empty and
932                 // on EOF.
933                 if (fl->ifind != 0)
934                 {
935                         p = &fl->ifbuf[fl->ifind];
936                         d = &fl->ifbuf[fl->ifcnt & 1];
937
938                         for(i=0; i<fl->ifcnt; i++)
939                                 *d++ = *p++;
940
941                         fl->ifind = fl->ifcnt & 1;
942                 }
943
944                 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
945
946                 if (readamt < 0)
947                         return NULL;
948
949                 if ((fl->ifcnt += readamt) == 0)
950                         return NULL;
951         }
952 }
953
954
955 //
956 // Tokenize a line
957 //
958 int TokenizeLine(void)
959 {
960         uint8_t * ln = NULL;            // Ptr to current position in line
961         uint8_t * p;                            // Random character ptr
962         PTR tk;                                         // Token-deposit ptr
963         int state = 0;                          // State for keyword detector
964         int j = 0;                                      // Var for keyword detector
965         uint8_t c;                                      // Random char
966         uint64_t v;                                     // Random value
967         uint32_t cursize = 0;           // Current line's size (.b, .w, .l, .s, .q, .d)
968         uint8_t * nullspot = NULL;      // Spot to clobber for SYMBOL termination
969         int stuffnull;                          // 1:terminate SYMBOL '\0' at *nullspot
970         uint8_t c1;
971         int stringNum = 0;                      // Pointer to string locations in tokenized line
972         SYM* sy;                                        // For looking up symbols (.equr)
973         int equrundef = 0;                      // Flag for equrundef scanning
974
975 retry:
976
977         if (cur_inobj == NULL)          // Return EOF if input stack is empty
978                 return TKEOF;
979
980         // Get another line of input from the current input source: a file, a
981         // macro, or a repeat-block
982         switch (cur_inobj->in_type)
983         {
984         // Include-file:
985         // o  handle EOF;
986         // o  bump source line number;
987         // o  tag the listing-line with a space;
988         // o  kludge lines generated by Alcyon C.
989         case SRC_IFILE:
990                 if ((ln = GetNextLine()) == NULL)
991                 {
992 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
993                         if (fpop() == 0)        // Pop input level
994                                 goto retry;             // Try for more lines
995                         else
996                         {
997                                 ifent->if_prev = (IFENT *)-1;   //Signal Assemble() that we have reached EOF with unbalanced if/endifs
998                                 return TKEOF;
999                         }
1000                 }
1001
1002                 curlineno++;                    // Bump line number
1003                 lntag = SPACE;
1004
1005                 break;
1006
1007         // Macro-block:
1008         // o  Handle end-of-macro;
1009         // o  tag the listing-line with an at (@) sign.
1010         case SRC_IMACRO:
1011                 if ((ln = GetNextMacroLine()) == NULL)
1012                 {
1013                         if (ExitMacro() == 0)   // Exit macro (pop args, do fpop(), etc)
1014                                 goto retry;                     // Try for more lines...
1015                         else
1016                                 return TKEOF;           // Oops, we got a non zero return code, signal EOF
1017                 }
1018
1019                 lntag = '@';
1020                 break;
1021
1022         // Repeat-block:
1023         // o  Handle end-of-repeat-block;
1024         // o  tag the listing-line with a pound (#) sign.
1025         case SRC_IREPT:
1026                 if ((ln = GetNextRepeatLine()) == NULL)
1027                 {
1028                         DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1029                         fpop();
1030                         goto retry;
1031                 }
1032
1033                 lntag = '#';
1034                 break;
1035         }
1036
1037         // Save text of the line. We only do this during listings and within
1038         // macro-type blocks, since it is expensive to unconditionally copy every
1039         // line.
1040         if (lnsave)
1041         {
1042                 // Sanity check
1043                 if (strlen(ln) > LNSIZ)
1044                         return error("line too long (%d, max %d)", strlen(ln), LNSIZ);
1045
1046                 strcpy(lnbuf, ln);
1047         }
1048
1049         // General housekeeping
1050         tok = tokeol;                   // Set "tok" to EOL in case of error
1051         tk.u32 = etok;                  // Reset token ptr
1052         stuffnull = 0;                  // Don't stuff nulls
1053         totlines++;                             // Bump total #lines assembled
1054
1055         // See if the entire line is a comment. This is a win if the programmer
1056         // puts in lots of comments
1057         if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1058                 goto goteol;
1059
1060         // And here we have a very ugly hack for signalling a single line 'turn off
1061         // optimization'. There's really no nice way to do this, so hack it is!
1062         optimizeOff = 0;                // Default is to take optimizations as they come
1063
1064         if (*ln == '!')
1065         {
1066                 optimizeOff = 1;        // Signal that we don't want to optimize this line
1067                 ln++;                           // & skip over the darned thing
1068         }
1069
1070         // Main tokenization loop;
1071         //  o  skip whitespace;
1072         //  o  handle end-of-line;
1073         //  o  handle symbols;
1074         //  o  handle single-character tokens (operators, etc.);
1075         //  o  handle multiple-character tokens (constants, strings, etc.).
1076         for(; *ln!=EOS;)
1077         {
1078                 // Check to see if there's enough space in the token buffer
1079                 if (tk.cp >= ((uint8_t *)(&tokbuf[TOKBUFSIZE])) - 20)
1080                 {
1081                         return error("token buffer overrun");
1082                 }
1083
1084                 // Skip whitespace, handle EOL
1085                 while (chrtab[*ln] & WHITE)
1086                         ln++;
1087
1088                 // Handle EOL, comment with ';'
1089                 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1090                         break;
1091
1092                 // Handle start of symbol. Symbols are null-terminated in place. The
1093                 // termination is always one symbol behind, since there may be no place
1094                 // for a null in the case that an operator immediately follows the name.
1095                 c = chrtab[*ln];
1096
1097                 if (c & STSYM)
1098                 {
1099                         if (stuffnull)                  // Terminate old symbol from previous pass
1100                                 *nullspot = EOS;
1101
1102                         v = 0;                                  // Assume no DOT attrib follows symbol
1103                         stuffnull = 1;
1104
1105                         // In some cases, we need to check for a DOTx at the *beginning*
1106                         // of a symbol, as the "start" of the line we're currently looking
1107                         // at could be somewhere in the middle of that line!
1108                         if (*ln == '.')
1109                         {
1110                                 // Make sure that it's *only* a .[bwsl] following, and not the
1111                                 // start of a local symbol:
1112                                 if ((chrtab[*(ln + 1)] & DOT)
1113                                         && (dotxtab[*(ln + 1)] != 0)
1114                                         && !(chrtab[*(ln + 2)] & CTSYM))
1115                                 {
1116                                         // We found a legitimate DOTx construct, so add it to the
1117                                         // token stream:
1118                                         ln++;
1119                                         stuffnull = 0;
1120                                         *tk.u32++ = (TOKEN)dotxtab[*ln++];
1121                                         continue;
1122                                 }
1123                         }
1124
1125                         p = nullspot = ln++;    // Nullspot -> start of this symbol
1126
1127                         // Find end of symbol (and compute its length)
1128                         for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1129                                 ln++;
1130
1131                         // Handle "DOT" special forms (like ".b") that follow a normal
1132                         // symbol or keyword:
1133                         if (*ln == '.')
1134                         {
1135                                 *ln++ = EOS;            // Terminate symbol
1136                                 stuffnull = 0;          // And never try it again
1137
1138                                 // Character following the '.' must have a DOT attribute, and
1139                                 // the chararacter after THAT one must not have a start-symbol
1140                                 // attribute (to prevent symbols that look like, for example,
1141                                 // "zingo.barf", which might be a good idea anyway....)
1142                                 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1143                                         return error("[bwsl] must follow '.' in symbol");
1144
1145                                 v = (uint32_t)dotxtab[*ln++];
1146                                 cursize = (uint32_t)v;
1147
1148                                 if (chrtab[*ln] & CTSYM)
1149                                         return error("misuse of '.'; not allowed in symbols");
1150                         }
1151
1152                         // If the symbol is small, check to see if it's really the name of
1153                         // a register.
1154                         uint8_t *p2 = p;
1155                         if (j <= 5)
1156                         {
1157                                 for (state = 0; state >= 0;)
1158                                 {
1159                                         j = (int)tolowertab[*p++];
1160                                         j += regbase[state];
1161
1162                                         if (regcheck[j] != state)
1163                                         {
1164                                                 j = -1;
1165                                                 break;
1166                                         }
1167
1168                                         if (*p == EOS || p == ln)
1169                                         {
1170                                                 j = regaccept[j];
1171                                                 goto skip_keyword;
1172                                                 break;
1173                                         }
1174
1175                                         state = regtab[j];
1176                                 }
1177                         }
1178
1179                         // Scan for keywords
1180                         if ((j <= 0 || state <= 0) || p==p2)
1181                         {
1182                                 if (j <= KWSIZE)
1183                                 {
1184                                         for (state = 0; state >= 0;)
1185                                         {
1186                                                 j = (int)tolowertab[*p2++];
1187                                                 j += kwbase[state];
1188                         
1189                                                 if (kwcheck[j] != state)
1190                                                 {
1191                                                         j = -1;
1192                                                         break;
1193                                                 }
1194                         
1195                                                 if (*p == EOS || p2 == ln)
1196                                                 {
1197                                                         j = kwaccept[j];
1198                                                         break;
1199                                                 }
1200                         
1201                                                 state = kwtab[j];
1202                                         }
1203                                 }
1204                                 else
1205                                 {
1206                                         j = -1;
1207                                 }
1208                         }
1209
1210                         skip_keyword:
1211
1212                         // If we detected equrundef/regundef set relevant flag
1213                         if (j == KW_EQURUNDEF)
1214                         {
1215                                 equrundef = 1;
1216                                 j = -1;
1217                         }
1218
1219                         // If not tokenized keyword OR token was not found
1220                         if ((j < 0) || (state < 0))
1221                         {
1222                                 // Only proceed if no equrundef has been detected. In that case we need to store the symbol
1223                                 // because the directive handler (d_equrundef) will run outside this loop, further into procln.c
1224                                 if (!equrundef && !disabled)
1225                                 {
1226                                         // Last attempt: let's see if this is an equated register.
1227                                         // If yes, then just store the register's keyword value instead of the symbol
1228                                         char temp = *ln;
1229                                         *ln = 0;
1230                                         sy = lookup(nullspot, LABEL, 0);
1231                                         *ln = temp;
1232                                         if (sy)
1233                                         {
1234                                                 if (sy->sattre & EQUATEDREG)
1235                                                 {
1236                                                         *tk.u32++ = sy->svalue;
1237                                                         stuffnull = 0;
1238                                                         continue;
1239                                                 }
1240                                         }
1241                                 }
1242                                 // Ok, that failed, let's store the symbol instead
1243                                 *tk.u32++ = SYMBOL;
1244                                 string[stringNum] = nullspot;
1245                                 *tk.u32++ = stringNum;
1246                                 stringNum++;
1247                         }
1248                         else
1249                         {
1250                                 *tk.u32++ = (TOKEN)j;
1251                                 stuffnull = 0;
1252                         }
1253
1254                         if (v)                  // Record attribute token (if any)
1255                                 *tk.u32++ = (TOKEN)v;
1256
1257                         if (stuffnull)  // Arrange for string termination on next pass
1258                                 nullspot = ln;
1259
1260                         continue;
1261                 }
1262
1263                 // Handle identity tokens
1264                 if (c & SELF)
1265                 {
1266                         *tk.u32++ = *ln++;
1267                         continue;
1268                 }
1269
1270                 // Handle multiple-character tokens
1271                 if (c & MULTX)
1272                 {
1273                         switch (*ln++)
1274                         {
1275                         case '!':               // ! or !=
1276                                 if (*ln == '=')
1277                                 {
1278                                         *tk.u32++ = NE;
1279                                         ln++;
1280                                 }
1281                                 else
1282                                         *tk.u32++ = '!';
1283
1284                                 continue;
1285                         case '\'':              // 'string'
1286                                 if (m6502)
1287                                 {
1288                                         // Hardcoded for now, maybe this will change in the future
1289                                         *tk.u32++ = STRINGA8;
1290                                         goto dostring;
1291                                 }
1292                                 // Fall through
1293                         case '\"':              // "string"
1294                                 *tk.u32++ = STRING;
1295 dostring:
1296                                 c1 = ln[-1];
1297                                 string[stringNum] = ln;
1298                                 *tk.u32++ = stringNum;
1299                                 stringNum++;
1300
1301                                 for(p=ln; *ln!=EOS && *ln!=c1;)
1302                                 {
1303                                         c = *ln++;
1304
1305                                         if (c == '\\')
1306                                         {
1307                                                 switch (*ln++)
1308                                                 {
1309                                                 case EOS:
1310                                                         return(error("unterminated string"));
1311                                                 case 'e':
1312                                                         c = '\033';
1313                                                         break;
1314                                                 case 'n':
1315                                                         c = '\n';
1316                                                         break;
1317                                                 case 'b':
1318                                                         c = '\b';
1319                                                         break;
1320                                                 case 't':
1321                                                         c = '\t';
1322                                                         break;
1323                                                 case 'r':
1324                                                         c = '\r';
1325                                                         break;
1326                                                 case 'f':
1327                                                         c = '\f';
1328                                                         break;
1329                                                 case '\"':
1330                                                         c = '\"';
1331                                                         break;
1332                                                 case '\'':
1333                                                         c = '\'';
1334                                                         break;
1335                                                 case '\\':
1336                                                         c = '\\';
1337                                                         break;
1338                                                 case '{':
1339                                                         // If we're evaluating a macro
1340                                                         // this is valid because it's
1341                                                         // a parameter expansion
1342                                                 case '!':
1343                                                         // If we're evaluating a macro
1344                                                         // this is valid and expands to
1345                                                         // "dot-size"
1346                                                         break;
1347                                                 default:
1348                                                         warn("bad backslash code in string");
1349                                                         ln--;
1350                                                         break;
1351                                                 }
1352                                         }
1353
1354                                         *p++ = c;
1355                                 }
1356
1357                                 if (*ln++ != c1)
1358                                         return error("unterminated string");
1359
1360                                 *p++ = EOS;
1361                                 continue;
1362                         case '$':               // $, hex constant
1363                                 if (chrtab[*ln] & HDIGIT)
1364                                 {
1365                                         v = 0;
1366
1367                                         // Parse the hex value
1368                                         while (hextab[*ln] >= 0)
1369                                                 v = (v << 4) + (int)hextab[*ln++];
1370
1371                                         *tk.u32++ = CONST;
1372                                         *tk.u64++ = v;
1373
1374                                         if (*ln == '.')
1375                                         {
1376                                                 if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1377                                                 {
1378                                                         *tk.u32++ = DOTW;
1379                                                         ln += 2;
1380                                                 }
1381                                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1382                                                 {
1383                                                         *tk.u32++ = DOTL;
1384                                                         ln += 2;
1385                                                 }
1386                                         }
1387                                 }
1388                                 else
1389                                         *tk.u32++ = '$';
1390
1391                                 continue;
1392                         case '<':               // < or << or <> or <=
1393                                 switch (*ln)
1394                                 {
1395                                 case '<':
1396                                         *tk.u32++ = SHL;
1397                                         ln++;
1398                                         continue;
1399                                 case '>':
1400                                         *tk.u32++ = NE;
1401                                         ln++;
1402                                         continue;
1403                                 case '=':
1404                                         *tk.u32++ = LE;
1405                                         ln++;
1406                                         continue;
1407                                 default:
1408                                         *tk.u32++ = '<';
1409                                         continue;
1410                                 }
1411                         case ':':               // : or ::
1412                                 if (*ln == ':')
1413                                 {
1414                                         *tk.u32++ = DCOLON;
1415                                         ln++;
1416                                 }
1417                                 else
1418                                         *tk.u32++ = ':';
1419
1420                                 continue;
1421                         case '=':               // = or ==
1422                                 if (*ln == '=')
1423                                 {
1424                                         *tk.u32++ = DEQUALS;
1425                                         ln++;
1426                                 }
1427                                 else
1428                                         *tk.u32++ = '=';
1429
1430                                 continue;
1431                         case '>':               // > or >> or >=
1432                                 switch (*ln)
1433                                 {
1434                                 case '>':
1435                                         *tk.u32++ = SHR;
1436                                         ln++;
1437                                         continue;
1438                                 case '=':
1439                                         *tk.u32++ = GE;
1440                                         ln++;
1441                                         continue;
1442                                 default:
1443                                         *tk.u32++ = '>';
1444                                         continue;
1445                                 }
1446                         case '%':               // % or binary constant
1447                                 if (*ln < '0' || *ln > '1')
1448                                 {
1449                                         *tk.u32++ = '%';
1450                                         continue;
1451                                 }
1452
1453                                 v = 0;
1454
1455                                 while (*ln >= '0' && *ln <= '1')
1456                                         v = (v << 1) + *ln++ - '0';
1457
1458                                 if (*ln == '.')
1459                                 {
1460                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1461                                         {
1462                                                 v &= 0x000000FF;
1463                                                 ln += 2;
1464                                         }
1465
1466                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1467                                         {
1468                                                 v &= 0x0000FFFF;
1469                                                 ln += 2;
1470                                         }
1471
1472                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1473                                         {
1474                                                 v &= 0xFFFFFFFF;
1475                                                 ln += 2;
1476                                         }
1477                                 }
1478
1479                                 *tk.u32++ = CONST;
1480                                 *tk.u64++ = v;
1481                                 continue;
1482                         case '@':               // @ or octal constant
1483                                 if (*ln < '0' || *ln > '7')
1484                                 {
1485                                         *tk.u32++ = '@';
1486                                         continue;
1487                                 }
1488
1489                                 v = 0;
1490
1491                                 while (*ln >= '0' && *ln <= '7')
1492                                         v = (v << 3) + *ln++ - '0';
1493
1494                                 if (*ln == '.')
1495                                 {
1496                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1497                                         {
1498                                                 v &= 0x000000FF;
1499                                                 ln += 2;
1500                                         }
1501
1502                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1503                                         {
1504                                                 v &= 0x0000FFFF;
1505                                                 ln += 2;
1506                                         }
1507
1508                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1509                                         {
1510                                                 v &= 0xFFFFFFFF;
1511                                                 ln += 2;
1512                                         }
1513                                 }
1514
1515                                 *tk.u32++ = CONST;
1516                                 *tk.u64++ = v;
1517                                 continue;
1518                         case '^':               // ^ or ^^ <operator-name>
1519                                 if (*ln != '^')
1520                                 {
1521                                         *tk.u32++ = '^';
1522                                         continue;
1523                                 }
1524
1525                                 if (((int)chrtab[*++ln] & STSYM) == 0)
1526                                 {
1527                                         error("invalid symbol following ^^");
1528                                         continue;
1529                                 }
1530
1531                                 p = ln++;
1532
1533                                 while ((int)chrtab[*ln] & CTSYM)
1534                                         ++ln;
1535
1536                                 for(state=0; state>=0;)
1537                                 {
1538                                         // Get char, convert to lowercase
1539                                         j = (int)tolowertab[*p++];
1540
1541                                         //if (j >= 'A' && j <= 'Z')
1542                                         //      j += 0x20;
1543
1544                                         j += unarybase[state];
1545
1546                                         if (unarycheck[j] != state)
1547                                         {
1548                                                 j = -1;
1549                                                 break;
1550                                         }
1551
1552                                         if (*p == EOS || p == ln)
1553                                         {
1554                                                 j = unaryaccept[j];
1555                                                 break;
1556                                         }
1557
1558                                         state = unarytab[j];
1559                                 }
1560
1561                                 if (j < 0 || state < 0)
1562                                 {
1563                                         error("unknown symbol following ^^");
1564                                         continue;
1565                                 }
1566
1567                                 *tk.u32++ = (TOKEN)j;
1568                                 continue;
1569                         default:
1570                                 interror(2);    // Bad MULTX entry in chrtab
1571                                 continue;
1572                         }
1573                 }
1574
1575                 // Handle decimal constant
1576                 if (c & DIGIT)
1577                 {
1578                         uint8_t * numStart = ln;
1579                         v = 0;
1580
1581                         while ((int)chrtab[*ln] & DIGIT)
1582                                 v = (v * 10) + *ln++ - '0';
1583
1584                         // See if there's a .[bwl] after the constant & deal with it if so
1585                         if (*ln == '.')
1586                         {
1587                                 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1588                                 {
1589                                         v &= 0x000000FF;
1590                                         ln += 2;
1591                                         *tk.u32++ = CONST;
1592                                         *tk.u64++ = v;
1593                                         *tk.u32++ = DOTB;
1594                                 }
1595                                 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1596                                 {
1597                                         v &= 0x0000FFFF;
1598                                         ln += 2;
1599                                         *tk.u32++ = CONST;
1600                                         *tk.u64++ = v;
1601                                         *tk.u32++ = DOTW;
1602                                 }
1603                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1604                                 {
1605                                         v &= 0xFFFFFFFF;
1606                                         ln += 2;
1607                                         *tk.u32++ = CONST;
1608                                         *tk.u64++ = v;
1609                                         *tk.u32++ = DOTL;
1610                                 }
1611                                 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1612                                 {
1613                                         // Hey, more digits after the dot, so we assume it's a
1614                                         // floating point number of some kind... numEnd will point
1615                                         // to the first non-float character after it's done
1616                                         char * numEnd;
1617                                         errno = 0;
1618                                         double f = strtod(numStart, &numEnd);
1619                                         ln = (uint8_t *)numEnd;
1620
1621                                         if (errno != 0)
1622                                                 return error("floating point parse error");
1623
1624                                         // N.B.: We use the C compiler's internal double
1625                                         //       representation for all internal float calcs and
1626                                         //       are reasonably sure that the size of said double
1627                                         //       is 8 bytes long (which we check for in fltpoint.c)
1628                                         *tk.u32++ = FCONST;
1629                                         *tk.dp = f;
1630                                         tk.u64++;
1631                                         continue;
1632                                 }
1633                         }
1634                         else
1635                         {
1636                                 *tk.u32++ = CONST;
1637                                 *tk.u64++ = v;
1638                         }
1639
1640 //printf("CONST: %i\n", v);
1641                         continue;
1642                 }
1643
1644                 // Handle illegal character
1645                 return error("illegal character $%02X found", *ln);
1646         }
1647
1648         // Terminate line of tokens and return "success."
1649
1650 goteol:
1651         tok = etok;                             // Set tok to beginning of line
1652
1653         if (stuffnull)                  // Terminate last SYMBOL
1654                 *nullspot = EOS;
1655
1656         *tk.u32++ = EOL;
1657
1658         return OK;
1659 }
1660
1661
1662 //
1663 // .GOTO <label>        goto directive
1664 //
1665 // The label is searched for starting from the first line of the current,
1666 // enclosing macro definition. If no enclosing macro exists, an error is
1667 // generated.
1668 //
1669 // A label is of the form:
1670 //
1671 // :<name><whitespace>
1672 //
1673 // The colon must appear in column 1.  The label is stripped prior to macro
1674 // expansion, and is NOT subject to macro expansion.  The whitespace may also
1675 // be EOL.
1676 //
1677 int d_goto(WORD unused)
1678 {
1679         // Setup for the search
1680         if (*tok != SYMBOL)
1681                 return error("missing label");
1682
1683         char * sym = string[tok[1]];
1684         tok += 2;
1685
1686         if (cur_inobj->in_type != SRC_IMACRO)
1687                 return error("goto not in macro");
1688
1689         IMACRO * imacro = cur_inobj->inobj.imacro;
1690         LLIST * defln = imacro->im_macro->lineList;
1691
1692         // Attempt to find the label, starting with the first line.
1693         for(; defln!=NULL; defln=defln->next)
1694         {
1695                 // Must start with a colon
1696                 if (defln->line[0] == ':')
1697                 {
1698                         // Compare names (sleazo string compare)
1699                         char * s1 = sym;
1700                         char * s2 = defln->line + 1;
1701
1702                         // Either we will match the strings to EOS on both, or we will
1703                         // match EOS on string 1 to whitespace on string 2. Otherwise, we
1704                         // have no match.
1705                         while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1706                         {
1707                                 // If we reached the end of string 1 (sym), we're done.
1708                                 // Note that we're also checking for the end of string 2 as
1709                                 // well, since we've established they're equal above.
1710                                 if (*s1 == EOS)
1711                                 {
1712                                         // Found the label, set new macro next-line and return.
1713                                         imacro->im_nextln = defln;
1714                                         return 0;
1715                                 }
1716
1717                                 s1++;
1718                                 s2++;
1719                         }
1720                 }
1721         }
1722
1723         return error("goto label not found");
1724 }
1725
1726
1727 void DumpToken(TOKEN t)
1728 {
1729         if (t == COLON)
1730                 printf("[COLON]");
1731         else if (t == CONST)
1732                 printf("[CONST]");
1733         else if (t == FCONST)
1734                 printf("[FCONST]");
1735         else if (t == ACONST)
1736                 printf("[ACONST]");
1737         else if (t == STRING)
1738                 printf("[STRING]");
1739         else if (t == SYMBOL)
1740                 printf("[SYMBOL]");
1741         else if (t == EOS)
1742                 printf("[EOS]");
1743         else if (t == TKEOF)
1744                 printf("[TKEOF]");
1745         else if (t == DEQUALS)
1746                 printf("[DEQUALS]");
1747         else if (t == SET)
1748                 printf("[SET]");
1749         else if (t == REG)
1750                 printf("[REG]");
1751         else if (t == DCOLON)
1752                 printf("[DCOLON]");
1753         else if (t == GE)
1754                 printf("[GE]");
1755         else if (t == LE)
1756                 printf("[LE]");
1757         else if (t == NE)
1758                 printf("[NE]");
1759         else if (t == SHR)
1760                 printf("[SHR]");
1761         else if (t == SHL)
1762                 printf("[SHL]");
1763         else if (t == UNMINUS)
1764                 printf("[UNMINUS]");
1765         else if (t == DOTB)
1766                 printf("[DOTB]");
1767         else if (t == DOTW)
1768                 printf("[DOTW]");
1769         else if (t == DOTL)
1770                 printf("[DOTL]");
1771         else if (t == DOTQ)
1772                 printf("[DOTQ]");
1773         else if (t == DOTS)
1774                 printf("[DOTS]");
1775         else if (t == DOTD)
1776                 printf("[DOTD]");
1777         else if (t == DOTI)
1778                 printf("[DOTI]");
1779         else if (t == ENDEXPR)
1780                 printf("[ENDEXPR]");
1781         else if (t == CR_ABSCOUNT)
1782                 printf("[CR_ABSCOUNT]");
1783         else if (t == CR_FILESIZE)
1784                 printf("[CR_FILESIZE]");
1785         else if (t == CR_DEFINED)
1786                 printf("[CR_DEFINED]");
1787         else if (t == CR_REFERENCED)
1788                 printf("[CR_REFERENCED]");
1789         else if (t == CR_STREQ)
1790                 printf("[CR_STREQ]");
1791         else if (t == CR_MACDEF)
1792                 printf("[CR_MACDEF]");
1793         else if (t == CR_TIME)
1794                 printf("[CR_TIME]");
1795         else if (t == CR_DATE)
1796                 printf("[CR_DATE]");
1797         else if (t >= 0x20 && t <= 0x2F)
1798                 printf("[%c]", (char)t);
1799         else if (t >= 0x3A && t <= 0x3F)
1800                 printf("[%c]", (char)t);
1801         else if (t >= 0x80 && t <= 0x87)
1802                 printf("[D%u]", ((uint32_t)t) - 0x80);
1803         else if (t >= 0x88 && t <= 0x8F)
1804                 printf("[A%u]", ((uint32_t)t) - 0x88);
1805         else
1806                 printf("[%X:%c]", (uint32_t)t, (char)t);
1807 }
1808
1809
1810 void DumpTokenBuffer(void)
1811 {
1812         printf("Tokens [%X]: ", sloc);
1813
1814         for(TOKEN * t=tokbuf; *t!=EOL; t++)
1815         {
1816                 if (*t == COLON)
1817                         printf("[COLON]");
1818                 else if (*t == CONST)
1819                 {
1820                         PTR tp;
1821                         tp.u32 = t + 1;
1822                         printf("[CONST: $%lX]", *tp.u64);
1823                         t += 2;
1824                 }
1825                 else if (*t == FCONST)
1826                 {
1827                         PTR tp;
1828                         tp.u32 = t + 1;
1829                         printf("[FCONST: $%lX]", *tp.u64);
1830                         t += 2;
1831                 }
1832                 else if (*t == ACONST)
1833                 {
1834                         printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1835                         t += 2;
1836                 }
1837                 else if (*t == STRING)
1838                 {
1839                         t++;
1840                         printf("[STRING:\"%s\"]", string[*t]);
1841                 }
1842                 else if (*t == SYMBOL)
1843                 {
1844                         t++;
1845                         printf("[SYMBOL:\"%s\"]", string[*t]);
1846                 }
1847                 else if (*t == EOS)
1848                         printf("[EOS]");
1849                 else if (*t == TKEOF)
1850                         printf("[TKEOF]");
1851                 else if (*t == DEQUALS)
1852                         printf("[DEQUALS]");
1853                 else if (*t == SET)
1854                         printf("[SET]");
1855                 else if (*t == REG)
1856                         printf("[REG]");
1857                 else if (*t == DCOLON)
1858                         printf("[DCOLON]");
1859                 else if (*t == GE)
1860                         printf("[GE]");
1861                 else if (*t == LE)
1862                         printf("[LE]");
1863                 else if (*t == NE)
1864                         printf("[NE]");
1865                 else if (*t == SHR)
1866                         printf("[SHR]");
1867                 else if (*t == SHL)
1868                         printf("[SHL]");
1869                 else if (*t == UNMINUS)
1870                         printf("[UNMINUS]");
1871                 else if (*t == DOTB)
1872                         printf("[DOTB]");
1873                 else if (*t == DOTW)
1874                         printf("[DOTW]");
1875                 else if (*t == DOTL)
1876                         printf("[DOTL]");
1877                 else if (*t == DOTQ)
1878                         printf("[DOTQ]");
1879                 else if (*t == DOTS)
1880                         printf("[DOTS]");
1881                 else if (*t == DOTD)
1882                         printf("[DOTD]");
1883                 else if (*t == DOTI)
1884                         printf("[DOTI]");
1885                 else if (*t == ENDEXPR)
1886                         printf("[ENDEXPR]");
1887                 else if (*t == CR_ABSCOUNT)
1888                         printf("[CR_ABSCOUNT]");
1889                 else if (*t == CR_FILESIZE)
1890                         printf("[CR_FILESIZE]");
1891                 else if (*t == CR_DEFINED)
1892                         printf("[CR_DEFINED]");
1893                 else if (*t == CR_REFERENCED)
1894                         printf("[CR_REFERENCED]");
1895                 else if (*t == CR_STREQ)
1896                         printf("[CR_STREQ]");
1897                 else if (*t == CR_MACDEF)
1898                         printf("[CR_MACDEF]");
1899                 else if (*t == CR_TIME)
1900                         printf("[CR_TIME]");
1901                 else if (*t == CR_DATE)
1902                         printf("[CR_DATE]");
1903                 else if (*t >= 0x20 && *t <= 0x2F)
1904                         printf("[%c]", (char)*t);
1905                 else if (*t >= 0x3A && *t <= 0x3F)
1906                         printf("[%c]", (char)*t);
1907                 else if (*t >= 0x80 && *t <= 0x87)
1908                         printf("[D%u]", ((uint32_t)*t) - 0x80);
1909                 else if (*t >= 0x88 && *t <= 0x8F)
1910                         printf("[A%u]", ((uint32_t)*t) - 0x88);
1911                 else
1912                         printf("[%X:%c]", (uint32_t)*t, (char)*t);
1913         }
1914
1915         printf("[EOL]\n");
1916 }
1917