Partial fix for bug #108 (Fixup cleanups).
[rmac] / token.c
1 //
2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2018 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
7 //
8
9 #include "token.h"
10
11 #include <errno.h>
12 #include "direct.h"
13 #include "error.h"
14 #include "macro.h"
15 #include "procln.h"
16 #include "sect.h"
17 #include "symbol.h"
18
19 #define DECL_KW                         // Declare keyword arrays
20 #define DEF_KW                          // Declare keyword values
21 #include "kwtab.h"                      // Incl generated keyword tables & defs
22
23
24 int lnsave;                                     // 1; strcpy() text of current line
25 uint16_t curlineno;                     // Current line number (64K max currently)
26 int totlines;                           // Total # of lines
27 int mjump_align = 0;            // mjump alignment flag
28 char lntag;                                     // Line tag
29 char * curfname;                        // Current filename
30 char tolowertab[128];           // Uppercase ==> lowercase
31 int8_t hextab[128];                     // Table of hex values
32 char dotxtab[128];                      // Table for ".b", ".s", etc.
33 char irbuf[LNSIZ];                      // Text for .rept block line
34 char lnbuf[LNSIZ];                      // Text of current line
35 WORD filecount;                         // Unique file number counter
36 WORD cfileno;                           // Current file number
37 TOKEN * tok;                            // Ptr to current token
38 TOKEN * etok;                           // Ptr past last token in tokbuf[]
39 TOKEN tokeol[1] = {EOL};        // Bailout end-of-line token
40 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
41 int optimizeOff;                        // Optimization override flag
42
43 // File record, used to maintain a list of every include file ever visited
44 #define FILEREC struct _filerec
45 FILEREC
46 {
47    FILEREC * frec_next;
48    char * frec_name;
49 };
50
51 FILEREC * filerec;
52 FILEREC * last_fr;
53
54 INOBJ * cur_inobj;                      // Ptr current input obj (IFILE/IMACRO)
55 static INOBJ * f_inobj;         // Ptr list of free INOBJs
56 static IFILE * f_ifile;         // Ptr list of free IFILEs
57 static IMACRO * f_imacro;       // Ptr list of free IMACROs
58
59 static TOKEN tokbuf[TOKBUFSIZE];        // Token buffer (stack-like, all files)
60
61 uint8_t chrtab[0x100] = {
62         ILLEG, ILLEG, ILLEG, ILLEG,                     // NUL SOH STX ETX
63         ILLEG, ILLEG, ILLEG, ILLEG,                     // EOT ENQ ACK BEL
64         ILLEG, WHITE, ILLEG, ILLEG,                     // BS HT LF VT
65         WHITE, ILLEG, ILLEG, ILLEG,                     // FF CR SO SI
66
67         ILLEG, ILLEG, ILLEG, ILLEG,                     // DLE DC1 DC2 DC3
68         ILLEG, ILLEG, ILLEG, ILLEG,                     // DC4 NAK SYN ETB
69         ILLEG, ILLEG, ILLEG, ILLEG,                     // CAN EM SUB ESC
70         ILLEG, ILLEG, ILLEG, ILLEG,                     // FS GS RS US
71
72         WHITE, MULTX, MULTX, SELF,                      // SP ! " #
73         MULTX+CTSYM, MULTX, SELF, MULTX,        // $ % & '
74         SELF, SELF, SELF, SELF,                         // ( ) * +
75         SELF, SELF, STSYM, SELF,                        // , - . /
76
77         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 0 1
78         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 2 3
79         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 4 5
80         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 6 7
81         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 8 9
82         MULTX, MULTX,                                                           // : ;
83         MULTX, MULTX, MULTX, STSYM+CTSYM,                       // < = > ?
84
85         MULTX, STSYM+CTSYM+HDIGIT,                                      // @ A
86         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // B C
87         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // D E
88         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // F G
89         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
90         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
91
92         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
93         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
94         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
95         SELF, SELF, MULTX, STSYM+CTSYM,                         // \ ] ^ _
96
97         ILLEG, STSYM+CTSYM+HDIGIT,                                      // ` a
98         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // b c
99         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // d e
100         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // f g
101         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
102         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
103
104         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
105         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
106         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                // x y z {
107         SELF, SELF, SELF, ILLEG,                                        // | } ~ DEL
108
109         // Anything above $7F is illegal (and yes, we need to check for this,
110         // otherwise you get strange and spurious errors that will lead you astray)
111         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
125         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
126         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
127 };
128
129 // Names of registers
130 static char * regname[] = {
131         "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
132         "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
133         "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
134         "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
135         "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
136         "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
137         "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
138         "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
139         "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
140         "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
141         "tt0","tt1","crp","","","","","", // 208,215
142         "","","","","fpiar","fpsr","fpcr","", // 216,223
143         "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
144         "","","","","","","","", // 232,239
145         "","","","","","","","", // 240,247
146         "","","","","","","","", // 248,255
147         "","","","","x0","x1","y0","y1", // 256,263
148         "","b0","","b2","","b1","a","b", // 264,271
149         "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
150         "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
151         "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
152         "","","","","","","l","p", // 296,303
153         "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
154         "a10","b10","x","y","","","ab","ba"  // 312,319
155 };
156
157 static char * riscregname[] = {
158          "r0",  "r1",  "r2",  "r3",  "r4", "r5",   "r6",  "r7",
159          "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",
160         "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
161         "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
162 };
163
164
165 //
166 // Initialize tokenizer
167 //
168 void InitTokenizer(void)
169 {
170         int i;                                                                  // Iterator
171         char * htab = "0123456789abcdefABCDEF"; // Hex character table
172
173         lnsave = 0;                                                             // Don't save lines
174         curfname = "";                                                  // No file, empty filename
175         filecount = (WORD)-1;
176         cfileno = (WORD)-1;                                             // cfileno gets bumped to 0
177         curlineno = 0;
178         totlines = 0;
179         etok = tokbuf;
180         f_inobj = NULL;
181         f_ifile = NULL;
182         f_imacro = NULL;
183         cur_inobj = NULL;
184         filerec = NULL;
185         last_fr = NULL;
186         lntag = SPACE;
187
188         // Initialize hex, "dot" and tolower tables
189         for(i=0; i<128; i++)
190         {
191                 hextab[i] = -1;
192                 dotxtab[i] = 0;
193                 tolowertab[i] = (char)i;
194         }
195
196         for(i=0; htab[i]!=EOS; i++)
197                 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
198
199         for(i='A'; i<='Z'; i++)
200                 tolowertab[i] |= 0x20;
201
202         // These characters are legal immediately after a period
203         dotxtab['b'] = DOTB;                                    // .b .B .s .S
204         dotxtab['B'] = DOTB;
205         //dotxtab['s'] = DOTB;
206         //dotxtab['S'] = DOTB;
207         dotxtab['w'] = DOTW;                                    // .w .W
208         dotxtab['W'] = DOTW;
209         dotxtab['l'] = DOTL;                                    // .l .L
210         dotxtab['L'] = DOTL;
211         dotxtab['i'] = DOTI;                                    // .i .I (WTF is this???)
212         dotxtab['I'] = DOTI;
213         dotxtab['D'] = DOTD;                                    // .d .D (double)
214         dotxtab['d'] = DOTD;
215         dotxtab['S'] = DOTS;                                    // .s .S
216         dotxtab['s'] = DOTS;
217         dotxtab['Q'] = DOTQ;                                    // .q .Q (quad word)
218         dotxtab['q'] = DOTQ;
219         dotxtab['X'] = DOTX;                                    // .x .x
220         dotxtab['x'] = DOTX;
221         dotxtab['P'] = DOTP;                                    // .p .P
222         dotxtab['p'] = DOTP;
223 }
224
225
226 void SetFilenameForErrorReporting(void)
227 {
228         WORD fnum = cfileno;
229
230         // Check for absolute top filename (this should never happen)
231         if (fnum == -1)
232         {
233                 curfname = "(*top*)";
234                 return;
235         }
236
237         FILEREC * fr = filerec;
238
239         // Advance to the correct record...
240         while (fr != NULL && fnum != 0)
241         {
242                 fr = fr->frec_next;
243                 fnum--;
244         }
245
246         // Check for file # record not found (this should never happen either)
247         if (fr == NULL)
248         {
249                 curfname = "(*NOT FOUND*)";
250                 return;
251         }
252
253         curfname = fr->frec_name;
254 }
255
256
257 //
258 // Allocate an IFILE or IMACRO
259 //
260 INOBJ * a_inobj(int typ)
261 {
262         INOBJ * inobj;
263         IFILE * ifile;
264         IMACRO * imacro;
265
266         // Allocate and initialize INOBJ first
267         if (f_inobj == NULL)
268                 inobj = malloc(sizeof(INOBJ));
269         else
270         {
271                 inobj = f_inobj;
272                 f_inobj = f_inobj->in_link;
273         }
274
275         switch (typ)
276         {
277         case SRC_IFILE:                                                 // Alloc and init an IFILE
278                 if (f_ifile == NULL)
279                         ifile = malloc(sizeof(IFILE));
280                 else
281                 {
282                         ifile = f_ifile;
283                         f_ifile = f_ifile->if_link;
284                 }
285
286                 inobj->inobj.ifile = ifile;
287                 break;
288
289         case SRC_IMACRO:                                                // Alloc and init an IMACRO
290                 if (f_imacro == NULL)
291                         imacro = malloc(sizeof(IMACRO));
292                 else
293                 {
294                         imacro = f_imacro;
295                         f_imacro = f_imacro->im_link;
296                 }
297
298                 inobj->inobj.imacro = imacro;
299                 break;
300
301         case SRC_IREPT:                                                 // Alloc and init an IREPT
302                 inobj->inobj.irept = malloc(sizeof(IREPT));
303                 DEBUG { printf("alloc IREPT\n"); }
304                 break;
305         }
306
307         // Install INOBJ on top of input stack
308         inobj->in_ifent = ifent;                                // Record .if context on entry
309         inobj->in_type = (WORD)typ;
310         inobj->in_otok = tok;
311         inobj->in_etok = etok;
312         inobj->in_link = cur_inobj;
313         cur_inobj = inobj;
314
315         return inobj;
316 }
317
318
319 //
320 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
321 // A macro reference is in one of two forms:
322 // \name <non-name-character>
323 // \{name}
324 // A doubled backslash (\\) is compressed to a single backslash (\).
325 // Argument definitions have been pre-tokenized, so we have to turn them back
326 // into text. This means that numbers, in particular, become hex, regardless of
327 // their representation when the macro was invoked. This is a hack.
328 // A label may appear at the beginning of the line:
329 // :<name><whitespace>
330 // (the colon must be in the first column). These labels are stripped before
331 // macro expansion takes place.
332 //
333 int ExpandMacro(char * src, char * dest, int destsiz)
334 {
335         int i;
336         int questmark;                  // \? for testing argument existence
337         char mname[128];                // Assume max size of a formal arg name
338         char numbuf[20];                // Buffer for text of CONSTs
339         TOKEN * tk;
340         SYM * arg;
341         char ** symbolString;
342
343         DEBUG { printf("ExM: src=\"%s\"\n", src); }
344
345         IMACRO * imacro = cur_inobj->inobj.imacro;
346         int macnum = (int)(imacro->im_macro->sattr);
347
348         char * dst = dest;                                              // Next dest slot
349         char * edst = dest + destsiz - 1;               // End + 1(?) of dest buffer
350
351         // Check for (and skip over) any "label" on the line
352         char * s = src;
353         char * d = NULL;
354
355         if (*s == ':')
356         {
357                 while (*s != EOS && !(chrtab[*s] & WHITE))
358                         s++;
359
360                 if (*s != EOS)
361                         s++;                                                    // Skip first whitespace
362         }
363
364         // Expand the rest of the line
365         while (*s != EOS)
366         {
367                 // Copy single character
368                 if (*s != '\\')
369                 {
370                         if (dst >= edst)
371                                 goto overflow;
372
373                         // Skip comments in case a loose @ or \ is in there
374                         // In that case the tokeniser was trying to expand it.
375                         if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
376                                 goto skipcomments;
377
378                         *dst++ = *s++;
379                 }
380                 // Do macro expansion
381                 else
382                 {
383                         questmark = 0;
384
385                         // Do special cases
386                         switch (*++s)
387                         {
388                         case '\\':                                              // \\, \ (collapse to single backslash)
389                                 if (dst >= edst)
390                                         goto overflow;
391
392                                 *dst++ = *s++;
393                                 continue;
394                         case '?':                                               // \? <macro>  set `questmark' flag
395                                 s++;
396                                 questmark = 1;
397                                 break;
398                         case '#':                                               // \#, number of arguments
399                                 sprintf(numbuf, "%d", (int)imacro->im_nargs);
400                                 goto copystr;
401                         case '!':                                               // \! size suffix supplied on invocation
402                                 switch ((int)imacro->im_siz)
403                                 {
404                                 case SIZN: d = "";   break;
405                                 case SIZB: d = ".b"; break;
406                                 case SIZW: d = ".w"; break;
407                                 case SIZL: d = ".l"; break;
408                                 }
409
410                                 goto copy_d;
411                         case '~':                                               // ==> unique label string Mnnnn...
412                                 sprintf(numbuf, "M%u", curuniq);
413 copystr:
414                                 d = numbuf;
415 copy_d:
416                                 s++;
417
418                                 while (*d != EOS)
419                                 {
420                                         if (dst >= edst)
421                                                 goto overflow;
422                                         else
423                                                 *dst++ = *d++;
424                                 }
425
426                                 continue;
427                         case EOS:
428                                 return error("missing argument name");
429                         }
430
431                         // \n ==> argument number 'n', 0..9
432                         if (chrtab[*s] & DIGIT)
433                         {
434                                 i = *s++ - '1';
435
436                                 if (i < 0)
437                                         i = 9;
438
439                                 goto arg_num;
440                         }
441
442                         // Get argument name: \name, \{name}
443                         d = mname;
444
445                         // \label
446                         if (*s != '{')
447                         {
448                                 do
449                                 {
450                                         *d++ = *s++;
451                                 }
452                                 while (chrtab[*s] & CTSYM);
453                         }
454                         // \\{label}
455                         else
456                         {
457                                 for(++s; *s != EOS && *s != '}';)
458                                         *d++ = *s++;
459
460                                 if (*s != '}')
461                                         return error("missing closing brace ('}')");
462                                 else
463                                         s++;
464                         }
465
466                         *d = EOS;
467
468                         // Lookup the argument and copy its (string) value into the
469                         // destination string
470                         DEBUG { printf("argument='%s'\n", mname); }
471
472                         if ((arg = lookup(mname, MACARG, macnum)) == NULL)
473                                 return error("undefined argument: '%s'", mname);
474                         else
475                         {
476                                 // Convert a string of tokens (terminated with EOL) back into
477                                 // text. If an argument is out of range (not specified in the
478                                 // macro invocation) then it is ignored.
479                                 i = (int)arg->svalue;
480 arg_num:
481                                 DEBUG { printf("~argnumber=%d\n", i); }
482                                 tk = NULL;
483
484                                 if (i < imacro->im_nargs)
485                                 {
486                                         tk = imacro->argument[i].token;
487                                         symbolString = imacro->argument[i].string;
488 //DEBUG
489 //{
490 //      printf("ExM: Preparing to parse argument #%u...\n", i);
491 //      DumpTokens(tk);
492 //}
493                                 }
494
495                                 // \?arg yields:
496                                 //    0  if the argument is empty or non-existant,
497                                 //    1  if the argument is not empty
498                                 if (questmark)
499                                 {
500                                         if (tk == NULL || *tk == EOL)
501                                                 questmark = 0;
502
503                                         if (dst >= edst)
504                                                 goto overflow;
505
506                                         *dst++ = (char)(questmark + '0');
507                                         continue;
508                                 }
509
510                                 // Argument # is in range, so expand it
511                                 if (tk != NULL)
512                                 {
513                                         while (*tk != EOL)
514                                         {
515                                                 // Reverse-translation from a token number to a string.
516                                                 // This is a hack. It might be better table-driven.
517                                                 d = NULL;
518
519                                                 if ((*tk >= KW_D0) && !rdsp && !rgpu)
520                                                 {
521                                                         d = regname[(int)*tk++ - KW_D0];
522                                                         goto strcopy;
523                                                 }
524                                                 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
525                                                 {
526                                                         d = riscregname[(int)*tk++ - KW_R0];
527                                                         goto strcopy;
528                                                 }
529                                                 else
530                                                 {
531                                                         switch ((int)*tk++)
532                                                         {
533                                                         case SYMBOL:
534                                                                 d = symbolString[*tk++];
535 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
536                                                                 break;
537                                                         case STRING:
538                                                                 d = symbolString[*tk++];
539
540                                                                 if (dst >= edst)
541                                                                         goto overflow;
542
543                                                                 *dst++ = '"';
544
545                                                                 while (*d != EOS)
546                                                                 {
547                                                                         if (dst >= edst)
548                                                                                 goto overflow;
549                                                                         else
550                                                                                 *dst++ = *d++;
551                                                                 }
552
553                                                                 if (dst >= edst)
554                                                                         goto overflow;
555
556                                                                 *dst++ = '"';
557                                                                 continue;
558                                                                 break;
559 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
560 //         to choke on legitimate code... Need to investigate this further
561 //         before changing anything else here!
562                                                         case CONST:
563                                                                 sprintf(numbuf, "$%lx", (uint64_t)*tk++);
564                                                                 tk++;
565                                                                 d = numbuf;
566                                                                 break;
567                                                         case DEQUALS:
568                                                                 d = "==";
569                                                                 break;
570                                                         case SET:
571                                                                 d = "set";
572                                                                 break;
573                                                         case COLON:
574                                                                 d = ":";
575                                                                 break;
576                                                         case DCOLON:
577                                                                 d = "::";
578                                                                 break;
579                                                         case GE:
580                                                                 d = ">=";
581                                                                 break;
582                                                         case LE:
583                                                                 d = "<=";
584                                                                 break;
585                                                         case NE:
586                                                                 d = "<>";
587                                                                 break;
588                                                         case SHR:
589                                                                 d = ">>";
590                                                                 break;
591                                                         case SHL:
592                                                                 d = "<<";
593                                                                 break;
594                                                         case DOTB:
595                                                                 d = ".b";
596                                                                 break;
597                                                         case DOTW:
598                                                                 d = ".w";
599                                                                 break;
600                                                         case DOTL:
601                                                                 d = ".l";
602                                                                 break;
603                                                         case CR_ABSCOUNT:
604                                                                 d = "^^abscount";
605                                                                 break;
606                                                         case CR_DATE:
607                                                                 d = "^^date";
608                                                                 break;
609                                                         case CR_TIME:
610                                                                 d = "^^time";
611                                                                 break;
612                                                         case CR_DEFINED:
613                                                                 d = "^^defined ";
614                                                                 break;
615                                                         case CR_REFERENCED:
616                                                                 d = "^^referenced ";
617                                                                 break;
618                                                         case CR_STREQ:
619                                                                 d = "^^streq ";
620                                                                 break;
621                                                         case CR_MACDEF:
622                                                                 d = "^^macdef ";
623                                                                 break;
624                                                         default:
625                                                                 if (dst >= edst)
626                                                                         goto overflow;
627
628                                                                 *dst++ = (char)*(tk - 1);
629                                                                 break;
630                                                         }
631                                                 }
632
633                                                 // If 'd' != NULL, copy string to destination
634                                                 if (d != NULL)
635                                                 {
636 strcopy:
637                                                         DEBUG printf("d='%s'\n", d);
638
639                                                         while (*d != EOS)
640                                                         {
641                                                                 if (dst >= edst)
642                                                                         goto overflow;
643                                                                 else
644                                                                         *dst++ = *d++;
645                                                         }
646                                                 }
647                                         }
648                                 }
649                         }
650                 }
651         }
652
653 skipcomments:
654
655         *dst = EOS;
656         DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
657         return OK;
658
659 overflow:
660         *dst = EOS;
661         DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
662         return fatal("line too long as a result of macro expansion");
663 }
664
665
666 //
667 // Get next line of text from a macro
668 //
669 char * GetNextMacroLine(void)
670 {
671         IMACRO * imacro = cur_inobj->inobj.imacro;
672 //      LONG * strp = imacro->im_nextln;
673         LLIST * strp = imacro->im_nextln;
674
675         if (strp == NULL)                                               // End-of-macro
676                 return NULL;
677
678         imacro->im_nextln = strp->next;
679 //      ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
680         ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
681
682         return imacro->im_lnbuf;
683 }
684
685
686 //
687 // Get next line of text from a repeat block
688 //
689 char * GetNextRepeatLine(void)
690 {
691         IREPT * irept = cur_inobj->inobj.irept;
692 //      LONG * strp = irept->ir_nextln;                 // initial null
693
694         // Do repeat at end of .rept block's string list
695 //      if (strp == NULL)
696         if (irept->ir_nextln == NULL)
697         {
698                 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
699                 irept->ir_nextln = irept->ir_firstln;   // copy first line
700
701                 if (irept->ir_count-- == 0)
702                 {
703                         DEBUG { printf("end-repeat-block\n"); }
704                         return NULL;
705                 }
706
707 //              strp = irept->ir_nextln;
708         }
709         // Mark the current macro line in the irept object
710         // This is probably overkill - a global variable
711         // would suffice here (it only gets used during
712         // error reporting anyway)
713         irept->lineno = irept->ir_nextln->lineno;
714
715 //      strcpy(irbuf, (char *)(irept->ir_nextln + 1));
716         strcpy(irbuf, irept->ir_nextln->line);
717         DEBUG { printf("repeat line='%s'\n", irbuf); }
718 //      irept->ir_nextln = (LONG *)*strp;
719         irept->ir_nextln = irept->ir_nextln->next;
720
721         return irbuf;
722 }
723
724
725 //
726 // Include a source file used at the root, and for ".include" files
727 //
728 int include(int handle, char * fname)
729 {
730         // Debug mode
731         DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
732
733         // Alloc and initialize include-descriptors
734         INOBJ * inobj = a_inobj(SRC_IFILE);
735         IFILE * ifile = inobj->inobj.ifile;
736
737         ifile->ifhandle = handle;                       // Setup file handle
738         ifile->ifind = ifile->ifcnt = 0;        // Setup buffer indices
739         ifile->ifoldlineno = curlineno;         // Save old line number
740         ifile->ifoldfname = curfname;           // Save old filename
741         ifile->ifno = cfileno;                          // Save old file number
742
743         // NB: This *must* be preincrement, we're adding one to the filecount here!
744         cfileno = ++filecount;                          // Compute NEW file number
745         curfname = strdup(fname);                       // Set current filename (alloc storage)
746         curlineno = 0;                                          // Start on line zero
747
748         // Add another file to the file-record
749         FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
750         fr->frec_next = NULL;
751         fr->frec_name = curfname;
752
753         if (last_fr == NULL)
754                 filerec = fr;                                   // Add first filerec
755         else
756                 last_fr->frec_next = fr;                // Append to list of filerecs
757
758         last_fr = fr;
759         DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
760
761         return OK;
762 }
763
764
765 //
766 // Pop the current input level
767 //
768 int fpop(void)
769 {
770         INOBJ * inobj = cur_inobj;
771
772         if (inobj == NULL)
773                 return 0;
774
775         // Pop IFENT levels until we reach the conditional assembly context we
776         // were at when the input object was entered.
777         int numUnmatched = 0;
778
779         while (ifent != inobj->in_ifent)
780         {
781                 if (d_endif() != 0)     // Something bad happened during endif parsing?
782                         return -1;              // If yes, bail instead of getting stuck in a loop
783
784                 numUnmatched++;
785         }
786
787         // Give a warning to the user that we had to wipe their bum for them
788         if (numUnmatched > 0)
789                 warn("missing %d .endif(s)", numUnmatched);
790
791         tok = inobj->in_otok;   // Restore tok and otok
792         etok = inobj->in_etok;
793
794         switch (inobj->in_type)
795         {
796         case SRC_IFILE:                 // Pop and release an IFILE
797         {
798                 DEBUG { printf("[Leaving: %s]\n", curfname); }
799
800                 IFILE * ifile = inobj->inobj.ifile;
801                 ifile->if_link = f_ifile;
802                 f_ifile = ifile;
803                 close(ifile->ifhandle);                 // Close source file
804 DEBUG { printf("[fpop (pre):  curfname=%s]\n", curfname); }
805                 curfname = ifile->ifoldfname;   // Set current filename
806 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
807 DEBUG { printf("[fpop: (pre)  cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
808                 curlineno = ifile->ifoldlineno; // Set current line#
809                 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
810                 cfileno = ifile->ifno;                  // Restore current file number
811 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
812                 break;
813         }
814
815         case SRC_IMACRO:                                        // Pop and release an IMACRO
816         {
817                 IMACRO * imacro = inobj->inobj.imacro;
818                 imacro->im_link = f_imacro;
819                 f_imacro = imacro;
820                 break;
821         }
822
823         case SRC_IREPT:                                         // Pop and release an IREPT
824         {
825                 DEBUG { printf("dealloc IREPT\n"); }
826                 LLIST * p = inobj->inobj.irept->ir_firstln;
827
828                 // Deallocate repeat lines
829                 while (p != NULL)
830                 {
831                         free(p->line);
832                         p = p->next;
833                 }
834
835                 break;
836         }
837         }
838
839         cur_inobj = inobj->in_link;
840         inobj->in_link = f_inobj;
841         f_inobj = inobj;
842
843         return 0;
844 }
845
846
847 //
848 // Get line from file into buf, return NULL on EOF or ptr to the start of a
849 // null-term line
850 //
851 char * GetNextLine(void)
852 {
853         int i, j;
854         char * p, * d;
855         int readamt = -1;                                               // 0 if last read() yeilded 0 bytes
856         IFILE * fl = cur_inobj->inobj.ifile;
857
858         for(;;)
859         {
860                 // Scan for next end-of-line; handle stupid text formats by treating
861                 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
862                 // check for '\n').
863                 d = &fl->ifbuf[fl->ifind];
864
865                 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
866                 {
867                         if (*p == '\r' || *p == '\n')
868                         {
869                                 i++;
870
871                                 if (*p == '\r')
872                                 {
873                                         if (i >= j)
874                                                 break;  // Need to read more, then look for '\n' to eat
875                                         else if (p[1] == '\n')
876                                                 i++;
877                                 }
878
879                                 // Cover up the newline with end-of-string sentinel
880                                 *p = '\0';
881
882                                 fl->ifind += i;
883                                 fl->ifcnt -= i;
884                                 return d;
885                         }
886                 }
887
888                 // Handle hanging lines by ignoring them (Input file is exhausted, no
889                 // \r or \n on last line)
890                 // Shamus: This is retarded. Never ignore any input!
891                 if (!readamt && fl->ifcnt)
892                 {
893 #if 0
894                         fl->ifcnt = 0;
895                         *p = '\0';
896                         return NULL;
897 #else
898                         // Really should check to see if we're at the end of the buffer!
899                         // :-P
900                         fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
901                         fl->ifcnt = 0;
902                         return &fl->ifbuf[fl->ifind];
903 #endif
904                 }
905
906                 // Truncate and return absurdly long lines.
907                 if (fl->ifcnt >= QUANTUM)
908                 {
909                         fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
910                         fl->ifcnt = 0;
911                         return &fl->ifbuf[fl->ifind];
912                 }
913
914                 // Relocate what's left of a line to the beginning of the buffer, and
915                 // read some more of the file in; return NULL if the buffer's empty and
916                 // on EOF.
917                 if (fl->ifind != 0)
918                 {
919                         p = &fl->ifbuf[fl->ifind];
920                         d = &fl->ifbuf[fl->ifcnt & 1];
921
922                         for(i=0; i<fl->ifcnt; i++)
923                                 *d++ = *p++;
924
925                         fl->ifind = fl->ifcnt & 1;
926                 }
927
928                 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
929
930                 if (readamt < 0)
931                         return NULL;
932
933                 if ((fl->ifcnt += readamt) == 0)
934                         return NULL;
935         }
936 }
937
938
939 //
940 // Tokenize a line
941 //
942 int TokenizeLine(void)
943 {
944         uint8_t * ln = NULL;            // Ptr to current position in line
945         uint8_t * p;                            // Random character ptr
946         PTR tk;                                         // Token-deposit ptr
947         int state = 0;                          // State for keyword detector
948         int j = 0;                                      // Var for keyword detector
949         uint8_t c;                                      // Random char
950         uint64_t v;                                     // Random value
951         uint32_t cursize = 0;           // Current line's size (.b, .w, .l, .s, .q, .d)
952         double f;                                       // Random float
953         uint8_t * nullspot = NULL;      // Spot to clobber for SYMBOL termination
954         int stuffnull;                          // 1:terminate SYMBOL '\0' at *nullspot
955         uint8_t c1;
956         int stringNum = 0;                      // Pointer to string locations in tokenized line
957
958 retry:
959
960         if (cur_inobj == NULL)          // Return EOF if input stack is empty
961                 return TKEOF;
962
963         // Get another line of input from the current input source: a file, a
964         // macro, or a repeat-block
965         switch (cur_inobj->in_type)
966         {
967         // Include-file:
968         // o  handle EOF;
969         // o  bump source line number;
970         // o  tag the listing-line with a space;
971         // o  kludge lines generated by Alcyon C.
972         case SRC_IFILE:
973                 if ((ln = GetNextLine()) == NULL)
974                 {
975 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
976                         if (fpop() == 0)        // Pop input level
977                                 goto retry;             // Try for more lines
978                         else
979                         {
980                                 ifent->if_prev = (IFENT *)-1;   //Signal Assemble() that we have reached EOF with unbalanced if/endifs
981                                 return TKEOF;
982                         }
983                 }
984
985                 curlineno++;                    // Bump line number
986                 lntag = SPACE;
987
988                 if (as68_flag)
989                 {
990                         // AS68 compatibility, throw away all lines starting with
991                         // back-quotes, tildes, or '*'
992                         // On other lines, turn the first '*' into a semi-colon.
993                         if (*ln == '`' || *ln == '~' || *ln == '*')
994                                 *ln = ';';
995                         else
996                         {
997                                 for(p=ln; *p!=EOS; p++)
998                                 {
999                                         if (*p == '*')
1000                                         {
1001                                                 *p = ';';
1002                                                 break;
1003                                         }
1004                                 }
1005                         }
1006                 }
1007
1008                 break;
1009
1010         // Macro-block:
1011         // o  Handle end-of-macro;
1012         // o  tag the listing-line with an at (@) sign.
1013         case SRC_IMACRO:
1014                 if ((ln = GetNextMacroLine()) == NULL)
1015                 {
1016                         if (ExitMacro() == 0)   // Exit macro (pop args, do fpop(), etc)
1017                                 goto retry;                     // Try for more lines...
1018                         else
1019                                 return TKEOF;           // Oops, we got a non zero return code, signal EOF
1020                 }
1021
1022                 lntag = '@';
1023                 break;
1024
1025         // Repeat-block:
1026         // o  Handle end-of-repeat-block;
1027         // o  tag the listing-line with a pound (#) sign.
1028         case SRC_IREPT:
1029                 if ((ln = GetNextRepeatLine()) == NULL)
1030                 {
1031                         DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1032                         fpop();
1033                         goto retry;
1034                 }
1035
1036                 lntag = '#';
1037                 break;
1038         }
1039
1040         // Save text of the line. We only do this during listings and within
1041         // macro-type blocks, since it is expensive to unconditionally copy every
1042         // line.
1043         if (lnsave)
1044                 strcpy(lnbuf, ln);
1045
1046         // General housekeeping
1047         tok = tokeol;                   // Set "tok" to EOL in case of error
1048         tk.u32 = etok;                  // Reset token ptr
1049         stuffnull = 0;                  // Don't stuff nulls
1050         totlines++;                             // Bump total #lines assembled
1051
1052         // See if the entire line is a comment. This is a win if the programmer
1053         // puts in lots of comments
1054         if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1055                 goto goteol;
1056
1057         // And here we have a very ugly hack for signalling a single line 'turn off
1058         // optimization'. There's really no nice way to do this, so hack it is!
1059         optimizeOff = 0;                // Default is to take optimizations as they come
1060
1061         if (*ln == '!')
1062         {
1063                 optimizeOff = 1;        // Signal that we don't want to optimize this line
1064                 ln++;                           // & skip over the darned thing
1065         }
1066
1067         // Main tokenization loop;
1068         //  o  skip whitespace;
1069         //  o  handle end-of-line;
1070         //  o  handle symbols;
1071         //  o  handle single-character tokens (operators, etc.);
1072         //  o  handle multiple-character tokens (constants, strings, etc.).
1073         for(; *ln!=EOS;)
1074         {
1075                 // Skip whitespace, handle EOL
1076                 while (chrtab[*ln] & WHITE)
1077                         ln++;
1078
1079                 // Handle EOL, comment with ';'
1080                 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1081                         break;
1082
1083                 // Handle start of symbol. Symbols are null-terminated in place. The
1084                 // termination is always one symbol behind, since there may be no place
1085                 // for a null in the case that an operator immediately follows the name.
1086                 c = chrtab[*ln];
1087
1088                 if (c & STSYM)
1089                 {
1090                         if (stuffnull)                  // Terminate old symbol from previous pass
1091                                 *nullspot = EOS;
1092
1093                         v = 0;                                  // Assume no DOT attrib follows symbol
1094                         stuffnull = 1;
1095
1096                         // In some cases, we need to check for a DOTx at the *beginning*
1097                         // of a symbol, as the "start" of the line we're currently looking
1098                         // at could be somewhere in the middle of that line!
1099                         if (*ln == '.')
1100                         {
1101                                 // Make sure that it's *only* a .[bwsl] following, and not the
1102                                 // start of a local symbol:
1103                                 if ((chrtab[*(ln + 1)] & DOT)
1104                                         && (dotxtab[*(ln + 1)] != 0)
1105                                         && !(chrtab[*(ln + 2)] & CTSYM))
1106                                 {
1107                                         // We found a legitimate DOTx construct, so add it to the
1108                                         // token stream:
1109                                         ln++;
1110                                         stuffnull = 0;
1111                                         *tk.u32++ = (TOKEN)dotxtab[*ln++];
1112                                         continue;
1113                                 }
1114                         }
1115
1116                         p = nullspot = ln++;    // Nullspot -> start of this symbol
1117
1118                         // Find end of symbol (and compute its length)
1119                         for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1120                                 ln++;
1121
1122                         // Handle "DOT" special forms (like ".b") that follow a normal
1123                         // symbol or keyword:
1124                         if (*ln == '.')
1125                         {
1126                                 *ln++ = EOS;            // Terminate symbol
1127                                 stuffnull = 0;          // And never try it again
1128
1129                                 // Character following the '.' must have a DOT attribute, and
1130                                 // the chararacter after THAT one must not have a start-symbol
1131                                 // attribute (to prevent symbols that look like, for example,
1132                                 // "zingo.barf", which might be a good idea anyway....)
1133                                 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1134                                         return error("[bwsl] must follow '.' in symbol");
1135
1136                                 v = (uint32_t)dotxtab[*ln++];
1137                                 cursize = (uint32_t)v;
1138
1139                                 if (chrtab[*ln] & CTSYM)
1140                                         return error("misuse of '.'; not allowed in symbols");
1141                         }
1142
1143                         // If the symbol is small, check to see if it's really the name of
1144                         // a register.
1145                         if (j <= KWSIZE)
1146                         {
1147                                 for(state=0; state>=0;)
1148                                 {
1149                                         j = (int)tolowertab[*p++];
1150                                         j += kwbase[state];
1151
1152                                         if (kwcheck[j] != state)
1153                                         {
1154                                                 j = -1;
1155                                                 break;
1156                                         }
1157
1158                                         if (*p == EOS || p == ln)
1159                                         {
1160                                                 j = kwaccept[j];
1161                                                 break;
1162                                         }
1163
1164                                         state = kwtab[j];
1165                                 }
1166                         }
1167                         else
1168                         {
1169                                 j = -1;
1170                         }
1171
1172                         // Make j = -1 if user tries to use a RISC register while in 68K mode
1173                         if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1174                         {
1175                                 j = -1;
1176                         }
1177
1178                         // Make j = -1 if time, date etc with no preceeding ^^
1179                         // defined, referenced, streq, macdef, date and time
1180                         switch ((TOKEN)j)
1181                         {
1182                         case 112:   // defined
1183                         case 113:   // referenced
1184                         case 118:   // streq
1185                         case 119:   // macdef
1186                         case 120:   // time
1187                         case 121:   // date
1188                                 j = -1;
1189                         }
1190
1191                         // If not tokenized keyword OR token was not found
1192                         if ((j < 0) || (state < 0))
1193                         {
1194                                 *tk.u32++ = SYMBOL;
1195                                 string[stringNum] = nullspot;
1196                                 *tk.u32++ = stringNum;
1197                                 stringNum++;
1198                         }
1199                         else
1200                         {
1201                                 *tk.u32++ = (TOKEN)j;
1202                                 stuffnull = 0;
1203                         }
1204
1205                         if (v)                  // Record attribute token (if any)
1206                                 *tk.u32++ = (TOKEN)v;
1207
1208                         if (stuffnull)  // Arrange for string termination on next pass
1209                                 nullspot = ln;
1210
1211                         continue;
1212                 }
1213
1214                 // Handle identity tokens
1215                 if (c & SELF)
1216                 {
1217                         *tk.u32++ = *ln++;
1218                         continue;
1219                 }
1220
1221                 // Handle multiple-character tokens
1222                 if (c & MULTX)
1223                 {
1224                         switch (*ln++)
1225                         {
1226                         case '!':               // ! or !=
1227                                 if (*ln == '=')
1228                                 {
1229                                         *tk.u32++ = NE;
1230                                         ln++;
1231                                 }
1232                                 else
1233                                         *tk.u32++ = '!';
1234
1235                                 continue;
1236                         case '\'':              // 'string'
1237                                 if (m6502)
1238                                 {
1239                                         // Hardcoded for now, maybe this will change in the future
1240                                         *tk.u32++ = STRINGA8;
1241                                         goto dostring;
1242                                 }
1243                                 // Fall through
1244                         case '\"':              // "string"
1245                                 *tk.u32++ = STRING;
1246 dostring:
1247                                 c1 = ln[-1];
1248                                 string[stringNum] = ln;
1249                                 *tk.u32++ = stringNum;
1250                                 stringNum++;
1251
1252                                 for(p=ln; *ln!=EOS && *ln!=c1;)
1253                                 {
1254                                         c = *ln++;
1255
1256                                         if (c == '\\')
1257                                         {
1258                                                 switch (*ln++)
1259                                                 {
1260                                                 case EOS:
1261                                                         return(error("unterminated string"));
1262                                                 case 'e':
1263                                                         c = '\033';
1264                                                         break;
1265                                                 case 'n':
1266                                                         c = '\n';
1267                                                         break;
1268                                                 case 'b':
1269                                                         c = '\b';
1270                                                         break;
1271                                                 case 't':
1272                                                         c = '\t';
1273                                                         break;
1274                                                 case 'r':
1275                                                         c = '\r';
1276                                                         break;
1277                                                 case 'f':
1278                                                         c = '\f';
1279                                                         break;
1280                                                 case '\"':
1281                                                         c = '\"';
1282                                                         break;
1283                                                 case '\'':
1284                                                         c = '\'';
1285                                                         break;
1286                                                 case '\\':
1287                                                         c = '\\';
1288                                                         break;
1289                                                 case '!':
1290                                                         // If we're evaluating a macro
1291                                                         // this is valid and expands to
1292                                                         // "dot-size"
1293                                                         break;
1294                                                 default:
1295                                                         warn("bad backslash code in string");
1296                                                         ln--;
1297                                                         break;
1298                                                 }
1299                                         }
1300
1301                                         *p++ = c;
1302                                 }
1303
1304                                 if (*ln++ != c1)
1305                                         return error("unterminated string");
1306
1307                                 *p++ = EOS;
1308                                 continue;
1309                         case '$':               // $, hex constant
1310                                 if (chrtab[*ln] & HDIGIT)
1311                                 {
1312                                         v = 0;
1313
1314                                         // Parse the hex value
1315                                         while (hextab[*ln] >= 0)
1316                                                 v = (v << 4) + (int)hextab[*ln++];
1317
1318                                         if (*ln == '.')
1319                                         {
1320                                                 if (obj_format == BSD)
1321                                                 {
1322                                                         if ((*(ln + 1) & 0xDF) == 'B')
1323                                                         {
1324                                                                 v &= 0x000000FF;
1325                                                                 ln += 2;
1326                                                         }
1327                                                         else if ((*(ln + 1) & 0xDF) == 'W')
1328                                                         {
1329                                                                 v &= 0x0000FFFF;
1330                                                                 ln += 2;
1331                                                         }
1332                                                         else if ((*(ln + 1) & 0xDF) == 'L')
1333                                                         {
1334                                                                 v &= 0xFFFFFFFF;
1335                                                                 ln += 2;
1336                                                         }
1337                                                 }
1338                                         }
1339
1340                                         *tk.u32++ = CONST;
1341                                         *tk.u64++ = v;
1342
1343                                         if (obj_format == ALCYON)
1344                                         {
1345                                                 if (*ln == '.')
1346                                                 {
1347                                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1348                                                         {
1349                                                                 *tk.u32++ = DOTW;
1350                                                                 ln += 2;
1351                                                         }
1352                                                         else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1353                                                         {
1354                                                                 *tk.u32++ = DOTL;
1355                                                                 ln += 2;
1356                                                         }
1357                                                 }
1358                                         }
1359                                 }
1360                                 else
1361                                         *tk.u32++ = '$';
1362
1363                                 continue;
1364                         case '<':               // < or << or <> or <=
1365                                 switch (*ln)
1366                                 {
1367                                 case '<':
1368                                         *tk.u32++ = SHL;
1369                                         ln++;
1370                                         continue;
1371                                 case '>':
1372                                         *tk.u32++ = NE;
1373                                         ln++;
1374                                         continue;
1375                                 case '=':
1376                                         *tk.u32++ = LE;
1377                                         ln++;
1378                                         continue;
1379                                 default:
1380                                         *tk.u32++ = '<';
1381                                         continue;
1382                                 }
1383                         case ':':               // : or ::
1384                                 if (*ln == ':')
1385                                 {
1386                                         *tk.u32++ = DCOLON;
1387                                         ln++;
1388                                 }
1389                                 else
1390                                         *tk.u32++ = ':';
1391
1392                                 continue;
1393                         case '=':               // = or ==
1394                                 if (*ln == '=')
1395                                 {
1396                                         *tk.u32++ = DEQUALS;
1397                                         ln++;
1398                                 }
1399                                 else
1400                                         *tk.u32++ = '=';
1401
1402                                 continue;
1403                         case '>':               // > or >> or >=
1404                                 switch (*ln)
1405                                 {
1406                                 case '>':
1407                                         *tk.u32++ = SHR;
1408                                         ln++;
1409                                         continue;
1410                                 case '=':
1411                                         *tk.u32++ = GE;
1412                                         ln++;
1413                                         continue;
1414                                 default:
1415                                         *tk.u32++ = '>';
1416                                         continue;
1417                                 }
1418                         case '%':               // % or binary constant
1419                                 if (*ln < '0' || *ln > '1')
1420                                 {
1421                                         *tk.u32++ = '%';
1422                                         continue;
1423                                 }
1424
1425                                 v = 0;
1426
1427                                 while (*ln >= '0' && *ln <= '1')
1428                                         v = (v << 1) + *ln++ - '0';
1429
1430                                 if (*ln == '.')
1431                                 {
1432                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1433                                         {
1434                                                 v &= 0x000000FF;
1435                                                 ln += 2;
1436                                         }
1437
1438                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1439                                         {
1440                                                 v &= 0x0000FFFF;
1441                                                 ln += 2;
1442                                         }
1443
1444                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1445                                         {
1446                                                 v &= 0xFFFFFFFF;
1447                                                 ln += 2;
1448                                         }
1449                                 }
1450
1451                                 *tk.u32++ = CONST;
1452                                 *tk.u64++ = v;
1453                                 continue;
1454                         case '@':               // @ or octal constant
1455                                 if (*ln < '0' || *ln > '7')
1456                                 {
1457                                         *tk.u32++ = '@';
1458                                         continue;
1459                                 }
1460
1461                                 v = 0;
1462
1463                                 while (*ln >= '0' && *ln <= '7')
1464                                         v = (v << 3) + *ln++ - '0';
1465
1466                                 if (*ln == '.')
1467                                 {
1468                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1469                                         {
1470                                                 v &= 0x000000FF;
1471                                                 ln += 2;
1472                                         }
1473
1474                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1475                                         {
1476                                                 v &= 0x0000FFFF;
1477                                                 ln += 2;
1478                                         }
1479
1480                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1481                                         {
1482                                                 v &= 0xFFFFFFFF;
1483                                                 ln += 2;
1484                                         }
1485                                 }
1486
1487                                 *tk.u32++ = CONST;
1488                                 *tk.u64++ = v;
1489                                 continue;
1490                         case '^':               // ^ or ^^ <operator-name>
1491                                 if (*ln != '^')
1492                                 {
1493                                         *tk.u32++ = '^';
1494                                         continue;
1495                                 }
1496
1497                                 if (((int)chrtab[*++ln] & STSYM) == 0)
1498                                 {
1499                                         error("invalid symbol following ^^");
1500                                         continue;
1501                                 }
1502
1503                                 p = ln++;
1504
1505                                 while ((int)chrtab[*ln] & CTSYM)
1506                                         ++ln;
1507
1508                                 for(state=0; state>=0;)
1509                                 {
1510                                         // Get char, convert to lowercase
1511                                         j = *p++;
1512
1513                                         if (j >= 'A' && j <= 'Z')
1514                                                 j += 0x20;
1515
1516                                         j += kwbase[state];
1517
1518                                         if (kwcheck[j] != state)
1519                                         {
1520                                                 j = -1;
1521                                                 break;
1522                                         }
1523
1524                                         if (*p == EOS || p == ln)
1525                                         {
1526                                                 j = kwaccept[j];
1527                                                 break;
1528                                         }
1529
1530                                         state = kwtab[j];
1531                                 }
1532
1533                                 if (j < 0 || state < 0)
1534                                 {
1535                                         error("unknown symbol following ^^");
1536                                         continue;
1537                                 }
1538
1539                                 *tk.u32++ = (TOKEN)j;
1540                                 continue;
1541                         default:
1542                                 interror(2);    // Bad MULTX entry in chrtab
1543                                 continue;
1544                         }
1545                 }
1546
1547                 // Handle decimal constant
1548                 if (c & DIGIT)
1549                 {
1550                         uint8_t * numStart = ln;
1551                         v = 0;
1552
1553                         while ((int)chrtab[*ln] & DIGIT)
1554                                 v = (v * 10) + *ln++ - '0';
1555
1556                         // See if there's a .[bwl] after the constant & deal with it if so
1557                         if (*ln == '.')
1558                         {
1559                                 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1560                                 {
1561                                         v &= 0x000000FF;
1562                                         ln += 2;
1563                                         *tk.u32++ = CONST;
1564                                         *tk.u64++ = v;
1565                                         *tk.u32++ = DOTB;
1566                                 }
1567                                 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1568                                 {
1569                                         v &= 0x0000FFFF;
1570                                         ln += 2;
1571                                         *tk.u32++ = CONST;
1572                                         *tk.u64++ = v;
1573                                         *tk.u32++ = DOTW;
1574                                 }
1575                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1576                                 {
1577                                         v &= 0xFFFFFFFF;
1578                                         ln += 2;
1579                                         *tk.u32++ = CONST;
1580                                         *tk.u64++ = v;
1581                                         *tk.u32++ = DOTL;
1582                                 }
1583                                 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1584                                 {
1585                                         // Hey, more digits after the dot, so we assume it's a
1586                                         // floating point number of some kind... numEnd will point
1587                                         // to the first non-float character after it's done
1588                                         char * numEnd;
1589                                         errno = 0;
1590                                         double f = strtod(numStart, &numEnd);
1591                                         ln = (uint8_t *)numEnd;
1592
1593                                         if (errno != 0)
1594                                                 return error("floating point parse error");
1595
1596                                         // N.B.: We use the C compiler's internal double
1597                                         //       representation for all internal float calcs and
1598                                         //       are reasonably sure that the size of said double
1599                                         //       is 8 bytes long (which we check for in fltpoint.c)
1600                                         *tk.u32++ = FCONST;
1601                                         *tk.dp = f;
1602                                         tk.u64++;
1603                                         continue;
1604                                 }
1605                         }
1606                         else
1607                         {
1608                                 *tk.u32++ = CONST;
1609                                 *tk.u64++ = v;
1610                         }
1611
1612 //printf("CONST: %i\n", v);
1613                         continue;
1614                 }
1615
1616                 // Handle illegal character
1617                 return error("illegal character $%02X found", *ln);
1618         }
1619
1620         // Terminate line of tokens and return "success."
1621
1622 goteol:
1623         tok = etok;                             // Set tok to beginning of line
1624
1625         if (stuffnull)                  // Terminate last SYMBOL
1626                 *nullspot = EOS;
1627
1628         *tk.u32++ = EOL;
1629
1630         return OK;
1631 }
1632
1633
1634 //
1635 // .GOTO <label>        goto directive
1636 //
1637 // The label is searched for starting from the first line of the current,
1638 // enclosing macro definition. If no enclosing macro exists, an error is
1639 // generated.
1640 //
1641 // A label is of the form:
1642 //
1643 // :<name><whitespace>
1644 //
1645 // The colon must appear in column 1.  The label is stripped prior to macro
1646 // expansion, and is NOT subject to macro expansion.  The whitespace may also
1647 // be EOL.
1648 //
1649 int d_goto(WORD unused)
1650 {
1651         // Setup for the search
1652         if (*tok != SYMBOL)
1653                 return error("missing label");
1654
1655         char * sym = string[tok[1]];
1656         tok += 2;
1657
1658         if (cur_inobj->in_type != SRC_IMACRO)
1659                 return error("goto not in macro");
1660
1661         IMACRO * imacro = cur_inobj->inobj.imacro;
1662         LLIST * defln = imacro->im_macro->lineList;
1663
1664         // Attempt to find the label, starting with the first line.
1665         for(; defln!=NULL; defln=defln->next)
1666         {
1667                 // Must start with a colon
1668                 if (defln->line[0] == ':')
1669                 {
1670                         // Compare names (sleazo string compare)
1671                         char * s1 = sym;
1672                         char * s2 = defln->line;
1673
1674                         // Either we will match the strings to EOS on both, or we will
1675                         // match EOS on string 1 to whitespace on string 2. Otherwise, we
1676                         // have no match.
1677                         while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1678                         {
1679                                 // If we reached the end of string 1 (sym), we're done.
1680                                 // Note that we're also checking for the end of string 2 as
1681                                 // well, since we've established they're equal above.
1682                                 if (*s1 == EOS)
1683                                 {
1684                                         // Found the label, set new macro next-line and return.
1685                                         imacro->im_nextln = defln;
1686                                         return 0;
1687                                 }
1688
1689                                 s1++;
1690                                 s2++;
1691                         }
1692                 }
1693         }
1694
1695         return error("goto label not found");
1696 }
1697
1698
1699 void DumpToken(TOKEN t)
1700 {
1701         if (t == COLON)
1702                 printf("[COLON]");
1703         else if (t == CONST)
1704                 printf("[CONST]");
1705         else if (t == FCONST)
1706                 printf("[FCONST]");
1707         else if (t == ACONST)
1708                 printf("[ACONST]");
1709         else if (t == STRING)
1710                 printf("[STRING]");
1711         else if (t == SYMBOL)
1712                 printf("[SYMBOL]");
1713         else if (t == EOS)
1714                 printf("[EOS]");
1715         else if (t == TKEOF)
1716                 printf("[TKEOF]");
1717         else if (t == DEQUALS)
1718                 printf("[DEQUALS]");
1719         else if (t == SET)
1720                 printf("[SET]");
1721         else if (t == REG)
1722                 printf("[REG]");
1723         else if (t == DCOLON)
1724                 printf("[DCOLON]");
1725         else if (t == GE)
1726                 printf("[GE]");
1727         else if (t == LE)
1728                 printf("[LE]");
1729         else if (t == NE)
1730                 printf("[NE]");
1731         else if (t == SHR)
1732                 printf("[SHR]");
1733         else if (t == SHL)
1734                 printf("[SHL]");
1735         else if (t == UNMINUS)
1736                 printf("[UNMINUS]");
1737         else if (t == DOTB)
1738                 printf("[DOTB]");
1739         else if (t == DOTW)
1740                 printf("[DOTW]");
1741         else if (t == DOTL)
1742                 printf("[DOTL]");
1743         else if (t == DOTQ)
1744                 printf("[DOTQ]");
1745         else if (t == DOTS)
1746                 printf("[DOTS]");
1747         else if (t == DOTD)
1748                 printf("[DOTD]");
1749         else if (t == DOTI)
1750                 printf("[DOTI]");
1751         else if (t == ENDEXPR)
1752                 printf("[ENDEXPR]");
1753         else if (t == CR_ABSCOUNT)
1754                 printf("[CR_ABSCOUNT]");
1755         else if (t == CR_DEFINED)
1756                 printf("[CR_DEFINED]");
1757         else if (t == CR_REFERENCED)
1758                 printf("[CR_REFERENCED]");
1759         else if (t == CR_STREQ)
1760                 printf("[CR_STREQ]");
1761         else if (t == CR_MACDEF)
1762                 printf("[CR_MACDEF]");
1763         else if (t == CR_TIME)
1764                 printf("[CR_TIME]");
1765         else if (t == CR_DATE)
1766                 printf("[CR_DATE]");
1767         else if (t >= 0x20 && t <= 0x2F)
1768                 printf("[%c]", (char)t);
1769         else if (t >= 0x3A && t <= 0x3F)
1770                 printf("[%c]", (char)t);
1771         else if (t >= 0x80 && t <= 0x87)
1772                 printf("[D%u]", ((uint32_t)t) - 0x80);
1773         else if (t >= 0x88 && t <= 0x8F)
1774                 printf("[A%u]", ((uint32_t)t) - 0x88);
1775         else
1776                 printf("[%X:%c]", (uint32_t)t, (char)t);
1777 }
1778
1779
1780 void DumpTokenBuffer(void)
1781 {
1782         printf("Tokens [%X]: ", sloc);
1783
1784         for(TOKEN * t=tokbuf; *t!=EOL; t++)
1785         {
1786                 if (*t == COLON)
1787                         printf("[COLON]");
1788                 else if (*t == CONST)
1789                 {
1790                         PTR tp;
1791                         tp.u32 = t + 1;
1792                         printf("[CONST: $%lX]", *tp.u64);
1793                         t += 2;
1794                 }
1795                 else if (*t == FCONST)
1796                 {
1797                         PTR tp;
1798                         tp.u32 = t + 1;
1799                         printf("[FCONST: $%lX]", *tp.u64);
1800                         t += 2;
1801                 }
1802                 else if (*t == ACONST)
1803                 {
1804                         printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1805                         t += 2;
1806                 }
1807                 else if (*t == STRING)
1808                 {
1809                         t++;
1810                         printf("[STRING:\"%s\"]", string[*t]);
1811                 }
1812                 else if (*t == SYMBOL)
1813                 {
1814                         t++;
1815                         printf("[SYMBOL:\"%s\"]", string[*t]);
1816                 }
1817                 else if (*t == EOS)
1818                         printf("[EOS]");
1819                 else if (*t == TKEOF)
1820                         printf("[TKEOF]");
1821                 else if (*t == DEQUALS)
1822                         printf("[DEQUALS]");
1823                 else if (*t == SET)
1824                         printf("[SET]");
1825                 else if (*t == REG)
1826                         printf("[REG]");
1827                 else if (*t == DCOLON)
1828                         printf("[DCOLON]");
1829                 else if (*t == GE)
1830                         printf("[GE]");
1831                 else if (*t == LE)
1832                         printf("[LE]");
1833                 else if (*t == NE)
1834                         printf("[NE]");
1835                 else if (*t == SHR)
1836                         printf("[SHR]");
1837                 else if (*t == SHL)
1838                         printf("[SHL]");
1839                 else if (*t == UNMINUS)
1840                         printf("[UNMINUS]");
1841                 else if (*t == DOTB)
1842                         printf("[DOTB]");
1843                 else if (*t == DOTW)
1844                         printf("[DOTW]");
1845                 else if (*t == DOTL)
1846                         printf("[DOTL]");
1847                 else if (*t == DOTQ)
1848                         printf("[DOTQ]");
1849                 else if (*t == DOTS)
1850                         printf("[DOTS]");
1851                 else if (*t == DOTD)
1852                         printf("[DOTD]");
1853                 else if (*t == DOTI)
1854                         printf("[DOTI]");
1855                 else if (*t == ENDEXPR)
1856                         printf("[ENDEXPR]");
1857                 else if (*t == CR_ABSCOUNT)
1858                         printf("[CR_ABSCOUNT]");
1859                 else if (*t == CR_DEFINED)
1860                         printf("[CR_DEFINED]");
1861                 else if (*t == CR_REFERENCED)
1862                         printf("[CR_REFERENCED]");
1863                 else if (*t == CR_STREQ)
1864                         printf("[CR_STREQ]");
1865                 else if (*t == CR_MACDEF)
1866                         printf("[CR_MACDEF]");
1867                 else if (*t == CR_TIME)
1868                         printf("[CR_TIME]");
1869                 else if (*t == CR_DATE)
1870                         printf("[CR_DATE]");
1871                 else if (*t >= 0x20 && *t <= 0x2F)
1872                         printf("[%c]", (char)*t);
1873                 else if (*t >= 0x3A && *t <= 0x3F)
1874                         printf("[%c]", (char)*t);
1875                 else if (*t >= 0x80 && *t <= 0x87)
1876                         printf("[D%u]", ((uint32_t)*t) - 0x80);
1877                 else if (*t >= 0x88 && *t <= 0x8F)
1878                         printf("[A%u]", ((uint32_t)*t) - 0x88);
1879                 else
1880                         printf("[%X:%c]", (uint32_t)*t, (char)*t);
1881         }
1882
1883         printf("[EOL]\n");
1884 }
1885