]> Shamusworld >> Repos - rmac/blob - token.c
2e6c4563df05517a7e8cbb3e4bfcdb565d9fda4e
[rmac] / token.c
1 //
2 // RMAC - Reboot's Macro Assembler for all Atari computers
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2017 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
7 //
8
9 #include "token.h"
10 #include "direct.h"
11 #include "error.h"
12 #include "macro.h"
13 #include "procln.h"
14 #include "sect.h"
15 #include "symbol.h"
16
17 #define DECL_KW                         // Declare keyword arrays
18 #define DEF_KW                          // Declare keyword values
19 #include "kwtab.h"                      // Incl generated keyword tables & defs
20
21
22 int lnsave;                                     // 1; strcpy() text of current line
23 uint16_t curlineno;                     // Current line number (64K max currently)
24 int totlines;                           // Total # of lines
25 int mjump_align = 0;            // mjump alignment flag
26 char lntag;                                     // Line tag
27 char * curfname;                        // Current filename
28 char tolowertab[128];           // Uppercase ==> lowercase
29 int8_t hextab[128];                     // Table of hex values
30 char dotxtab[128];                      // Table for ".b", ".s", etc.
31 char irbuf[LNSIZ];                      // Text for .rept block line
32 char lnbuf[LNSIZ];                      // Text of current line
33 WORD filecount;                         // Unique file number counter
34 WORD cfileno;                           // Current file number
35 TOKENPTR tok;                           // Ptr to current token
36 TOKEN * etok;                           // Ptr past last token in tokbuf[]
37 TOKEN tokeol[1] = {EOL};        // Bailout end-of-line token
38 char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
39 int optimizeOff;                        // Optimization override flag
40
41 // File record, used to maintain a list of every include file ever visited
42 #define FILEREC struct _filerec
43 FILEREC
44 {
45    FILEREC * frec_next;
46    char * frec_name;
47 };
48
49 FILEREC * filerec;
50 FILEREC * last_fr;
51
52 INOBJ * cur_inobj;                      // Ptr current input obj (IFILE/IMACRO)
53 static INOBJ * f_inobj;         // Ptr list of free INOBJs
54 static IFILE * f_ifile;         // Ptr list of free IFILEs
55 static IMACRO * f_imacro;       // Ptr list of free IMACROs
56
57 static TOKEN tokbuf[TOKBUFSIZE];        // Token buffer (stack-like, all files)
58
59 uint8_t chrtab[0x100] = {
60         ILLEG, ILLEG, ILLEG, ILLEG,                     // NUL SOH STX ETX
61         ILLEG, ILLEG, ILLEG, ILLEG,                     // EOT ENQ ACK BEL
62         ILLEG, WHITE, ILLEG, ILLEG,                     // BS HT LF VT
63         WHITE, ILLEG, ILLEG, ILLEG,                     // FF CR SO SI
64
65         ILLEG, ILLEG, ILLEG, ILLEG,                     // DLE DC1 DC2 DC3
66         ILLEG, ILLEG, ILLEG, ILLEG,                     // DC4 NAK SYN ETB
67         ILLEG, ILLEG, ILLEG, ILLEG,                     // CAN EM SUB ESC
68         ILLEG, ILLEG, ILLEG, ILLEG,                     // FS GS RS US
69
70         WHITE, MULTX, MULTX, SELF,                      // SP ! " #
71         MULTX+CTSYM, MULTX, SELF, MULTX,        // $ % & '
72         SELF, SELF, SELF, SELF,                         // ( ) * +
73         SELF, SELF, STSYM, SELF,                        // , - . /
74
75         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 0 1
76         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 2 3
77         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 4 5
78         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 6 7
79         DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,         // 8 9
80         MULTX, MULTX,                                                           // : ;
81         MULTX, MULTX, MULTX, STSYM+CTSYM,                       // < = > ?
82
83         MULTX, STSYM+CTSYM+HDIGIT,                                      // @ A
84         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // B C
85         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // D E
86         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // F G
87         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // H I J K
88         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // L M N O
89
90         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // P Q R S
91         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // T U V W
92         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,// X Y Z [
93         SELF, SELF, MULTX, STSYM+CTSYM,                         // \ ] ^ _
94
95         ILLEG, STSYM+CTSYM+HDIGIT,                                      // ` a
96         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // b c
97         DOT+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,     // d e
98         STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                        // f g
99         STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // h i j k
100         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, // l m n o
101
102         DOT+STSYM+CTSYM, DOT+STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // p q r s
103         STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, DOT+STSYM+CTSYM, // t u v w
104         DOT+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                // x y z {
105         SELF, SELF, SELF, ILLEG,                                        // | } ~ DEL
106
107         // Anything above $7F is illegal (and yes, we need to check for this,
108         // otherwise you get strange and spurious errors that will lead you astray)
109         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
110         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
111         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
112         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
113         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
114         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
115         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
116         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
117         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
118         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
119         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
120         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
121         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
122         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
123         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG,
124         ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG, ILLEG
125 };
126
127 // Names of registers
128 static char * regname[] = {
129         "d0","d1","d2","d3","d4","d5","d6","d7", // 128,135
130         "a0","a1","a2","a3","a4","a5","a6","sp", // 136,143
131         "ssp","pc","sr","ccr","regequ","set","reg","r0", // 144,151
132         "r1","r2","r3","r4","r5","r6","r7","r8", // 152,159
133         "r9","r10","r11","r12","r13","r14","r15","r16", // 160,167
134         "r17","r18","r19","r20","r21","r22","r23","r24", // 168,175
135         "r25","r26","r27","r28","r29","r30","r31","ccdef", // 176,183
136         "usp","ic40","dc40","bc40","sfc","dfc","","vbr", // 184,191
137         "cacr","caar","msp","isp","tc","itt0","itt1","dtt0", // 192,199
138         "dtt1","mmusr","urp","srp","iacr0","iacr1","dacr0","dacr1", // 200,207
139         "tt0","tt1","crp","","","","","", // 208,215
140         "","","","","fpiar","fpsr","fpcr","", // 216,223
141         "fp0","fp1","fp2","fp3","fp4","fp5","fp6","fp7", // 224,231
142         "","","","","","","","", // 232,239
143         "","","","","","","","", // 240,247
144         "","","","","","","","", // 248,255
145         "","","","","x0","x1","y0","y1", // 256,263
146         "","b0","","b2","","b1","a","b", // 264,271
147         "mr","omr","la","lc","ssh","ssl","ss","", // 272,279
148         "n0","n1","n2","n3","n4","n5","n6","n7", // 280,287
149         "m0","m1","m2","m3","m4","m5","m6","m7", // 288,295
150         "","","","","","","l","p", // 296,303
151         "mr","omr","la","lc","ssh","ssl","ss","", // 304,311
152         "a10","b10","x","y","","","ab","ba"  // 312,319
153 };
154
155 static char * riscregname[] = {
156          "r0",  "r1",  "r2",  "r3",  "r4", "r5",   "r6",  "r7",
157          "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",
158         "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
159         "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
160 };
161
162
163 //
164 // Initialize tokenizer
165 //
166 void InitTokenizer(void)
167 {
168         int i;                                                                  // Iterator
169         char * htab = "0123456789abcdefABCDEF"; // Hex character table
170
171         lnsave = 0;                                                             // Don't save lines
172         curfname = "";                                                  // No file, empty filename
173         filecount = (WORD)-1;
174         cfileno = (WORD)-1;                                             // cfileno gets bumped to 0
175         curlineno = 0;
176         totlines = 0;
177         etok = tokbuf;
178         f_inobj = NULL;
179         f_ifile = NULL;
180         f_imacro = NULL;
181         cur_inobj = NULL;
182         filerec = NULL;
183         last_fr = NULL;
184         lntag = SPACE;
185
186         // Initialize hex, "dot" and tolower tables
187         for(i=0; i<128; i++)
188         {
189                 hextab[i] = -1;
190                 dotxtab[i] = 0;
191                 tolowertab[i] = (char)i;
192         }
193
194         for(i=0; htab[i]!=EOS; i++)
195                 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
196
197         for(i='A'; i<='Z'; i++)
198                 tolowertab[i] |= 0x20;
199
200         // These characters are legal immediately after a period
201         dotxtab['b'] = DOTB;                                    // .b .B .s .S
202         dotxtab['B'] = DOTB;
203         //dotxtab['s'] = DOTB;
204         //dotxtab['S'] = DOTB;
205         dotxtab['w'] = DOTW;                                    // .w .W
206         dotxtab['W'] = DOTW;
207         dotxtab['l'] = DOTL;                                    // .l .L
208         dotxtab['L'] = DOTL;
209         dotxtab['i'] = DOTI;                                    // .i .I (WTF is this???)
210         dotxtab['I'] = DOTI;
211         dotxtab['D'] = DOTD;                                    // .d .D (double)
212         dotxtab['d'] = DOTD;
213         dotxtab['S'] = DOTS;                                    // .s .S
214         dotxtab['s'] = DOTS;
215         dotxtab['Q'] = DOTQ;                                    // .q .Q (quad word)
216         dotxtab['q'] = DOTQ;
217         dotxtab['X'] = DOTX;                                    // .x .x
218         dotxtab['x'] = DOTX;
219         dotxtab['P'] = DOTP;                                    // .p .P
220         dotxtab['p'] = DOTP;
221 }
222
223
224 void SetFilenameForErrorReporting(void)
225 {
226         WORD fnum = cfileno;
227
228         // Check for absolute top filename (this should never happen)
229         if (fnum == -1)
230         {
231                 curfname = "(*top*)";
232                 return;
233         }
234
235         FILEREC * fr = filerec;
236
237         // Advance to the correct record...
238         while (fr != NULL && fnum != 0)
239         {
240                 fr = fr->frec_next;
241                 fnum--;
242         }
243
244         // Check for file # record not found (this should never happen either)
245         if (fr == NULL)
246         {
247                 curfname = "(*NOT FOUND*)";
248                 return;
249         }
250
251         curfname = fr->frec_name;
252 }
253
254
255 //
256 // Allocate an IFILE or IMACRO
257 //
258 INOBJ * a_inobj(int typ)
259 {
260         INOBJ * inobj;
261         IFILE * ifile;
262         IMACRO * imacro;
263
264         // Allocate and initialize INOBJ first
265         if (f_inobj == NULL)
266                 inobj = malloc(sizeof(INOBJ));
267         else
268         {
269                 inobj = f_inobj;
270                 f_inobj = f_inobj->in_link;
271         }
272
273         switch (typ)
274         {
275         case SRC_IFILE:                                                 // Alloc and init an IFILE
276                 if (f_ifile == NULL)
277                         ifile = malloc(sizeof(IFILE));
278                 else
279                 {
280                         ifile = f_ifile;
281                         f_ifile = f_ifile->if_link;
282                 }
283
284                 inobj->inobj.ifile = ifile;
285                 break;
286
287         case SRC_IMACRO:                                                // Alloc and init an IMACRO
288                 if (f_imacro == NULL)
289                         imacro = malloc(sizeof(IMACRO));
290                 else
291                 {
292                         imacro = f_imacro;
293                         f_imacro = f_imacro->im_link;
294                 }
295
296                 inobj->inobj.imacro = imacro;
297                 break;
298
299         case SRC_IREPT:                                                 // Alloc and init an IREPT
300                 inobj->inobj.irept = malloc(sizeof(IREPT));
301                 DEBUG { printf("alloc IREPT\n"); }
302                 break;
303         }
304
305         // Install INOBJ on top of input stack
306         inobj->in_ifent = ifent;                                // Record .if context on entry
307         inobj->in_type = (WORD)typ;
308         inobj->in_otok = tok.u32;
309         inobj->in_etok = etok;
310         inobj->in_link = cur_inobj;
311         cur_inobj = inobj;
312
313         return inobj;
314 }
315
316
317 //
318 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
319 // A macro reference is in one of two forms:
320 // \name <non-name-character>
321 // \{name}
322 // A doubled backslash (\\) is compressed to a single backslash (\).
323 // Argument definitions have been pre-tokenized, so we have to turn them back
324 // into text. This means that numbers, in particular, become hex, regardless of
325 // their representation when the macro was invoked. This is a hack.
326 // A label may appear at the beginning of the line:
327 // :<name><whitespace>
328 // (the colon must be in the first column). These labels are stripped before
329 // macro expansion takes place.
330 //
331 int ExpandMacro(char * src, char * dest, int destsiz)
332 {
333         int i;
334         int questmark;                  // \? for testing argument existence
335         char mname[128];                // Assume max size of a formal arg name
336         char numbuf[20];                // Buffer for text of CONSTs
337         TOKEN * tk;
338         SYM * arg;
339         char ** symbolString;
340
341         DEBUG { printf("ExM: src=\"%s\"\n", src); }
342
343         IMACRO * imacro = cur_inobj->inobj.imacro;
344         int macnum = (int)(imacro->im_macro->sattr);
345
346         char * dst = dest;                                              // Next dest slot
347         char * edst = dest + destsiz - 1;               // End + 1(?) of dest buffer
348
349         // Check for (and skip over) any "label" on the line
350         char * s = src;
351         char * d = NULL;
352
353         if (*s == ':')
354         {
355                 while (*s != EOS && !(chrtab[*s] & WHITE))
356                         s++;
357
358                 if (*s != EOS)
359                         s++;                                                    // Skip first whitespace
360         }
361
362         // Expand the rest of the line
363         while (*s != EOS)
364         {
365                 // Copy single character
366                 if (*s != '\\')
367                 {
368                         if (dst >= edst)
369                                 goto overflow;
370
371                         // Skip comments in case a loose @ or \ is in there
372                         // In that case the tokeniser was trying to expand it.
373                         if ((*s == ';') || ((*s == '/') && (*(s + 1) == '/')))
374                                 goto skipcomments;
375
376                         *dst++ = *s++;
377                 }
378                 // Do macro expansion
379                 else
380                 {
381                         questmark = 0;
382
383                         // Do special cases
384                         switch (*++s)
385                         {
386                         case '\\':                                              // \\, \ (collapse to single backslash)
387                                 if (dst >= edst)
388                                         goto overflow;
389
390                                 *dst++ = *s++;
391                                 continue;
392                         case '?':                                               // \? <macro>  set `questmark' flag
393                                 s++;
394                                 questmark = 1;
395                                 break;
396                         case '#':                                               // \#, number of arguments
397                                 sprintf(numbuf, "%d", (int)imacro->im_nargs);
398                                 goto copystr;
399                         case '!':                                               // \! size suffix supplied on invocation
400                                 switch ((int)imacro->im_siz)
401                                 {
402                                 case SIZN: d = "";   break;
403                                 case SIZB: d = ".b"; break;
404                                 case SIZW: d = ".w"; break;
405                                 case SIZL: d = ".l"; break;
406                                 }
407
408                                 goto copy_d;
409                         case '~':                                               // ==> unique label string Mnnnn...
410                                 sprintf(numbuf, "M%u", curuniq);
411 copystr:
412                                 d = numbuf;
413 copy_d:
414                                 s++;
415
416                                 while (*d != EOS)
417                                 {
418                                         if (dst >= edst)
419                                                 goto overflow;
420                                         else
421                                                 *dst++ = *d++;
422                                 }
423
424                                 continue;
425                         case EOS:
426                                 return error("missing argument name");
427                         }
428
429                         // \n ==> argument number 'n', 0..9
430                         if (chrtab[*s] & DIGIT)
431                         {
432                                 i = *s++ - '1';
433
434                                 if (i < 0)
435                                         i = 9;
436
437                                 goto arg_num;
438                         }
439
440                         // Get argument name: \name, \{name}
441                         d = mname;
442
443                         // \label
444                         if (*s != '{')
445                         {
446                                 do
447                                 {
448                                         *d++ = *s++;
449                                 }
450                                 while (chrtab[*s] & CTSYM);
451                         }
452                         // \\{label}
453                         else
454                         {
455                                 for(++s; *s != EOS && *s != '}';)
456                                         *d++ = *s++;
457
458                                 if (*s != '}')
459                                         return error("missing closing brace ('}')");
460                                 else
461                                         s++;
462                         }
463
464                         *d = EOS;
465
466                         // Lookup the argument and copy its (string) value into the
467                         // destination string
468                         DEBUG { printf("argument='%s'\n", mname); }
469
470                         if ((arg = lookup(mname, MACARG, macnum)) == NULL)
471                                 return error("undefined argument: '%s'", mname);
472                         else
473                         {
474                                 // Convert a string of tokens (terminated with EOL) back into
475                                 // text. If an argument is out of range (not specified in the
476                                 // macro invocation) then it is ignored.
477                                 i = (int)arg->svalue;
478 arg_num:
479                                 DEBUG { printf("~argnumber=%d\n", i); }
480                                 tk = NULL;
481
482                                 if (i < imacro->im_nargs)
483                                 {
484                                         tk = imacro->argument[i].token;
485                                         symbolString = imacro->argument[i].string;
486 //DEBUG
487 //{
488 //      printf("ExM: Preparing to parse argument #%u...\n", i);
489 //      DumpTokens(tk);
490 //}
491                                 }
492
493                                 // \?arg yields:
494                                 //    0  if the argument is empty or non-existant,
495                                 //    1  if the argument is not empty
496                                 if (questmark)
497                                 {
498                                         if (tk == NULL || *tk == EOL)
499                                                 questmark = 0;
500
501                                         if (dst >= edst)
502                                                 goto overflow;
503
504                                         *dst++ = (char)(questmark + '0');
505                                         continue;
506                                 }
507
508                                 // Argument # is in range, so expand it
509                                 if (tk != NULL)
510                                 {
511                                         while (*tk != EOL)
512                                         {
513                                                 // Reverse-translation from a token number to a string.
514                                                 // This is a hack. It might be better table-driven.
515                                                 d = NULL;
516
517                                                 if ((*tk >= KW_D0) && !rdsp && !rgpu)
518                                                 {
519                                                         d = regname[(int)*tk++ - KW_D0];
520                                                         goto strcopy;
521                                                 }
522                                                 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
523                                                 {
524                                                         d = riscregname[(int)*tk++ - KW_R0];
525                                                         goto strcopy;
526                                                 }
527                                                 else
528                                                 {
529                                                         switch ((int)*tk++)
530                                                         {
531                                                         case SYMBOL:
532 #if 0
533 //                                                              d = (char *)*tk++;
534                                                                 d = string[*tk++];
535 #else
536                                                                 // This fix should be done for strings too
537                                                                 d = symbolString[*tk++];
538 DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
539 #endif
540                                                                 break;
541                                                         case STRING:
542 #if 0
543 //                                                              d = (char *)*tk++;
544                                                                 d = string[*tk++];
545 #else
546                                                                 d = symbolString[*tk++];
547 #endif
548                                                                 if (dst >= edst)
549                                                                         goto overflow;
550
551                                                                 *dst++ = '"';
552
553                                                                 while (*d != EOS)
554                                                                 {
555                                                                         if (dst >= edst)
556                                                                                 goto overflow;
557                                                                         else
558                                                                                 *dst++ = *d++;
559                                                                 }
560
561                                                                 if (dst >= edst)
562                                                                         goto overflow;
563
564                                                                 *dst++ = '"';
565                                                                 continue;
566                                                                 break;
567 // Shamus: Changing the format specifier from %lx to %ux caused the assembler
568 //         to choke on legitimate code... Need to investigate this further
569 //         before changing anything else here!
570                                                         case CONST:
571                                                                 sprintf(numbuf, "$%lx", (uint64_t)*tk++);
572                                                                 tk++;
573                                                                 d = numbuf;
574                                                                 break;
575                                                         case DEQUALS:
576                                                                 d = "==";
577                                                                 break;
578                                                         case SET:
579                                                                 d = "set";
580                                                                 break;
581                                                         case COLON:
582                                                                 d = ":";
583                                                                 break;
584                                                         case DCOLON:
585                                                                 d = "::";
586                                                                 break;
587                                                         case GE:
588                                                                 d = ">=";
589                                                                 break;
590                                                         case LE:
591                                                                 d = "<=";
592                                                                 break;
593                                                         case NE:
594                                                                 d = "<>";
595                                                                 break;
596                                                         case SHR:
597                                                                 d = ">>";
598                                                                 break;
599                                                         case SHL:
600                                                                 d = "<<";
601                                                                 break;
602                                                         case DOTB:
603                                                                 d = ".b";
604                                                                 break;
605                                                         case DOTW:
606                                                                 d = ".w";
607                                                                 break;
608                                                         case DOTL:
609                                                                 d = ".l";
610                                                                 break;
611                                                         case CR_ABSCOUNT:
612                                                                 d = "^^abscount";
613                                                                 break;
614                                                         case CR_DATE:
615                                                                 d = "^^date";
616                                                                 break;
617                                                         case CR_TIME:
618                                                                 d = "^^time";
619                                                                 break;
620                                                         case CR_DEFINED:
621                                                                 d = "^^defined ";
622                                                                 break;
623                                                         case CR_REFERENCED:
624                                                                 d = "^^referenced ";
625                                                                 break;
626                                                         case CR_STREQ:
627                                                                 d = "^^streq ";
628                                                                 break;
629                                                         case CR_MACDEF:
630                                                                 d = "^^macdef ";
631                                                                 break;
632                                                         default:
633                                                                 if (dst >= edst)
634                                                                         goto overflow;
635
636                                                                 *dst++ = (char)*(tk - 1);
637                                                                 break;
638                                                         }
639                                                 }
640
641                                                 // If 'd' != NULL, copy string to destination
642                                                 if (d != NULL)
643                                                 {
644 strcopy:
645                                                         DEBUG printf("d='%s'\n", d);
646
647                                                         while (*d != EOS)
648                                                         {
649                                                                 if (dst >= edst)
650                                                                         goto overflow;
651                                                                 else
652                                                                         *dst++ = *d++;
653                                                         }
654                                                 }
655                                         }
656                                 }
657                         }
658                 }
659         }
660
661 skipcomments:
662
663         *dst = EOS;
664         DEBUG { printf("ExM: dst=\"%s\"\n", dest); }
665         return OK;
666
667 overflow:
668         *dst = EOS;
669         DEBUG { printf("*** OVERFLOW LINE ***\n%s\n", dest); }
670         return fatal("line too long as a result of macro expansion");
671 }
672
673
674 //
675 // Get next line of text from a macro
676 //
677 char * GetNextMacroLine(void)
678 {
679         IMACRO * imacro = cur_inobj->inobj.imacro;
680 //      LONG * strp = imacro->im_nextln;
681         LLIST * strp = imacro->im_nextln;
682
683         if (strp == NULL)                                               // End-of-macro
684                 return NULL;
685
686         imacro->im_nextln = strp->next;
687 //      ExpandMacro((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
688         ExpandMacro(strp->line, imacro->im_lnbuf, LNSIZ);
689
690         return imacro->im_lnbuf;
691 }
692
693
694 //
695 // Get next line of text from a repeat block
696 //
697 char * GetNextRepeatLine(void)
698 {
699         IREPT * irept = cur_inobj->inobj.irept;
700 //      LONG * strp = irept->ir_nextln;                 // initial null
701
702         // Do repeat at end of .rept block's string list
703 //      if (strp == NULL)
704         if (irept->ir_nextln == NULL)
705         {
706                 DEBUG { printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count); }
707                 irept->ir_nextln = irept->ir_firstln;   // copy first line
708
709                 if (irept->ir_count-- == 0)
710                 {
711                         DEBUG { printf("end-repeat-block\n"); }
712                         return NULL;
713                 }
714
715 //              strp = irept->ir_nextln;
716         }
717
718 //      strcpy(irbuf, (char *)(irept->ir_nextln + 1));
719         strcpy(irbuf, irept->ir_nextln->line);
720         DEBUG { printf("repeat line='%s'\n", irbuf); }
721 //      irept->ir_nextln = (LONG *)*strp;
722         irept->ir_nextln = irept->ir_nextln->next;
723
724         return irbuf;
725 }
726
727
728 //
729 // Include a source file used at the root, and for ".include" files
730 //
731 int include(int handle, char * fname)
732 {
733         // Debug mode
734         DEBUG { printf("[include: %s, cfileno=%u]\n", fname, cfileno); }
735
736         // Alloc and initialize include-descriptors
737         INOBJ * inobj = a_inobj(SRC_IFILE);
738         IFILE * ifile = inobj->inobj.ifile;
739
740         ifile->ifhandle = handle;                       // Setup file handle
741         ifile->ifind = ifile->ifcnt = 0;        // Setup buffer indices
742         ifile->ifoldlineno = curlineno;         // Save old line number
743         ifile->ifoldfname = curfname;           // Save old filename
744         ifile->ifno = cfileno;                          // Save old file number
745
746         // NB: This *must* be preincrement, we're adding one to the filecount here!
747         cfileno = ++filecount;                          // Compute NEW file number
748         curfname = strdup(fname);                       // Set current filename (alloc storage)
749         curlineno = 0;                                          // Start on line zero
750
751         // Add another file to the file-record
752         FILEREC * fr = (FILEREC *)malloc(sizeof(FILEREC));
753         fr->frec_next = NULL;
754         fr->frec_name = curfname;
755
756         if (last_fr == NULL)
757                 filerec = fr;                                   // Add first filerec
758         else
759                 last_fr->frec_next = fr;                // Append to list of filerecs
760
761         last_fr = fr;
762         DEBUG { printf("[include: curfname: %s, cfileno=%u]\n", curfname, cfileno); }
763
764         return OK;
765 }
766
767
768 //
769 // Pop the current input level
770 //
771 int fpop(void)
772 {
773         INOBJ * inobj = cur_inobj;
774
775         if (inobj == NULL)
776                 return 0;
777
778         // Pop IFENT levels until we reach the conditional assembly context we
779         // were at when the input object was entered.
780         int numUnmatched = 0;
781
782         while (ifent != inobj->in_ifent)
783         {
784                 if (d_endif() != 0)     // Something bad happened during endif parsing?
785                         return -1;              // If yes, bail instead of getting stuck in a loop
786
787                 numUnmatched++;
788         }
789
790         // Give a warning to the user that we had to wipe their bum for them
791         if (numUnmatched > 0)
792                 warn("missing %d .endif(s)", numUnmatched);
793
794         tok.u32 = inobj->in_otok;       // Restore tok and otok
795         etok = inobj->in_etok;
796
797         switch (inobj->in_type)
798         {
799         case SRC_IFILE:                 // Pop and release an IFILE
800         {
801                 DEBUG { printf("[Leaving: %s]\n", curfname); }
802
803                 IFILE * ifile = inobj->inobj.ifile;
804                 ifile->if_link = f_ifile;
805                 f_ifile = ifile;
806                 close(ifile->ifhandle);                 // Close source file
807 DEBUG { printf("[fpop (pre):  curfname=%s]\n", curfname); }
808                 curfname = ifile->ifoldfname;   // Set current filename
809 DEBUG { printf("[fpop (post): curfname=%s]\n", curfname); }
810 DEBUG { printf("[fpop: (pre)  cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
811                 curlineno = ifile->ifoldlineno; // Set current line#
812                 DEBUG { printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno); }
813                 cfileno = ifile->ifno;                  // Restore current file number
814 DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)ifile->ifno); }
815                 break;
816         }
817
818         case SRC_IMACRO:                                        // Pop and release an IMACRO
819         {
820                 IMACRO * imacro = inobj->inobj.imacro;
821                 imacro->im_link = f_imacro;
822                 f_imacro = imacro;
823                 break;
824         }
825
826         case SRC_IREPT:                                         // Pop and release an IREPT
827         {
828                 DEBUG { printf("dealloc IREPT\n"); }
829                 LLIST * p = inobj->inobj.irept->ir_firstln;
830
831                 // Deallocate repeat lines
832                 while (p != NULL)
833                 {
834                         free(p->line);
835                         p = p->next;
836                 }
837
838                 break;
839         }
840         }
841
842         cur_inobj = inobj->in_link;
843         inobj->in_link = f_inobj;
844         f_inobj = inobj;
845
846         return 0;
847 }
848
849
850 //
851 // Get line from file into buf, return NULL on EOF or ptr to the start of a
852 // null-term line
853 //
854 char * GetNextLine(void)
855 {
856         int i, j;
857         char * p, * d;
858         int readamt = -1;                                               // 0 if last read() yeilded 0 bytes
859         IFILE * fl = cur_inobj->inobj.ifile;
860
861         for(;;)
862         {
863                 // Scan for next end-of-line; handle stupid text formats by treating
864                 // \r\n the same as \n. (lone '\r' at end of buffer means we have to
865                 // check for '\n').
866                 d = &fl->ifbuf[fl->ifind];
867
868                 for(p=d, i=0, j=fl->ifcnt; i<j; i++, p++)
869                 {
870                         if (*p == '\r' || *p == '\n')
871                         {
872                                 i++;
873
874                                 if (*p == '\r')
875                                 {
876                                         if (i >= j)
877                                                 break;  // Need to read more, then look for '\n' to eat
878                                         else if (p[1] == '\n')
879                                                 i++;
880                                 }
881
882                                 // Cover up the newline with end-of-string sentinel
883                                 *p = '\0';
884
885                                 fl->ifind += i;
886                                 fl->ifcnt -= i;
887                                 return d;
888                         }
889                 }
890
891                 // Handle hanging lines by ignoring them (Input file is exhausted, no
892                 // \r or \n on last line)
893                 // Shamus: This is retarded. Never ignore any input!
894                 if (!readamt && fl->ifcnt)
895                 {
896 #if 0
897                         fl->ifcnt = 0;
898                         *p = '\0';
899                         return NULL;
900 #else
901                         // Really should check to see if we're at the end of the buffer!
902                         // :-P
903                         fl->ifbuf[fl->ifind + fl->ifcnt] = '\0';
904                         fl->ifcnt = 0;
905                         return &fl->ifbuf[fl->ifind];
906 #endif
907                 }
908
909                 // Truncate and return absurdly long lines.
910                 if (fl->ifcnt >= QUANTUM)
911                 {
912                         fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
913                         fl->ifcnt = 0;
914                         return &fl->ifbuf[fl->ifind];
915                 }
916
917                 // Relocate what's left of a line to the beginning of the buffer, and
918                 // read some more of the file in; return NULL if the buffer's empty and
919                 // on EOF.
920                 if (fl->ifind != 0)
921                 {
922                         p = &fl->ifbuf[fl->ifind];
923                         d = &fl->ifbuf[fl->ifcnt & 1];
924
925                         for(i=0; i<fl->ifcnt; i++)
926                                 *d++ = *p++;
927
928                         fl->ifind = fl->ifcnt & 1;
929                 }
930
931                 readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM);
932
933                 if (readamt < 0)
934                         return NULL;
935
936                 if ((fl->ifcnt += readamt) == 0)
937                         return NULL;
938         }
939 }
940
941
942 //
943 // Tokenize a line
944 //
945 int TokenizeLine(void)
946 {
947         uint8_t * ln = NULL;            // Ptr to current position in line
948         uint8_t * p;                            // Random character ptr
949         TOKENPTR tk;                            // Token-deposit ptr
950         int state = 0;                          // State for keyword detector
951         int j = 0;                                      // Var for keyword detector
952         uint8_t c;                                      // Random char
953         uint64_t v;                                     // Random value
954         uint32_t cursize = 0;           // Current line's size (.b, .w, .l, .s, .q, .d)
955         double f;                                       // Random float
956         uint8_t * nullspot = NULL;      // Spot to clobber for SYMBOL termination
957         int stuffnull;                          // 1:terminate SYMBOL '\0' at *nullspot
958         uint8_t c1;
959         int stringNum = 0;                      // Pointer to string locations in tokenized line
960
961 retry:
962
963         if (cur_inobj == NULL)                  // Return EOF if input stack is empty
964                 return TKEOF;
965
966         // Get another line of input from the current input source: a file, a
967         // macro, or a repeat-block
968         switch (cur_inobj->in_type)
969         {
970         // Include-file:
971         // o  handle EOF;
972         // o  bump source line number;
973         // o  tag the listing-line with a space;
974         // o  kludge lines generated by Alcyon C.
975         case SRC_IFILE:
976                 if ((ln = GetNextLine()) == NULL)
977                 {
978 DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
979                         if (fpop() == 0)                                // Pop input level
980                                 goto retry;                                     // Try for more lines
981                         else
982                         {
983                                 ifent->if_prev = (IFENT *)-1;   //Signal Assemble() that we have reached EOF with unbalanced if/endifs
984                                 return TKEOF;
985                         }
986                 }
987
988                 curlineno++;                                            // Bump line number
989                 lntag = SPACE;
990
991                 if (as68_flag)
992                 {
993                         // AS68 compatibility, throw away all lines starting with
994                         // back-quotes, tildes, or '*'
995                         // On other lines, turn the first '*' into a semi-colon.
996                         if (*ln == '`' || *ln == '~' || *ln == '*')
997                                 *ln = ';';
998                         else
999                         {
1000                                 for(p=ln; *p!=EOS; p++)
1001                                 {
1002                                         if (*p == '*')
1003                                         {
1004                                                 *p = ';';
1005                                                 break;
1006                                         }
1007                                 }
1008                         }
1009                 }
1010
1011                 break;
1012
1013         // Macro-block:
1014         // o  Handle end-of-macro;
1015         // o  tag the listing-line with an at (@) sign.
1016         case SRC_IMACRO:
1017                 if ((ln = GetNextMacroLine()) == NULL)
1018                 {
1019                         if (ExitMacro() == 0)   // Exit macro (pop args, do fpop(), etc)
1020                                 goto retry;                     // Try for more lines...
1021                         else
1022                                 return TKEOF;           // Oops, we got a non zero return code, signal EOF
1023                 }
1024
1025                 lntag = '@';
1026                 break;
1027
1028         // Repeat-block:
1029         // o  Handle end-of-repeat-block;
1030         // o  tag the listing-line with a pound (#) sign.
1031         case SRC_IREPT:
1032                 if ((ln = GetNextRepeatLine()) == NULL)
1033                 {
1034                         DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
1035                         fpop();
1036                         goto retry;
1037                 }
1038
1039                 lntag = '#';
1040                 break;
1041         }
1042
1043         // Save text of the line. We only do this during listings and within
1044         // macro-type blocks, since it is expensive to unconditionally copy every
1045         // line.
1046         if (lnsave)
1047                 strcpy(lnbuf, ln);
1048
1049         // General housekeeping
1050         tok.u32 = tokeol;               // Set "tok" to EOL in case of error
1051         tk.u32 = etok;                  // Reset token ptr
1052         stuffnull = 0;                  // Don't stuff nulls
1053         totlines++;                             // Bump total #lines assembled
1054
1055         // See if the entire line is a comment. This is a win if the programmer
1056         // puts in lots of comments
1057         if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
1058                 goto goteol;
1059
1060         // And here we have a very ugly hack for signalling a single line 'turn off
1061         // optimization'. There's really no nice way to do this, so hack it is!
1062         optimizeOff = 0;                // Default is to take optimizations as they come
1063
1064         if (*ln == '!')
1065         {
1066                 optimizeOff = 1;        // Signal that we don't want to optimize this line
1067                 ln++;                           // & skip over the darned thing
1068         }
1069
1070         // Main tokenization loop;
1071         //  o  skip whitespace;
1072         //  o  handle end-of-line;
1073         //  o  handle symbols;
1074         //  o  handle single-character tokens (operators, etc.);
1075         //  o  handle multiple-character tokens (constants, strings, etc.).
1076         for(; *ln!=EOS;)
1077         {
1078                 // Skip whitespace, handle EOL
1079                 while (chrtab[*ln] & WHITE)
1080                         ln++;
1081
1082                 // Handle EOL, comment with ';'
1083                 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln + 1) == '/')))
1084                         break;
1085
1086                 // Handle start of symbol. Symbols are null-terminated in place. The
1087                 // termination is always one symbol behind, since there may be no place
1088                 // for a null in the case that an operator immediately follows the name.
1089                 c = chrtab[*ln];
1090
1091                 if (c & STSYM)
1092                 {
1093                         if (stuffnull)                  // Terminate old symbol from previous pass
1094                                 *nullspot = EOS;
1095
1096                         v = 0;                                  // Assume no DOT attrib follows symbol
1097                         stuffnull = 1;
1098
1099                         // In some cases, we need to check for a DOTx at the *beginning*
1100                         // of a symbol, as the "start" of the line we're currently looking
1101                         // at could be somewhere in the middle of that line!
1102                         if (*ln == '.')
1103                         {
1104                                 // Make sure that it's *only* a .[bwsl] following, and not the
1105                                 // start of a local symbol:
1106                                 if ((chrtab[*(ln + 1)] & DOT)
1107                                         && (dotxtab[*(ln + 1)] != 0)
1108                                         && !(chrtab[*(ln + 2)] & CTSYM))
1109                                 {
1110                                         // We found a legitimate DOTx construct, so add it to the
1111                                         // token stream:
1112                                         ln++;
1113                                         stuffnull = 0;
1114                                         *tk.u32++ = (TOKEN)dotxtab[*ln++];
1115                                         continue;
1116                                 }
1117                         }
1118
1119                         p = nullspot = ln++;    // Nullspot -> start of this symbol
1120
1121                         // Find end of symbol (and compute its length)
1122                         for(j=1; (int)chrtab[*ln]&CTSYM; j++)
1123                                 ln++;
1124
1125                         // Handle "DOT" special forms (like ".b") that follow a normal
1126                         // symbol or keyword:
1127                         if (*ln == '.')
1128                         {
1129                                 *ln++ = EOS;            // Terminate symbol
1130                                 stuffnull = 0;          // And never try it again
1131
1132                                 // Character following the '.' must have a DOT attribute, and
1133                                 // the chararacter after THAT one must not have a start-symbol
1134                                 // attribute (to prevent symbols that look like, for example,
1135                                 // "zingo.barf", which might be a good idea anyway....)
1136                                 if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
1137                                         return error("[bwsl] must follow '.' in symbol");
1138
1139                                 v = (uint32_t)dotxtab[*ln++];
1140                                 cursize = (uint32_t)v;
1141
1142                                 if (chrtab[*ln] & CTSYM)
1143                                         return error("misuse of '.'; not allowed in symbols");
1144                         }
1145
1146                         // If the symbol is small, check to see if it's really the name of
1147                         // a register.
1148                         if (j <= KWSIZE)
1149                         {
1150                                 for(state=0; state>=0;)
1151                                 {
1152                                         j = (int)tolowertab[*p++];
1153                                         j += kwbase[state];
1154
1155                                         if (kwcheck[j] != state)
1156                                         {
1157                                                 j = -1;
1158                                                 break;
1159                                         }
1160
1161                                         if (*p == EOS || p == ln)
1162                                         {
1163                                                 j = kwaccept[j];
1164                                                 break;
1165                                         }
1166
1167                                         state = kwtab[j];
1168                                 }
1169                         }
1170                         else
1171                         {
1172                                 j = -1;
1173                         }
1174
1175                         // Make j = -1 if user tries to use a RISC register while in 68K mode
1176                         if (!(rgpu || rdsp) && ((TOKEN)j >= KW_R0 && (TOKEN)j <= KW_R31))
1177                         {
1178                                 j = -1;
1179                         }
1180
1181                         // Make j = -1 if time, date etc with no preceeding ^^
1182                         // defined, referenced, streq, macdef, date and time
1183                         switch ((TOKEN)j)
1184                         {
1185                         case 112:   // defined
1186                         case 113:   // referenced
1187                         case 118:   // streq
1188                         case 119:   // macdef
1189                         case 120:   // time
1190                         case 121:   // date
1191                                 j = -1;
1192                         }
1193
1194                         // If not tokenized keyword OR token was not found
1195                         if ((j < 0) || (state < 0))
1196                         {
1197                                 *tk.u32++ = SYMBOL;
1198 //#warning
1199 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
1200 //system, this will cause all kinds of mischief.
1201 #if 0
1202                                 *tk++ = (TOKEN)nullspot;
1203 #else
1204                                 string[stringNum] = nullspot;
1205                                 *tk.u32++ = stringNum;
1206                                 stringNum++;
1207 #endif
1208                         }
1209                         else
1210                         {
1211                                 *tk.u32++ = (TOKEN)j;
1212                                 stuffnull = 0;
1213                         }
1214
1215                         if (v)                                                  // Record attribute token (if any)
1216                                 *tk.u32++ = (TOKEN)v;
1217
1218                         if (stuffnull)                                  // Arrange for string termination on next pass
1219                                 nullspot = ln;
1220
1221                         continue;
1222                 }
1223
1224                 // Handle identity tokens
1225                 if (c & SELF)
1226                 {
1227                         *tk.u32++ = *ln++;
1228                         continue;
1229                 }
1230
1231                 // Handle multiple-character tokens
1232                 if (c & MULTX)
1233                 {
1234                         switch (*ln++)
1235                         {
1236                         case '!':               // ! or !=
1237                                 if (*ln == '=')
1238                                 {
1239                                         *tk.u32++ = NE;
1240                                         ln++;
1241                                 }
1242                                 else
1243                                         *tk.u32++ = '!';
1244
1245                                 continue;
1246                         case '\'':              // 'string'
1247                                 if (m6502)
1248                                 {
1249                                         // Hardcoded for now, maybe this will change in the future
1250                                         *tk.u32++ = STRINGA8;
1251                                         goto dostring;
1252                                 }
1253                                 // Fall through
1254                         case '\"':              // "string"
1255                                 *tk.u32++ = STRING;
1256 dostring:
1257                                 c1 = ln[-1];
1258                                 string[stringNum] = ln;
1259                                 *tk.u32++ = stringNum;
1260                                 stringNum++;
1261
1262                                 for(p=ln; *ln!=EOS && *ln!=c1;)
1263                                 {
1264                                         c = *ln++;
1265
1266                                         if (c == '\\')
1267                                         {
1268                                                 switch (*ln++)
1269                                                 {
1270                                                 case EOS:
1271                                                         return(error("unterminated string"));
1272                                                 case 'e':
1273                                                         c = '\033';
1274                                                         break;
1275                                                 case 'n':
1276                                                         c = '\n';
1277                                                         break;
1278                                                 case 'b':
1279                                                         c = '\b';
1280                                                         break;
1281                                                 case 't':
1282                                                         c = '\t';
1283                                                         break;
1284                                                 case 'r':
1285                                                         c = '\r';
1286                                                         break;
1287                                                 case 'f':
1288                                                         c = '\f';
1289                                                         break;
1290                                                 case '\"':
1291                                                         c = '\"';
1292                                                         break;
1293                                                 case '\'':
1294                                                         c = '\'';
1295                                                         break;
1296                                                 case '\\':
1297                                                         c = '\\';
1298                                                         break;
1299                                                 case '!':
1300                                                         // If we're evaluating a macro
1301                                                         // this is valid and expands to
1302                                                         // "dot-size"
1303                                                         break;
1304                                                 default:
1305                                                         warn("bad backslash code in string");
1306                                                         ln--;
1307                                                         break;
1308                                                 }
1309                                         }
1310
1311                                         *p++ = c;
1312                                 }
1313
1314                                 if (*ln++ != c1)
1315                                         return error("unterminated string");
1316
1317                                 *p++ = EOS;
1318                                 continue;
1319                         case '$':               // $, hex constant
1320                                 if (chrtab[*ln] & HDIGIT)
1321                                 {
1322                                         v = 0;
1323
1324                                         // Parse the hex value
1325                                         while (hextab[*ln] >= 0)
1326                                                 v = (v << 4) + (int)hextab[*ln++];
1327
1328                                         if (*ln == '.')
1329                                         {
1330                                                 if (obj_format == BSD)
1331                                                 {
1332                                                         if ((*(ln + 1) & 0xDF) == 'B')
1333                                                         {
1334                                                                 v &= 0x000000FF;
1335                                                                 ln += 2;
1336                                                         }
1337                                                         else if ((*(ln + 1) & 0xDF) == 'W')
1338                                                         {
1339                                                                 v &= 0x0000FFFF;
1340                                                                 ln += 2;
1341                                                         }
1342                                                         else if ((*(ln + 1) & 0xDF) == 'L')
1343                                                         {
1344                                                                 v &= 0xFFFFFFFF;
1345                                                                 ln += 2;
1346                                                         }
1347                                                 }
1348                                         }
1349
1350                                         *tk.u32++ = CONST;
1351                                         *tk.u64++ = v;
1352
1353                                         if (obj_format == ALCYON)
1354                                         {
1355                                                 if (*ln == '.')
1356                                                 {
1357                                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1358                                                         {
1359                                                                 *tk.u32++ = DOTW;
1360                                                                 ln += 2;
1361                                                         }
1362                                                         else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1363                                                         {
1364                                                                 *tk.u32++ = DOTL;
1365                                                                 ln += 2;
1366                                                         }
1367                                                 }
1368                                         }
1369                                 }
1370                                 else
1371                                         *tk.u32++ = '$';
1372
1373                                 continue;
1374                         case '<':               // < or << or <> or <=
1375                                 switch (*ln)
1376                                 {
1377                                 case '<':
1378                                         *tk.u32++ = SHL;
1379                                         ln++;
1380                                         continue;
1381                                 case '>':
1382                                         *tk.u32++ = NE;
1383                                         ln++;
1384                                         continue;
1385                                 case '=':
1386                                         *tk.u32++ = LE;
1387                                         ln++;
1388                                         continue;
1389                                 default:
1390                                         *tk.u32++ = '<';
1391                                         continue;
1392                                 }
1393                         case ':':               // : or ::
1394                                 if (*ln == ':')
1395                                 {
1396                                         *tk.u32++ = DCOLON;
1397                                         ln++;
1398                                 }
1399                                 else
1400                                         *tk.u32++ = ':';
1401
1402                                 continue;
1403                         case '=':               // = or ==
1404                                 if (*ln == '=')
1405                                 {
1406                                         *tk.u32++ = DEQUALS;
1407                                         ln++;
1408                                 }
1409                                 else
1410                                         *tk.u32++ = '=';
1411
1412                                 continue;
1413                         case '>':               // > or >> or >=
1414                                 switch (*ln)
1415                                 {
1416                                 case '>':
1417                                         *tk.u32++ = SHR;
1418                                         ln++;
1419                                         continue;
1420                                 case '=':
1421                                         *tk.u32++ = GE;
1422                                         ln++;
1423                                         continue;
1424                                 default:
1425                                         *tk.u32++ = '>';
1426                                         continue;
1427                                 }
1428                         case '%':               // % or binary constant
1429                                 if (*ln < '0' || *ln > '1')
1430                                 {
1431                                         *tk.u32++ = '%';
1432                                         continue;
1433                                 }
1434
1435                                 v = 0;
1436
1437                                 while (*ln >= '0' && *ln <= '1')
1438                                         v = (v << 1) + *ln++ - '0';
1439
1440                                 if (*ln == '.')
1441                                 {
1442                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1443                                         {
1444                                                 v &= 0x000000FF;
1445                                                 ln += 2;
1446                                         }
1447
1448                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1449                                         {
1450                                                 v &= 0x0000FFFF;
1451                                                 ln += 2;
1452                                         }
1453
1454                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1455                                         {
1456                                                 v &= 0xFFFFFFFF;
1457                                                 ln += 2;
1458                                         }
1459                                 }
1460
1461                                 *tk.u32++ = CONST;
1462                                 *tk.u64++ = v;
1463                                 continue;
1464                         case '@':               // @ or octal constant
1465                                 if (*ln < '0' || *ln > '7')
1466                                 {
1467                                         *tk.u32++ = '@';
1468                                         continue;
1469                                 }
1470
1471                                 v = 0;
1472
1473                                 while (*ln >= '0' && *ln <= '7')
1474                                         v = (v << 3) + *ln++ - '0';
1475
1476                                 if (*ln == '.')
1477                                 {
1478                                         if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1479                                         {
1480                                                 v &= 0x000000FF;
1481                                                 ln += 2;
1482                                         }
1483
1484                                         if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1485                                         {
1486                                                 v &= 0x0000FFFF;
1487                                                 ln += 2;
1488                                         }
1489
1490                                         if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1491                                         {
1492                                                 v &= 0xFFFFFFFF;
1493                                                 ln += 2;
1494                                         }
1495                                 }
1496
1497                                 *tk.u32++ = CONST;
1498                                 *tk.u64++ = v;
1499                                 continue;
1500                         case '^':               // ^ or ^^ <operator-name>
1501                                 if (*ln != '^')
1502                                 {
1503                                         *tk.u32++ = '^';
1504                                         continue;
1505                                 }
1506
1507                                 if (((int)chrtab[*++ln] & STSYM) == 0)
1508                                 {
1509                                         error("invalid symbol following ^^");
1510                                         continue;
1511                                 }
1512
1513                                 p = ln++;
1514
1515                                 while ((int)chrtab[*ln] & CTSYM)
1516                                         ++ln;
1517
1518                                 for(state=0; state>=0;)
1519                                 {
1520                                         // Get char, convert to lowercase
1521                                         j = *p++;
1522
1523                                         if (j >= 'A' && j <= 'Z')
1524                                                 j += 0x20;
1525
1526                                         j += kwbase[state];
1527
1528                                         if (kwcheck[j] != state)
1529                                         {
1530                                                 j = -1;
1531                                                 break;
1532                                         }
1533
1534                                         if (*p == EOS || p == ln)
1535                                         {
1536                                                 j = kwaccept[j];
1537                                                 break;
1538                                         }
1539
1540                                         state = kwtab[j];
1541                                 }
1542
1543                                 if (j < 0 || state < 0)
1544                                 {
1545                                         error("unknown symbol following ^^");
1546                                         continue;
1547                                 }
1548
1549                                 *tk.u32++ = (TOKEN)j;
1550                                 continue;
1551                         default:
1552                                 interror(2);    // Bad MULTX entry in chrtab
1553                                 continue;
1554                         }
1555                 }
1556
1557                 // Handle decimal constant
1558                 if (c & DIGIT)
1559                 {
1560                         uint8_t * numStart = ln;
1561                         v = 0;
1562
1563                         while ((int)chrtab[*ln] & DIGIT)
1564                                 v = (v * 10) + *ln++ - '0';
1565
1566                         // See if there's a .[bwl] after the constant & deal with it if so
1567                         if (*ln == '.')
1568                         {
1569                                 if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
1570                                 {
1571                                         v &= 0x000000FF;
1572                                         ln += 2;
1573                                         *tk.u32++ = CONST;
1574                                         *tk.u64++ = v;
1575                                         *tk.u32++ = DOTB;
1576                                 }
1577                                 else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
1578                                 {
1579                                         v &= 0x0000FFFF;
1580                                         ln += 2;
1581                                         *tk.u32++ = CONST;
1582                                         *tk.u64++ = v;
1583                                         *tk.u32++ = DOTW;
1584                                 }
1585                                 else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
1586                                 {
1587                                         v &= 0xFFFFFFFF;
1588                                         ln += 2;
1589                                         *tk.u32++ = CONST;
1590                                         *tk.u64++ = v;
1591                                         *tk.u32++ = DOTL;
1592                                 }
1593                                 else if ((int)chrtab[*(ln + 1)] & DIGIT)
1594                                 {
1595                                         // Hey, more digits after the dot, so we assume it's a
1596                                         // floating point number of some kind
1597 #if 0
1598                                         double fract = 10;
1599                                         ln++;
1600                                         f = (double)v;
1601
1602                                         while ((int)chrtab[*ln] & DIGIT)
1603                                         {
1604                                                 f = f + (double)(*ln++ - '0') / fract;
1605                                                 fract *= 10;
1606                                         }
1607 #else
1608                                         // Here we parse the whole floating point number
1609 #include <errno.h>
1610                                         char * numEnd;
1611                                         errno = 0;
1612                                         double f = strtod(numStart, &numEnd);
1613                                         ln = (uint8_t *)numEnd;
1614
1615                                         if (errno != 0)
1616                                                 return error("floating point parse error");
1617 #endif
1618
1619                                         *tk.u32++ = FCONST;
1620 // Shamus: Well, this is all kinds of icky--not the least of which is that unlike uintNN_t types, we have no guarantees of any kind when it comes to the size of floating point numbers in C (as far as I know of). If there is, we need to use those kinds here, or else figure out at runtime what sizes we're dealing with and act accordingly. To be fair, this is OK as long as the double type is less than 64 bits wide, but again, there's no guarantee that it isn't. :-/
1621                                         *tk.u64++ = f;
1622                                         continue;
1623                                 }
1624                         }
1625                         else
1626                         {
1627                                 *tk.u32++ = CONST;
1628                                 *tk.u64++ = v;
1629                         }
1630
1631 //printf("CONST: %i\n", v);
1632                         continue;
1633                 }
1634
1635                 // Handle illegal character
1636                 return error("illegal character $%02X found", *ln);
1637         }
1638
1639         // Terminate line of tokens and return "success."
1640
1641 goteol:
1642         tok.u32 = etok;                                                 // Set tok to beginning of line
1643
1644         if (stuffnull)                                                  // Terminate last SYMBOL
1645                 *nullspot = EOS;
1646
1647         *tk.u32++ = EOL;
1648
1649         return OK;
1650 }
1651
1652
1653 //
1654 // .GOTO <label>        goto directive
1655 //
1656 // The label is searched for starting from the first line of the current,
1657 // enclosing macro definition. If no enclosing macro exists, an error is
1658 // generated.
1659 //
1660 // A label is of the form:
1661 //
1662 // :<name><whitespace>
1663 //
1664 // The colon must appear in column 1.  The label is stripped prior to macro
1665 // expansion, and is NOT subject to macro expansion.  The whitespace may also
1666 // be EOL.
1667 //
1668 int d_goto(WORD unused)
1669 {
1670         // Setup for the search
1671         if (*tok.u32 != SYMBOL)
1672                 return error("missing label");
1673
1674         char * sym = string[tok.u32[1]];
1675         tok.u32 += 2;
1676
1677         if (cur_inobj->in_type != SRC_IMACRO)
1678                 return error("goto not in macro");
1679
1680         IMACRO * imacro = cur_inobj->inobj.imacro;
1681         LLIST * defln = imacro->im_macro->lineList;
1682
1683         // Attempt to find the label, starting with the first line.
1684         for(; defln!=NULL; defln=defln->next)
1685         {
1686                 // Must start with a colon
1687                 if (defln->line[0] == ':')
1688                 {
1689                         // Compare names (sleazo string compare)
1690                         char * s1 = sym;
1691                         char * s2 = defln->line;
1692
1693                         // Either we will match the strings to EOS on both, or we will
1694                         // match EOS on string 1 to whitespace on string 2. Otherwise, we
1695                         // have no match.
1696                         while ((*s1 == *s2) || ((*s1 == EOS) && (chrtab[*s2] & WHITE)))
1697                         {
1698                                 // If we reached the end of string 1 (sym), we're done.
1699                                 // Note that we're also checking for the end of string 2 as
1700                                 // well, since we've established they're equal above.
1701                                 if (*s1 == EOS)
1702                                 {
1703                                         // Found the label, set new macro next-line and return.
1704                                         imacro->im_nextln = defln;
1705                                         return 0;
1706                                 }
1707
1708                                 s1++;
1709                                 s2++;
1710                         }
1711                 }
1712         }
1713
1714         return error("goto label not found");
1715 }
1716
1717
1718 void DumpToken(TOKEN t)
1719 {
1720         if (t == COLON)
1721                 printf("[COLON]");
1722         else if (t == CONST)
1723                 printf("[CONST]");
1724         else if (t == ACONST)
1725                 printf("[ACONST]");
1726         else if (t == STRING)
1727                 printf("[STRING]");
1728         else if (t == SYMBOL)
1729                 printf("[SYMBOL]");
1730         else if (t == EOS)
1731                 printf("[EOS]");
1732         else if (t == TKEOF)
1733                 printf("[TKEOF]");
1734         else if (t == DEQUALS)
1735                 printf("[DEQUALS]");
1736         else if (t == SET)
1737                 printf("[SET]");
1738         else if (t == REG)
1739                 printf("[REG]");
1740         else if (t == DCOLON)
1741                 printf("[DCOLON]");
1742         else if (t == GE)
1743                 printf("[GE]");
1744         else if (t == LE)
1745                 printf("[LE]");
1746         else if (t == NE)
1747                 printf("[NE]");
1748         else if (t == SHR)
1749                 printf("[SHR]");
1750         else if (t == SHL)
1751                 printf("[SHL]");
1752         else if (t == UNMINUS)
1753                 printf("[UNMINUS]");
1754         else if (t == DOTB)
1755                 printf("[DOTB]");
1756         else if (t == DOTW)
1757                 printf("[DOTW]");
1758         else if (t == DOTL)
1759                 printf("[DOTL]");
1760         else if (t == DOTQ)
1761                 printf("[DOTQ]");
1762         else if (t == DOTS)
1763                 printf("[DOTS]");
1764         else if (t == DOTD)
1765                 printf("[DOTD]");
1766         else if (t == DOTI)
1767                 printf("[DOTI]");
1768         else if (t == ENDEXPR)
1769                 printf("[ENDEXPR]");
1770         else if (t == CR_ABSCOUNT)
1771                 printf("[CR_ABSCOUNT]");
1772         else if (t == CR_DEFINED)
1773                 printf("[CR_DEFINED]");
1774         else if (t == CR_REFERENCED)
1775                 printf("[CR_REFERENCED]");
1776         else if (t == CR_STREQ)
1777                 printf("[CR_STREQ]");
1778         else if (t == CR_MACDEF)
1779                 printf("[CR_MACDEF]");
1780         else if (t == CR_TIME)
1781                 printf("[CR_TIME]");
1782         else if (t == CR_DATE)
1783                 printf("[CR_DATE]");
1784         else if (t >= 0x20 && t <= 0x2F)
1785                 printf("[%c]", (char)t);
1786         else if (t >= 0x3A && t <= 0x3F)
1787                 printf("[%c]", (char)t);
1788         else if (t >= 0x80 && t <= 0x87)
1789                 printf("[D%u]", ((uint32_t)t) - 0x80);
1790         else if (t >= 0x88 && t <= 0x8F)
1791                 printf("[A%u]", ((uint32_t)t) - 0x88);
1792         else
1793                 printf("[%X:%c]", (uint32_t)t, (char)t);
1794 }
1795
1796
1797 void DumpTokenBuffer(void)
1798 {
1799         printf("Tokens [%X]: ", sloc);
1800
1801         for(TOKEN * t=tokbuf; *t!=EOL; t++)
1802         {
1803                 if (*t == COLON)
1804                         printf("[COLON]");
1805                 else if (*t == CONST)
1806                 {
1807                         TOKENPTR tp = (TOKENPTR)(t + 1);
1808                         printf("[CONST: $%lX]", (uint64_t)(*tp.u64));
1809                         t += 2;
1810                 }
1811                 else if (*t == ACONST)
1812                 {
1813                         printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
1814                         t += 2;
1815                 }
1816                 else if (*t == STRING)
1817                 {
1818                         t++;
1819                         printf("[STRING:\"%s\"]", string[*t]);
1820                 }
1821                 else if (*t == SYMBOL)
1822                 {
1823                         t++;
1824                         printf("[SYMBOL:\"%s\"]", string[*t]);
1825                 }
1826                 else if (*t == EOS)
1827                         printf("[EOS]");
1828                 else if (*t == TKEOF)
1829                         printf("[TKEOF]");
1830                 else if (*t == DEQUALS)
1831                         printf("[DEQUALS]");
1832                 else if (*t == SET)
1833                         printf("[SET]");
1834                 else if (*t == REG)
1835                         printf("[REG]");
1836                 else if (*t == DCOLON)
1837                         printf("[DCOLON]");
1838                 else if (*t == GE)
1839                         printf("[GE]");
1840                 else if (*t == LE)
1841                         printf("[LE]");
1842                 else if (*t == NE)
1843                         printf("[NE]");
1844                 else if (*t == SHR)
1845                         printf("[SHR]");
1846                 else if (*t == SHL)
1847                         printf("[SHL]");
1848                 else if (*t == UNMINUS)
1849                         printf("[UNMINUS]");
1850                 else if (*t == DOTB)
1851                         printf("[DOTB]");
1852                 else if (*t == DOTW)
1853                         printf("[DOTW]");
1854                 else if (*t == DOTL)
1855                         printf("[DOTL]");
1856                 else if (*t == DOTQ)
1857                         printf("[DOTQ]");
1858                 else if (*t == DOTS)
1859                         printf("[DOTS]");
1860                 else if (*t == DOTD)
1861                         printf("[DOTD]");
1862                 else if (*t == DOTI)
1863                         printf("[DOTI]");
1864                 else if (*t == ENDEXPR)
1865                         printf("[ENDEXPR]");
1866                 else if (*t == CR_ABSCOUNT)
1867                         printf("[CR_ABSCOUNT]");
1868                 else if (*t == CR_DEFINED)
1869                         printf("[CR_DEFINED]");
1870                 else if (*t == CR_REFERENCED)
1871                         printf("[CR_REFERENCED]");
1872                 else if (*t == CR_STREQ)
1873                         printf("[CR_STREQ]");
1874                 else if (*t == CR_MACDEF)
1875                         printf("[CR_MACDEF]");
1876                 else if (*t == CR_TIME)
1877                         printf("[CR_TIME]");
1878                 else if (*t == CR_DATE)
1879                         printf("[CR_DATE]");
1880                 else if (*t >= 0x20 && *t <= 0x2F)
1881                         printf("[%c]", (char)*t);
1882                 else if (*t >= 0x3A && *t <= 0x3F)
1883                         printf("[%c]", (char)*t);
1884                 else if (*t >= 0x80 && *t <= 0x87)
1885                         printf("[D%u]", ((uint32_t)*t) - 0x80);
1886                 else if (*t >= 0x88 && *t <= 0x8F)
1887                         printf("[A%u]", ((uint32_t)*t) - 0x88);
1888                 else
1889                         printf("[%X:%c]", (uint32_t)*t, (char)*t);
1890         }
1891
1892         printf("[EOL]\n");
1893 }
1894