]> Shamusworld >> Repos - rmac/blob - token.c
Fix problem in tokenizer that caused legit code to make assembler barf.
[rmac] / token.c
1 //
2 // RMAC - Reboot's Macro Assembler for the Atari Jaguar Console System
3 // TOKEN.C - Token Handling
4 // Copyright (C) 199x Landon Dyer, 2011-2012 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source Utilised with the Kind Permission of Landon Dyer
7 //
8
9 #include "token.h"
10 #include "symbol.h"
11 #include "procln.h"
12 #include "macro.h"
13 #include "error.h"
14
15 #define DECL_KW                                             // Declare keyword arrays
16 #define DEF_KW                                              // Declare keyword values 
17 #include "kwtab.h"                                          // Incl generated keyword tables & defs
18
19 int lnsave;                                                 // 1; strcpy() text of current line
20 int curlineno;                                              // Current line number
21 int totlines;                                               // Total # of lines
22 int mjump_align = 0;                                        // mjump alignment flag
23 char lntag;                                                 // Line tag
24 char * curfname;                                            // Current filename
25 char tolowertab[128];                                       // Uppercase ==> lowercase 
26 char hextab[128];                                           // Table of hex values
27 char dotxtab[128];                                          // Table for ".b", ".s", etc.
28 char irbuf[LNSIZ];                                          // Text for .rept block line
29 char lnbuf[LNSIZ];                                          // Text of current line
30 WORD filecount;                                             // Unique file number counter
31 WORD cfileno;                                               // Current file number
32 TOKEN * tok;                                                // Ptr to current token
33 TOKEN * etok;                                               // Ptr past last token in tokbuf[]
34 TOKEN tokeol[1] = {EOL};                                    // Bailout end-of-line token
35
36 // File record, used to maintain a list of every include file ever visited
37 #define FILEREC struct _filerec
38 FILEREC
39 {
40    FILEREC * frec_next;
41    char * frec_name;
42 };
43
44 FILEREC * filerec;
45 FILEREC * last_fr;
46
47 INOBJ * cur_inobj;                                          // Ptr current input obj (IFILE/IMACRO)
48 static INOBJ * f_inobj;                                     // Ptr list of free INOBJs
49 static IFILE * f_ifile;                                     // Ptr list of free IFILEs
50 static IMACRO * f_imacro;                                   // Ptr list of free IMACROs
51
52 static TOKEN tokbuf[TOKBUFSIZE];                            // Token buffer (stack-like, all files)
53
54 char chrtab[] = {
55    ILLEG, ILLEG, ILLEG, ILLEG,                                    // NUL SOH STX ETX 
56    ILLEG, ILLEG, ILLEG, ILLEG,                                    // EOT ENQ ACK BEL 
57    ILLEG, WHITE, ILLEG, ILLEG,                                    // BS HT LF VT 
58    WHITE, ILLEG, ILLEG, ILLEG,                                    // FF CR SO SI 
59
60    ILLEG, ILLEG, ILLEG, ILLEG,                                    // DLE DC1 DC2 DC3 
61    ILLEG, ILLEG, ILLEG, ILLEG,                                    // DC4 NAK SYN ETB 
62    ILLEG, ILLEG, ILLEG, ILLEG,                                    // CAN EM SUB ESC 
63    ILLEG, ILLEG, ILLEG, ILLEG,                                    // FS GS RS US 
64
65    WHITE, MULTX, MULTX, SELF,                                     // SP ! " #
66    MULTX+CTSYM, MULTX, SELF, MULTX,                               // $ % & '
67    SELF, SELF, SELF, SELF,                                        // ( ) * +
68    SELF, SELF, STSYM, SELF,                                       // , - . /
69
70    DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,                        // 0 1 
71    DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,                        // 2 3 
72    DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,                        // 4 5 
73    DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,                        // 6 7 
74    DIGIT+HDIGIT+CTSYM, DIGIT+HDIGIT+CTSYM,                        // 8 9 
75    MULTX, MULTX,                                                  // : ; 
76    MULTX, MULTX, MULTX, STSYM+CTSYM,                              // < = > ? 
77
78    MULTX, STSYM+CTSYM+HDIGIT,                                     // @ A
79    (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,              // B C
80    STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,                        // D E
81    STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                               // F G
82    STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM,            // H I J K
83    (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM,  // L M N O
84
85    STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM,  // P Q R S
86    STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM,  // T U V W
87    STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                   // X Y Z [
88    SELF, SELF, MULTX, STSYM+CTSYM,                                // \ ] ^ _
89
90    ILLEG, STSYM+CTSYM+HDIGIT,                                     // ` a
91    (char)((BYTE)DOT)+STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,              // b c
92    STSYM+CTSYM+HDIGIT, STSYM+CTSYM+HDIGIT,                        // d e
93    STSYM+CTSYM+HDIGIT, STSYM+CTSYM,                               // f g
94    STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM,            // h i j k
95    (char)((BYTE)DOT)+STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM,  // l m n o
96
97    STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM,  // p q r s 
98    STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, (char)((BYTE)DOT)+STSYM+CTSYM,  // t u v w 
99    STSYM+CTSYM, STSYM+CTSYM, STSYM+CTSYM, SELF,                   // x y z { 
100    SELF, SELF, SELF, ILLEG                                        // | } ~ DEL 
101 };
102
103 // Names of registers
104 static char * regname[] = {
105    "d0", "d1",  "d2",  "d3", "d4", "d5", "d6", "d7",
106    "a0", "a1",  "a2",  "a3", "a4", "a5", "a6", "a7",
107    "pc", "ssp", "usp", "sr", "ccr"
108 };
109
110 static char * riscregname[] = {
111     "r0",  "r1",  "r2",  "r3",  "r4", "r5",   "r6",  "r7", 
112     "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",
113    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
114    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
115 };
116
117
118 //
119 // Make `fnum' the Current `curfname'
120 //
121 void setfnum(WORD fnum)
122 {
123         FILEREC * fr;
124
125         for(fr=filerec; fr!=NULL && fnum--; fr=fr->frec_next);
126
127         if (fr == NULL)
128                 curfname = "(*top*)";
129         else
130                 curfname = fr->frec_name;
131 }
132
133
134 //
135 // Allocate an IFILE or IMACRO
136 //
137 INOBJ * a_inobj(int typ)
138 {
139         INOBJ * inobj;
140         IFILE * ifile;
141         IMACRO * imacro;
142
143         // Allocate and initialize INOBJ first
144         if (f_inobj == NULL)
145                 inobj = (INOBJ *)amem((LONG)sizeof(INOBJ));
146         else
147         {
148                 inobj = f_inobj;
149                 f_inobj = f_inobj->in_link;
150         }
151
152         switch (typ)
153         {
154         case SRC_IFILE:                                       // Alloc and init an IFILE
155                 if (f_ifile == NULL)
156                         ifile = (IFILE *)amem((LONG)sizeof(IFILE));
157                 else
158                 {
159                         ifile = f_ifile;
160                         f_ifile = f_ifile->if_link;
161                 }
162
163                 inobj->inobj.ifile = ifile;
164                 break;
165         case SRC_IMACRO:                                      // Alloc and init an IMACRO 
166                 if (f_imacro == NULL)
167                         imacro = (IMACRO *)amem((LONG)sizeof(IMACRO));
168                 else
169                 {
170                         imacro = f_imacro;
171                         f_imacro = f_imacro->im_link;
172                 }
173
174                 inobj->inobj.imacro = imacro;
175                 break;
176         case SRC_IREPT:                                       // Alloc and init an IREPT
177                 inobj->inobj.irept = (IREPT *)amem((LONG)sizeof(IREPT));
178                 DEBUG printf("alloc IREPT\n");
179                 break;
180         }
181
182         // Install INOBJ on top of input stack
183         inobj->in_ifent = ifent;                                 // Record .if context on entry
184         inobj->in_type = (WORD)typ;
185         inobj->in_otok = tok;
186         inobj->in_etok = etok;
187         inobj->in_link = cur_inobj;
188         cur_inobj = inobj;
189
190         return inobj;
191 }
192
193
194 //
195 // Perform macro substitution from 'orig' to 'dest'. Return OK or some error.
196 // A macro reference is in one of two forms:
197 // \name <non-name-character>
198 // \{name}
199 // A doubled backslash (\\) is compressed to a single backslash (\).
200 // Argument definitions have been pre-tokenized, so we have to turn them back
201 // into text. This means that numbers, in particular, become hex, regardless of
202 // their representation when the macro was invoked. This is a hack.
203 // A label may appear at the beginning of the line:
204 // :<name><whitespace>
205 // (the colon must be in the first column). These labels are stripped before
206 // macro expansion takes place.
207 //
208 int mexpand(char * src, char * dest, int destsiz)
209 {
210         char * s;
211         char * d = NULL;
212         char * dst;                                               // Next dest slot
213         char * edst;                                              // End+1 of dest buffer
214         int i;
215         int questmark;                                           // \? for testing argument existence
216         TOKEN * tk;
217         char mname[128];                                         // Assume max size of a formal arg name
218         int macnum;
219         SYM * arg;
220         IMACRO * imacro;
221         char numbuf[20];                                         // Buffer for text of CONSTs
222
223         imacro = cur_inobj->inobj.imacro;
224         macnum = (int)(imacro->im_macro->sattr);
225
226         --destsiz;
227         dst = dest;
228         edst = dest + destsiz;
229
230         // Check for (and skip over) any "label" on the line
231         s = src;
232         if (*s == ':')
233         {
234                 while (*s != EOS && !(chrtab[*s] & WHITE))
235                         ++s;
236
237                 if (*s != EOS)
238                         ++s;                                    // Skip first whitespace
239         }
240
241         // Expand the rest of the line
242         while (*s != EOS)
243         {
244                 if (*s != '\\')
245                 {                                      // Copy single character
246                         if (dst >= edst)
247                                 goto overflow;
248
249                         *dst++ = *s++;
250                 }
251                 else
252                 {                                              // Do macro expansion
253                         questmark = 0;
254
255                         // Do special cases
256                         switch (*++s)
257                         {
258                         case '\\':                                      // \\, \ (collapse to single backslash)
259                                 if (dst >= edst)
260                                         goto overflow;
261
262                                 *dst++ = *s++;
263                                 continue;
264                         case '?':                                       // \? <macro>  set `questmark' flag 
265                                 ++s;
266                                 questmark = 1;
267                                 break;
268                         case '#':                                       // \#, number of arguments 
269                                 sprintf(numbuf, "%d", (int)imacro->im_nargs);
270                                 goto copystr;
271                         case '!':                                       // \! size suffix supplied on invocation
272                                 switch ((int)imacro->im_siz)
273                                 {
274                                 case SIZN: d = "";   break;
275                                 case SIZB: d = ".b"; break;
276                                 case SIZW: d = ".w"; break;
277                                 case SIZL: d = ".l"; break;
278                                 }
279
280                                 goto copy_d;
281                         case '~':                                       // ==> unique label string Mnnnn... 
282                                 sprintf(numbuf, "M%ud", curuniq);
283 copystr:
284                                 d = numbuf;
285 copy_d:
286                                 ++s;
287
288                                 while (*d != EOS)
289                                 {
290                                         if (dst >= edst)
291                                                 goto overflow;
292                                         else
293                                                 *dst++ = *d++;
294                                 }
295
296                                 continue;
297                         case EOS:
298                                 return error("missing argument name");
299                         }
300
301                         // \n ==> argument number 'n', 0..9
302                         if (chrtab[*s] & DIGIT)
303                         {
304                                 i = *s++ - '1';
305
306                                 if (i < 0)
307                                         i = 9;
308
309                                 goto arg_num;
310                         }
311
312                         // Get argument name: \name, \{name}
313                         d = mname;
314                         if (*s != '{')
315                         {                                    // \foo
316                                 do
317                                 {
318                                         *d++ = *s++;
319                                 }
320                                 while (chrtab[*s] & CTSYM);
321                         }
322                         else
323                         {                                          // \\{foo} 
324                                 for(++s; *s != EOS && *s != '}';)
325                                         *d++ = *s++;
326
327                                 if (*s != '}')
328                                         return error("missing '}'");
329                                 else
330                                         ++s;
331                         }
332
333                         *d = EOS;
334
335                         // Lookup the argument and copy its (string) value into the destination string
336                         DEBUG printf("mname='%s'\n", mname);
337
338                         if ((arg = lookup(mname, MACARG, macnum)) == NULL)
339                                 return errors("undefined argument: '%s'", mname);
340                         else
341                         {
342                                 // Convert a string of tokens (terminated with EOL) back into text. If an argument 
343                                 // is out of range (not specified in the macro invocation) then it is ignored.
344                                 i = (int)arg->svalue;
345 arg_num:
346                                 DEBUG printf("~argnumber=%d\n", i);
347
348                                 tk = NULL;
349
350                                 if (i < imacro->im_nargs)
351                                         tk = argp[i];
352
353                                 // \?arg yields:
354                                 //    0  if the argument is empty or non-existant,
355                                 //    1  if the argument is not empty
356                                 if (questmark)
357                                 {
358                                         if (tk == NULL || *tk == EOL)
359                                                 questmark = 0;
360
361                                         if (dst >= edst)
362                                                 goto overflow;
363
364                                         *dst++ = (char)(questmark + '0');
365                                         continue;
366                                 }
367
368                                 if (tk != NULL)                                  // arg# is in range, so expand it
369                                 {
370                                         while (*tk != EOL)
371                                         {
372                                                 // Reverse-translation from a token number to a string.  This is a hack.
373                                                 // It might be better table-driven.
374                                                 d = NULL;
375
376                                                 if ((*tk >= KW_D0) && !rdsp && !rgpu)
377                                                 {
378                                                         d = regname[(int)*tk++ - KW_D0];
379                                                         goto strcopy;
380                                                 }
381                                                 else if ((*tk >= KW_R0) && (*tk <= KW_R31))
382                                                 {
383                                                         d = riscregname[(int)*tk++ - KW_R0];
384                                                         goto strcopy;
385                                                 }
386                                                 else
387                                                 {
388                                                         switch ((int)*tk++)
389                                                         {
390                                                         case SYMBOL:
391                                                                 d = (char *)*tk++;
392                                                                 break;
393                                                         case STRING:
394                                                                 d = (char *)*tk++;
395
396                                                                 if (dst >= edst)
397                                                                         goto overflow;
398
399                                                                 *dst++ = '"';
400
401                                                                 while (*d != EOS)
402                                                                 {
403                                                                         if (dst >= edst)
404                                                                                 goto overflow;
405                                                                         else
406                                                                                 *dst++ = *d++;
407                                                                 }
408
409                                                                 if (dst >= edst)
410                                                                         goto overflow;
411
412                                                                 *dst++ = '"';
413                                                                 continue;
414                                                                 break;
415 // Shamus: Changing the format specifier from %lx to %ux caused
416 //         the assembler to choke on legitimate code... Need to investigate
417 //         this further before changing anything else here!
418                                                         case CONST:
419                                                                 sprintf(numbuf, "$%lx", (LONG)*tk++);
420                                                                 d = numbuf;
421                                                                 break;
422                                                         case DEQUALS:
423                                                                 d = "==";
424                                                                 break;
425                                                         case SET:
426                                                                 d = "set";
427                                                                 break;
428                                                         case COLON:
429                                                                 d = ":";
430                                                                 break;
431                                                         case DCOLON:
432                                                                 d = "::";
433                                                                 break;
434                                                         case GE:
435                                                                 d = ">=";
436                                                                 break;
437                                                         case LE:
438                                                                 d = "<=";
439                                                                 break;
440                                                         case NE:
441                                                                 d = "<>";
442                                                                 break;
443                                                         case SHR:
444                                                                 d = ">>";
445                                                                 break;
446                                                         case SHL:
447                                                                 d = "<<";
448                                                                 break;
449                                                         case DOTB:
450                                                                 d = ".b";
451                                                                 break;
452                                                         case DOTW:
453                                                                 d = ".w";
454                                                                 break;
455                                                         case DOTL:
456                                                                 d = ".l";
457                                                                 break;
458                                                         case CR_DATE:
459                                                                 d = "^^date";
460                                                                 break;
461                                                         case CR_TIME:
462                                                                 d = "^^time";
463                                                                 break;
464                                                         case CR_DEFINED:
465                                                                 d = "^^defined ";
466                                                                 break;
467                                                         case CR_REFERENCED:
468                                                                 d = "^^referenced ";
469                                                                 break;
470                                                         case CR_STREQ:
471                                                                 d = "^^streq ";
472                                                                 break;
473                                                         case CR_MACDEF:
474                                                                 d = "^^macdef ";
475                                                                 break;
476                                                         default:
477                                                                 if (dst >= edst)
478                                                                         goto overflow;
479
480                                                                 *dst++ = (char)*(tk-1);
481                                                                 break;
482                                                         }
483                                                 }
484
485                                                 // If 'd' != NULL, copy string to destination
486                                                 if (d != NULL)
487                                                 {
488 strcopy:
489                                                         DEBUG printf("d='%s'\n", d);
490
491                                                         while (*d != EOS)
492                                                         {
493                                                                 if (dst >= edst)
494                                                                         goto overflow;
495                                                                 else
496                                                                         *dst++ = *d++;
497                                                         }
498                                                 }
499                                         }
500                                 }
501                         }
502                 }
503         }
504
505         *dst = EOS;
506         return OK;
507
508 overflow:
509         *dst = EOS;
510         return fatal("line too long as a result of macro expansion");
511 }
512
513
514 //
515 // Get Next Line of Text from a Macro
516 //
517 char * getmln(void)
518 {
519         IMACRO * imacro;
520         LONG * strp;
521         unsigned source_addr;
522
523         imacro = cur_inobj->inobj.imacro;
524         strp = imacro->im_nextln;
525
526         if (strp == NULL)                                         // End-of-macro
527                 return NULL;
528
529         imacro->im_nextln = (LONG *)*strp;
530         mexpand((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
531
532         if (!strcmp(imacro->im_macro->sname, "mjump") && !mjump_align)
533         {
534                 // if we need to adjust the alignment of the jump source address to meet the rules of
535                 // gpu main execution we need to skip the first nop of the macro. This is simpler than
536                 // trying to insert nop's mid macro.
537                 source_addr = (orgactive) ? orgaddr : sloc;
538                 source_addr += 8;
539
540                 if (source_addr % 4)
541                 {
542                         strp = imacro->im_nextln;
543
544                         if (strp == NULL)
545                                 return NULL;
546
547                         imacro->im_nextln = (LONG *)*strp;
548                         mexpand((char *)(strp + 1), imacro->im_lnbuf, LNSIZ);
549                 }
550
551                 mjump_align = 1;
552         }
553
554         return imacro->im_lnbuf;
555 }
556
557
558 //
559 // Get Next Line of Text from a Repeat Block
560 //
561 char * getrln(void)
562 {
563         IREPT * irept;
564         LONG * strp;
565
566         irept = cur_inobj->inobj.irept;
567         strp = irept->ir_nextln;         // initial null
568
569         // Do repeat at end of .rept block's string list
570         if (strp == NULL)
571         {
572                 DEBUG printf("back-to-top-of-repeat-block count=%d\n", (int)irept->ir_count);
573                 irept->ir_nextln = irept->ir_firstln;  // copy first line
574
575                 if (irept->ir_count-- == 0)
576                 {
577                         DEBUG printf("end-repeat-block\n");
578                         return NULL;
579                 }
580
581                 strp = irept->ir_nextln;               //strp
582         }
583
584         strcpy(irbuf, (char*)(irept->ir_nextln + 1));
585
586         DEBUG printf("repeat line='%s'\n", irbuf);
587         irept->ir_nextln = (LONG *)*strp;
588
589         return irbuf;
590 }
591
592
593 //
594 // Include a Source File used at the Root, and for ".include" Files
595 //
596 int include(int handle, char * fname)
597 {
598         IFILE * ifile;
599         INOBJ * inobj;
600         FILEREC * fr;
601
602         if (verb_flag)
603                 printf("[Including: %s]\n", fname);        // Verbose mode
604
605         // Alloc and initialize include-descriptors
606         inobj = a_inobj(SRC_IFILE);
607         ifile = inobj->inobj.ifile;
608
609         ifile->ifhandle = handle;                                // Setup file handle
610         ifile->ifind = ifile->ifcnt = 0;                         // Setup buffer indices
611         ifile->ifoldlineno = curlineno;                          // Save old line number
612         ifile->ifoldfname = curfname;                            // Save old filename
613         ifile->ifno = cfileno;                                   // Save old file number
614         cfileno = ++filecount;                                   // Compute new file number
615         curfname = nstring(fname);                                    // Set current filename (alloc storage)
616         curlineno = 0;                                           // Start on line zero
617
618         // Add another file to the file-record
619         fr = (FILEREC *)amem((LONG)sizeof(FILEREC));
620         fr->frec_next = NULL;
621         fr->frec_name = curfname;
622
623         if (last_fr == NULL)
624                 filerec = fr;                                         // Add first filerec 
625         else
626                 last_fr->frec_next = fr;                              // Append to list of filerecs 
627
628         last_fr = fr;
629
630         return OK;
631 }
632
633
634 //
635 // Initialize Tokenizer
636 //
637 void init_token(void)
638 {
639         int i;                                                   // Iterator
640         char * htab = "0123456789abcdefABCDEF";                   // Hex character table
641
642         lnsave = 0;                                              // Don't save lines
643         curfname = "";                                           // No file, empty filename
644         filecount = (WORD)-1;
645         cfileno = (WORD)-1;                                      // cfileno gets bumped to 0
646         curlineno = 0;
647         totlines = 0;
648         etok = tokbuf;
649         f_inobj = NULL;
650         f_ifile = NULL;
651         f_imacro = NULL;
652         cur_inobj = NULL;
653         filerec = NULL;
654         last_fr = NULL;
655         lntag = SPACE;
656
657         // Initialize hex, "dot" and tolower tables
658         for(i=0; i<128; ++i)
659         {
660                 hextab[i] = -1;
661                 dotxtab[i] = 0;
662                 tolowertab[i] = (char)i;
663         }
664
665         for(i=0; htab[i]!=EOS; ++i)
666                 hextab[htab[i]] = (char)((i < 16) ? i : i - 6);
667
668         for(i='A'; i<='Z'; ++i)
669                 tolowertab[i] |= 0x20;
670
671         // These characters are legal immediately after a period
672         dotxtab['b'] = DOTB;                                     // .b .B .s .S 
673         dotxtab['B'] = DOTB;
674         dotxtab['s'] = DOTB;
675         dotxtab['S'] = DOTB;
676         dotxtab['w'] = DOTW;                                     // .w .W 
677         dotxtab['W'] = DOTW;
678         dotxtab['l'] = DOTL;                                     // .l .L 
679         dotxtab['L'] = DOTL;
680         dotxtab['I'] = DOTI;                                     // .l .L 
681         dotxtab['I'] = DOTI;
682 }
683
684
685 //
686 // Pop the Current Input Level
687 //
688 int fpop(void)
689 {
690         INOBJ * inobj;
691         IFILE * ifile;
692         IMACRO * imacro;
693         LONG * p, * p1;
694
695         inobj = cur_inobj;
696
697         if (inobj != NULL)
698         {
699                 // Pop IFENT levels until we reach the conditional assembly context we were at when the 
700                 // input object was entered.
701                 while (ifent != inobj->in_ifent)
702                         d_endif ();
703
704                 tok = inobj->in_otok;                                 // Restore tok and otok
705                 etok = inobj->in_etok;
706
707                 switch (inobj->in_type)
708                 {
709                 case SRC_IFILE:                                    // Pop and release an IFILE
710                         if (verb_flag)
711                                 printf("[Leaving: %s]\n", curfname);
712
713                         ifile = inobj->inobj.ifile;
714                         ifile->if_link = f_ifile;
715                         f_ifile = ifile;
716                         close(ifile->ifhandle);                         // Close source file
717                         curfname = ifile->ifoldfname;                   // Set current filename
718                         curlineno = ifile->ifoldlineno;                 // Set current line# 
719                         DEBUG printf("cfileno=%d ifile->ifno=%d\n", (int)cfileno, (int)ifile->ifno);
720                         cfileno = ifile->ifno;                          // Restore current file number
721                         break;
722                 case SRC_IMACRO:                                   // Pop and release an IMACRO
723                         imacro = inobj->inobj.imacro;
724                         imacro->im_link = f_imacro;
725                         f_imacro = imacro;
726                         break;
727                 case SRC_IREPT:                                    // Pop and release an IREPT
728                         DEBUG printf("dealloc IREPT\n");
729                         p = inobj->inobj.irept->ir_firstln;
730
731                         while (p != NULL)
732                         {
733                                 p1 = (LONG *)*p;
734                                 p = p1;
735                         }
736
737                         break;
738                 }
739
740                 cur_inobj = inobj->in_link;
741                 inobj->in_link = f_inobj;
742                 f_inobj = inobj;
743         }
744
745         return 0;
746 }
747
748
749 //
750 // Get line from file into buf, return NULL on EOF or ptr to the start of a
751 // null-term line
752 //
753 char * getln(void)
754 {
755         IFILE * fl;
756         int i, j;
757         char * p, * d;
758         int readamt;
759
760         readamt = -1;                                            // 0 if last read() yeilded 0 bytes
761         fl = cur_inobj->inobj.ifile;
762
763         for(;;)
764         {
765                 // Scan for next end-of-line; handle stupid text formats by treating \r\n the same as \n.
766                 // (lone '\r' at end of buffer means we have to check for '\n').
767                 i = 0;
768                 j = fl->ifcnt;
769                 d = &fl->ifbuf[fl->ifind];
770
771                 for(p=d; i<j; ++i, ++p)
772                 {
773                         if (*p == '\r' || *p == '\n')
774                         {
775                                 ++i;
776
777                                 if (*p == '\r')
778                                 {
779                                         if (i >= j)
780                                         {
781                                                 break;                                    // Look for '\n' to eat 
782                                         }
783                                         else if (p[1] == '\n')
784                                         {
785                                                 ++i;
786                                         }
787                                 }
788
789                                 *p = '\0';
790
791                                 fl->ifind += i;
792                                 fl->ifcnt -= i;
793                                 return d;
794                         }
795                 }
796
797                 // Handle hanging lines by ignoring them (Input file is exhausted, no \r or \n on last line)
798                 if (!readamt && fl->ifcnt)
799                 {
800                         fl->ifcnt = 0;
801                         *p = '\0';
802                         return NULL;
803                 }
804
805                 // Truncate and return absurdly long lines.
806                 if (fl->ifcnt >= QUANTUM)
807                 {
808                         fl->ifbuf[fl->ifind + fl->ifcnt - 1] = '\0';
809                         fl->ifcnt = 0;
810                         return &fl->ifbuf[fl->ifind];
811                 }
812
813                 // Relocate what's left of a line to the beginning of the buffer, and read some more of the 
814                 // file in; return NULL if the buffer's empty and on EOF.
815                 if (fl->ifind != 0)
816                 {
817                         p = &fl->ifbuf[fl->ifind];
818                         d = &fl->ifbuf[fl->ifcnt & 1];
819
820                         for(i = 0; i < fl->ifcnt; ++i)
821                                 *d++ = *p++;
822
823                         fl->ifind = fl->ifcnt & 1;
824                 }
825
826                 if ((readamt = read(fl->ifhandle, &fl->ifbuf[fl->ifind + fl->ifcnt], QUANTUM)) < 0)
827                         return NULL;
828
829                 if ((fl->ifcnt += readamt) == 0)
830                         return NULL;
831         }
832 }
833
834
835 //
836 // Tokenize a Line
837 //
838 int tokln(void)
839 {
840         char * ln = NULL;                                         // Ptr to current position in line
841         char * p;                                                 // Random character ptr
842         TOKEN *tk;                                               // Token-deposit ptr
843         int state = 0;                                           // State for keyword detector
844         int j = 0;                                               // Var for keyword detector
845         char c;                                                  // Random char
846         VALUE v;                                                 // Random value
847         char * nullspot = NULL;                                   // Spot to clobber for SYMBOL terminatn
848         int stuffnull;                                           // 1:terminate SYMBOL '\0' at *nullspot
849         char c1;
850
851         retry:
852
853         if (cur_inobj == NULL)                                    // Return EOF if input stack is empty
854                 return TKEOF;
855
856         // Get another line of input from the current input source: a file, a macro, or a repeat-block
857         switch (cur_inobj->in_type)
858         {
859         // Include-file:
860         // o  handle EOF;
861         // o  bump source line number;
862         // o  tag the listing-line with a space;
863         // o  kludge lines generated by Alcyon C.
864         case SRC_IFILE:
865                 if ((ln = getln()) == NULL)
866                 {
867                         fpop();                                         // Pop input level
868                         goto retry;                                     // Try for more lines 
869                 }
870
871                 ++curlineno;                                       // Bump line number
872                 lntag = SPACE;
873
874                 if (as68_flag)
875                 {
876                         // AS68 compatibility, throw away all lines starting with back-quotes, tildes, or '*'
877                         // On other lines, turn the first '*' into a semi-colon.
878                         if (*ln == '`' || *ln == '~' || *ln == '*')
879                                 *ln = ';';
880                         else
881                         {
882                                 for(p=ln; *p!=EOS; ++p)
883                                 {
884                                         if (*p == '*')
885                                         {
886                                                 *p = ';';
887                                                 break;
888                                         }
889                                 }
890                         }
891                 }
892
893                 break;
894         // Macro-block:
895         // o  Handle end-of-macro;
896         // o  tag the listing-line with an at (@) sign.
897         case SRC_IMACRO:
898                 if ((ln = getmln()) == NULL)
899                 {
900                         exitmac();                                      // Exit macro (pop args, do fpop(), etc)
901                         goto retry;                                     // Try for more lines...
902                 }
903
904                 lntag = '@';
905                 break;
906         // Repeat-block:
907         // o  Handle end-of-repeat-block;
908         // o  tag the listing-line with a pound (#) sign.
909         case SRC_IREPT:
910                 if ((ln = getrln()) == NULL)
911                 {
912                         fpop();
913                         goto retry;
914                 }
915
916                 lntag = '#';
917                 break;
918         }
919
920         // Save text of the line.  We only do this during listings and within macro-type blocks, 
921         // since it is expensive to unconditionally copy every line.
922         if (lnsave)
923                 strcpy(lnbuf, ln);
924
925         // General house-keeping
926         tok = tokeol;                                            // Set "tok" to EOL in case of error
927         tk = etok;                                               // Reset token ptr
928         stuffnull = 0;                                           // Don't stuff nulls
929         ++totlines;                                              // Bump total #lines assembled
930
931         // See if the entire line is a comment.  This is a win if the programmer puts in lots of comments
932         if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln+1) == '/')))
933                 goto goteol;
934
935         // Main tokenization loop;
936         // o  skip whitespace;
937         // o  handle end-of-line;
938         // o  handle symbols;
939         // o  handle single-character tokens (operators, etc.);
940         // o  handle multiple-character tokens (constants, strings, etc.).
941         for(; *ln!=EOS;)
942         {
943                 // Skip whitespace, handle EOL
944                 while ((int)chrtab[*ln] & WHITE)
945                         ++ln;
946
947                 // Handle EOL, comment with ';'
948                 if (*ln == EOS || *ln == ';'|| ((*ln == '/') && (*(ln+1) == '/'))) 
949                         break;
950
951                 // Handle start of symbol. Symbols are null-terminated in place. The termination is
952                 // always one symbol behind, since there may be no place for a null in the case that 
953                 // an operator immediately follows the name.
954                 c = chrtab[*ln];
955
956                 if (c & STSYM)
957                 {
958                         if (stuffnull)                                      // Terminate old symbol 
959                                 *nullspot = EOS;
960
961                         v = 0;                                             // Assume no DOT attrib follows symbol
962                         stuffnull = 1;
963                         p = nullspot = ln++;                               // Nullspot -> start of this symbol
964
965                         // Find end of symbol (and compute its length)
966                         for(j=1; (int)chrtab[*ln]&CTSYM; ++j)
967                                 ++ln;
968
969                         // Handle "DOT" special forms (like ".b") that follow a normal symbol or keyword:
970                         if (*ln == '.')
971                         {
972                                 *ln++ = EOS;                                    // Terminate symbol
973                                 stuffnull = 0;                                  // And never try it again 
974
975                                 // Character following the `.' must have a DOT attribute, and the chararacter after 
976                                 // THAT one must not have a start-symbol attribute (to prevent symbols that look
977                                 // like, for example, "zingo.barf", which might be a good idea anyway....)
978                                 if ((((int)chrtab[*ln] & DOT) == 0) || ((int)dotxtab[*ln] <= 0))
979                                         return error("[bwsl] must follow `.' in symbol");
980
981                                 v = (VALUE)dotxtab[*ln++];
982
983                                 if ((int)chrtab[*ln] & CTSYM)
984                                         return error("misuse of `.', not allowed in symbols");
985                         }
986
987                         // If the symbol is small, check to see if it's really the name of a register.
988                         if (j <= KWSIZE)
989                         {
990                                 for(state=0; state>=0;)
991                                 {
992                                         j = (int)tolowertab[*p++];
993                                         j += kwbase[state];
994
995                                         if (kwcheck[j] != state)
996                                         {
997                                                 j = -1;
998                                                 break;
999                                         }
1000
1001                                         if (*p == EOS || p == ln)
1002                                         {
1003                                                 j = kwaccept[j];
1004                                                 break;
1005                                         }
1006
1007                                         state = kwtab[j];
1008                                 }
1009                         }
1010                         else
1011                         {
1012                                 j = -1;
1013                         }
1014
1015                         //make j = -1 if time, date etc with no preceeding ^^
1016                         //defined, referenced, streq, macdef, date and time
1017                         switch ((TOKEN)j)
1018                         {
1019                         case 112:   // defined
1020                         case 113:   // referenced
1021                         case 118:   // streq
1022                         case 119:   // macdef
1023                         case 120:   // time
1024                         case 121:   // date
1025                                 j = -1;
1026                                 break;
1027                         }
1028
1029                         if (j < 0 || state < 0)
1030                         {
1031                                 *tk++ = SYMBOL;
1032                                 *tk++ = (TOKEN)nullspot;
1033                         }
1034                         else
1035                         {
1036                                 *tk++ = (TOKEN)j;
1037                                 stuffnull = 0;
1038                         }
1039
1040                         if (v)                                              // Record attribute token (if any)
1041                                 *tk++ = (TOKEN)v;
1042
1043                         if (stuffnull)                                      // Arrange for string termination 
1044                                 nullspot = ln;
1045
1046                         continue;
1047                 }
1048
1049                 // Handle identity tokens
1050                 if (c & SELF)
1051                 {
1052                         *tk++ = *ln++;
1053                         continue;
1054                 }
1055
1056                 // Handle multiple-character tokens
1057                 if (c & MULTX)
1058                 {
1059                         switch (*ln++)
1060                         {
1061                         case '!':                                       // ! or != 
1062                                 if (*ln == '=')
1063                                 {
1064                                         *tk++ = NE;
1065                                         ++ln;
1066                                 }
1067                                 else
1068                                         *tk++ = '!';
1069
1070                                 continue;
1071                         case '\'':                                      // 'string' 
1072                         case '\"':                                      // "string" 
1073                                 c1 = ln[-1];
1074                                 *tk++ = STRING;
1075                                 *tk++ = (TOKEN)ln;
1076
1077                                 for(p=ln; *ln!=EOS && *ln!=c1;)
1078                                 {
1079                                         c = *ln++;
1080
1081                                         if (c == '\\')
1082                                         {
1083                                                 switch (*ln++)
1084                                                 {
1085                                                 case EOS:
1086                                                         return(error("unterminated string"));
1087                                                 case 'e':
1088                                                         c = '\033';
1089                                                         break;
1090                                                 case 'n':
1091                                                         c = '\n';
1092                                                         break;
1093                                                 case 'b':
1094                                                         c = '\b';
1095                                                         break;
1096                                                 case 't':
1097                                                         c = '\t';
1098                                                         break;
1099                                                 case 'r':
1100                                                         c = '\r';
1101                                                         break;
1102                                                 case 'f':
1103                                                         c = '\f';
1104                                                         break;
1105                                                 case '\"':
1106                                                         c = '\"';
1107                                                         break;
1108                                                 case '\'':
1109                                                         c = '\'';
1110                                                         break;
1111                                                 case '\\':
1112                                                         c = '\\';
1113                                                         break;
1114                                                 default:
1115                                                         warn("bad backslash code in string");
1116                                                         --ln;
1117                                                         break;
1118                                                 }
1119                                         }
1120
1121                                         *p++ = c;
1122                                 }
1123
1124                                 if (*ln++ != c1)
1125                                         return error("unterminated string");
1126
1127                                 *p++ = EOS;
1128                                 continue;
1129                         case '$':                                       // $, hex constant
1130                                 if ((int)chrtab[*ln] & HDIGIT)
1131                                 {
1132                                         v = 0;
1133
1134                                         while ((int)hextab[*ln] >= 0)
1135                                                 v = (v << 4) + (int)hextab[*ln++];
1136
1137                                         if (*ln == '.')
1138                                         {
1139                                                 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1140                                                 {
1141                                                         v &= 0x000000FF;
1142                                                         ln += 2;
1143                                                 }
1144
1145                                                 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1146                                                 {
1147                                                         v &= 0x0000FFFF;
1148                                                         ln += 2;
1149                                                 }
1150
1151                                                 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1152                                                 {
1153                                                         ln += 2;
1154                                                 }
1155                                         }
1156
1157                                         *tk++ = CONST;
1158                                         *tk++ = v;
1159                                 }
1160                                 else
1161                                         *tk++ = '$';
1162
1163                                 continue;
1164                         case '<':                                       // < or << or <> or <= 
1165                                 switch (*ln)
1166                                 {
1167                                 case '<':
1168                                         *tk++ = SHL;
1169                                         ++ln;
1170                                         continue;
1171                                 case '>':
1172                                         *tk++ = NE;
1173                                         ++ln;
1174                                         continue;
1175                                 case '=':
1176                                         *tk++ = LE;
1177                                         ++ln;
1178                                         continue;
1179                                 default:
1180                                         *tk++ = '<';
1181                                         continue;
1182                                 }
1183                         case ':':                                       // : or ::
1184                                 if (*ln == ':')
1185                                 {
1186                                         *tk++ = DCOLON;
1187                                         ++ln;
1188                                 }
1189                                 else
1190                                         *tk++ = ':';
1191
1192                                 continue;
1193                         case '=':                                       // = or == 
1194                                 if (*ln == '=')
1195                                 {
1196                                         *tk++ = DEQUALS;
1197                                         ++ln;
1198                                 }
1199                                 else
1200                                         *tk++ = '=';
1201
1202                                 continue;
1203                         case '>':                                       // > or >> or >= 
1204                                 switch (*ln)
1205                                 {
1206                                 case '>':
1207                                         *tk++ = SHR;
1208                                         ++ln;
1209                                         continue;
1210                                 case '=':
1211                                         *tk++ = GE;
1212                                         ++ln;
1213                                         continue;
1214                                 default:
1215                                         *tk++ = '>';
1216                                         continue;
1217                                 }
1218                         case '%':                                       // % or binary constant 
1219                                 if (*ln < '0' || *ln > '1')
1220                                 {
1221                                         *tk++ = '%';
1222                                         continue;
1223                                 }
1224
1225                                 v = 0;
1226
1227                                 while (*ln >= '0' && *ln <= '1')
1228                                         v = (v << 1) + *ln++ - '0';
1229
1230                                 if (*ln == '.')
1231                                 {
1232                                         if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1233                                         {
1234                                                 v &= 0x000000FF;
1235                                                 ln += 2;
1236                                         }
1237
1238                                         if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1239                                         {
1240                                                 v &= 0x0000FFFF;
1241                                                 ln += 2;
1242                                         }
1243
1244                                         if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1245                                         {
1246                                                 ln += 2;
1247                                         }
1248                                 }
1249
1250                                 *tk++ = CONST;
1251                                 *tk++ = v;
1252                                 continue;
1253                         case '@':                                       // @ or octal constant 
1254                                 if (*ln < '0' || *ln > '7')
1255                                 {
1256                                         *tk++ = '@';
1257                                         continue;
1258                                 }
1259
1260                                 v = 0;
1261
1262                                 while (*ln >= '0' && *ln <= '7')
1263                                         v = (v << 3) + *ln++ - '0';
1264
1265                                 if (*ln == '.')
1266                                 {
1267                                         if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1268                                         {
1269                                                 v &= 0x000000FF;
1270                                                 ln += 2;
1271                                         }
1272
1273                                         if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1274                                         {
1275                                                 v &= 0x0000FFFF;
1276                                                 ln += 2;
1277                                         }
1278
1279                                         if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1280                                         {
1281                                                 ln += 2;
1282                                         }
1283                                 }
1284
1285                                 *tk++ = CONST;
1286                                 *tk++ = v;
1287                                 continue;
1288                         case '^':                                       // ^ or ^^ <operator-name>
1289                                 if (*ln != '^')
1290                                 {
1291                                         *tk++ = '^';
1292                                         continue;
1293                                 }
1294
1295                                 if (((int)chrtab[*++ln] & STSYM) == 0)
1296                                 {
1297                                         error("invalid symbol following ^^");
1298                                         continue;
1299                                 }
1300
1301                                 p = ln++;
1302
1303                                 while ((int)chrtab[*ln] & CTSYM)
1304                                         ++ln;
1305
1306                                 for(state=0; state>=0;)
1307                                 {
1308                                         // Get char, convert to lowercase 
1309                                         j = *p++;
1310
1311                                         if (j >= 'A' && j <= 'Z')
1312                                                 j += 0x20;
1313
1314                                         j += kwbase[state];
1315
1316                                         if (kwcheck[j] != state)
1317                                         {
1318                                                 j = -1;
1319                                                 break;
1320                                         }
1321
1322                                         if (*p == EOS || p == ln)
1323                                         {
1324                                                 j = kwaccept[j];
1325                                                 break;
1326                                         }
1327
1328                                         state = kwtab[j];
1329                                 }
1330
1331                                 if (j < 0 || state < 0)
1332                                 {
1333                                         error("unknown symbol following ^^");
1334                                         continue;
1335                                 }
1336
1337                                 *tk++ = (TOKEN)j;
1338                                 continue;
1339                         default:
1340                                 interror(2);                                 // Bad MULTX entry in chrtab
1341                                 continue;
1342                         }
1343                 }
1344
1345                 // Handle decimal constant
1346                 if (c & DIGIT)
1347                 {
1348                         v = 0;
1349
1350                         while ((int)chrtab[*ln] & DIGIT)
1351                                 v = (v * 10) + *ln++ - '0';
1352
1353                         if (*ln == '.')
1354                         {
1355                                 if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
1356                                 {
1357                                         v &= 0x000000FF;
1358                                         ln += 2;
1359                                 }
1360
1361                                 if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
1362                                 {
1363                                         v &= 0x0000FFFF;
1364                                         ln += 2;
1365                                 }
1366
1367                                 if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
1368                                 {
1369                                         ln += 2;
1370                                 }
1371                         }
1372
1373                         *tk++ = CONST;
1374                         *tk++ = v;
1375                         continue;
1376                 }
1377
1378                 // Handle illegal character
1379                 return error("illegal character");
1380         }
1381
1382         // Terminate line of tokens and return "success."
1383
1384 goteol:
1385         tok = etok;                                              // Set tok to beginning of line
1386
1387         if (stuffnull)                                            // Terminate last SYMBOL
1388                 *nullspot = EOS;
1389
1390         *tk++ = EOL;
1391
1392         return OK;
1393 }
1394
1395
1396 //
1397 // .GOTO <label>        goto directive
1398 // 
1399 // The label is searched for starting from the first line of the current,
1400 // enclosing macro definition. If no enclosing macro exists, an error is
1401 // generated.
1402 // 
1403 // A label is of the form:
1404 // 
1405 // :<name><whitespace>
1406 // 
1407 // The colon must appear in column 1.  The label is stripped prior to macro
1408 // expansion, and is NOT subject to macro expansion.  The whitespace may also
1409 // be EOL.
1410 //
1411 //int d_goto(WORD siz) {
1412 int d_goto(void)
1413 {
1414         char * sym;                                               // Label to search for 
1415         LONG * defln;                                             // Macro definition strings 
1416         char * s1;                                                // Temps for string comparison 
1417         char * s2;
1418         IMACRO * imacro;                                          // Macro invocation block
1419
1420         // Setup for the search
1421         if (*tok != SYMBOL)
1422                 return error("missing label");
1423
1424         sym = (char *)tok[1];
1425         tok += 2;
1426
1427         if (cur_inobj->in_type != SRC_IMACRO)
1428                 return error("goto not in macro");
1429
1430         imacro = cur_inobj->inobj.imacro;
1431         defln = (LONG *)imacro->im_macro->svalue;
1432
1433         // Find the label, starting with the first line.
1434         for(; defln!=NULL; defln=(LONG *)*defln)
1435         {
1436                 if (*(char *)(defln + 1) == ':')
1437                 {
1438                         // Compare names (sleazo string compare)
1439                         s1 = sym;
1440                         s2 = (char *)(defln + 1) + 1;
1441
1442                         while (*s1 == *s2)
1443                         {
1444                                 if (*s1 == EOS)
1445                                         break;
1446                                 else
1447                                 {
1448                                         ++s1;
1449                                         ++s2;
1450                                 }
1451                         }
1452
1453                         // Found the label, set new macro next-line and return.
1454                         if ((*s2 == EOS) || ((int)chrtab[*s2] & WHITE))
1455                         {
1456                                 imacro->im_nextln = defln;
1457                                 return 0;
1458                         }
1459                 }
1460         }
1461
1462         return error("goto label not found");
1463 }