2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // EXPR.C - Expression Analyzer
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
20 #define DEF_KW // Declare keyword values
21 #include "kwtab.h" // Incl generated keyword tables & defs
23 // N.B.: The size of tokenClass should be identical to the largest value of
24 // a token; we're assuming 256 but not 100% sure!
25 static char tokenClass[256]; // Generated table of token classes
26 static uint64_t evstk[EVSTACKSIZE]; // Evaluator value stack
27 static WORD evattr[EVSTACKSIZE]; // Evaluator attribute stack
29 // Token-class initialization list
32 CONST, FCONST, SYMBOL, 0, // ID
33 '(', '[', '{', 0, // OPAR
34 ')', ']', '}', 0, // CPAR
35 CR_DEFINED, CR_REFERENCED, // SUNARY (special unary)
38 CR_ABSCOUNT, CR_FILESIZE, 0,
39 '!', '~', UNMINUS, UNLT, UNGT, 0, // UNARY
40 '*', '/', '%', 0, // MULT
43 LE, GE, '<', '>', NE, '=', 0, // REL
50 const char missym_error[] = "missing symbol";
51 const char str_error[] = "missing symbol or string";
52 const char noflt_error[] = "operator not usable with float";
54 // Convert expression to postfix
55 static PTR evalTokenBuffer; // Deposit tokens here (this is really a
56 // pointer to exprbuf from direct.c)
57 // (Can also be from others, like
59 static int symbolNum; // Pointer to the entry in symbolPtr[]
62 // Obtain a string value
64 static uint32_t str_value(char * p)
69 v = (v << 8) | (*p & 0xFF);
75 // Initialize expression analyzer
77 void InitExpression(void)
79 // Initialize token-class table (all set to END)
80 for(int i=0; i<256; i++)
85 for(char * p=itokcl; *p!=1; p++)
90 tokenClass[(int)(*p)] = (char)i;
97 // Binary operators (all the same precedence)
104 while (tokenClass[*tok] >= MULT)
111 *evalTokenBuffer.u32++ = t;
118 // Unary operators (detect unary '-')
119 // ggn: If expression starts with a plus then also eat it up. For some reason
120 // the parser gets confused when this happens and emits a "bad
128 int class = tokenClass[*tok];
130 if (*tok == '-' || *tok == '+' || *tok == '<' || *tok == '>' || class == UNARY)
144 // With leading + we don't have to deposit anything to the buffer
145 // because there's no unary '+' nor we have to do anything about it
147 *evalTokenBuffer.u32++ = t;
149 else if (class == SUNARY)
156 *evalTokenBuffer.u32++ = CONST;
157 *evalTokenBuffer.u64++ = sect[ABS].sloc;
161 *evalTokenBuffer.u32++ = CONST;
162 *evalTokenBuffer.u64++ = sloc;
166 if (*tok++ != STRING)
167 return error("^^FILESIZE expects filename inside string");
168 *evalTokenBuffer.u32++ = CONST;
169 // @@copypasted from d_incbin, maybe factor this out somehow?
170 // Attempt to open the include file in the current directory, then (if that
171 // failed) try list of include files passed in the enviroment string or by
176 if ((fd = open(string[*tok], _OPEN_INC)) < 0)
178 for(i=0; nthpath("RMACPATH", i, buf1)!=0; i++)
182 // Append path char if necessary
183 if ((fd > 0) && (buf1[fd - 1] != SLASHCHAR))
184 strcat(buf1, SLASHSTRING);
186 strcat(buf1, string[*tok]);
188 if ((fd = open(buf1, _OPEN_INC)) >= 0)
192 return error("cannot open: \"%s\"", string[tok[1]]);
196 *evalTokenBuffer.u64++ = (uint64_t)lseek(fd, 0L, SEEK_END);
199 // Advance tok because of consumed string token
203 *evalTokenBuffer.u32++ = CONST;
204 *evalTokenBuffer.u64++ = dos_time();
207 *evalTokenBuffer.u32++ = CONST;
208 *evalTokenBuffer.u64++ = dos_date();
210 case CR_MACDEF: // ^^macdef <macro-name>
211 if (*tok++ != SYMBOL)
212 return error(missym_error);
215 w = (lookup(p, MACRO, 0) == NULL ? 0 : 1);
216 *evalTokenBuffer.u32++ = CONST;
217 *evalTokenBuffer.u64++ = (uint64_t)w;
225 if (*tok++ != SYMBOL)
226 return error(missym_error);
229 int j = (*p == '.' ? curenv : 0);
230 SYM * sy = lookup(p, LABEL, j);
231 w = ((sy != NULL) && (sy->sattr & w ? 1 : 0));
232 *evalTokenBuffer.u32++ = CONST;
233 *evalTokenBuffer.u64++ = (uint64_t)w;
236 if (*tok != SYMBOL && *tok != STRING)
237 return error(str_error);
243 return error(comma_error);
245 if (*tok != SYMBOL && *tok != STRING)
246 return error(str_error);
248 char * p2 = string[tok[1]];
251 w = (WORD)(!strcmp(p, p2));
252 *evalTokenBuffer.u32++ = CONST;
253 *evalTokenBuffer.u64++ = (uint64_t)w;
264 // Terminals (CONSTs) and parenthesis grouping
274 *evalTokenBuffer.u32++ = CONST;
275 *evalTokenBuffer.u64++ = *ptk.u64++;
280 *evalTokenBuffer.u32++ = FCONST;
281 *evalTokenBuffer.u64++ = *ptk.u64++;
286 char * p = string[*tok++];
287 int j = (*p == '.' ? curenv : 0);
288 SYM * sy = lookup(p, LABEL, j);
291 sy = NewSymbol(p, LABEL, j);
293 // Check register bank usage
294 if (sy->sattre & EQUATEDREG)
296 if ((regbank == BANK_0) && (sy->sattre & BANK_1) && !altbankok)
297 warn("equated symbol \'%s\' cannot be used in register bank 0", sy->sname);
299 if ((regbank == BANK_1) && (sy->sattre & BANK_0) && !altbankok)
300 warn("equated symbol \'%s\' cannot be used in register bank 1", sy->sname);
303 *evalTokenBuffer.u32++ = SYMBOL;
304 *evalTokenBuffer.u32++ = symbolNum;
305 symbolPtr[symbolNum] = sy;
310 *evalTokenBuffer.u32++ = CONST;
311 *evalTokenBuffer.u64++ = str_value(string[*tok++]);
318 return error("missing closing parenthesis ')'");
326 return error("missing closing bracket ']'");
330 if (expr0() != OK) // Eat up first parameter (register or immediate)
333 if (*tok++ != ':') // Demand a ':' there
334 return error("missing colon ':'");
336 if (expr0() != OK) // Eat up second parameter (register or immediate)
340 return error("missing closing brace '}'");
344 *evalTokenBuffer.u32++ = ACONST; // Attributed const
345 *evalTokenBuffer.u32++ = sloc; // Current location
346 *evalTokenBuffer.u32++ = DEFINED | ((orgactive | org68k_active) ? 0 : cursect); // Store attribs
349 *evalTokenBuffer.u32++ = ACONST; // Attributed const
351 // pcloc == location at start of line
352 *evalTokenBuffer.u32++ = (orgactive ? orgaddr : pcloc);
353 // '*' takes attributes of current section, not ABS!
354 // Also, if we're ORG'd, the symbol is absolute
355 *evalTokenBuffer.u32++ = DEFINED | ((orgactive | org68k_active) ? 0 : cursect);
358 return error("bad expression");
365 // Recursive-descent expression analyzer (with some simple speed hacks)
367 int expr(TOKEN * otk, uint64_t * a_value, WORD * a_attr, SYM ** a_esym)
369 // Passed in values (once derefenced, that is) can all be zero. They are
370 // there so that the expression analyzer can fill them in as needed. The
371 // expression analyzer gets its input from the global token pointer "tok",
372 // and not from anything passed in by the user.
378 evalTokenBuffer.u32 = otk; // Set token pointer to 'exprbuf' (direct.c)
379 // Also set in various other places too (riscasm.c,
382 // Optimize for single constant or single symbol.
383 // Shamus: Seems to me that this could be greatly simplified by 1st
384 // checking if the first token is a multibyte token, *then*
385 // checking if there's an EOL after it depending on the actual
386 // length of the token (multiple vs. single). Otherwise, we have
387 // the horror show that is the following:
389 && (tok[0] != CONST && tokenClass[tok[0]] != SUNARY))
390 || ((tok[0] == SYMBOL)
391 && (tokenClass[tok[2]] < UNARY))
392 || ((tok[0] == CONST) && (tokenClass[tok[3]] < UNARY))
394 // Shamus: Yes, you can parse that out and make some kind of sense of it, but damn, it takes a while to get it and understand the subtle bugs that result from not being careful about what you're checking; especially vis-a-vis naively checking tok[1] for an EOL. O_o
399 *evalTokenBuffer.u32++ = *ptk.u32++;
400 *evalTokenBuffer.u64++ = *a_value = *ptk.u64++;
401 *a_attr = ABS | DEFINED;
407 //printf("Quick eval in expr(): CONST = %i, tokenClass[tok[2]] = %i\n", *a_value, tokenClass[*tok]);
409 // Not sure that removing float constant here is going to break anything and/or
410 // make things significantly slower, but having this here seems to cause the
411 // complexity of the check to get to this part of the parse to go through the
412 // roof, and dammit, I just don't feel like fighting that fight ATM. :-P
414 else if (*tok == FCONST)
416 *evalTokenBuffer.u32++ = *tok++;
417 *evalTokenBuffer.u64++ = *a_value = *tok.u64++;
418 *a_attr = ABS | DEFINED | FLOAT;
423 //printf("Quick eval in expr(): CONST = %i, tokenClass[tok[2]] = %i\n", *a_value, tokenClass[*tok]);
426 else if (*tok == '*')
428 *evalTokenBuffer.u32++ = CONST;
430 if (orgactive | org68k_active)
432 *evalTokenBuffer.u64++ = *a_value = orgaddr;
433 *a_attr = DEFINED; // We have ORG active, it doesn't belong in a section!
437 *evalTokenBuffer.u64++ = *a_value = pcloc;
438 // '*' takes attributes of current section, not ABS!
439 *a_attr = cursect | DEFINED;
448 else if (*tok == STRING || *tok == SYMBOL)
451 j = (*p == '.' ? curenv : 0);
452 symbol = lookup(p, LABEL, j);
455 symbol = NewSymbol(p, LABEL, j);
457 symbol->sattr |= REFERENCED;
459 // Check for undefined register equates, but only if it's not part
460 // of a #<SYMBOL> construct, as it could be that the label that's
461 // been undefined may later be used as an address label--which
462 // means it will be fixed up later, and thus, not an error.
463 if ((symbol->sattre & UNDEF_EQUR) && !riscImmTokenSeen)
465 error("undefined register equate '%s'", symbol->sname);
466 //if we return right away, it returns some spurious errors...
470 // Check register bank usage
471 if (symbol->sattre & EQUATEDREG)
473 if ((regbank == BANK_0) && (symbol->sattre & BANK_1) && !altbankok)
474 warn("equated symbol '%s' cannot be used in register bank 0", symbol->sname);
476 if ((regbank == BANK_1) && (symbol->sattre & BANK_0) && !altbankok)
477 warn("equated symbol '%s' cannot be used in register bank 1", symbol->sname);
480 *evalTokenBuffer.u32++ = SYMBOL;
482 *evalTokenBuffer++ = (TOKEN)symbol;
485 While this approach works, it's wasteful. It would be better to use something
486 that's already available, like the symbol "order defined" table (which needs to
487 be converted from a linked list into an array).
489 *evalTokenBuffer.u32++ = symbolNum;
490 symbolPtr[symbolNum] = symbol;
494 *a_value = (symbol->sattr & DEFINED ? symbol->svalue : 0);
495 *a_attr = (WORD)(symbol->sattr & ~GLOBAL);
498 All that extra crap that was put into the svalue when doing the equr stuff is
499 thrown away right here. What the hell is it for?
501 if (symbol->sattre & EQUATEDREG)
504 *a_attr |= RISCREG; // Mark it as a register, 'cause it is
508 if ((symbol->sattr & (GLOBAL | DEFINED)) == GLOBAL
514 // Holy hell... This is likely due to the fact that LSR is mistakenly set as a SUNARY type... Need to fix this... !!! FIX !!!
517 *evalTokenBuffer.u32++ = *tok++;
521 // Unknown type here... Alert the user!,
522 error("undefined RISC register in expression [token=$%X]", *tok);
523 // Prevent spurious error reporting...
528 *evalTokenBuffer.u32++ = ENDEXPR;
535 *evalTokenBuffer.u32++ = ENDEXPR;
536 return evexpr(otk, a_value, a_attr, a_esym);
540 // Evaluate expression.
541 // If the expression involves only ONE external symbol, the expression is
542 // UNDEFINED, but it's value includes everything but the symbol value, and
543 // 'a_esym' is set to the external symbol.
545 int evexpr(TOKEN * _tk, uint64_t * a_value, WORD * a_attr, SYM ** a_esym)
549 uint64_t * sval = evstk; // (Empty) initial stack
550 WORD * sattr = evattr;
551 SYM * esym = NULL; // No external symbol involved
556 while (*tk.u32 != ENDEXPR)
558 switch ((int)*tk.u32++)
561 sy = symbolPtr[*tk.u32++];
562 sy->sattr |= REFERENCED; // Set "referenced" bit
564 if (!(sy->sattr & DEFINED))
566 // Reference to undefined symbol
567 if (!(sy->sattr & GLOBAL))
574 if (esym != NULL) // Check for multiple externals
575 return error(seg_error);
580 if (sy->sattr & DEFINED)
581 *++sval = sy->svalue; // Push symbol's value
583 *++sval = 0; // 0 for undefined symbols
585 *++sattr = (WORD)(sy->sattr & ~GLOBAL); // Push attribs
586 sym_seg = (WORD)(sy->sattr & TDB);
591 *++sattr = ABS | DEFINED; // Push simple attribs
595 // Even though it's a double, we can treat it like a uint64_t since
596 // we're just moving the bits around.
598 *++sattr = ABS | DEFINED | FLOAT; // Push simple attribs
602 *++sval = *tk.u32++; // Push value
603 *++sattr = (WORD)*tk.u32++; // Push attribs
606 // Binary "+" and "-" matrix:
609 // ----------------------------
610 // ABS | ABS | Sect | Other |
611 // Sect | Sect | [1] | Error |
612 // Other | Other | Error | [1] |
613 // ----------------------------
620 --sattr; // Pop attrib
621 // Get FLOAT attribute, if any
622 attr = (sattr[0] | sattr[1]) & FLOAT;
624 // Since adding an int to a fp value promotes it to a fp value, we
625 // don't care whether it's first or second; we cast to to a double
631 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
633 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
634 *(double *)sval = fpval1 + fpval2;
638 *sval += sval[1]; // Compute value
642 *sattr = sattr[1] | attr;
643 else if (sattr[1] & TDB)
644 return error(seg_error);
650 --sattr; // Pop attrib
651 // Get FLOAT attribute, if any
652 attr = (sattr[0] | sattr[1]) & FLOAT;
654 // Since subtracting an int to a fp value promotes it to a fp
655 // value, we don't care whether it's first or second; we cast to to
656 // a double regardless.
661 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
663 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
664 *(double *)sval = fpval1 - fpval2;
671 *sattr |= attr; // Inherit FLOAT attribute
672 attr = (WORD)(*sattr & TDB);
673 // If symbol1 is ABS, take attributes from symbol2
676 // Otherwise, they're both TDB and so attributes cancel out
677 else if (sattr[1] & TDB)
682 // Unary operators only work on ABS items
685 return error(seg_error);
689 double * dst = (double *)sval;
691 *sattr = ABS | DEFINED | FLOAT; // Expr becomes absolute
695 *sval = -(int64_t)*sval;
696 *sattr = ABS | DEFINED; // Expr becomes absolute
701 case UNLT: // Unary < (get the low byte of a word)
703 return error(seg_error);
706 return error(noflt_error);
708 *sval = (int64_t)((*sval) & 0x00FF);
709 *sattr = ABS | DEFINED; // Expr becomes absolute
712 case UNGT: // Unary > (get the high byte of a word)
714 return error(seg_error);
717 return error(noflt_error);
719 *sval = (int64_t)(((*sval) >> 8) & 0x00FF);
720 *sattr = ABS | DEFINED; // Expr becomes absolute
725 return error(seg_error);
728 return error("floating point numbers not allowed with operator '!'.");
731 *sattr = ABS | DEFINED; // Expr becomes absolute
736 return error(seg_error);
739 return error("floating point numbers not allowed with operator '~'.");
742 *sattr = ABS | DEFINED; // Expr becomes absolute
745 // Comparison operators must have two values that
746 // are in the same segment, but that's the only requirement.
751 if ((*sattr & TDB) != (sattr[1] & TDB))
752 return error(seg_error);
754 // Get FLOAT attribute, if any
755 attr = (sattr[0] | sattr[1]) & FLOAT;
757 // Cast any ints in the comparison to double, if there's at least
758 // one double in the comparison.
763 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
765 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
766 *sval = (fpval1 <= fpval2);
770 *sval = (*sval <= sval[1]);
773 *sattr = ABS | DEFINED;
780 if ((*sattr & TDB) != (sattr[1] & TDB))
781 return error(seg_error);
783 // Get FLOAT attribute, if any
784 attr = (sattr[0] | sattr[1]) & FLOAT;
786 // Cast any ints in the comparison to double, if there's at least
787 // one double in the comparison.
792 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
794 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
795 *sval = (fpval1 >= fpval2);
799 *sval = (*sval >= sval[1]);
802 *sattr = ABS | DEFINED;
809 if ((*sattr & TDB) != (sattr[1] & TDB))
810 return error(seg_error);
812 // Get FLOAT attribute, if any
813 attr = (sattr[0] | sattr[1]) & FLOAT;
815 // Cast any ints in the comparison to double, if there's at least
816 // one double in the comparison.
821 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
823 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
824 *sval = (fpval1 > fpval2);
828 *sval = (*sval > sval[1]);
831 *sattr = ABS | DEFINED;
838 if ((*sattr & TDB) != (sattr[1] & TDB))
839 return error(seg_error);
841 // Get FLOAT attribute, if any
842 attr = (sattr[0] | sattr[1]) & FLOAT;
844 // Cast any ints in the comparison to double, if there's at least
845 // one double in the comparison.
850 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
852 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
853 *sval = (fpval1 < fpval2);
857 *sval = (*sval < sval[1]);
860 *sattr = ABS | DEFINED; // Expr forced to ABS
867 if ((*sattr & TDB) != (sattr[1] & TDB))
868 return error(seg_error);
870 // Get FLOAT attribute, if any
871 attr = (sattr[0] | sattr[1]) & FLOAT;
873 // Cast any ints in the comparison to double, if there's at least
874 // one double in the comparison.
879 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
881 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
882 *sval = (fpval1 != fpval2);
886 *sval = (*sval != sval[1]);
889 *sattr = ABS | DEFINED; // Expr forced to ABS
896 if ((*sattr & TDB) != (sattr[1] & TDB))
897 return error(seg_error);
899 // Get FLOAT attribute, if any
900 attr = (sattr[0] | sattr[1]) & FLOAT;
902 // Cast any ints in the comparison to double, if there's at least
903 // one double in the comparison.
908 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
910 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
911 *sval = (fpval1 == fpval2);
915 *sval = (*sval == sval[1]);
918 *sattr = ABS | DEFINED; // Expr forced to ABS
922 // All other binary operators must have two ABS items to work with.
923 // They all produce an ABS value.
924 // Shamus: Is this true? There's at least one counterexample of legit
925 // code where this assumption fails to produce correct code.
928 switch ((int)tk.u32[-1])
933 // Get FLOAT attribute, if any
934 attr = (sattr[0] | sattr[1]) & FLOAT;
936 // Since multiplying an int to a fp value promotes it to a fp
937 // value, we don't care whether it's first or second; it will
938 // be cast to a double regardless.
940 An open question here is do we promote ints to floats as signed or unsigned? It makes a difference if, say, the int is put in as -1 but is promoted to a double as $FFFFFFFFFFFFFFFF--you get very different results that way! For now, we promote as signed until proven detrimental otherwise.
946 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
948 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
949 *(double *)sval = fpval1 * fpval2;
961 // Get FLOAT attribute, if any
962 attr = (sattr[0] | sattr[1]) & FLOAT;
968 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
970 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
973 return error("divide by zero");
975 *(double *)sval = fpval1 / fpval2;
980 return error("divide by zero");
982 // Compiler is picky here: Without casting these, it
983 // discards the sign if dividing a negative # by a
984 // positive one, creating a bad result. :-/
985 // Definitely a side effect of using uint32_ts intead of
987 *sval = (int32_t)sval[0] / (int32_t)sval[1];
996 if ((*sattr | sattr[1]) & FLOAT)
997 return error("floating point numbers not allowed with operator '%'.");
1000 return error("mod (%) by zero");
1007 sattr--; // Pop attrib
1009 if ((*sattr | sattr[1]) & FLOAT)
1010 return error("floating point numbers not allowed with operator '<<'.");
1017 sattr--; // Pop attrib
1019 if ((*sattr | sattr[1]) & FLOAT)
1020 return error("floating point numbers not allowed with operator '>>'.");
1027 sattr--; // Pop attrib
1029 if ((*sattr | sattr[1]) & FLOAT)
1030 return error("floating point numbers not allowed with operator '&'.");
1037 sattr--; // Pop attrib
1039 if ((*sattr | sattr[1]) & FLOAT)
1040 return error("floating point numbers not allowed with operator '^'.");
1049 if ((*sattr | sattr[1]) & FLOAT)
1050 return error("floating point numbers not allowed with operator '|'.");
1056 // Bad operator in expression stream (this should never happen!)
1068 // Copy value + attrib into return variables
1076 // Count the # of tokens in the passed in expression
1077 // N.B.: 64-bit constants count as two tokens each
1079 uint16_t ExpressionLength(TOKEN * tk)
1083 for(length=0; tk[length]!=ENDEXPR; length++)
1085 // Add one to length for 2X tokens, two for 3X tokens
1086 if (tk[length] == SYMBOL)
1088 else if ((tk[length] == CONST) || (tk[length] == FCONST))
1092 // Add 1 for ENDEXPR