2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // EXPR.C - Expression Analyzer
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
20 #define DEF_KW // Declare keyword values
21 #include "kwtab.h" // Incl generated keyword tables & defs
23 // N.B.: The size of tokenClass should be identical to the largest value of
24 // a token; we're assuming 256 but not 100% sure!
25 static char tokenClass[256]; // Generated table of token classes
26 static uint64_t evstk[EVSTACKSIZE]; // Evaluator value stack
27 static WORD evattr[EVSTACKSIZE]; // Evaluator attribute stack
29 // Token-class initialization list
32 CONST, FCONST, SYMBOL, 0, // ID
33 '(', '[', '{', 0, // OPAR
34 ')', ']', '}', 0, // CPAR
35 CR_DEFINED, CR_REFERENCED, // SUNARY (special unary)
38 CR_ABSCOUNT, CR_FILESIZE, 0,
39 '!', '~', UNMINUS, UNLT, UNGT, 0, // UNARY
40 '*', '/', '%', 0, // MULT
43 LE, GE, '<', '>', NE, '=', 0, // REL
50 const char missym_error[] = "missing symbol";
51 const char str_error[] = "missing symbol or string";
52 const char noflt_error[] = "operator not usable with float";
54 // Convert expression to postfix
55 static PTR evalTokenBuffer; // Deposit tokens here (this is really a
56 // pointer to exprbuf from direct.c)
57 // (Can also be from others, like
59 static int symbolNum; // Pointer to the entry in symbolPtr[]
62 // Obtain a string value
64 static uint32_t str_value(char * p)
69 v = (v << 8) | (*p & 0xFF);
75 // Initialize expression analyzer
77 void InitExpression(void)
79 // Initialize token-class table (all set to END)
80 for(int i=0; i<256; i++)
85 for(char * p=itokcl; *p!=1; p++)
90 tokenClass[(int)(*p)] = (char)i;
96 extern int correctMathRules;
105 // Binary operators (all the same precedence,
106 // except if -4 is passed to the command line)
108 #define precedence(HIERARCHY_HIGHER, HIERARCHY_CURRENT) \
111 if (HIERARCHY_HIGHER() != OK) \
113 while (tokenClass[*tok] == HIERARCHY_CURRENT) \
116 if (HIERARCHY_HIGHER() != OK) \
118 *evalTokenBuffer.u32++ = t; \
124 if ( correctMathRules == 0 )
129 while (tokenClass[*tok] >= MULT)
136 *evalTokenBuffer.u32++ = t;
141 // The order of C precedence (lower to higher):
145 // relational = < <= >= > !=
156 precedence(and, XOR);
162 precedence(rel, AND);
168 precedence(shift, REL);
174 precedence(sum, SHIFT);
180 precedence(product, ADD);
186 precedence(expr1, MULT);
191 // Unary operators (detect unary '-')
192 // ggn: If expression starts with a plus then also eat it up. For some reason
193 // the parser gets confused when this happens and emits a "bad
201 int class = tokenClass[*tok];
203 if (*tok == '-' || *tok == '+' || *tok == '<' || *tok == '>' || class == UNARY)
217 // With leading + we don't have to deposit anything to the buffer
218 // because there's no unary '+' nor we have to do anything about it
220 *evalTokenBuffer.u32++ = t;
222 else if (class == SUNARY)
229 *evalTokenBuffer.u32++ = CONST;
230 *evalTokenBuffer.u64++ = sect[ABS].sloc;
234 *evalTokenBuffer.u32++ = CONST;
235 *evalTokenBuffer.u64++ = sloc;
239 if (*tok++ != STRING)
240 return error("^^FILESIZE expects filename inside string");
241 *evalTokenBuffer.u32++ = CONST;
242 // @@copypasted from d_incbin, maybe factor this out somehow?
243 // Attempt to open the include file in the current directory, then (if that
244 // failed) try list of include files passed in the enviroment string or by
249 if ((fd = open(string[*tok], _OPEN_INC)) < 0)
251 for(i=0; nthpath("RMACPATH", i, buf1)!=0; i++)
255 // Append path char if necessary
256 if ((fd > 0) && (buf1[fd - 1] != SLASHCHAR))
257 strcat(buf1, SLASHSTRING);
259 strcat(buf1, string[*tok]);
261 if ((fd = open(buf1, _OPEN_INC)) >= 0)
265 return error("cannot open: \"%s\"", string[*tok]);
269 *evalTokenBuffer.u64++ = (uint64_t)lseek(fd, 0L, SEEK_END);
272 // Advance tok because of consumed string token
276 *evalTokenBuffer.u32++ = CONST;
277 *evalTokenBuffer.u64++ = dos_time();
280 *evalTokenBuffer.u32++ = CONST;
281 *evalTokenBuffer.u64++ = dos_date();
283 case CR_MACDEF: // ^^macdef <macro-name>
284 if (*tok++ != SYMBOL)
285 return error(missym_error);
288 w = (lookup(p, MACRO, 0) == NULL ? 0 : 1);
289 *evalTokenBuffer.u32++ = CONST;
290 *evalTokenBuffer.u64++ = (uint64_t)w;
298 if (*tok++ != SYMBOL)
299 return error(missym_error);
302 int j = (*p == '.' ? curenv : 0);
303 SYM * sy = lookup(p, LABEL, j);
304 w = ((sy != NULL) && (sy->sattr & w ? 1 : 0));
305 *evalTokenBuffer.u32++ = CONST;
306 *evalTokenBuffer.u64++ = (uint64_t)w;
309 if (*tok != SYMBOL && *tok != STRING)
310 return error(str_error);
316 return error(comma_error);
318 if (*tok != SYMBOL && *tok != STRING)
319 return error(str_error);
321 char * p2 = string[tok[1]];
324 w = (WORD)(!strcmp(p, p2));
325 *evalTokenBuffer.u32++ = CONST;
326 *evalTokenBuffer.u64++ = (uint64_t)w;
337 // Terminals (CONSTs) and parenthesis grouping
347 *evalTokenBuffer.u32++ = CONST;
348 *evalTokenBuffer.u64++ = *ptk.u64++;
353 *evalTokenBuffer.u32++ = FCONST;
354 *evalTokenBuffer.u64++ = *ptk.u64++;
359 char * p = string[*tok++];
360 int j = (*p == '.' ? curenv : 0);
361 SYM * sy = lookup(p, LABEL, j);
364 sy = NewSymbol(p, LABEL, j);
366 *evalTokenBuffer.u32++ = SYMBOL;
367 *evalTokenBuffer.u32++ = symbolNum;
368 symbolPtr[symbolNum] = sy;
373 *evalTokenBuffer.u32++ = CONST;
374 *evalTokenBuffer.u64++ = str_value(string[*tok++]);
381 return error("missing closing parenthesis ')'");
389 return error("missing closing bracket ']'");
393 if (expr0() != OK) // Eat up first parameter (register or immediate)
396 if (*tok++ != ':') // Demand a ':' there
397 return error("missing colon ':'");
399 if (expr0() != OK) // Eat up second parameter (register or immediate)
403 return error("missing closing brace '}'");
407 *evalTokenBuffer.u32++ = ACONST; // Attributed const
408 *evalTokenBuffer.u32++ = sloc; // Current location
409 *evalTokenBuffer.u32++ = DEFINED | ((orgactive | org68k_active) ? 0 : cursect); // Store attribs
412 *evalTokenBuffer.u32++ = ACONST; // Attributed const
414 // pcloc == location at start of line
415 *evalTokenBuffer.u32++ = (orgactive ? orgaddr : pcloc);
416 // '*' takes attributes of current section, not ABS!
417 // Also, if we're ORG'd, the symbol is absolute
418 *evalTokenBuffer.u32++ = DEFINED | ((orgactive | org68k_active) ? 0 : cursect);
421 return error("bad expression");
428 // Recursive-descent expression analyzer (with some simple speed hacks)
430 int expr(TOKEN * otk, uint64_t * a_value, WORD * a_attr, SYM ** a_esym)
432 // Passed in values (once derefenced, that is) can all be zero. They are
433 // there so that the expression analyzer can fill them in as needed. The
434 // expression analyzer gets its input from the global token pointer "tok",
435 // and not from anything passed in by the user.
441 evalTokenBuffer.u32 = otk; // Set token pointer to 'exprbuf' (direct.c)
442 // Also set in various other places too (riscasm.c,
445 // Optimize for single constant or single symbol.
446 // Shamus: Seems to me that this could be greatly simplified by 1st
447 // checking if the first token is a multibyte token, *then*
448 // checking if there's an EOL after it depending on the actual
449 // length of the token (multiple vs. single). Otherwise, we have
450 // the horror show that is the following:
452 && (tok[0] != CONST && tokenClass[tok[0]] != SUNARY))
453 || ((tok[0] == SYMBOL)
454 && (tokenClass[tok[2]] < UNARY))
455 || ((tok[0] == CONST) && (tokenClass[tok[3]] < UNARY))
457 // Shamus: Yes, you can parse that out and make some kind of sense of it, but damn, it takes a while to get it and understand the subtle bugs that result from not being careful about what you're checking; especially vis-a-vis naively checking tok[1] for an EOL. O_o
462 *evalTokenBuffer.u32++ = *ptk.u32++;
463 *evalTokenBuffer.u64++ = *a_value = *ptk.u64++;
464 *a_attr = ABS | DEFINED;
470 //printf("Quick eval in expr(): CONST = %i, tokenClass[tok[2]] = %i\n", *a_value, tokenClass[*tok]);
472 // Not sure that removing float constant here is going to break anything and/or
473 // make things significantly slower, but having this here seems to cause the
474 // complexity of the check to get to this part of the parse to go through the
475 // roof, and dammit, I just don't feel like fighting that fight ATM. :-P
477 else if (*tok == FCONST)
479 *evalTokenBuffer.u32++ = *tok++;
480 *evalTokenBuffer.u64++ = *a_value = *tok.u64++;
481 *a_attr = ABS | DEFINED | FLOAT;
486 //printf("Quick eval in expr(): CONST = %i, tokenClass[tok[2]] = %i\n", *a_value, tokenClass[*tok]);
489 else if (*tok == '*')
491 *evalTokenBuffer.u32++ = CONST;
493 if (orgactive | org68k_active)
495 *evalTokenBuffer.u64++ = *a_value = orgaddr;
496 *a_attr = DEFINED; // We have ORG active, it doesn't belong in a section!
500 *evalTokenBuffer.u64++ = *a_value = pcloc;
501 // '*' takes attributes of current section, not ABS!
502 *a_attr = cursect | DEFINED;
511 else if (*tok == STRING || *tok == SYMBOL)
514 j = (*p == '.' ? curenv : 0);
515 symbol = lookup(p, LABEL, j);
518 symbol = NewSymbol(p, LABEL, j);
520 symbol->sattr |= REFERENCED;
522 // Check for undefined register equates, but only if it's not part
523 // of a #<SYMBOL> construct, as it could be that the label that's
524 // been undefined may later be used as an address label--which
525 // means it will be fixed up later, and thus, not an error.
526 if ((symbol->sattre & UNDEF_EQUR) && !riscImmTokenSeen)
528 error("undefined register equate '%s'", symbol->sname);
531 *evalTokenBuffer.u32++ = SYMBOL;
533 *evalTokenBuffer++ = (TOKEN)symbol;
536 While this approach works, it's wasteful. It would be better to use something
537 that's already available, like the symbol "order defined" table (which needs to
538 be converted from a linked list into an array).
540 *evalTokenBuffer.u32++ = symbolNum;
541 symbolPtr[symbolNum] = symbol;
545 *a_value = (symbol->sattr & DEFINED ? symbol->svalue : 0);
546 *a_attr = (WORD)(symbol->sattr & ~GLOBAL);
548 if (symbol->sattre & EQUATEDREG)
550 *a_attr |= RISCREG; // Mark it as a register, 'cause it is
554 if ((symbol->sattr & (GLOBAL | DEFINED)) == GLOBAL
560 // Holy hell... This is likely due to the fact that LSR is mistakenly set as a SUNARY type... Need to fix this... !!! FIX !!!
563 *evalTokenBuffer.u32++ = *tok++;
567 // Unknown type here... Alert the user!,
568 error("undefined RISC register in expression [token=$%X]", *tok);
569 // Prevent spurious error reporting...
574 *evalTokenBuffer.u32++ = ENDEXPR;
581 *evalTokenBuffer.u32++ = ENDEXPR;
582 return evexpr(otk, a_value, a_attr, a_esym);
586 // Evaluate expression.
587 // If the expression involves only ONE external symbol, the expression is
588 // UNDEFINED, but its value includes everything but the symbol value, and
589 // 'a_esym' is set to the external symbol.
591 int evexpr(TOKEN * _tk, uint64_t * a_value, WORD * a_attr, SYM ** a_esym)
595 uint64_t * sval = evstk; // (Empty) initial stack
596 WORD * sattr = evattr;
597 SYM * esym = NULL; // No external symbol involved
602 while (*tk.u32 != ENDEXPR)
604 switch ((int)*tk.u32++)
607 sy = symbolPtr[*tk.u32++];
608 sy->sattr |= REFERENCED; // Set "referenced" bit
610 if (!(sy->sattr & DEFINED))
612 // Reference to undefined symbol
613 if (!(sy->sattr & GLOBAL))
620 if (esym != NULL) // Check for multiple externals
621 return error(seg_error);
626 if (sy->sattr & DEFINED)
627 *++sval = sy->svalue; // Push symbol's value
629 *++sval = 0; // 0 for undefined symbols
631 *++sattr = (WORD)(sy->sattr & ~GLOBAL); // Push attribs
632 sym_seg = (WORD)(sy->sattr & TDB);
637 *++sattr = ABS | DEFINED; // Push simple attribs
641 // Even though it's a double, we can treat it like a uint64_t since
642 // we're just moving the bits around.
644 *++sattr = ABS | DEFINED | FLOAT; // Push simple attribs
648 *++sval = *tk.u32++; // Push value
649 *++sattr = (WORD)*tk.u32++; // Push attribs
652 // Binary "+" and "-" matrix:
655 // ----------------------------
656 // ABS | ABS | Sect | Other |
657 // Sect | Sect | [1] | Error |
658 // Other | Other | Error | [1] |
659 // ----------------------------
666 --sattr; // Pop attrib
667 // Get FLOAT attribute, if any
668 attr = (sattr[0] | sattr[1]) & FLOAT;
670 // Since adding an int to a fp value promotes it to a fp value, we
671 // don't care whether it's first or second; we cast to to a double
677 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
679 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
680 *(double *)sval = fpval1 + fpval2;
684 *sval += sval[1]; // Compute value
688 *sattr = sattr[1] | attr;
689 else if (sattr[1] & TDB)
690 return error(seg_error);
696 --sattr; // Pop attrib
697 // Get FLOAT attribute, if any
698 attr = (sattr[0] | sattr[1]) & FLOAT;
700 // Since subtracting an int to a fp value promotes it to a fp
701 // value, we don't care whether it's first or second; we cast to to
702 // a double regardless.
707 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
709 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
710 *(double *)sval = fpval1 - fpval2;
717 *sattr |= attr; // Inherit FLOAT attribute
718 attr = (WORD)(*sattr & TDB);
719 // If symbol1 is ABS, take attributes from symbol2
722 // Otherwise, they're both TDB and so attributes cancel out
723 else if (sattr[1] & TDB)
728 // Unary operators only work on ABS items
731 return error(seg_error);
735 double * dst = (double *)sval;
737 *sattr = ABS | DEFINED | FLOAT; // Expr becomes absolute
741 *sval = -(int64_t)*sval;
742 *sattr = ABS | DEFINED; // Expr becomes absolute
747 case UNLT: // Unary < (get the low byte of a word)
749 return error(seg_error);
752 return error(noflt_error);
754 *sval = (int64_t)((*sval) & 0x00FF);
755 *sattr = ABS | DEFINED; // Expr becomes absolute
758 case UNGT: // Unary > (get the high byte of a word)
760 return error(seg_error);
763 return error(noflt_error);
765 *sval = (int64_t)(((*sval) >> 8) & 0x00FF);
766 *sattr = ABS | DEFINED; // Expr becomes absolute
771 return error(seg_error);
774 return error("floating point numbers not allowed with operator '!'.");
777 *sattr = ABS | DEFINED; // Expr becomes absolute
782 return error(seg_error);
785 return error("floating point numbers not allowed with operator '~'.");
788 *sattr = ABS | DEFINED; // Expr becomes absolute
791 // Comparison operators must have two values that
792 // are in the same segment, but that's the only requirement.
797 if ((*sattr & TDB) != (sattr[1] & TDB))
798 return error(seg_error);
800 // Get FLOAT attribute, if any
801 attr = (sattr[0] | sattr[1]) & FLOAT;
803 // Cast any ints in the comparison to double, if there's at least
804 // one double in the comparison.
809 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
811 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
812 *sval = (fpval1 <= fpval2);
816 *sval = (*sval <= sval[1]);
819 *sattr = ABS | DEFINED;
826 if ((*sattr & TDB) != (sattr[1] & TDB))
827 return error(seg_error);
829 // Get FLOAT attribute, if any
830 attr = (sattr[0] | sattr[1]) & FLOAT;
832 // Cast any ints in the comparison to double, if there's at least
833 // one double in the comparison.
838 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
840 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
841 *sval = (fpval1 >= fpval2);
845 *sval = (*sval >= sval[1]);
848 *sattr = ABS | DEFINED;
855 if ((*sattr & TDB) != (sattr[1] & TDB))
856 return error(seg_error);
858 // Get FLOAT attribute, if any
859 attr = (sattr[0] | sattr[1]) & FLOAT;
861 // Cast any ints in the comparison to double, if there's at least
862 // one double in the comparison.
867 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
869 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
870 *sval = (fpval1 > fpval2);
874 *sval = (*sval > sval[1]);
877 *sattr = ABS | DEFINED;
884 if ((*sattr & TDB) != (sattr[1] & TDB))
885 return error(seg_error);
887 // Get FLOAT attribute, if any
888 attr = (sattr[0] | sattr[1]) & FLOAT;
890 // Cast any ints in the comparison to double, if there's at least
891 // one double in the comparison.
896 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
898 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
899 *sval = (fpval1 < fpval2);
903 *sval = (*sval < sval[1]);
906 *sattr = ABS | DEFINED; // Expr forced to ABS
913 if ((*sattr & TDB) != (sattr[1] & TDB))
914 return error(seg_error);
916 // Get FLOAT attribute, if any
917 attr = (sattr[0] | sattr[1]) & FLOAT;
919 // Cast any ints in the comparison to double, if there's at least
920 // one double in the comparison.
925 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
927 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
928 *sval = (fpval1 != fpval2);
932 *sval = (*sval != sval[1]);
935 *sattr = ABS | DEFINED; // Expr forced to ABS
942 if ((*sattr & TDB) != (sattr[1] & TDB))
943 return error(seg_error);
945 // Get FLOAT attribute, if any
946 attr = (sattr[0] | sattr[1]) & FLOAT;
948 // Cast any ints in the comparison to double, if there's at least
949 // one double in the comparison.
954 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
956 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
957 *sval = (fpval1 == fpval2);
961 *sval = (*sval == sval[1]);
964 *sattr = ABS | DEFINED; // Expr forced to ABS
968 // All other binary operators must have two ABS items to work with.
969 // They all produce an ABS value.
970 // Shamus: Is this true? There's at least one counterexample of legit
971 // code where this assumption fails to produce correct code.
974 switch ((int)tk.u32[-1])
979 // Get FLOAT attribute, if any
980 attr = (sattr[0] | sattr[1]) & FLOAT;
982 // Since multiplying an int to a fp value promotes it to a fp
983 // value, we don't care whether it's first or second; it will
984 // be cast to a double regardless.
986 An open question here is do we promote ints to floats as signed or unsigned? It makes a difference if, say, the int is put in as -1 but is promoted to a double as $FFFFFFFFFFFFFFFF--you get very different results that way! For now, we promote as signed until proven detrimental otherwise.
992 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
994 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
995 *(double *)sval = fpval1 * fpval2;
1007 // Get FLOAT attribute, if any
1008 attr = (sattr[0] | sattr[1]) & FLOAT;
1014 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
1016 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
1019 return error("divide by zero");
1021 *(double *)sval = fpval1 / fpval2;
1026 return error("divide by zero");
1028 // Compiler is picky here: Without casting these, it
1029 // discards the sign if dividing a negative # by a
1030 // positive one, creating a bad result. :-/
1031 // Definitely a side effect of using uint32_ts intead of
1033 *sval = (int32_t)sval[0] / (int32_t)sval[1];
1042 if ((*sattr | sattr[1]) & FLOAT)
1043 return error("floating point numbers not allowed with operator '%'.");
1046 return error("mod (%) by zero");
1053 sattr--; // Pop attrib
1055 if ((*sattr | sattr[1]) & FLOAT)
1056 return error("floating point numbers not allowed with operator '<<'.");
1063 sattr--; // Pop attrib
1065 if ((*sattr | sattr[1]) & FLOAT)
1066 return error("floating point numbers not allowed with operator '>>'.");
1073 sattr--; // Pop attrib
1075 if ((*sattr | sattr[1]) & FLOAT)
1076 return error("floating point numbers not allowed with operator '&'.");
1083 sattr--; // Pop attrib
1085 if ((*sattr | sattr[1]) & FLOAT)
1086 return error("floating point numbers not allowed with operator '^'.");
1095 if ((*sattr | sattr[1]) & FLOAT)
1096 return error("floating point numbers not allowed with operator '|'.");
1102 // Bad operator in expression stream (this should never happen!)
1114 // Copy value + attrib into return variables
1122 // Count the # of tokens in the passed in expression
1123 // N.B.: 64-bit constants count as two tokens each
1125 uint16_t ExpressionLength(TOKEN * tk)
1129 for(length=0; tk[length]!=ENDEXPR; length++)
1131 // Add one to length for 2X tokens, two for 3X tokens
1132 if (tk[length] == SYMBOL)
1134 else if ((tk[length] == CONST) || (tk[length] == FCONST))
1138 // Add 1 for ENDEXPR