2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // EXPR.C - Expression Analyzer
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
20 #define DEF_KW // Declare keyword values
21 #include "kwtab.h" // Incl generated keyword tables & defs
23 // N.B.: The size of tokenClass should be identical to the largest value of
24 // a token; we're assuming 256 but not 100% sure!
25 static char tokenClass[256]; // Generated table of token classes
26 static uint64_t evstk[EVSTACKSIZE]; // Evaluator value stack
27 static WORD evattr[EVSTACKSIZE]; // Evaluator attribute stack
29 // Token-class initialization list
32 CONST, FCONST, SYMBOL, 0, // ID
33 '(', '[', '{', 0, // OPAR
34 ')', ']', '}', 0, // CPAR
35 CR_DEFINED, CR_REFERENCED, // SUNARY (special unary)
38 CR_ABSCOUNT, CR_FILESIZE, 0,
39 '!', '~', UNMINUS, UNLT, UNGT, 0, // UNARY
40 '*', '/', '%', 0, // MULT
43 LE, GE, '<', '>', NE, '=', 0, // REL
50 const char missym_error[] = "missing symbol";
51 const char str_error[] = "missing symbol or string";
52 const char noflt_error[] = "operator not usable with float";
54 // Convert expression to postfix
55 static PTR evalTokenBuffer; // Deposit tokens here (this is really a
56 // pointer to exprbuf from direct.c)
57 // (Can also be from others, like
59 static int symbolNum; // Pointer to the entry in symbolPtr[]
62 // Obtain a string value
64 static uint32_t str_value(char * p)
69 v = (v << 8) | (*p & 0xFF);
75 // Initialize expression analyzer
77 void InitExpression(void)
79 // Initialize token-class table (all set to END)
80 for(int i=0; i<256; i++)
85 for(char * p=itokcl; *p!=1; p++)
90 tokenClass[(int)(*p)] = (char)i;
97 // Binary operators (all the same precedence)
104 while (tokenClass[*tok] >= MULT)
111 *evalTokenBuffer.u32++ = t;
118 // Unary operators (detect unary '-')
119 // ggn: If expression starts with a plus then also eat it up. For some reason
120 // the parser gets confused when this happens and emits a "bad
128 int class = tokenClass[*tok];
130 if (*tok == '-' || *tok == '+' || *tok == '<' || *tok == '>' || class == UNARY)
144 // With leading + we don't have to deposit anything to the buffer
145 // because there's no unary '+' nor we have to do anything about it
147 *evalTokenBuffer.u32++ = t;
149 else if (class == SUNARY)
156 *evalTokenBuffer.u32++ = CONST;
157 *evalTokenBuffer.u64++ = sect[ABS].sloc;
161 *evalTokenBuffer.u32++ = CONST;
162 *evalTokenBuffer.u64++ = sloc;
166 if (*tok++ != STRING)
167 return error("^^FILESIZE expects filename inside string");
168 *evalTokenBuffer.u32++ = CONST;
169 // @@copypasted from d_incbin, maybe factor this out somehow?
170 // Attempt to open the include file in the current directory, then (if that
171 // failed) try list of include files passed in the enviroment string or by
176 if ((fd = open(string[*tok], _OPEN_INC)) < 0)
178 for(i=0; nthpath("RMACPATH", i, buf1)!=0; i++)
182 // Append path char if necessary
183 if ((fd > 0) && (buf1[fd - 1] != SLASHCHAR))
184 strcat(buf1, SLASHSTRING);
186 strcat(buf1, string[*tok]);
188 if ((fd = open(buf1, _OPEN_INC)) >= 0)
192 return error("cannot open: \"%s\"", string[tok[1]]);
196 *evalTokenBuffer.u64++ = (uint64_t)lseek(fd, 0L, SEEK_END);
199 // Advance tok because of consumed string token
203 *evalTokenBuffer.u32++ = CONST;
204 *evalTokenBuffer.u64++ = dos_time();
207 *evalTokenBuffer.u32++ = CONST;
208 *evalTokenBuffer.u64++ = dos_date();
210 case CR_MACDEF: // ^^macdef <macro-name>
211 if (*tok++ != SYMBOL)
212 return error(missym_error);
215 w = (lookup(p, MACRO, 0) == NULL ? 0 : 1);
216 *evalTokenBuffer.u32++ = CONST;
217 *evalTokenBuffer.u64++ = (uint64_t)w;
225 if (*tok++ != SYMBOL)
226 return error(missym_error);
229 int j = (*p == '.' ? curenv : 0);
230 SYM * sy = lookup(p, LABEL, j);
231 w = ((sy != NULL) && (sy->sattr & w ? 1 : 0));
232 *evalTokenBuffer.u32++ = CONST;
233 *evalTokenBuffer.u64++ = (uint64_t)w;
236 if (*tok != SYMBOL && *tok != STRING)
237 return error(str_error);
243 return error(comma_error);
245 if (*tok != SYMBOL && *tok != STRING)
246 return error(str_error);
248 char * p2 = string[tok[1]];
251 w = (WORD)(!strcmp(p, p2));
252 *evalTokenBuffer.u32++ = CONST;
253 *evalTokenBuffer.u64++ = (uint64_t)w;
264 // Terminals (CONSTs) and parenthesis grouping
274 *evalTokenBuffer.u32++ = CONST;
275 *evalTokenBuffer.u64++ = *ptk.u64++;
280 *evalTokenBuffer.u32++ = FCONST;
281 *evalTokenBuffer.u64++ = *ptk.u64++;
286 char * p = string[*tok++];
287 int j = (*p == '.' ? curenv : 0);
288 SYM * sy = lookup(p, LABEL, j);
291 sy = NewSymbol(p, LABEL, j);
293 *evalTokenBuffer.u32++ = SYMBOL;
294 *evalTokenBuffer.u32++ = symbolNum;
295 symbolPtr[symbolNum] = sy;
300 *evalTokenBuffer.u32++ = CONST;
301 *evalTokenBuffer.u64++ = str_value(string[*tok++]);
308 return error("missing closing parenthesis ')'");
316 return error("missing closing bracket ']'");
320 if (expr0() != OK) // Eat up first parameter (register or immediate)
323 if (*tok++ != ':') // Demand a ':' there
324 return error("missing colon ':'");
326 if (expr0() != OK) // Eat up second parameter (register or immediate)
330 return error("missing closing brace '}'");
334 *evalTokenBuffer.u32++ = ACONST; // Attributed const
335 *evalTokenBuffer.u32++ = sloc; // Current location
336 *evalTokenBuffer.u32++ = DEFINED | ((orgactive | org68k_active) ? 0 : cursect); // Store attribs
339 *evalTokenBuffer.u32++ = ACONST; // Attributed const
341 // pcloc == location at start of line
342 *evalTokenBuffer.u32++ = (orgactive ? orgaddr : pcloc);
343 // '*' takes attributes of current section, not ABS!
344 // Also, if we're ORG'd, the symbol is absolute
345 *evalTokenBuffer.u32++ = DEFINED | ((orgactive | org68k_active) ? 0 : cursect);
348 return error("bad expression");
355 // Recursive-descent expression analyzer (with some simple speed hacks)
357 int expr(TOKEN * otk, uint64_t * a_value, WORD * a_attr, SYM ** a_esym)
359 // Passed in values (once derefenced, that is) can all be zero. They are
360 // there so that the expression analyzer can fill them in as needed. The
361 // expression analyzer gets its input from the global token pointer "tok",
362 // and not from anything passed in by the user.
368 evalTokenBuffer.u32 = otk; // Set token pointer to 'exprbuf' (direct.c)
369 // Also set in various other places too (riscasm.c,
372 // Optimize for single constant or single symbol.
373 // Shamus: Seems to me that this could be greatly simplified by 1st
374 // checking if the first token is a multibyte token, *then*
375 // checking if there's an EOL after it depending on the actual
376 // length of the token (multiple vs. single). Otherwise, we have
377 // the horror show that is the following:
379 && (tok[0] != CONST && tokenClass[tok[0]] != SUNARY))
380 || ((tok[0] == SYMBOL)
381 && (tokenClass[tok[2]] < UNARY))
382 || ((tok[0] == CONST) && (tokenClass[tok[3]] < UNARY))
384 // Shamus: Yes, you can parse that out and make some kind of sense of it, but damn, it takes a while to get it and understand the subtle bugs that result from not being careful about what you're checking; especially vis-a-vis naively checking tok[1] for an EOL. O_o
389 *evalTokenBuffer.u32++ = *ptk.u32++;
390 *evalTokenBuffer.u64++ = *a_value = *ptk.u64++;
391 *a_attr = ABS | DEFINED;
397 //printf("Quick eval in expr(): CONST = %i, tokenClass[tok[2]] = %i\n", *a_value, tokenClass[*tok]);
399 // Not sure that removing float constant here is going to break anything and/or
400 // make things significantly slower, but having this here seems to cause the
401 // complexity of the check to get to this part of the parse to go through the
402 // roof, and dammit, I just don't feel like fighting that fight ATM. :-P
404 else if (*tok == FCONST)
406 *evalTokenBuffer.u32++ = *tok++;
407 *evalTokenBuffer.u64++ = *a_value = *tok.u64++;
408 *a_attr = ABS | DEFINED | FLOAT;
413 //printf("Quick eval in expr(): CONST = %i, tokenClass[tok[2]] = %i\n", *a_value, tokenClass[*tok]);
416 else if (*tok == '*')
418 *evalTokenBuffer.u32++ = CONST;
420 if (orgactive | org68k_active)
422 *evalTokenBuffer.u64++ = *a_value = orgaddr;
423 *a_attr = DEFINED; // We have ORG active, it doesn't belong in a section!
427 *evalTokenBuffer.u64++ = *a_value = pcloc;
428 // '*' takes attributes of current section, not ABS!
429 *a_attr = cursect | DEFINED;
438 else if (*tok == STRING || *tok == SYMBOL)
441 j = (*p == '.' ? curenv : 0);
442 symbol = lookup(p, LABEL, j);
445 symbol = NewSymbol(p, LABEL, j);
447 symbol->sattr |= REFERENCED;
449 // Check for undefined register equates, but only if it's not part
450 // of a #<SYMBOL> construct, as it could be that the label that's
451 // been undefined may later be used as an address label--which
452 // means it will be fixed up later, and thus, not an error.
453 if ((symbol->sattre & UNDEF_EQUR) && !riscImmTokenSeen)
455 error("undefined register equate '%s'", symbol->sname);
458 *evalTokenBuffer.u32++ = SYMBOL;
460 *evalTokenBuffer++ = (TOKEN)symbol;
463 While this approach works, it's wasteful. It would be better to use something
464 that's already available, like the symbol "order defined" table (which needs to
465 be converted from a linked list into an array).
467 *evalTokenBuffer.u32++ = symbolNum;
468 symbolPtr[symbolNum] = symbol;
472 *a_value = (symbol->sattr & DEFINED ? symbol->svalue : 0);
473 *a_attr = (WORD)(symbol->sattr & ~GLOBAL);
475 if (symbol->sattre & EQUATEDREG)
477 *a_attr |= RISCREG; // Mark it as a register, 'cause it is
481 if ((symbol->sattr & (GLOBAL | DEFINED)) == GLOBAL
487 // Holy hell... This is likely due to the fact that LSR is mistakenly set as a SUNARY type... Need to fix this... !!! FIX !!!
490 *evalTokenBuffer.u32++ = *tok++;
494 // Unknown type here... Alert the user!,
495 error("undefined RISC register in expression [token=$%X]", *tok);
496 // Prevent spurious error reporting...
501 *evalTokenBuffer.u32++ = ENDEXPR;
508 *evalTokenBuffer.u32++ = ENDEXPR;
509 return evexpr(otk, a_value, a_attr, a_esym);
513 // Evaluate expression.
514 // If the expression involves only ONE external symbol, the expression is
515 // UNDEFINED, but it's value includes everything but the symbol value, and
516 // 'a_esym' is set to the external symbol.
518 int evexpr(TOKEN * _tk, uint64_t * a_value, WORD * a_attr, SYM ** a_esym)
522 uint64_t * sval = evstk; // (Empty) initial stack
523 WORD * sattr = evattr;
524 SYM * esym = NULL; // No external symbol involved
529 while (*tk.u32 != ENDEXPR)
531 switch ((int)*tk.u32++)
534 sy = symbolPtr[*tk.u32++];
535 sy->sattr |= REFERENCED; // Set "referenced" bit
537 if (!(sy->sattr & DEFINED))
539 // Reference to undefined symbol
540 if (!(sy->sattr & GLOBAL))
547 if (esym != NULL) // Check for multiple externals
548 return error(seg_error);
553 if (sy->sattr & DEFINED)
554 *++sval = sy->svalue; // Push symbol's value
556 *++sval = 0; // 0 for undefined symbols
558 *++sattr = (WORD)(sy->sattr & ~GLOBAL); // Push attribs
559 sym_seg = (WORD)(sy->sattr & TDB);
564 *++sattr = ABS | DEFINED; // Push simple attribs
568 // Even though it's a double, we can treat it like a uint64_t since
569 // we're just moving the bits around.
571 *++sattr = ABS | DEFINED | FLOAT; // Push simple attribs
575 *++sval = *tk.u32++; // Push value
576 *++sattr = (WORD)*tk.u32++; // Push attribs
579 // Binary "+" and "-" matrix:
582 // ----------------------------
583 // ABS | ABS | Sect | Other |
584 // Sect | Sect | [1] | Error |
585 // Other | Other | Error | [1] |
586 // ----------------------------
593 --sattr; // Pop attrib
594 // Get FLOAT attribute, if any
595 attr = (sattr[0] | sattr[1]) & FLOAT;
597 // Since adding an int to a fp value promotes it to a fp value, we
598 // don't care whether it's first or second; we cast to to a double
604 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
606 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
607 *(double *)sval = fpval1 + fpval2;
611 *sval += sval[1]; // Compute value
615 *sattr = sattr[1] | attr;
616 else if (sattr[1] & TDB)
617 return error(seg_error);
623 --sattr; // Pop attrib
624 // Get FLOAT attribute, if any
625 attr = (sattr[0] | sattr[1]) & FLOAT;
627 // Since subtracting an int to a fp value promotes it to a fp
628 // value, we don't care whether it's first or second; we cast to to
629 // a double regardless.
634 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
636 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
637 *(double *)sval = fpval1 - fpval2;
644 *sattr |= attr; // Inherit FLOAT attribute
645 attr = (WORD)(*sattr & TDB);
646 // If symbol1 is ABS, take attributes from symbol2
649 // Otherwise, they're both TDB and so attributes cancel out
650 else if (sattr[1] & TDB)
655 // Unary operators only work on ABS items
658 return error(seg_error);
662 double * dst = (double *)sval;
664 *sattr = ABS | DEFINED | FLOAT; // Expr becomes absolute
668 *sval = -(int64_t)*sval;
669 *sattr = ABS | DEFINED; // Expr becomes absolute
674 case UNLT: // Unary < (get the low byte of a word)
676 return error(seg_error);
679 return error(noflt_error);
681 *sval = (int64_t)((*sval) & 0x00FF);
682 *sattr = ABS | DEFINED; // Expr becomes absolute
685 case UNGT: // Unary > (get the high byte of a word)
687 return error(seg_error);
690 return error(noflt_error);
692 *sval = (int64_t)(((*sval) >> 8) & 0x00FF);
693 *sattr = ABS | DEFINED; // Expr becomes absolute
698 return error(seg_error);
701 return error("floating point numbers not allowed with operator '!'.");
704 *sattr = ABS | DEFINED; // Expr becomes absolute
709 return error(seg_error);
712 return error("floating point numbers not allowed with operator '~'.");
715 *sattr = ABS | DEFINED; // Expr becomes absolute
718 // Comparison operators must have two values that
719 // are in the same segment, but that's the only requirement.
724 if ((*sattr & TDB) != (sattr[1] & TDB))
725 return error(seg_error);
727 // Get FLOAT attribute, if any
728 attr = (sattr[0] | sattr[1]) & FLOAT;
730 // Cast any ints in the comparison to double, if there's at least
731 // one double in the comparison.
736 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
738 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
739 *sval = (fpval1 <= fpval2);
743 *sval = (*sval <= sval[1]);
746 *sattr = ABS | DEFINED;
753 if ((*sattr & TDB) != (sattr[1] & TDB))
754 return error(seg_error);
756 // Get FLOAT attribute, if any
757 attr = (sattr[0] | sattr[1]) & FLOAT;
759 // Cast any ints in the comparison to double, if there's at least
760 // one double in the comparison.
765 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
767 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
768 *sval = (fpval1 >= fpval2);
772 *sval = (*sval >= sval[1]);
775 *sattr = ABS | DEFINED;
782 if ((*sattr & TDB) != (sattr[1] & TDB))
783 return error(seg_error);
785 // Get FLOAT attribute, if any
786 attr = (sattr[0] | sattr[1]) & FLOAT;
788 // Cast any ints in the comparison to double, if there's at least
789 // one double in the comparison.
794 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
796 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
797 *sval = (fpval1 > fpval2);
801 *sval = (*sval > sval[1]);
804 *sattr = ABS | DEFINED;
811 if ((*sattr & TDB) != (sattr[1] & TDB))
812 return error(seg_error);
814 // Get FLOAT attribute, if any
815 attr = (sattr[0] | sattr[1]) & FLOAT;
817 // Cast any ints in the comparison to double, if there's at least
818 // one double in the comparison.
823 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
825 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
826 *sval = (fpval1 < fpval2);
830 *sval = (*sval < sval[1]);
833 *sattr = ABS | DEFINED; // Expr forced to ABS
840 if ((*sattr & TDB) != (sattr[1] & TDB))
841 return error(seg_error);
843 // Get FLOAT attribute, if any
844 attr = (sattr[0] | sattr[1]) & FLOAT;
846 // Cast any ints in the comparison to double, if there's at least
847 // one double in the comparison.
852 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
854 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
855 *sval = (fpval1 != fpval2);
859 *sval = (*sval != sval[1]);
862 *sattr = ABS | DEFINED; // Expr forced to ABS
869 if ((*sattr & TDB) != (sattr[1] & TDB))
870 return error(seg_error);
872 // Get FLOAT attribute, if any
873 attr = (sattr[0] | sattr[1]) & FLOAT;
875 // Cast any ints in the comparison to double, if there's at least
876 // one double in the comparison.
881 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
883 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
884 *sval = (fpval1 == fpval2);
888 *sval = (*sval == sval[1]);
891 *sattr = ABS | DEFINED; // Expr forced to ABS
895 // All other binary operators must have two ABS items to work with.
896 // They all produce an ABS value.
897 // Shamus: Is this true? There's at least one counterexample of legit
898 // code where this assumption fails to produce correct code.
901 switch ((int)tk.u32[-1])
906 // Get FLOAT attribute, if any
907 attr = (sattr[0] | sattr[1]) & FLOAT;
909 // Since multiplying an int to a fp value promotes it to a fp
910 // value, we don't care whether it's first or second; it will
911 // be cast to a double regardless.
913 An open question here is do we promote ints to floats as signed or unsigned? It makes a difference if, say, the int is put in as -1 but is promoted to a double as $FFFFFFFFFFFFFFFF--you get very different results that way! For now, we promote as signed until proven detrimental otherwise.
919 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
921 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
922 *(double *)sval = fpval1 * fpval2;
934 // Get FLOAT attribute, if any
935 attr = (sattr[0] | sattr[1]) & FLOAT;
941 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
943 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
946 return error("divide by zero");
948 *(double *)sval = fpval1 / fpval2;
953 return error("divide by zero");
955 // Compiler is picky here: Without casting these, it
956 // discards the sign if dividing a negative # by a
957 // positive one, creating a bad result. :-/
958 // Definitely a side effect of using uint32_ts intead of
960 *sval = (int32_t)sval[0] / (int32_t)sval[1];
969 if ((*sattr | sattr[1]) & FLOAT)
970 return error("floating point numbers not allowed with operator '%'.");
973 return error("mod (%) by zero");
980 sattr--; // Pop attrib
982 if ((*sattr | sattr[1]) & FLOAT)
983 return error("floating point numbers not allowed with operator '<<'.");
990 sattr--; // Pop attrib
992 if ((*sattr | sattr[1]) & FLOAT)
993 return error("floating point numbers not allowed with operator '>>'.");
1000 sattr--; // Pop attrib
1002 if ((*sattr | sattr[1]) & FLOAT)
1003 return error("floating point numbers not allowed with operator '&'.");
1010 sattr--; // Pop attrib
1012 if ((*sattr | sattr[1]) & FLOAT)
1013 return error("floating point numbers not allowed with operator '^'.");
1022 if ((*sattr | sattr[1]) & FLOAT)
1023 return error("floating point numbers not allowed with operator '|'.");
1029 // Bad operator in expression stream (this should never happen!)
1041 // Copy value + attrib into return variables
1049 // Count the # of tokens in the passed in expression
1050 // N.B.: 64-bit constants count as two tokens each
1052 uint16_t ExpressionLength(TOKEN * tk)
1056 for(length=0; tk[length]!=ENDEXPR; length++)
1058 // Add one to length for 2X tokens, two for 3X tokens
1059 if (tk[length] == SYMBOL)
1061 else if ((tk[length] == CONST) || (tk[length] == FCONST))
1065 // Add 1 for ENDEXPR