2 // RMAC - Renamed Macro Assembler for all Atari computers
3 // EXPR.C - Expression Analyzer
4 // Copyright (C) 199x Landon Dyer, 2011-2021 Reboot and Friends
5 // RMAC derived from MADMAC v1.07 Written by Landon Dyer, 1986
6 // Source utilised with the kind permission of Landon Dyer
20 #define DEF_KW // Declare keyword values
21 #include "kwtab.h" // Incl generated keyword tables & defs
23 // N.B.: The size of tokenClass should be identical to the largest value of
24 // a token; we're assuming 256 but not 100% sure!
25 static char tokenClass[256]; // Generated table of token classes
26 static uint64_t evstk[EVSTACKSIZE]; // Evaluator value stack
27 static WORD evattr[EVSTACKSIZE]; // Evaluator attribute stack
29 // Token-class initialization list
32 CONST, FCONST, SYMBOL, 0, // ID
33 '(', '[', '{', 0, // OPAR
34 ')', ']', '}', 0, // CPAR
35 CR_DEFINED, CR_REFERENCED, // SUNARY (special unary)
38 CR_ABSCOUNT, CR_FILESIZE, 0,
39 '!', '~', UNMINUS, UNLT, UNGT, 0, // UNARY
40 '*', '/', '%', 0, // MULT
43 LE, GE, '<', '>', NE, '=', 0, // REL
50 const char missym_error[] = "missing symbol";
51 const char str_error[] = "missing symbol or string";
52 const char noflt_error[] = "operator not usable with float";
54 // Convert expression to postfix
55 static PTR evalTokenBuffer; // Deposit tokens here (this is really a
56 // pointer to exprbuf from direct.c)
57 // (Can also be from others, like
59 static int symbolNum; // Pointer to the entry in symbolPtr[]
62 // Obtain a string value
64 static uint32_t str_value(char * p)
69 v = (v << 8) | (*p & 0xFF);
75 // Initialize expression analyzer
77 void InitExpression(void)
79 // Initialize token-class table (all set to END)
80 for(int i=0; i<256; i++)
85 for(char * p=itokcl; *p!=1; p++)
90 tokenClass[(int)(*p)] = (char)i;
97 // Binary operators (all the same precedence)
104 while (tokenClass[*tok] >= MULT)
111 *evalTokenBuffer.u32++ = t;
118 // Unary operators (detect unary '-')
119 // ggn: If expression starts with a plus then also eat it up. For some reason
120 // the parser gets confused when this happens and emits a "bad
128 int class = tokenClass[*tok];
130 if (*tok == '-' || *tok == '+' || *tok == '<' || *tok == '>' || class == UNARY)
144 // With leading + we don't have to deposit anything to the buffer
145 // because there's no unary '+' nor we have to do anything about it
147 *evalTokenBuffer.u32++ = t;
149 else if (class == SUNARY)
156 *evalTokenBuffer.u32++ = CONST;
157 *evalTokenBuffer.u64++ = sect[ABS].sloc;
161 *evalTokenBuffer.u32++ = CONST;
162 *evalTokenBuffer.u64++ = sloc;
166 if (*tok++ != STRING)
167 return error("^^FILESIZE expects filename inside string");
168 *evalTokenBuffer.u32++ = CONST;
169 // @@copypasted from d_incbin, maybe factor this out somehow?
170 // Attempt to open the include file in the current directory, then (if that
171 // failed) try list of include files passed in the enviroment string or by
176 if ((fd = open(string[*tok], _OPEN_INC)) < 0)
178 for(i=0; nthpath("RMACPATH", i, buf1)!=0; i++)
182 // Append path char if necessary
183 if ((fd > 0) && (buf1[fd - 1] != SLASHCHAR))
184 strcat(buf1, SLASHSTRING);
186 strcat(buf1, string[*tok]);
188 if ((fd = open(buf1, _OPEN_INC)) >= 0)
192 return error("cannot open: \"%s\"", string[tok[1]]);
196 *evalTokenBuffer.u64++ = (uint64_t)lseek(fd, 0L, SEEK_END);
199 // Advance tok because of consumed string token
203 *evalTokenBuffer.u32++ = CONST;
204 *evalTokenBuffer.u64++ = dos_time();
207 *evalTokenBuffer.u32++ = CONST;
208 *evalTokenBuffer.u64++ = dos_date();
210 case CR_MACDEF: // ^^macdef <macro-name>
211 if (*tok++ != SYMBOL)
212 return error(missym_error);
215 w = (lookup(p, MACRO, 0) == NULL ? 0 : 1);
216 *evalTokenBuffer.u32++ = CONST;
217 *evalTokenBuffer.u64++ = (uint64_t)w;
225 if (*tok++ != SYMBOL)
226 return error(missym_error);
229 int j = (*p == '.' ? curenv : 0);
230 SYM * sy = lookup(p, LABEL, j);
231 w = ((sy != NULL) && (sy->sattr & w ? 1 : 0));
232 *evalTokenBuffer.u32++ = CONST;
233 *evalTokenBuffer.u64++ = (uint64_t)w;
236 if (*tok != SYMBOL && *tok != STRING)
237 return error(str_error);
243 return error(comma_error);
245 if (*tok != SYMBOL && *tok != STRING)
246 return error(str_error);
248 char * p2 = string[tok[1]];
251 w = (WORD)(!strcmp(p, p2));
252 *evalTokenBuffer.u32++ = CONST;
253 *evalTokenBuffer.u64++ = (uint64_t)w;
264 // Terminals (CONSTs) and parenthesis grouping
274 *evalTokenBuffer.u32++ = CONST;
275 *evalTokenBuffer.u64++ = *ptk.u64++;
280 *evalTokenBuffer.u32++ = FCONST;
281 *evalTokenBuffer.u64++ = *ptk.u64++;
286 char * p = string[*tok++];
287 int j = (*p == '.' ? curenv : 0);
288 SYM * sy = lookup(p, LABEL, j);
291 sy = NewSymbol(p, LABEL, j);
293 // Check register bank usage
294 if (sy->sattre & EQUATEDREG)
296 if ((regbank == BANK_0) && (sy->sattre & BANK_1) && !altbankok)
297 warn("equated symbol \'%s\' cannot be used in register bank 0", sy->sname);
299 if ((regbank == BANK_1) && (sy->sattre & BANK_0) && !altbankok)
300 warn("equated symbol \'%s\' cannot be used in register bank 1", sy->sname);
303 *evalTokenBuffer.u32++ = SYMBOL;
304 *evalTokenBuffer.u32++ = symbolNum;
305 symbolPtr[symbolNum] = sy;
310 *evalTokenBuffer.u32++ = CONST;
311 *evalTokenBuffer.u64++ = str_value(string[*tok++]);
318 return error("missing closing parenthesis ')'");
326 return error("missing closing bracket ']'");
330 if (expr0() != OK) // Eat up first parameter (register or immediate)
333 if (*tok++ != ':') // Demand a ':' there
334 return error("missing colon ':'");
336 if (expr0() != OK) // Eat up second parameter (register or immediate)
340 return error("missing closing brace '}'");
344 *evalTokenBuffer.u32++ = ACONST; // Attributed const
345 *evalTokenBuffer.u32++ = sloc; // Current location
346 *evalTokenBuffer.u32++ = DEFINED | ((orgactive | org68k_active) ? 0 : cursect); // Store attribs
349 *evalTokenBuffer.u32++ = ACONST; // Attributed const
351 // pcloc == location at start of line
352 *evalTokenBuffer.u32++ = (orgactive ? orgaddr : pcloc);
353 // '*' takes attributes of current section, not ABS!
354 // Also, if we're ORG'd, the symbol is absolute
355 *evalTokenBuffer.u32++ = DEFINED | ((orgactive | org68k_active) ? 0 : cursect);
358 return error("bad expression");
365 // Recursive-descent expression analyzer (with some simple speed hacks)
367 int expr(TOKEN * otk, uint64_t * a_value, WORD * a_attr, SYM ** a_esym)
369 // Passed in values (once derefenced, that is) can all be zero. They are
370 // there so that the expression analyzer can fill them in as needed. The
371 // expression analyzer gets its input from the global token pointer "tok",
372 // and not from anything passed in by the user.
378 evalTokenBuffer.u32 = otk; // Set token pointer to 'exprbuf' (direct.c)
379 // Also set in various other places too (riscasm.c,
382 // Optimize for single constant or single symbol.
383 // Shamus: Seems to me that this could be greatly simplified by 1st
384 // checking if the first token is a multibyte token, *then*
385 // checking if there's an EOL after it depending on the actual
386 // length of the token (multiple vs. single). Otherwise, we have
387 // the horror show that is the following:
389 && (tok[0] != CONST && tokenClass[tok[0]] != SUNARY))
390 || ((tok[0] == SYMBOL)
391 && (tokenClass[tok[2]] < UNARY))
392 || ((tok[0] == CONST) && (tokenClass[tok[3]] < UNARY))
394 // Shamus: Yes, you can parse that out and make some kind of sense of it, but damn, it takes a while to get it and understand the subtle bugs that result from not being careful about what you're checking; especially vis-a-vis naively checking tok[1] for an EOL. O_o
399 *evalTokenBuffer.u32++ = *ptk.u32++;
400 *evalTokenBuffer.u64++ = *a_value = *ptk.u64++;
401 *a_attr = ABS | DEFINED;
407 //printf("Quick eval in expr(): CONST = %i, tokenClass[tok[2]] = %i\n", *a_value, tokenClass[*tok]);
409 // Not sure that removing float constant here is going to break anything and/or
410 // make things significantly slower, but having this here seems to cause the
411 // complexity of the check to get to this part of the parse to go through the
412 // roof, and dammit, I just don't feel like fighting that fight ATM. :-P
414 else if (*tok == FCONST)
416 *evalTokenBuffer.u32++ = *tok++;
417 *evalTokenBuffer.u64++ = *a_value = *tok.u64++;
418 *a_attr = ABS | DEFINED | FLOAT;
423 //printf("Quick eval in expr(): CONST = %i, tokenClass[tok[2]] = %i\n", *a_value, tokenClass[*tok]);
426 else if (*tok == '*')
428 *evalTokenBuffer.u32++ = CONST;
430 if (orgactive | org68k_active)
432 *evalTokenBuffer.u64++ = *a_value = orgaddr;
433 *a_attr = DEFINED; // We have ORG active, it doesn't belong in a section!
437 *evalTokenBuffer.u64++ = *a_value = pcloc;
438 // '*' takes attributes of current section, not ABS!
439 *a_attr = cursect | DEFINED;
448 else if (*tok == STRING || *tok == SYMBOL)
451 j = (*p == '.' ? curenv : 0);
452 symbol = lookup(p, LABEL, j);
455 symbol = NewSymbol(p, LABEL, j);
457 symbol->sattr |= REFERENCED;
459 // Check for undefined register equates, but only if it's not part
460 // of a #<SYMBOL> construct, as it could be that the label that's
461 // been undefined may later be used as an address label--which
462 // means it will be fixed up later, and thus, not an error.
463 if ((symbol->sattre & UNDEF_EQUR) && !riscImmTokenSeen)
465 error("undefined register equate '%s'", symbol->sname);
466 //if we return right away, it returns some spurious errors...
470 // Check register bank usage (moved to EvaluateRegisterFromTokenStream()))
471 //if (symbol->sattre & EQUATEDREG)
473 // if ((regbank == BANK_0) && (symbol->sattre & BANK_1) && !altbankok)
474 // warn("equated symbol '%s' cannot be used in register bank 0", symbol->sname);
476 // if ((regbank == BANK_1) && (symbol->sattre & BANK_0) && !altbankok)
477 // warn("equated symbol '%s' cannot be used in register bank 1", symbol->sname);
480 *evalTokenBuffer.u32++ = SYMBOL;
482 *evalTokenBuffer++ = (TOKEN)symbol;
485 While this approach works, it's wasteful. It would be better to use something
486 that's already available, like the symbol "order defined" table (which needs to
487 be converted from a linked list into an array).
489 *evalTokenBuffer.u32++ = symbolNum;
490 symbolPtr[symbolNum] = symbol;
494 *a_value = (symbol->sattr & DEFINED ? symbol->svalue : 0);
495 *a_attr = (WORD)(symbol->sattr & ~GLOBAL);
497 if (symbol->sattre & EQUATEDREG)
499 *a_attr |= RISCREG; // Mark it as a register, 'cause it is
503 if ((symbol->sattr & (GLOBAL | DEFINED)) == GLOBAL
509 // Holy hell... This is likely due to the fact that LSR is mistakenly set as a SUNARY type... Need to fix this... !!! FIX !!!
512 *evalTokenBuffer.u32++ = *tok++;
516 // Unknown type here... Alert the user!,
517 error("undefined RISC register in expression [token=$%X]", *tok);
518 // Prevent spurious error reporting...
523 *evalTokenBuffer.u32++ = ENDEXPR;
530 *evalTokenBuffer.u32++ = ENDEXPR;
531 return evexpr(otk, a_value, a_attr, a_esym);
535 // Evaluate expression.
536 // If the expression involves only ONE external symbol, the expression is
537 // UNDEFINED, but it's value includes everything but the symbol value, and
538 // 'a_esym' is set to the external symbol.
540 int evexpr(TOKEN * _tk, uint64_t * a_value, WORD * a_attr, SYM ** a_esym)
544 uint64_t * sval = evstk; // (Empty) initial stack
545 WORD * sattr = evattr;
546 SYM * esym = NULL; // No external symbol involved
551 while (*tk.u32 != ENDEXPR)
553 switch ((int)*tk.u32++)
556 sy = symbolPtr[*tk.u32++];
557 sy->sattr |= REFERENCED; // Set "referenced" bit
559 if (!(sy->sattr & DEFINED))
561 // Reference to undefined symbol
562 if (!(sy->sattr & GLOBAL))
569 if (esym != NULL) // Check for multiple externals
570 return error(seg_error);
575 if (sy->sattr & DEFINED)
576 *++sval = sy->svalue; // Push symbol's value
578 *++sval = 0; // 0 for undefined symbols
580 *++sattr = (WORD)(sy->sattr & ~GLOBAL); // Push attribs
581 sym_seg = (WORD)(sy->sattr & TDB);
586 *++sattr = ABS | DEFINED; // Push simple attribs
590 // Even though it's a double, we can treat it like a uint64_t since
591 // we're just moving the bits around.
593 *++sattr = ABS | DEFINED | FLOAT; // Push simple attribs
597 *++sval = *tk.u32++; // Push value
598 *++sattr = (WORD)*tk.u32++; // Push attribs
601 // Binary "+" and "-" matrix:
604 // ----------------------------
605 // ABS | ABS | Sect | Other |
606 // Sect | Sect | [1] | Error |
607 // Other | Other | Error | [1] |
608 // ----------------------------
615 --sattr; // Pop attrib
616 // Get FLOAT attribute, if any
617 attr = (sattr[0] | sattr[1]) & FLOAT;
619 // Since adding an int to a fp value promotes it to a fp value, we
620 // don't care whether it's first or second; we cast to to a double
626 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
628 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
629 *(double *)sval = fpval1 + fpval2;
633 *sval += sval[1]; // Compute value
637 *sattr = sattr[1] | attr;
638 else if (sattr[1] & TDB)
639 return error(seg_error);
645 --sattr; // Pop attrib
646 // Get FLOAT attribute, if any
647 attr = (sattr[0] | sattr[1]) & FLOAT;
649 // Since subtracting an int to a fp value promotes it to a fp
650 // value, we don't care whether it's first or second; we cast to to
651 // a double regardless.
656 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
658 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
659 *(double *)sval = fpval1 - fpval2;
666 *sattr |= attr; // Inherit FLOAT attribute
667 attr = (WORD)(*sattr & TDB);
668 // If symbol1 is ABS, take attributes from symbol2
671 // Otherwise, they're both TDB and so attributes cancel out
672 else if (sattr[1] & TDB)
677 // Unary operators only work on ABS items
680 return error(seg_error);
684 double * dst = (double *)sval;
686 *sattr = ABS | DEFINED | FLOAT; // Expr becomes absolute
690 *sval = -(int64_t)*sval;
691 *sattr = ABS | DEFINED; // Expr becomes absolute
696 case UNLT: // Unary < (get the low byte of a word)
698 return error(seg_error);
701 return error(noflt_error);
703 *sval = (int64_t)((*sval) & 0x00FF);
704 *sattr = ABS | DEFINED; // Expr becomes absolute
707 case UNGT: // Unary > (get the high byte of a word)
709 return error(seg_error);
712 return error(noflt_error);
714 *sval = (int64_t)(((*sval) >> 8) & 0x00FF);
715 *sattr = ABS | DEFINED; // Expr becomes absolute
720 return error(seg_error);
723 return error("floating point numbers not allowed with operator '!'.");
726 *sattr = ABS | DEFINED; // Expr becomes absolute
731 return error(seg_error);
734 return error("floating point numbers not allowed with operator '~'.");
737 *sattr = ABS | DEFINED; // Expr becomes absolute
740 // Comparison operators must have two values that
741 // are in the same segment, but that's the only requirement.
746 if ((*sattr & TDB) != (sattr[1] & TDB))
747 return error(seg_error);
749 // Get FLOAT attribute, if any
750 attr = (sattr[0] | sattr[1]) & FLOAT;
752 // Cast any ints in the comparison to double, if there's at least
753 // one double in the comparison.
758 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
760 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
761 *sval = (fpval1 <= fpval2);
765 *sval = (*sval <= sval[1]);
768 *sattr = ABS | DEFINED;
775 if ((*sattr & TDB) != (sattr[1] & TDB))
776 return error(seg_error);
778 // Get FLOAT attribute, if any
779 attr = (sattr[0] | sattr[1]) & FLOAT;
781 // Cast any ints in the comparison to double, if there's at least
782 // one double in the comparison.
787 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
789 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
790 *sval = (fpval1 >= fpval2);
794 *sval = (*sval >= sval[1]);
797 *sattr = ABS | DEFINED;
804 if ((*sattr & TDB) != (sattr[1] & TDB))
805 return error(seg_error);
807 // Get FLOAT attribute, if any
808 attr = (sattr[0] | sattr[1]) & FLOAT;
810 // Cast any ints in the comparison to double, if there's at least
811 // one double in the comparison.
816 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
818 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
819 *sval = (fpval1 > fpval2);
823 *sval = (*sval > sval[1]);
826 *sattr = ABS | DEFINED;
833 if ((*sattr & TDB) != (sattr[1] & TDB))
834 return error(seg_error);
836 // Get FLOAT attribute, if any
837 attr = (sattr[0] | sattr[1]) & FLOAT;
839 // Cast any ints in the comparison to double, if there's at least
840 // one double in the comparison.
845 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
847 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
848 *sval = (fpval1 < fpval2);
852 *sval = (*sval < sval[1]);
855 *sattr = ABS | DEFINED; // Expr forced to ABS
862 if ((*sattr & TDB) != (sattr[1] & TDB))
863 return error(seg_error);
865 // Get FLOAT attribute, if any
866 attr = (sattr[0] | sattr[1]) & FLOAT;
868 // Cast any ints in the comparison to double, if there's at least
869 // one double in the comparison.
874 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
876 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
877 *sval = (fpval1 != fpval2);
881 *sval = (*sval != sval[1]);
884 *sattr = ABS | DEFINED; // Expr forced to ABS
891 if ((*sattr & TDB) != (sattr[1] & TDB))
892 return error(seg_error);
894 // Get FLOAT attribute, if any
895 attr = (sattr[0] | sattr[1]) & FLOAT;
897 // Cast any ints in the comparison to double, if there's at least
898 // one double in the comparison.
903 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
905 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
906 *sval = (fpval1 == fpval2);
910 *sval = (*sval == sval[1]);
913 *sattr = ABS | DEFINED; // Expr forced to ABS
917 // All other binary operators must have two ABS items to work with.
918 // They all produce an ABS value.
919 // Shamus: Is this true? There's at least one counterexample of legit
920 // code where this assumption fails to produce correct code.
923 switch ((int)tk.u32[-1])
928 // Get FLOAT attribute, if any
929 attr = (sattr[0] | sattr[1]) & FLOAT;
931 // Since multiplying an int to a fp value promotes it to a fp
932 // value, we don't care whether it's first or second; it will
933 // be cast to a double regardless.
935 An open question here is do we promote ints to floats as signed or unsigned? It makes a difference if, say, the int is put in as -1 but is promoted to a double as $FFFFFFFFFFFFFFFF--you get very different results that way! For now, we promote as signed until proven detrimental otherwise.
941 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
943 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
944 *(double *)sval = fpval1 * fpval2;
956 // Get FLOAT attribute, if any
957 attr = (sattr[0] | sattr[1]) & FLOAT;
963 double fpval1 = (sattr[0] & FLOAT ? *p.dp : (double)*p.i64);
965 double fpval2 = (sattr[1] & FLOAT ? *p.dp : (double)*p.i64);
968 return error("divide by zero");
970 *(double *)sval = fpval1 / fpval2;
975 return error("divide by zero");
977 // Compiler is picky here: Without casting these, it
978 // discards the sign if dividing a negative # by a
979 // positive one, creating a bad result. :-/
980 // Definitely a side effect of using uint32_ts intead of
982 *sval = (int32_t)sval[0] / (int32_t)sval[1];
991 if ((*sattr | sattr[1]) & FLOAT)
992 return error("floating point numbers not allowed with operator '%'.");
995 return error("mod (%) by zero");
1002 sattr--; // Pop attrib
1004 if ((*sattr | sattr[1]) & FLOAT)
1005 return error("floating point numbers not allowed with operator '<<'.");
1012 sattr--; // Pop attrib
1014 if ((*sattr | sattr[1]) & FLOAT)
1015 return error("floating point numbers not allowed with operator '>>'.");
1022 sattr--; // Pop attrib
1024 if ((*sattr | sattr[1]) & FLOAT)
1025 return error("floating point numbers not allowed with operator '&'.");
1032 sattr--; // Pop attrib
1034 if ((*sattr | sattr[1]) & FLOAT)
1035 return error("floating point numbers not allowed with operator '^'.");
1044 if ((*sattr | sattr[1]) & FLOAT)
1045 return error("floating point numbers not allowed with operator '|'.");
1051 // Bad operator in expression stream (this should never happen!)
1063 // Copy value + attrib into return variables
1071 // Count the # of tokens in the passed in expression
1072 // N.B.: 64-bit constants count as two tokens each
1074 uint16_t ExpressionLength(TOKEN * tk)
1078 for(length=0; tk[length]!=ENDEXPR; length++)
1080 // Add one to length for 2X tokens, two for 3X tokens
1081 if (tk[length] == SYMBOL)
1083 else if ((tk[length] == CONST) || (tk[length] == FCONST))
1087 // Add 1 for ENDEXPR