From 1b714ae49d9c9a71bfcbacadd2bf8dab49d67962 Mon Sep 17 00:00:00 2001 From: Shamus Hammons Date: Mon, 7 Jun 2021 22:42:33 -0500 Subject: [PATCH] Harden RISC register parser and simplify expression evaluator. The bulk of this patch is ggn's; I rolled a few more macros after I realized that the EOL check in the RISC assembler required checking its return value as well as EvaluateRegisterFromTokenStream()'s. :-/ --- expr.c | 39 +++------------------- riscasm.c | 96 +++++++++++++++++++++++++++++++++---------------------- version.h | 2 +- 3 files changed, 63 insertions(+), 74 deletions(-) diff --git a/expr.c b/expr.c index 8faa634..65b4a11 100644 --- a/expr.c +++ b/expr.c @@ -381,49 +381,20 @@ int expr(TOKEN * otk, uint64_t * a_value, WORD * a_attr, SYM ** a_esym) PTR ptk; evalTokenBuffer.u32 = otk; // Set token pointer to 'exprbuf' (direct.c) - // Also set in various other places too (riscasm.c, - // e.g.) + // Also set in various other places too (riscasm.c, + // e.g.) -//printf("expr(): tokens 0-2: %i %i %i (%c %c %c); tc[2] = %i\n", tok[0], tok[1], tok[2], tok[0], tok[1], tok[2], tokenClass[tok[2]]); // Optimize for single constant or single symbol. - // Shamus: Subtle bug here. EOL token is 101; if you have a constant token - // followed by the value 101, it will trigger a bad evaluation here. - // This is probably a really bad assumption to be making here...! - // (assuming tok[1] == EOL is a single token that is) - // Seems that even other tokens (SUNARY type) can fuck this up too. -#if 0 -// if ((tok[1] == EOL) - if ((tok[1] == EOL && ((tok[0] != CONST || tok[0] != FCONST) && tokenClass[tok[0]] != SUNARY)) -// || (((*tok == CONST || *tok == FCONST || *tok == SYMBOL) || (*tok >= KW_R0 && *tok <= KW_R31)) -// && (tokenClass[tok[2]] < UNARY))) - || (((tok[0] == SYMBOL) || (tok[0] >= KW_R0 && tok[0] <= KW_R31)) - && (tokenClass[tok[2]] < UNARY)) - || ((tok[0] == CONST || tok[0] == FCONST) && (tokenClass[tok[3]] < UNARY)) - ) -#else // Shamus: Seems to me that this could be greatly simplified by 1st checking if the first token is a multibyte token, *then* checking if there's an EOL after it depending on the actual length of the token (multiple vs. single). Otherwise, we have the horror show that is the following: if ((tok[1] == EOL && (tok[0] != CONST && tokenClass[tok[0]] != SUNARY)) - || (((tok[0] == SYMBOL) - || (tok[0] >= KW_R0 && tok[0] <= KW_R31)) + || ((tok[0] == SYMBOL) && (tokenClass[tok[2]] < UNARY)) || ((tok[0] == CONST) && (tokenClass[tok[3]] < UNARY)) ) -// Shamus: Yes, you can parse that out and make some kind of sense of it, but damn, it takes a while to get it and understand the subtle bugs that result from not being careful about what you're checking; especially vis-a-vis niavely checking tok[1] for an EOL. O_o -#endif +// Shamus: Yes, you can parse that out and make some kind of sense of it, but damn, it takes a while to get it and understand the subtle bugs that result from not being careful about what you're checking; especially vis-a-vis naively checking tok[1] for an EOL. O_o { - if (*tok >= KW_R0 && *tok <= KW_R31) - { - *evalTokenBuffer.u32++ = CONST; - *evalTokenBuffer.u64++ = *a_value = (*tok - KW_R0); - *a_attr = ABS | DEFINED | RISCREG; - - if (a_esym != NULL) - *a_esym = NULL; - - tok++; - } - else if (*tok == CONST) + if (*tok == CONST) { ptk.u32 = tok; *evalTokenBuffer.u32++ = *ptk.u32++; diff --git a/riscasm.c b/riscasm.c index 4d62e97..5681f35 100644 --- a/riscasm.c +++ b/riscasm.c @@ -25,6 +25,22 @@ #define MAXINTERNCC 26 // Maximum internal condition codes +// Useful macros +#define EVAL_REG_RETURN_IF_ERROR(x, y) \ +x = EvaluateRegisterFromTokenStream(y); \ +\ +if (x == ERROR) \ + return ERROR; + +#define EVAL_REG_RETURN_IF_ERROR_OR_NO_EOL(x, y) \ +x = EvaluateRegisterFromTokenStream(y); \ +\ +if ((x == ERROR) || (ErrorIfNotAtEOL() == ERROR)) \ + return ERROR; + +#define CHECK_EOL \ +if (ErrorIfNotAtEOL() == ERROR) \ + return ERROR; unsigned altbankok = 0; // Ok to use alternate register bank unsigned orgactive = 0; // RISC/6502 org directive active @@ -134,7 +150,6 @@ static const char * malformErr[] = { malform1, malform2, malform3, malform4 }; - // // Function to return "malformed expression" error // This is done mainly to remove a bunch of GOTO statements in the parser @@ -144,7 +159,6 @@ static inline int MalformedOpcode(int signal) return error("Malformed opcode, %s", malformErr[signal]); } - // // Function to return "Illegal Indexed Register" error // Anyone trying to index something other than R14 or R15 @@ -154,7 +168,6 @@ static inline int IllegalIndexedRegister(int reg) return error("Attempted index reference with non-indexable register (r%d)", reg - KW_R0); } - // // Function to return "Illegal Indexed Register" error for EQUR scenarios // Trying to use register value within EQUR that isn't 14 or 15 @@ -164,7 +177,6 @@ static inline int IllegalIndexedRegisterEqur(SYM * sy) return error("Attempted index reference with non-indexable register within EQUR (%s = r%d)", sy->sname, sy->svalue); } - // // Build up & deposit RISC instruction word // @@ -181,25 +193,40 @@ static void DepositRISCInstructionWord(uint16_t opcode, int reg1, int reg2) D_word(value); } - // // Evaluate the RISC register from the token stream. Passed in value is the // FIXUP attribute to use if the expression comes back as undefined. // -static int EvaluateRegisterFromTokenStream(uint32_t attr) +static int EvaluateRegisterFromTokenStream(uint32_t fixup) { + // Firstly, check to see if it's a register token and return that. No + // need to invoke expr() for easy cases like this. + if (*tok >= KW_R0 && *tok <= KW_R31) + { + int reg = *tok - KW_R0; + tok++; + return reg; + } + + if (*tok != SYMBOL) + { + // If at this point we don't have a symbol then it's garbage. Punt. + return error("Expected register number or EQUREG"); + } + uint64_t eval; // Expression value WORD eattr; // Expression attributes SYM * esym; // External symbol involved in expr. TOKEN r_expr[EXPRSIZE]; // Expression token list // Evaluate what's in the global "tok" buffer + // N.B.: We should either get a fixup or a register name from EQUR if (expr(r_expr, &eval, &eattr, &esym) != OK) return ERROR; if (!(eattr & DEFINED)) { - AddFixup(FU_WORD | attr, sloc, r_expr); + AddFixup(FU_WORD | fixup, sloc, r_expr); return 0; } @@ -211,7 +238,6 @@ static int EvaluateRegisterFromTokenStream(uint32_t attr) return error(reg_err); } - // // Do RISC code generation // @@ -257,8 +283,7 @@ int GenerateRISCCode(int state) // ABS, MIRROR, NEG, NOT, PACK, RESMAC, SAT8, SAT16, SAT16S, SAT24, SAT32S, // UNPACK case RI_ONE: - reg2 = EvaluateRegisterFromTokenStream(FU_REGTWO); - ErrorIfNotAtEOL(); + EVAL_REG_RETURN_IF_ERROR_OR_NO_EOL(reg2, FU_REGTWO); DepositRISCInstructionWord(parm, parm >> 6, reg2); break; @@ -269,14 +294,13 @@ int GenerateRISCCode(int state) if (parm == 37) altbankok = 1; // MOVEFA - reg1 = EvaluateRegisterFromTokenStream(FU_REGONE); + EVAL_REG_RETURN_IF_ERROR(reg1, FU_REGONE); CHECK_COMMA; if (parm == 36) altbankok = 1; // MOVETA - reg2 = EvaluateRegisterFromTokenStream(FU_REGTWO); - ErrorIfNotAtEOL(); + EVAL_REG_RETURN_IF_ERROR_OR_NO_EOL(reg2, FU_REGTWO); DepositRISCInstructionWord(parm, reg1, reg2); break; @@ -343,8 +367,7 @@ int GenerateRISCCode(int state) } CHECK_COMMA; - reg2 = EvaluateRegisterFromTokenStream(FU_REGTWO); - ErrorIfNotAtEOL(); + EVAL_REG_RETURN_IF_ERROR_OR_NO_EOL(reg2, FU_REGTWO); DepositRISCInstructionWord(parm, reg1, reg2); break; @@ -392,8 +415,7 @@ int GenerateRISCCode(int state) } CHECK_COMMA; - reg2 = EvaluateRegisterFromTokenStream(FU_REGTWO); - ErrorIfNotAtEOL(); + EVAL_REG_RETURN_IF_ERROR_OR_NO_EOL(reg2, FU_REGTWO); DepositRISCInstructionWord(parm, 0, reg2); val = WORDSWAP32(eval); @@ -411,12 +433,11 @@ int GenerateRISCCode(int state) else { parm = 34; - reg1 = EvaluateRegisterFromTokenStream(FU_REGONE); + EVAL_REG_RETURN_IF_ERROR(reg1, FU_REGONE); } CHECK_COMMA; - reg2 = EvaluateRegisterFromTokenStream(FU_REGTWO); - ErrorIfNotAtEOL(); + EVAL_REG_RETURN_IF_ERROR_OR_NO_EOL(reg2, FU_REGTWO); DepositRISCInstructionWord(parm, reg1, reg2); break; @@ -465,7 +486,7 @@ int GenerateRISCCode(int state) if (!indexed) { - reg1 = EvaluateRegisterFromTokenStream(FU_REGONE); + EVAL_REG_RETURN_IF_ERROR(reg1, FU_REGONE); } else { @@ -497,7 +518,7 @@ int GenerateRISCCode(int state) if (indexed) { - reg1 = EvaluateRegisterFromTokenStream(FU_REGONE); + EVAL_REG_RETURN_IF_ERROR(reg1, FU_REGONE); } else { @@ -529,7 +550,7 @@ int GenerateRISCCode(int state) } else { - reg1 = EvaluateRegisterFromTokenStream(FU_REGONE); + EVAL_REG_RETURN_IF_ERROR(reg1, FU_REGONE); } } @@ -538,15 +559,14 @@ int GenerateRISCCode(int state) tok++; CHECK_COMMA; - reg2 = EvaluateRegisterFromTokenStream(FU_REGTWO); - ErrorIfNotAtEOL(); + EVAL_REG_RETURN_IF_ERROR_OR_NO_EOL(reg2, FU_REGTWO); DepositRISCInstructionWord(parm, reg1, reg2); break; // Rn,(Rn) = 47 / Rn,(R14/R15+n) = 49/50 / Rn,(R14/R15+Rn) = 60/61 case RI_STORE: parm = 47; - reg1 = EvaluateRegisterFromTokenStream(FU_REGONE); + EVAL_REG_RETURN_IF_ERROR(reg1, FU_REGONE); CHECK_COMMA; if (*tok != '(') @@ -581,7 +601,7 @@ int GenerateRISCCode(int state) if (!indexed) { - reg2 = EvaluateRegisterFromTokenStream(FU_REGTWO); + EVAL_REG_RETURN_IF_ERROR(reg2, FU_REGTWO); } else { @@ -613,7 +633,7 @@ int GenerateRISCCode(int state) if (indexed) { - reg2 = EvaluateRegisterFromTokenStream(FU_REGTWO); + EVAL_REG_RETURN_IF_ERROR(reg2, FU_REGTWO); } else { @@ -650,7 +670,7 @@ int GenerateRISCCode(int state) } else { - reg2 = EvaluateRegisterFromTokenStream(FU_REGTWO); + EVAL_REG_RETURN_IF_ERROR(reg2, FU_REGTWO); } } @@ -658,7 +678,7 @@ int GenerateRISCCode(int state) return MalformedOpcode(MALF_RPAREN); tok++; - ErrorIfNotAtEOL(); + CHECK_EOL; DepositRISCInstructionWord(parm, reg2, reg1); break; @@ -668,34 +688,33 @@ int GenerateRISCCode(int state) return MalformedOpcode(MALF_LPAREN); tok++; - reg1 = EvaluateRegisterFromTokenStream(FU_REGONE); + EVAL_REG_RETURN_IF_ERROR(reg1, FU_REGONE); if (*tok != ')') return MalformedOpcode(MALF_RPAREN); tok++; CHECK_COMMA; - reg2 = EvaluateRegisterFromTokenStream(FU_REGTWO); - ErrorIfNotAtEOL(); + EVAL_REG_RETURN_IF_ERROR_OR_NO_EOL(reg2, FU_REGTWO); DepositRISCInstructionWord(parm, reg1, reg2); break; // STOREB/STOREP/STOREW Rn,(Rn) case RI_STOREN: - reg1 = EvaluateRegisterFromTokenStream(FU_REGONE); + EVAL_REG_RETURN_IF_ERROR(reg1, FU_REGONE); CHECK_COMMA; if (*tok != '(') return MalformedOpcode(MALF_LPAREN); tok++; - reg2 = EvaluateRegisterFromTokenStream(FU_REGTWO); + EVAL_REG_RETURN_IF_ERROR(reg2, FU_REGTWO); if (*tok != ')') return MalformedOpcode(MALF_RPAREN); tok++; - ErrorIfNotAtEOL(); + CHECK_EOL; DepositRISCInstructionWord(parm, reg2, reg1); break; @@ -802,13 +821,13 @@ int GenerateRISCCode(int state) return MalformedOpcode(MALF_LPAREN); tok++; - reg2 = EvaluateRegisterFromTokenStream(FU_REGTWO); + EVAL_REG_RETURN_IF_ERROR(reg2, FU_REGTWO); if (*tok != ')') return MalformedOpcode(MALF_RPAREN); tok++; - ErrorIfNotAtEOL(); + CHECK_EOL; } DepositRISCInstructionWord(parm, reg2, reg1); @@ -822,4 +841,3 @@ int GenerateRISCCode(int state) lastOpcode = type; return 0; } - diff --git a/version.h b/version.h index 5df6d9c..94754a3 100644 --- a/version.h +++ b/version.h @@ -15,6 +15,6 @@ #define MAJOR 2 // Major version number #define MINOR 1 // Minor version number -#define PATCH 0 // Patch release number +#define PATCH 1 // Patch release number #endif // __VERSION_H__ -- 2.37.2