From: Shamus Hammons Date: Thu, 20 Jul 2017 04:22:01 +0000 (-0500) Subject: Fix for bug #101 (bad macro handling). X-Git-Tag: v2.1.0~116 X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?p=rmac;a=commitdiff_plain;h=f7f625cf6c2f4b96854ac0e911ca2b1c249f4e05 Fix for bug #101 (bad macro handling). Just when you thought it was safe to write macros with constants, up pops a condition you thought was dead and buried yet lurches inexorably towards your code and causes it to segfault. As it turns out, it was bad token stream parsing that caused this, with a constant masquerading as a STRING token being the particular mischief maker. The code that was in place was an awful mess of horrible garbage code that wasn't even being used anymore--so that crap is gone, and replaced with something more (I hope) sane and maintainable. I think that the code that was there survived purging for so long because nobody really understood it; now that I understand it, I can't believe that it was written in the first place (to be fair, some of it was patching done by me, though the bulk of awfulness didn't come from that code). Onward and upward... --- diff --git a/debug.c b/debug.c index ce5cdaa..0488aff 100644 --- a/debug.c +++ b/debug.c @@ -327,6 +327,98 @@ int dumptok(TOKEN * tk) } +void DumpTokens(TOKEN * tokenBuffer) +{ +// printf("Tokens [%X]: ", sloc); + + for(TOKEN * t=tokenBuffer; *t!=EOL; t++) + { + if (*t == COLON) + printf("[COLON]"); + else if (*t == CONST) + { + t++; + printf("[CONST: $%X]", (uint32_t)*t); + } + else if (*t == ACONST) + { + printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]); + t += 2; + } + else if (*t == STRING) + { + t++; + printf("[STRING:\"%s\"]", string[*t]); + } + else if (*t == SYMBOL) + { + t++; + printf("[SYMBOL:\"%s\"]", string[*t]); + } + else if (*t == EOS) + printf("[EOS]"); + else if (*t == TKEOF) + printf("[TKEOF]"); + else if (*t == DEQUALS) + printf("[DEQUALS]"); + else if (*t == SET) + printf("[SET]"); + else if (*t == REG) + printf("[REG]"); + else if (*t == DCOLON) + printf("[DCOLON]"); + else if (*t == GE) + printf("[GE]"); + else if (*t == LE) + printf("[LE]"); + else if (*t == NE) + printf("[NE]"); + else if (*t == SHR) + printf("[SHR]"); + else if (*t == SHL) + printf("[SHL]"); + else if (*t == UNMINUS) + printf("[UNMINUS]"); + else if (*t == DOTB) + printf("[DOTB]"); + else if (*t == DOTW) + printf("[DOTW]"); + else if (*t == DOTL) + printf("[DOTL]"); + else if (*t == DOTI) + printf("[DOTI]"); + else if (*t == ENDEXPR) + printf("[ENDEXPR]"); + else if (*t == CR_ABSCOUNT) + printf("[CR_ABSCOUNT]"); + else if (*t == CR_DEFINED) + printf("[CR_DEFINED]"); + else if (*t == CR_REFERENCED) + printf("[CR_REFERENCED]"); + else if (*t == CR_STREQ) + printf("[CR_STREQ]"); + else if (*t == CR_MACDEF) + printf("[CR_MACDEF]"); + else if (*t == CR_TIME) + printf("[CR_TIME]"); + else if (*t == CR_DATE) + printf("[CR_DATE]"); + else if (*t >= 0x20 && *t <= 0x2F) + printf("[%c]", (char)*t); + else if (*t >= 0x3A && *t <= 0x3F) + printf("[%c]", (char)*t); + else if (*t >= 0x80 && *t <= 0x87) + printf("[D%u]", ((uint32_t)*t) - 0x80); + else if (*t >= 0x88 && *t <= 0x8F) + printf("[A%u]", ((uint32_t)*t) - 0x88); + else + printf("[%X:%c]", (uint32_t)*t, (char)*t); + } + + printf("[EOL]\n"); +} + + // // Dump everything // diff --git a/debug.h b/debug.h index 73be705..ad1ccac 100644 --- a/debug.h +++ b/debug.h @@ -15,6 +15,8 @@ int mudump(void); int mdump(char *, LONG, int, LONG); int dumptok(TOKEN *); +void DumpTokens(TOKEN *); int dump_everything(void); #endif // __DEBUG_H__ + diff --git a/macro.c b/macro.c index 6b592d1..38dfecd 100644 --- a/macro.c +++ b/macro.c @@ -19,8 +19,6 @@ LONG curuniq; // Current macro's unique number int macnum; // Unique number for macro definition -TOKEN * argPtrs[128]; // 128 arguments ought to be enough for anyone -static int argp; static LONG macuniq; // Unique-per-macro number static SYM * curmac; // Macro currently being defined @@ -42,7 +40,6 @@ void InitMacro(void) { macuniq = 0; macnum = 1; - argp = 0; } @@ -59,6 +56,9 @@ WARNING(!!! Bad macro exiting !!!) This is a problem. Currently, the argument logic just keeps the current arguments and doesn't save anything if a new macro is called in the middle of another (nested macros). Need to fix that somehow. + +Is this still true, now that we have IMACROs with TOKENSTREAMs in them? Need to +check it out for sure...! */ // Pop intervening include files and .rept blocks while (cur_inobj != NULL && cur_inobj->in_type != SRC_IMACRO) @@ -75,11 +75,7 @@ of another (nested macros). Need to fix that somehow. IMACRO * imacro = cur_inobj->inobj.imacro; curuniq = imacro->im_olduniq; -// /*TOKEN ** p = */argp--; -// argp = (TOKEN **)*argp; - DEBUG { printf("ExitMacro: argp: %d -> ", argp); } - argp -= imacro->im_nargs; - DEBUG { printf("%d (nargs = %d)\n", argp, imacro->im_nargs); } + DEBUG { printf("ExitMacro: nargs = %d\n", imacro->im_nargs); } return fpop(); } @@ -110,45 +106,12 @@ int defmac1(char * ln, int notEndFlag) { if (list_flag) { - listeol(); // Flush previous source line - lstout('.'); // Mark macro definition with period + listeol(); // Flush previous source line + lstout('.'); // Mark macro definition with period } - // This is just wrong, wrong, wrong. It makes up a weird kind of string with - // a pointer on front, and then uses a ** to manage them: This is a recipe - // for disaster. - // How to manage it then? - // Could use a linked list, like Landon uses everywhere else. -/* -How it works: -Allocate a space big enough for the string + NULL + pointer. -Set the pointer to NULL. -Copy the string to the space after the pointer. -If this is the 1st time through, set the SYM * "svalue" to the pointer. -If this is the 2nd time through, derefence the ** to point to the memory you -just allocated. Then, set the ** to the location of the memory you allocated -for the next pass through. - -This is a really low level way to do a linked list, and by bypassing all the -safety features of the language. Seems like we can do better here. -*/ if (notEndFlag) { -#if 0 - len = strlen(ln) + 1 + sizeof(LONG); - p.cp = malloc(len); - *p.lp = 0; - strcpy(p.cp + sizeof(LONG), ln); - - // Link line of text onto end of list - if (curmln == NULL) - curmac->svalue = p.cp; - else - *curmln = p.cp; - - curmln = (char **)p.cp; - return 1; // Keep looking -#else if (curmac->lineList == NULL) { curmac->lineList = malloc(sizeof(LLIST)); @@ -164,11 +127,10 @@ safety features of the language. Seems like we can do better here. curmac->last = curmac->last->next; } - return 1; // Keep looking -#endif + return 1; // Keep looking } - return 0; // Stop looking at the end + return 0; // Stop looking; at the end } @@ -337,6 +299,8 @@ static int LNCatch(int (* lnfunc)(), char * dirlist) fatal("cannot continue"); } + DEBUG { DumpTokenBuffer(); } + // Test for end condition. Two cases to handle: // // symbol: @@ -421,157 +385,63 @@ static int KWMatch(char * kw, char * kwlist) // -// Invoke a macro -// o parse, count and copy arguments -// o push macro's string-stream +// Invoke a macro by creating a new IMACRO object & chopping up the arguments // int InvokeMacro(SYM * mac, WORD siz) { - TOKEN * p = NULL; - int dry_run; - WORD arg_siz = 0; -// TOKEN ** argptr = NULL; -//Doesn't need to be global! (or does it???--it does) -// argp = 0; - DEBUG printf("InvokeMacro: argp: %d -> ", argp); - - INOBJ * inobj = a_inobj(SRC_IMACRO); // Alloc and init IMACRO + DEBUG { printf("InvokeMacro: arguments="); DumpTokens(tok); } + + INOBJ * inobj = a_inobj(SRC_IMACRO); // Alloc and init IMACRO IMACRO * imacro = inobj->inobj.imacro; - imacro->im_siz = siz; - WORD nargs = 0; - TOKEN * beg_tok = tok; // 'tok' comes from token.c - TOKEN * startOfArg; - TOKEN * dest; - int stringNum = 0; - int argumentNum = 0; - - for(dry_run=1; ; dry_run--) + uint16_t nargs = 0; + + // Chop up the arguments, if any (tok comes from token.c, which at this + // point points at the macro argument token stream) + if (*tok != EOL) { - for(tok=beg_tok; *tok!=EOL;) + // Parse out the arguments and set them up correctly + TOKEN * p = imacro->argument[nargs].token; + int stringNum = 0; + + while (*tok != EOL) { - if (dry_run) - nargs++; - else + if (*tok == ACONST) { -#if 0 - *argptr++ = p; -#else - argPtrs[argp++] = p; - startOfArg = p; -#endif + for(int i=0; i<3; i++) + *p++ = *tok++; } - - // Keep going while tok isn't pointing at a comma or EOL - while (*tok != ',' && *tok != EOL) + else if (*tok == CONST) { - // Skip over backslash character, unless it's followed by an EOL - if (*tok == '\\' && tok[1] != EOL) - tok++; - - switch (*tok) - { - case CONST: - case SYMBOL: -//Shamus: Possible bug. ACONST has 2 tokens after it, not just 1 - case ACONST: - if (dry_run) - { - arg_siz += sizeof(TOKEN); - tok++; - } - else - { - *p++ = *tok++; - } - // FALLTHROUGH (picks up the arg after a CONST, SYMBOL or ACONST) - default: - if (dry_run) - { - arg_siz += sizeof(TOKEN); - tok++; - } - else - { - *p++ = *tok++; - } - - break; - } + *p++ = *tok++; + *p++ = *tok++; } - - // We hit the comma or EOL, so count/stuff it - if (dry_run) - arg_siz += sizeof(TOKEN); - else + else if ((*tok == STRING) || (*tok == SYMBOL)) + { + *p++ = *tok++; + imacro->argument[nargs].string[stringNum] = strdup(string[*tok++]); + *p++ = stringNum++; + } + else if (*tok == ',') + { + // Comma delimiter was found, so set up for next argument *p++ = EOL; - - // If we hit the comma instead of an EOL, skip over it - if (*tok == ',') tok++; - - // Do our QnD token grabbing (this will be redone once we get all - // the data structures fixed as this is a really dirty hack) - if (!dry_run) - { - dest = imacro->argument[argumentNum].token; stringNum = 0; - - do - { - // Remap strings to point the IMACRO internal token storage - if (*startOfArg == SYMBOL || *startOfArg == STRING) - { - *dest++ = *startOfArg++; - imacro->argument[argumentNum].string[stringNum] = strdup(string[*startOfArg++]); - *dest++ = stringNum++; - } - else - *dest++ = *startOfArg++; - } - while (*startOfArg != EOL); - - *dest = *startOfArg; // Copy EOL... - argumentNum++; + nargs++; + p = imacro->argument[nargs].token; + } + else + { + *p++ = *tok++; } } - // Allocate space for argument ptrs and so on and then go back and - // construct the arg frame - if (dry_run) - { - if (nargs != 0) - p = (TOKEN *)malloc(arg_siz); -// p = (TOKEN *)malloc(arg_siz + sizeof(TOKEN)); - -/* -Shamus: -This construct is meant to deal with nested macros, so the simple minded way -we deal with them now won't work. :-/ Have to think about how to fix. -What we could do is simply move the argp with each call, and move it back by -the number of arguments in the macro that's ending. That would solve the -problem nicely. -[Which we do now. But that uncovered another problem: the token strings are all -stale by the time a nested macro gets to the end. But they're supposed to be -symbols, which means if we put symbol references into the argument token -streams, we can alleviate this problem.] -*/ -#if 0 - argptr = (TOKEN **)malloc((nargs + 1) * sizeof(LONG)); - *argptr++ = (TOKEN *)argp; - argp = argptr; -#else - // We don't need to do anything here since we already advance argp - // when parsing the arguments. -// argp += nargs; -#endif - } - else - break; + // Make sure to stuff the final EOL (otherwise, it will be skipped) + *p++ = EOL; + nargs++; } - DEBUG { printf("%d\n", argp); } - - // Setup imacro: + // Setup IMACRO: // o # arguments; // o -> macro symbol; // o -> macro definition string list; @@ -579,19 +449,19 @@ streams, we can alleviate this problem.] // o bump `macuniq' counter and set 'curuniq' to it; imacro->im_nargs = nargs; imacro->im_macro = mac; + imacro->im_siz = siz; imacro->im_nextln = mac->lineList; imacro->im_olduniq = curuniq; curuniq = macuniq++; - imacro->argBase = argp - nargs; // Shamus: keep track of argument base DEBUG { - printf("nargs=%d\n", nargs); + printf("# args = %d\n", nargs); - for(nargs=0; nargsim_nargs; nargs++) + for(uint16_t i=0; iargBase); } + DEBUG { printf("~argnumber=%d\n", i); } tk = NULL; if (i < imacro->im_nargs) { -#if 0 -// tk = argp[i]; -// tk = argPtrs[i]; - tk = argPtrs[imacro->argBase + i]; -#else tk = imacro->argument[i].token; symbolString = imacro->argument[i].string; //DEBUG //{ // printf("ExM: Preparing to parse argument #%u...\n", i); -// dumptok(tk); +// DumpTokens(tk); //} -#endif } // \?arg yields: @@ -1661,10 +1655,9 @@ int d_goto(WORD unused) void DumpTokenBuffer(void) { - TOKEN * t; printf("Tokens [%X]: ", sloc); - for(t=tokbuf; *t!=EOL; t++) + for(TOKEN * t=tokbuf; *t!=EOL; t++) { if (*t == COLON) printf("[COLON]"); @@ -1674,7 +1667,10 @@ void DumpTokenBuffer(void) printf("[CONST: $%X]", (uint32_t)*t); } else if (*t == ACONST) - printf("[ACONST]"); + { + printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]); + t += 2; + } else if (*t == STRING) { t++; diff --git a/token.h b/token.h index 52ff8cb..1d824a9 100644 --- a/token.h +++ b/token.h @@ -128,13 +128,12 @@ TOKENSTREAM { // Information about a macro invocation IMACRO { IMACRO * im_link; // Pointer to ancient IMACROs - LLIST * im_nextln; // Next line to include + LLIST * im_nextln; // Next line to include WORD im_nargs; // # of arguments supplied on invocation WORD im_siz; // Size suffix supplied on invocation LONG im_olduniq; // Old value of 'macuniq' SYM * im_macro; // Pointer to macro we're in char im_lnbuf[LNSIZ]; // Line buffer - uint32_t argBase; // Base in argPtrs[] for current macro TOKENSTREAM argument[20]; // Assume no more than 20 arguments in an invocation }; diff --git a/version.h b/version.h index 7228b2d..ffe1fe0 100644 --- a/version.h +++ b/version.h @@ -15,7 +15,7 @@ #define MAJOR 1 // Major version number #define MINOR 8 // Minor version number -#define PATCH 0 // Patch release number +#define PATCH 1 // Patch release number #endif // __VERSION_H__