Fix for bug #101 (bad macro handling).
authorShamus Hammons <jlhamm@acm.org>
Thu, 20 Jul 2017 04:22:01 +0000 (23:22 -0500)
committerShamus Hammons <jlhamm@acm.org>
Thu, 20 Jul 2017 04:22:01 +0000 (23:22 -0500)
Just when you thought it was safe to write macros with constants, up
pops a condition you thought was dead and buried yet lurches inexorably
towards your code and causes it to segfault. As it turns out, it was
bad token stream parsing that caused this, with a constant masquerading
as a STRING token being the particular mischief maker. The code that
was in place was an awful mess of horrible garbage code that wasn't
even being used anymore--so that crap is gone, and replaced with
something more (I hope) sane and maintainable. I think that the code
that was there survived purging for so long because nobody really
understood it; now that I understand it, I can't believe that it was
written in the first place (to be fair, some of it was patching done by
me, though the bulk of awfulness didn't come from that code). Onward
and upward...

debug.c
debug.h
macro.c
token.c
token.h
version.h

diff --git a/debug.c b/debug.c
index ce5cdaaff8ba5b877b8ceb1f1684c47e95943e79..0488aff4d6c2eee0658f0190bebfe408fe0ca6b2 100644 (file)
--- a/debug.c
+++ b/debug.c
@@ -327,6 +327,98 @@ int dumptok(TOKEN * tk)
 }
 
 
+void DumpTokens(TOKEN * tokenBuffer)
+{
+//     printf("Tokens [%X]: ", sloc);
+
+       for(TOKEN * t=tokenBuffer; *t!=EOL; t++)
+       {
+               if (*t == COLON)
+                       printf("[COLON]");
+               else if (*t == CONST)
+               {
+                       t++;
+                       printf("[CONST: $%X]", (uint32_t)*t);
+               }
+               else if (*t == ACONST)
+               {
+                       printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
+                       t += 2;
+               }
+               else if (*t == STRING)
+               {
+                       t++;
+                       printf("[STRING:\"%s\"]", string[*t]);
+               }
+               else if (*t == SYMBOL)
+               {
+                       t++;
+                       printf("[SYMBOL:\"%s\"]", string[*t]);
+               }
+               else if (*t == EOS)
+                       printf("[EOS]");
+               else if (*t == TKEOF)
+                       printf("[TKEOF]");
+               else if (*t == DEQUALS)
+                       printf("[DEQUALS]");
+               else if (*t == SET)
+                       printf("[SET]");
+               else if (*t == REG)
+                       printf("[REG]");
+               else if (*t == DCOLON)
+                       printf("[DCOLON]");
+               else if (*t == GE)
+                       printf("[GE]");
+               else if (*t == LE)
+                       printf("[LE]");
+               else if (*t == NE)
+                       printf("[NE]");
+               else if (*t == SHR)
+                       printf("[SHR]");
+               else if (*t == SHL)
+                       printf("[SHL]");
+               else if (*t == UNMINUS)
+                       printf("[UNMINUS]");
+               else if (*t == DOTB)
+                       printf("[DOTB]");
+               else if (*t == DOTW)
+                       printf("[DOTW]");
+               else if (*t == DOTL)
+                       printf("[DOTL]");
+               else if (*t == DOTI)
+                       printf("[DOTI]");
+               else if (*t == ENDEXPR)
+                       printf("[ENDEXPR]");
+               else if (*t == CR_ABSCOUNT)
+                       printf("[CR_ABSCOUNT]");
+               else if (*t == CR_DEFINED)
+                       printf("[CR_DEFINED]");
+               else if (*t == CR_REFERENCED)
+                       printf("[CR_REFERENCED]");
+               else if (*t == CR_STREQ)
+                       printf("[CR_STREQ]");
+               else if (*t == CR_MACDEF)
+                       printf("[CR_MACDEF]");
+               else if (*t == CR_TIME)
+                       printf("[CR_TIME]");
+               else if (*t == CR_DATE)
+                       printf("[CR_DATE]");
+               else if (*t >= 0x20 && *t <= 0x2F)
+                       printf("[%c]", (char)*t);
+               else if (*t >= 0x3A && *t <= 0x3F)
+                       printf("[%c]", (char)*t);
+               else if (*t >= 0x80 && *t <= 0x87)
+                       printf("[D%u]", ((uint32_t)*t) - 0x80);
+               else if (*t >= 0x88 && *t <= 0x8F)
+                       printf("[A%u]", ((uint32_t)*t) - 0x88);
+               else
+                       printf("[%X:%c]", (uint32_t)*t, (char)*t);
+       }
+
+       printf("[EOL]\n");
+}
+
+
 //
 // Dump everything
 //
diff --git a/debug.h b/debug.h
index 73be70540137fa6a4619b90c9d30ae15acdb0874..ad1ccacb490935b515282a67db25b9cdcf0008f7 100644 (file)
--- a/debug.h
+++ b/debug.h
@@ -15,6 +15,8 @@
 int mudump(void);
 int mdump(char *, LONG, int, LONG);
 int dumptok(TOKEN *);
+void DumpTokens(TOKEN *);
 int dump_everything(void);
 
 #endif // __DEBUG_H__
+
diff --git a/macro.c b/macro.c
index 6b592d114aedacc3c6a48321e04c351bfaa241bf..38dfecd80ae7fb573144d759c2cfdbebb8c2300a 100644 (file)
--- a/macro.c
+++ b/macro.c
@@ -19,8 +19,6 @@
 
 LONG curuniq;                          // Current macro's unique number
 int macnum;                                    // Unique number for macro definition
-TOKEN * argPtrs[128];          // 128 arguments ought to be enough for anyone
-static int argp;
 
 static LONG macuniq;           // Unique-per-macro number
 static SYM * curmac;           // Macro currently being defined
@@ -42,7 +40,6 @@ void InitMacro(void)
 {
        macuniq = 0;
        macnum = 1;
-       argp = 0;
 }
 
 
@@ -59,6 +56,9 @@ WARNING(!!! Bad macro exiting !!!)
 This is a problem. Currently, the argument logic just keeps the current
 arguments and doesn't save anything if a new macro is called in the middle
 of another (nested macros). Need to fix that somehow.
+
+Is this still true, now that we have IMACROs with TOKENSTREAMs in them? Need to
+check it out for sure...!
 */
        // Pop intervening include files and .rept blocks
        while (cur_inobj != NULL && cur_inobj->in_type != SRC_IMACRO)
@@ -75,11 +75,7 @@ of another (nested macros). Need to fix that somehow.
        IMACRO * imacro = cur_inobj->inobj.imacro;
        curuniq = imacro->im_olduniq;
 
-//     /*TOKEN ** p = */argp--;
-//     argp = (TOKEN **)*argp;
-       DEBUG { printf("ExitMacro: argp: %d -> ", argp); }
-       argp -= imacro->im_nargs;
-       DEBUG { printf("%d (nargs = %d)\n", argp, imacro->im_nargs); }
+       DEBUG { printf("ExitMacro: nargs = %d\n", imacro->im_nargs); }
 
        return fpop();
 }
@@ -110,45 +106,12 @@ int defmac1(char * ln, int notEndFlag)
 {
        if (list_flag)
        {
-               listeol();                                                      // Flush previous source line
-               lstout('.');                                            // Mark macro definition with period
+               listeol();              // Flush previous source line
+               lstout('.');    // Mark macro definition with period
        }
 
-       // This is just wrong, wrong, wrong. It makes up a weird kind of string with
-       // a pointer on front, and then uses a ** to manage them: This is a recipe
-       // for disaster.
-       // How to manage it then?
-       // Could use a linked list, like Landon uses everywhere else.
-/*
-How it works:
-Allocate a space big enough for the string + NULL + pointer.
-Set the pointer to NULL.
-Copy the string to the space after the pointer.
-If this is the 1st time through, set the SYM * "svalue" to the pointer.
-If this is the 2nd time through, derefence the ** to point to the memory you
-just allocated. Then, set the ** to the location of the memory you allocated
-for the next pass through.
-
-This is a really low level way to do a linked list, and by bypassing all the
-safety features of the language. Seems like we can do better here.
-*/
        if (notEndFlag)
        {
-#if 0
-               len = strlen(ln) + 1 + sizeof(LONG);
-               p.cp = malloc(len);
-               *p.lp = 0;
-               strcpy(p.cp + sizeof(LONG), ln);
-
-               // Link line of text onto end of list
-               if (curmln == NULL)
-                       curmac->svalue = p.cp;
-               else
-                       *curmln = p.cp;
-
-               curmln = (char **)p.cp;
-               return 1;                                                       // Keep looking
-#else
                if (curmac->lineList == NULL)
                {
                        curmac->lineList = malloc(sizeof(LLIST));
@@ -164,11 +127,10 @@ safety features of the language. Seems like we can do better here.
                        curmac->last = curmac->last->next;
                }
 
-               return 1;                                                       // Keep looking
-#endif
+               return 1;               // Keep looking
        }
 
-       return 0;                                                               // Stop looking at the end
+       return 0;                       // Stop looking; at the end
 }
 
 
@@ -337,6 +299,8 @@ static int LNCatch(int (* lnfunc)(), char * dirlist)
                        fatal("cannot continue");
                }
 
+               DEBUG { DumpTokenBuffer(); }
+
                // Test for end condition.  Two cases to handle:
                //            <directive>
                //    symbol: <directive>
@@ -421,157 +385,63 @@ static int KWMatch(char * kw, char * kwlist)
 
 
 //
-// Invoke a macro
-//  o  parse, count and copy arguments
-//  o  push macro's string-stream
+// Invoke a macro by creating a new IMACRO object & chopping up the arguments
 //
 int InvokeMacro(SYM * mac, WORD siz)
 {
-       TOKEN * p = NULL;
-       int dry_run;
-       WORD arg_siz = 0;
-//     TOKEN ** argptr = NULL;
-//Doesn't need to be global! (or does it???--it does)
-//     argp = 0;
-       DEBUG printf("InvokeMacro: argp: %d -> ", argp);
-
-       INOBJ * inobj = a_inobj(SRC_IMACRO);            // Alloc and init IMACRO
+       DEBUG { printf("InvokeMacro: arguments="); DumpTokens(tok); }
+
+       INOBJ * inobj = a_inobj(SRC_IMACRO);    // Alloc and init IMACRO
        IMACRO * imacro = inobj->inobj.imacro;
-       imacro->im_siz = siz;
-       WORD nargs = 0;
-       TOKEN * beg_tok = tok;                                          // 'tok' comes from token.c
-       TOKEN * startOfArg;
-       TOKEN * dest;
-       int stringNum = 0;
-       int argumentNum = 0;
-
-       for(dry_run=1; ; dry_run--)
+       uint16_t nargs = 0;
+
+       // Chop up the arguments, if any (tok comes from token.c, which at this
+       // point points at the macro argument token stream)
+       if (*tok != EOL)
        {
-               for(tok=beg_tok; *tok!=EOL;)
+               // Parse out the arguments and set them up correctly
+               TOKEN * p = imacro->argument[nargs].token;
+               int stringNum = 0;
+
+               while (*tok != EOL)
                {
-                       if (dry_run)
-                               nargs++;
-                       else
+                       if (*tok == ACONST)
                        {
-#if 0
-                               *argptr++ = p;
-#else
-                               argPtrs[argp++] = p;
-                               startOfArg = p;
-#endif
+                               for(int i=0; i<3; i++)
+                                       *p++ = *tok++;
                        }
-
-                       // Keep going while tok isn't pointing at a comma or EOL
-                       while (*tok != ',' && *tok != EOL)
+                       else if (*tok == CONST)
                        {
-                               // Skip over backslash character, unless it's followed by an EOL
-                               if (*tok == '\\' && tok[1] != EOL)
-                                       tok++;
-
-                               switch (*tok)
-                               {
-                               case CONST:
-                               case SYMBOL:
-//Shamus: Possible bug. ACONST has 2 tokens after it, not just 1
-                               case ACONST:
-                                       if (dry_run)
-                                       {
-                                               arg_siz += sizeof(TOKEN);
-                                               tok++;
-                                       }
-                                       else
-                                       {
-                                               *p++ = *tok++;
-                                       }
-                               // FALLTHROUGH (picks up the arg after a CONST, SYMBOL or ACONST)
-                               default:
-                                       if (dry_run)
-                                       {
-                                               arg_siz += sizeof(TOKEN);
-                                               tok++;
-                                       }
-                                       else
-                                       {
-                                               *p++ = *tok++;
-                                       }
-
-                                       break;
-                               }
+                               *p++ = *tok++;
+                               *p++ = *tok++;
                        }
-
-                       // We hit the comma or EOL, so count/stuff it
-                       if (dry_run)
-                               arg_siz += sizeof(TOKEN);
-                       else
+                       else if ((*tok == STRING) || (*tok == SYMBOL))
+                       {
+                               *p++ = *tok++;
+                               imacro->argument[nargs].string[stringNum] = strdup(string[*tok++]);
+                               *p++ = stringNum++;
+                       }
+                       else if (*tok == ',')
+                       {
+                               // Comma delimiter was found, so set up for next argument
                                *p++ = EOL;
-
-                       // If we hit the comma instead of an EOL, skip over it
-                       if (*tok == ',')
                                tok++;
-
-                       // Do our QnD token grabbing (this will be redone once we get all
-                       // the data structures fixed as this is a really dirty hack)
-                       if (!dry_run)
-                       {
-                               dest = imacro->argument[argumentNum].token;
                                stringNum = 0;
-
-                               do
-                               {
-                                       // Remap strings to point the IMACRO internal token storage
-                                       if (*startOfArg == SYMBOL || *startOfArg == STRING)
-                                       {
-                                               *dest++ = *startOfArg++;
-                                               imacro->argument[argumentNum].string[stringNum] = strdup(string[*startOfArg++]);
-                                               *dest++ = stringNum++;
-                                       }
-                                       else
-                                               *dest++ = *startOfArg++;
-                               }
-                               while (*startOfArg != EOL);
-
-                               *dest = *startOfArg;            // Copy EOL...
-                               argumentNum++;
+                               nargs++;
+                               p = imacro->argument[nargs].token;
+                       }
+                       else
+                       {
+                               *p++ = *tok++;
                        }
                }
 
-               // Allocate space for argument ptrs and so on and then go back and
-               // construct the arg frame
-               if (dry_run)
-               {
-                       if (nargs != 0)
-                               p = (TOKEN *)malloc(arg_siz);
-//                             p = (TOKEN *)malloc(arg_siz + sizeof(TOKEN));
-
-/*
-Shamus:
-This construct is meant to deal with nested macros, so the simple minded way
-we deal with them now won't work. :-/ Have to think about how to fix.
-What we could do is simply move the argp with each call, and move it back by
-the number of arguments in the macro that's ending. That would solve the
-problem nicely.
-[Which we do now. But that uncovered another problem: the token strings are all
-stale by the time a nested macro gets to the end. But they're supposed to be
-symbols, which means if we put symbol references into the argument token
-streams, we can alleviate this problem.]
-*/
-#if 0
-                       argptr = (TOKEN **)malloc((nargs + 1) * sizeof(LONG));
-                       *argptr++ = (TOKEN *)argp;
-                       argp = argptr;
-#else
-                       // We don't need to do anything here since we already advance argp
-                       // when parsing the arguments.
-//                     argp += nargs;
-#endif
-               }
-               else
-                       break;
+               // Make sure to stuff the final EOL (otherwise, it will be skipped)
+               *p++ = EOL;
+               nargs++;
        }
 
-       DEBUG { printf("%d\n", argp); }
-
-       // Setup imacro:
+       // Setup IMACRO:
        //  o  # arguments;
        //  o  -> macro symbol;
        //  o  -> macro definition string list;
@@ -579,19 +449,19 @@ streams, we can alleviate this problem.]
        //  o  bump `macuniq' counter and set 'curuniq' to it;
        imacro->im_nargs = nargs;
        imacro->im_macro = mac;
+       imacro->im_siz = siz;
        imacro->im_nextln = mac->lineList;
        imacro->im_olduniq = curuniq;
        curuniq = macuniq++;
-       imacro->argBase = argp - nargs; // Shamus: keep track of argument base
 
        DEBUG
        {
-               printf("nargs=%d\n", nargs);
+               printf("# args = %d\n", nargs);
 
-               for(nargs=0; nargs<imacro->im_nargs; nargs++)
+               for(uint16_t i=0; i<nargs; i++)
                {
-                       printf("arg%d=", nargs);
-                       dumptok(argPtrs[(argp - imacro->im_nargs) + nargs]);
+                       printf("arg%d=", i);
+                       DumpTokens(imacro->argument[i].token);
                }
        }
 
diff --git a/token.c b/token.c
index 27eedc3d849b500d57305740b9d8d910cba6f476..2a8513d899a45f3f3883aaeacf6cd6aafc31afe0 100644 (file)
--- a/token.c
+++ b/token.c
@@ -475,24 +475,18 @@ copy_d:
                                // macro invocation) then it is ignored.
                                i = (int)arg->svalue;
 arg_num:
-                               DEBUG { printf("~argnumber=%d (argBase=%u)\n", i, imacro->argBase); }
+                               DEBUG { printf("~argnumber=%d\n", i); }
                                tk = NULL;
 
                                if (i < imacro->im_nargs)
                                {
-#if 0
-//                                     tk = argp[i];
-//                                     tk = argPtrs[i];
-                                       tk = argPtrs[imacro->argBase + i];
-#else
                                        tk = imacro->argument[i].token;
                                        symbolString = imacro->argument[i].string;
 //DEBUG
 //{
 //     printf("ExM: Preparing to parse argument #%u...\n", i);
-//     dumptok(tk);
+//     DumpTokens(tk);
 //}
-#endif
                                }
 
                                // \?arg yields:
@@ -1661,10 +1655,9 @@ int d_goto(WORD unused)
 
 void DumpTokenBuffer(void)
 {
-       TOKEN * t;
        printf("Tokens [%X]: ", sloc);
 
-       for(t=tokbuf; *t!=EOL; t++)
+       for(TOKEN * t=tokbuf; *t!=EOL; t++)
        {
                if (*t == COLON)
                        printf("[COLON]");
@@ -1674,7 +1667,10 @@ void DumpTokenBuffer(void)
                        printf("[CONST: $%X]", (uint32_t)*t);
                }
                else if (*t == ACONST)
-                       printf("[ACONST]");
+               {
+                       printf("[ACONST: $%X, $%X]", (uint32_t)t[1], (uint32_t)t[2]);
+                       t += 2;
+               }
                else if (*t == STRING)
                {
                        t++;
diff --git a/token.h b/token.h
index 52ff8cb2d39154791fbc7b6351dfb65e87427613..1d824a905966bde12b0b8b7036c6e77e4ac37215 100644 (file)
--- a/token.h
+++ b/token.h
@@ -128,13 +128,12 @@ TOKENSTREAM {
 // Information about a macro invocation
 IMACRO {
        IMACRO * im_link;               // Pointer to ancient IMACROs
-       LLIST * im_nextln;      // Next line to include
+       LLIST * im_nextln;              // Next line to include
        WORD im_nargs;                  // # of arguments supplied on invocation
        WORD im_siz;                    // Size suffix supplied on invocation
        LONG im_olduniq;                // Old value of 'macuniq'
        SYM * im_macro;                 // Pointer to macro we're in
        char im_lnbuf[LNSIZ];   // Line buffer
-       uint32_t argBase;               // Base in argPtrs[] for current macro
        TOKENSTREAM argument[20];       // Assume no more than 20 arguments in an invocation
 };
 
index 7228b2dcc5585679038b3d9f0032102cc3dafe0b..ffe1fe04bb0bac20512581be277c89d754aad042 100644 (file)
--- a/version.h
+++ b/version.h
@@ -15,7 +15,7 @@
 
 #define MAJOR   1              // Major version number
 #define MINOR   8              // Minor version number
-#define PATCH   0              // Patch release number
+#define PATCH   1              // Patch release number
 
 #endif // __VERSION_H__