]> Shamusworld >> Repos - rln/commitdiff
Convert output symbol table back to a table
authorJames Jones <atari@theinnocuous.com>
Tue, 26 Jul 2022 10:36:50 +0000 (03:36 -0700)
committerShamus Hammons <jlhamm@acm.org>
Tue, 16 Aug 2022 02:31:44 +0000 (21:31 -0500)
It looks like at some point the output symbol
was converted from an actual table of SYMREC
structs to an in-memory copy of a COFF/a.out
symbol table. This entailed relying on the
assumption that all symbols had an entry in the
string table when using OSTLookup()'s return
value as anything other than a boolean value,
as is done in the relocation procesing logic.

In preparation for adding support for debug
symbols, which often have no string table entry,
revert to using an intermediate output symbol
table representation and serializing it one symbol
at a time when writing the output file. This
simplifies various code paths, but potentially
slows down writing COFF symbol tables to disk.
Fortunately, this table is not written with
default options, and is rather small unless
using debug symbols, so this shouldn't
significantly affect the runtime of most
existing use cases.

rln.c
rln.h

diff --git a/rln.c b/rln.c
index 4a83410238337b3669ce8178ec012e56c7913d21..4ae8d4435f75b6799ff9b9d59a2d609818735f22 100644 (file)
--- a/rln.c
+++ b/rln.c
@@ -55,13 +55,12 @@ char * arPtr[512];
 uint32_t arIndex = 0;
 struct HREC * htable[NBUCKETS];                // Hash table
 struct HREC * unresolved = NULL;       // Pointer to unresolved hash list
-char * ost;                                                    // Output symbol table
-char * ost_ptr;                                                // Output symbol table; current pointer
-char * ost_end;                                                // Output symbol table; end pointer
-char * oststr;                                         // Output string table
-char * oststr_ptr;                                     // Output string table; current pointer
-char * oststr_end;                                     // Output string table; end pointer
-int ost_index = 0;                                     // Index of next ost addition
+struct SYMREC * ost;                           // Output symbol table
+char * oststr = NULL;                          // Output string table
+char * oststr_ptr = NULL;                      // Output string table; current pointer
+char * oststr_end = NULL;                      // Output string table; end pointer
+int ost_index = 0;                                     // Index of next free ost entry
+int ost_size = 0;                                      // Size of ost
 uint8_t nullStr[1] = "\x00";           // Empty string
 struct HREC * arSymbol = NULL;         // Pointer to AR symbol table
 
@@ -139,6 +138,7 @@ int DoSymbols(struct OFILE * ofile)
        int type;
        long value;
        int index;
+       char *string;
        int j;
        struct HREC * hptr;
        uint32_t tsoSave, dsoSave, bsoSave;
@@ -166,6 +166,7 @@ int DoSymbols(struct OFILE * ofile)
                index = GetLong(symptr + 0);    // Obtain symbol string index
                type  = GetLong(symptr + 4);    // Obtain symbol type
                value = GetLong(symptr + 8);    // Obtain symbol value
+               string = index ? symend + index : "";
 
                // Global/External symbols have a pre-processing stage
                // N.B.: This destroys the t/d/bsegoffset discovered above. So if a
@@ -176,21 +177,21 @@ int DoSymbols(struct OFILE * ofile)
                        // Obtain the string table index for the relocation symbol, look
                        // for it in the globals hash table to obtain information on that
                        // symbol.
-                       hptr = LookupHREC(symend + index);
+                       hptr = LookupHREC(string);
 
                        if (hptr == NULL)
                        {
                                // Try to find it in the OST
-                               int ostIndex = OSTLookup(symend + index);
+                               int ostIndex = OSTLookup(string);
 
                                if (ostIndex == -1)
                                {
-                                       printf("DoSymbols(): Symbol not found in hash table: '%s' (%s)\n", symend + index, ofile->o_name);
+                                       printf("DoSymbols(): Symbol not found in hash table: '%s' (%s)\n", string, ofile->o_name);
                                        return 1;
                                }
 
                                if (vflag > 1)
-                                       printf("DoSymbols(): Skipping symbol '%s' (%s) found in OST...\n", symend + index, ofile->o_name);
+                                       printf("DoSymbols(): Skipping symbol '%s' (%s) found in OST...\n", string, ofile->o_name);
 
                                // If the symbol is not in any .a or .o units, it must be one
                                // of the injected ones (_TEXT_E, _DATA_E, or _BSS_E), so skip
@@ -230,7 +231,7 @@ int DoSymbols(struct OFILE * ofile)
                                        break;
                                default:
                                        if (vflag > 1)
-                                               printf("DoSymbols: No adjustment made for symbol: %s (%s) = %X\n", symend + index, ofile->o_name, hptr->h_value);
+                                               printf("DoSymbols: No adjustment made for symbol: %s (%s) = %X\n", string, ofile->o_name, hptr->h_value);
                                }
                        }
                }
@@ -311,13 +312,13 @@ int DoSymbols(struct OFILE * ofile)
                if (isglobal(type) || lflag)
                {
                        if (vflag > 1)
-                               printf("DoSymbols: Adding symbol: %s (%s) to OST...\n", symend + index, ofile->o_name);
+                               printf("DoSymbols: Adding symbol: %s (%s) to OST...\n", string, ofile->o_name);
 
-                       index = OSTAdd(symend + index, type, value);
+                       index = OSTAdd(index ? string : NULL, type, value);
 
                        if (index == -1)
                        {
-                               printf("DoSymbols(): Failed to add symbol '%s' to OST!\n", symend + index);
+                               printf("DoSymbols(): Failed to add symbol '%s' to OST!\n", string);
                                return 1;
                        }
                }
@@ -394,127 +395,102 @@ long DoCommon(void)
 //
 int OSTAdd(char * name, int type, long value)
 {
-       int ost_offset_p, ost_offset_e = 0;     // OST table offsets for position calcs
+       int ost_offset_p = 0, ost_offset_e;     // OST table offsets for position calcs
        int ostresult;                                          // OST index result
-       int slen = strlen(name);
+       int slen;                                                       // String length, including terminator
 
-       // If the OST or OST string table has not been initialised then do so
-       if (ost_index == 0)
+       // If this is a debug symbol and the include debug symbol flag (-g) is not
+       // set then do nothing
+       if ((type & 0xF0000000) && !gflag)
        {
-               ost = malloc(OST_BLOCK);
-               oststr = malloc(OST_BLOCK);
-
-               if (ost == NULL)
-               {
-                       printf("OST memory allocation error.\n");
-                       return -1;
-               }
+               // Do nothing
+               return 0;
+       }
 
-               if (oststr == NULL)
-               {
-                       printf("OSTSTR memory allocation error.\n");
-                       return -1;
-               }
+       if (!name || !name[0])
+               slen = 0;
+       else
+               slen = strlen(name) + 1;
 
-               ost_ptr = ost;                                          // Set OST start pointer
-               ost_end = ost + OST_BLOCK;                      // Set OST end pointer
+       // Get symbol index in OST, if any (-1 if not found)
+       ostresult = slen ? OSTLookup(name) : -1;
 
-               PutLong(oststr, 0x00000004);            // Just null long for now
-               oststr_ptr = oststr + 4;                        // Skip size of str table long (incl null long)
-               PutLong(oststr_ptr, 0x00000000);        // Null terminating long
-               oststr_end = oststr + OST_BLOCK;
+       // If the symbol is in the output symbol table and the bflag is set
+       // (don't remove multiply defined locals) and this is not an
+       // external/global symbol, or the gflag (output debug  symbols) is
+       // set and this a debug symbol, *** OR *** the symbol is not in the
+       // output symbol table then add it.
+       if ((ostresult != -1) && !(bflag && !(type & 0x01000000))
+               && !(gflag && (type & 0xF0000000)))
+       {
+               return ostresult;
        }
-       else
+
+       // If the OST has not been initialised, or more space is needed, then
+       // allocate it.
+       if ((ost_index + 1) > ost_size)
        {
-               // If next symbol record exceeds current allocation then expand symbol
-               // table and/or symbol string table.
-               ost_offset_p = (ost_ptr - ost);
-               ost_offset_e = (ost_end - ost);
+               if (ost_size == 0)
+                       ost_size = OST_SIZE_INIT;
 
-               // 3 x uint32_t (12 bytes)
-               if ((ost_ptr + 12) > ost_end)
-               {
-                       // We want to allocate the current size of the OST + another block.
-                       ost = realloc(ost, ost_offset_e + OST_BLOCK);
+               ost_size *= 2;
 
-                       if (ost == NULL)
-                       {
-                               printf("OST memory reallocation error.\n");
-                               return -1;
-                       }
+               ost = realloc(ost, ost_size * sizeof(ost[0]));
 
-                       ost_ptr = ost + ost_offset_p;
-                       ost_end = (ost + ost_offset_e) + OST_BLOCK;
+               if (ost == NULL)
+               {
+                       printf("OST memory allocation error.\n");
+                       return -1;
                }
+       }
 
+       if (slen)
+       {
                ost_offset_p = (oststr_ptr - oststr);
                ost_offset_e = (oststr_end - oststr);
 
-               // string length + terminating NULL + uint32_t (terminal long)
-               if ((oststr_ptr + (slen + 1 + 4)) > oststr_end)
+               // If the OST data has been exhausted, allocate another chunk.
+               if (((oststr_ptr + slen + 4) > oststr_end))
                {
-                       oststr = realloc(oststr, ost_offset_e + OST_BLOCK);
-
-                       if (oststr == NULL)
+                       // string length + terminating NULL + uint32_t (terminal long)
+                       if ((oststr_ptr + (slen + 1 + 4)) > oststr_end)
                        {
-                               printf("OSTSTR memory reallocation error.\n");
-                               return -1;
-                       }
+                               oststr = realloc(oststr, ost_offset_e + OST_BLOCK);
 
-                       oststr_ptr = oststr + ost_offset_p;
-                       oststr_end = (oststr + ost_offset_e) + OST_BLOCK;
-               }
-       }
-
-       // If this is a debug symbol and the include debug symbol flag (-g) is not
-       // set then do nothing
-       if ((type & 0xF0000000) && !gflag)
-       {
-               // Do nothing
-               return 0;
-       }
+                               if (oststr == NULL)
+                               {
+                                       printf("OSTSTR memory reallocation error.\n");
+                                       return -1;
+                               }
 
-       // Get symbol index in OST, if any (-1 if not found)
-       ostresult = OSTLookup(name);
+                               oststr_ptr = oststr + ost_offset_p;
+                               oststr_end = (oststr + ost_offset_e) + OST_BLOCK;
 
-       // If the symbol is in the output symbol table and the bflag is set
-       // (don't remove multiply defined locals) and this is not an
-       // external/global symbol *** OR *** the symbol is not in the output
-       // symbol table then add it.
-       if (((ostresult != -1) && bflag && !(type & 0x01000000))
-               || ((ostresult != -1) && gflag && (type & 0xF0000000))
-               || (ostresult == -1))
-       {
-               if ((type & 0xF0000000) == 0x40000000)
-                       PutLong(ost_ptr, 0x00000000);   // Zero string table offset for dbg line
-               else
-                       PutLong(ost_ptr, (oststr_ptr - oststr));        // String table offset of symbol string
+                               // On the first alloc, reserve space for the string table
+                               // size field.
+                               if (ost_offset_e == 0)
+                                       oststr_ptr += 4;
+                       }
+               }
 
-               PutLong(ost_ptr + 4, type);
-               PutLong(ost_ptr + 8, value);
-               ost_ptr += 12;
+               strcpy(oststr_ptr, name);                       // Put symbol name in string table
+               oststr_ptr += slen;
+               oststr_ptr[-1] = '\0';                          // Add null terminating character
+               PutLong(oststr_ptr, 0x00000000);        // Null terminating long
+               PutLong(oststr, (oststr_ptr - oststr)); // Update size of string table
+       }
 
-               // If the symbol type is anything but a debug line information
-               // symbol then write the symbol string to the string table
-               if ((type & 0xF0000000) != 0x40000000)
-               {
-                       strcpy(oststr_ptr, name);               // Put symbol name in string table
-                       *(oststr_ptr + slen) = '\0';    // Add null terminating character
-                       oststr_ptr += (slen + 1);
-                       PutLong(oststr_ptr, 0x00000000);        // Null terminating long
-                       PutLong(oststr, (oststr_ptr - oststr)); // Update size of string table
-               }
+       ostresult = ost_index++;
 
-               if (vflag > 1)
-                       printf("OSTAdd: (%s), type=$%08X, val=$%08lX\n", name, type, value);
+       ost[ostresult].s_idx = ost_offset_p;
+       ost[ostresult].s_type = type;
+       ost[ostresult].s_value = value;
 
-// is ost_index pointing one past?
-// does this return the same regardless of if its ++n or n++?
-// no. it returns the value of ost_index *before* it's incremented.
-               return ++ost_index;
-       }
+       if (vflag > 1)
+               printf("OSTAdd: (%s), type=$%08X, val=$%08lX\n",
+                          slen ? name : "", type, value);
 
-       return ostresult;
+       return ost_index;
 }
 
 
@@ -525,14 +501,11 @@ int OSTAdd(char * name, int type, long value)
 int OSTLookup(char * sym)
 {
        int i;
-       int stro = 4;           // Offset in string table
 
        for(i=0; i<ost_index; i++)
        {
-               if (strcmp(oststr + stro, sym) == 0)
+               if (ost[i].s_idx && (strcmp(oststr + ost[i].s_idx, sym) == 0))
                        return i + 1;
-
-               stro += strlen(oststr + stro) + 1;
        }
 
        return -1;
@@ -689,7 +662,7 @@ int RelocateSegment(struct OFILE * ofile, int flag)
                        strcpy(sym, symbols + symidx);
                        olddata = newdata = 0;   // Initialise old and new segment data
                        ssidx = OSTLookup(sym);
-                       newdata = GetLong(ost + ((ssidx - 1) * 12) + 8);
+                       newdata = ost[ssidx - 1].s_value;
                }
 
                // Obtain the existing long word (or word) segment data and flip words
@@ -1076,10 +1049,8 @@ int WriteOutputFile(struct OHEADER * header)
        int i, j;                                                       // Iterators
        char himage[0x168];                                     // Header image (COF = 0xA8)
        uint32_t tsoff, dsoff, bsoff;           // Segment offset values
-       unsigned index, type, value;            // Symbol table index, type and value
        short abstype;                                          // ABS symbol type
-       char symbol[14];                                        // Symbol record for ABS files
-       int slen;                                                       // Symbol string length
+       char symbol[14];                                        // raw symbol record
 
        symoffset = 0;                                          // Initialise symbol offset
 
@@ -1267,8 +1238,15 @@ int WriteOutputFile(struct OHEADER * header)
                {
                        if (header->ssize)
                        {
-                               if (fwrite(ost, (ost_ptr - ost), 1, fd) != 1)
-                                       goto werror;
+                               for (i = 0; i < ost_index; i++)
+                               {
+                                       PutLong(symbol,     ost[i].s_idx);
+                                       PutLong(symbol + 4, ost[i].s_type);
+                                       PutLong(symbol + 8, ost[i].s_value);
+
+                                       if (fwrite(symbol, 12, 1, fd) != 1)
+                                               goto werror;
+                               }
 
                                if (fwrite(oststr, (oststr_ptr - oststr), 1, fd) != 1)
                                        goto werror;
@@ -1288,32 +1266,16 @@ int WriteOutputFile(struct OHEADER * header)
                        {
                                memset(symbol, 0, 14);          // Initialise symbol record
                                abstype = 0;                            // Initialise ABS symbol type
-                               slen = 0;                                       // Initialise symbol string length
-                               index = GetLong(ost + (i * 12));        // Get symbol index
-                               type  = GetLong((ost + (i * 12)) + 4);  // Get symbol type
 
                                // Skip debug symbols
-                               if (type & 0xF0000000)
+                               if (ost[i].s_type & 0xF0000000)
                                        continue;
 
-                               // Get symbol value
-                               value = GetLong((ost + (i * 12)) + 8);
-                               slen = strlen(oststr + index);
-
                                // Get symbol string (maximum 8 chars)
-                               if (slen > 8)
-                               {
-                                       for(j=0; j<8; j++)
-                                               *(symbol + j) = *(oststr + index + j);
-                               }
-                               else
-                               {
-                                       for(j=0; j<slen; j++)
-                                               *(symbol + j) = *(oststr + index + j);
-                               }
+                               strncpy(symbol, oststr + ost[i].s_idx, 8);
 
                                // Modify to ABS symbol type
-                               switch (type)
+                               switch (ost[i].s_type)
                                {
                                case 0x02000000: abstype = (short)ABST_DEFINED;                           break;
                                case 0x04000000: abstype = (short)ABST_DEFINED | ABST_TEXT;               break;
@@ -1323,13 +1285,13 @@ int WriteOutputFile(struct OHEADER * header)
                                case 0x08000000: abstype = (short)ABST_DEFINED | ABST_BSS;                break;
                                case 0x09000000: abstype = (short)ABST_DEFINED | ABST_GLOBAL | ABST_BSS;  break;
                                default:
-                                       printf("warning (WriteOutputFile): ABS, cannot determine symbol type ($%08X) [%s]\n", type, symbol);
+                                       printf("warning (WriteOutputFile): ABS, cannot determine symbol type ($%08X) [%s]\n", ost[i].s_type, symbol);
 //                                     type = 0;
                                        break;
                                }
 
-                               PutWord(symbol + 8, abstype);   // Write back new ABS type
-                               PutLong(symbol + 10, value);    // Write back value
+                               PutWord(symbol + 8, abstype);           // Write back new ABS type
+                               PutLong(symbol + 10, ost[i].s_value);   // Write back value
 
                                // Write symbol record
                                if (fwrite(symbol, 14, 1, fd) != 1)
@@ -1391,10 +1353,10 @@ int ShowSymbolLoadMap(struct OHEADER * header)
                // Inner loop to process each record in the symbol table
                for(i=0; i<(unsigned)ost_index; i++)
                {
-                       index  = GetLong(ost + (i * 12));               // Get symbol string index
-                       type   = GetLong(ost + (i * 12) + 4);   // Get symbol type
-                       value  = GetLong(ost + (i * 12) + 8);   // Get symbol value
-                       symbol = oststr + index;                                // Get symbol string
+                       index  = ost[i].s_idx;                                  // Get symbol string index
+                       type   = ost[i].s_type;                                 // Get symbol type
+                       value  = ost[i].s_value;                                // Get symbol value
+                       symbol = index ? oststr + index : "";   // Get symbol string
 
                        // Display only three columns
                        if (c == 3)
@@ -1645,8 +1607,8 @@ struct OHEADER * MakeOutputObject()
        header->tsize = textsize;                       // TEXT segment size
        header->dsize = datasize;                       // DATA segment size
        header->bsize = bsssize;                        // BSS segment size
-       header->ssize = (ost_ptr - ost);        // Symbol table size
-       header->ostbase = ost;                          // Output symbol table base address
+       header->ssize = ost_index * 12;         // Symbol table size
+       header->ostbase = NULL;                         // Output symbol table base address
 
        // For each object file, relocate its TEXT and DATA segments. OR the result
        // into ret so all files get moved (and errors reported) before returning
diff --git a/rln.h b/rln.h
index 7737f4d945e1e6340e73af7668e83cfcadf3c122..9451dfb395084ad719cf0a7210d9119a4ad9fc2e 100644 (file)
--- a/rln.h
+++ b/rln.h
@@ -250,18 +250,15 @@ struct OFILE
 
 // Symbol Record
 
-// SYMREC: Used by builddir for the lists of exports and imports, and by the
-// linker for the output symbol table (that's why there are type and value
-// fields, unused in builddir)
+// SYMREC: Used by the linker for the output symbol table
 
-#define SYMLEN       100                       // Symbol name size (incl. null)
+#define OST_SIZE_INIT 8                                // Half the initial output symbol table size
 
 struct SYMREC
 {
-       uint8_t s_name[SYMLEN];                 // Including null terminator
-       uint16_t s_type;
+       uint32_t s_idx;
+       uint32_t s_type;
        uint32_t s_value;
-       struct SYMREC * s_next;
 };
 
 #define new_symrec() (struct SYMREC *)malloc(sizeof(struct SYMREC))
@@ -272,6 +269,8 @@ struct SYMREC
 // and Globals share a hash table, but their value fields are interpreted
 // differently.
 
+#define SYMLEN       100                       // Symbol name size (incl. null)
+
 struct HREC
 {
        uint8_t h_sym[SYMLEN];