X-Git-Url: http://shamusworld.gotdns.org/cgi-bin/gitweb.cgi?p=rmac;a=blobdiff_plain;f=token.c;h=e1be9b5b8871ea3cde208b29b8e9a072f43ec64d;hp=2a8513d899a45f3f3883aaeacf6cd6aafc31afe0;hb=eace4e1b294ccec54a5c476619f616f5da0bf8a9;hpb=f7f625cf6c2f4b96854ac0e911ca2b1c249f4e05

diff --git a/token.c b/token.c
index 2a8513d..e1be9b5 100644
--- a/token.c
+++ b/token.c
@@ -7,6 +7,8 @@
 //
 
 #include "token.h"
+
+#include <errno.h>
 #include "direct.h"
 #include "error.h"
 #include "macro.h"
@@ -35,7 +37,8 @@ WORD cfileno;				// Current file number
 TOKEN * tok;				// Ptr to current token
 TOKEN * etok;				// Ptr past last token in tokbuf[]
 TOKEN tokeol[1] = {EOL};	// Bailout end-of-line token
-char * string[TOKBUFSIZE*2];	// Token buffer string pointer storage
+char * string[TOKBUFSIZE*2];// Token buffer string pointer storage
+int optimizeOff;			// Optimization override flag
 
 // File record, used to maintain a list of every include file ever visited
 #define FILEREC struct _filerec
@@ -48,12 +51,12 @@ FILEREC
 FILEREC * filerec;
 FILEREC * last_fr;
 
-INOBJ * cur_inobj;						// Ptr current input obj (IFILE/IMACRO)
-static INOBJ * f_inobj;					// Ptr list of free INOBJs
-static IFILE * f_ifile;					// Ptr list of free IFILEs
-static IMACRO * f_imacro;				// Ptr list of free IMACROs
+INOBJ * cur_inobj;			// Ptr current input obj (IFILE/IMACRO)
+static INOBJ * f_inobj;		// Ptr list of free INOBJs
+static IFILE * f_ifile;		// Ptr list of free IFILEs
+static IMACRO * f_imacro;	// Ptr list of free IMACROs
 
-static TOKEN tokbuf[TOKBUFSIZE];		// Token buffer (stack-like, all files)
+static TOKEN tokbuf[TOKBUFSIZE];	// Token buffer (stack-like, all files)
 
 uint8_t chrtab[0x100] = {
 	ILLEG, ILLEG, ILLEG, ILLEG,			// NUL SOH STX ETX
@@ -205,13 +208,13 @@ void InitTokenizer(void)
 	dotxtab['W'] = DOTW;
 	dotxtab['l'] = DOTL;					// .l .L
 	dotxtab['L'] = DOTL;
-	dotxtab['i'] = DOTI;					// .i .I (???)
+	dotxtab['i'] = DOTI;					// .i .I (WTF is this???)
 	dotxtab['I'] = DOTI;
-	dotxtab['D'] = DOTD;					// .d .D (quad word)
+	dotxtab['D'] = DOTD;					// .d .D (double)
 	dotxtab['d'] = DOTD;
 	dotxtab['S'] = DOTS;					// .s .S
 	dotxtab['s'] = DOTS;
-	dotxtab['Q'] = DOTQ;					// .q .Q
+	dotxtab['Q'] = DOTQ;					// .q .Q (quad word)
 	dotxtab['q'] = DOTQ;
 	dotxtab['X'] = DOTX;					// .x .x
 	dotxtab['x'] = DOTX;
@@ -389,7 +392,7 @@ int ExpandMacro(char * src, char * dest, int destsiz)
 				*dst++ = *s++;
 				continue;
 			case '?':						// \? <macro>  set `questmark' flag
-				++s;
+				s++;
 				questmark = 1;
 				break;
 			case '#':						// \#, number of arguments
@@ -567,7 +570,8 @@ DEBUG { printf("ExM: SYMBOL=\"%s\"", d); }
 //         to choke on legitimate code... Need to investigate this further
 //         before changing anything else here!
 							case CONST:
-								sprintf(numbuf, "$%lx", (long unsigned int)*tk++);
+								sprintf(numbuf, "$%lx", (uint64_t)*tk++);
+								tk++;
 								d = numbuf;
 								break;
 							case DEQUALS:
@@ -824,17 +828,11 @@ DEBUG { printf("[fpop: (post) cfileno=%d ifile->ifno=%d]\n", (int)cfileno, (int)
 	case SRC_IREPT:						// Pop and release an IREPT
 	{
 		DEBUG { printf("dealloc IREPT\n"); }
-//		LONG * p = inobj->inobj.irept->ir_firstln;
 		LLIST * p = inobj->inobj.irept->ir_firstln;
 
 		// Deallocate repeat lines
 		while (p != NULL)
 		{
-// Shamus: ggn confirmed that this will cause a segfault on 64-bit versions of
-//         RMAC. This is just stupid and wrong anyway, so we need to fix crapola
-//         like this...
-//			LONG * p1 = (LONG *)*p;
-//			p = p1;
 			free(p->line);
 			p = p->next;
 		}
@@ -950,11 +948,13 @@ int TokenizeLine(void)
 {
 	uint8_t * ln = NULL;		// Ptr to current position in line
 	uint8_t * p;				// Random character ptr
-	TOKEN * tk;					// Token-deposit ptr
+	PTR tk;						// Token-deposit ptr
 	int state = 0;				// State for keyword detector
 	int j = 0;					// Var for keyword detector
 	uint8_t c;					// Random char
-	VALUE v;					// Random value
+	uint64_t v;					// Random value
+	uint32_t cursize = 0;		// Current line's size (.b, .w, .l, .s, .q, .d)
+	double f;					// Random float
 	uint8_t * nullspot = NULL;	// Spot to clobber for SYMBOL termination
 	int stuffnull;				// 1:terminate SYMBOL '\0' at *nullspot
 	uint8_t c1;
@@ -962,7 +962,7 @@ int TokenizeLine(void)
 
 retry:
 
-	if (cur_inobj == NULL)					// Return EOF if input stack is empty
+	if (cur_inobj == NULL)			// Return EOF if input stack is empty
 		return TKEOF;
 
 	// Get another line of input from the current input source: a file, a
@@ -1033,7 +1033,7 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IFILE...\n"); }
 	case SRC_IREPT:
 		if ((ln = GetNextRepeatLine()) == NULL)
 		{
-DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
+			DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
 			fpop();
 			goto retry;
 		}
@@ -1049,8 +1049,8 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
 		strcpy(lnbuf, ln);
 
 	// General housekeeping
-	tok = tokeol;			// Set "tok" to EOL in case of error
-	tk = etok;				// Reset token ptr
+	tok = tokeol;		// Set "tok" to EOL in case of error
+	tk.u32 = etok;			// Reset token ptr
 	stuffnull = 0;			// Don't stuff nulls
 	totlines++;				// Bump total #lines assembled
 
@@ -1059,6 +1059,16 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
 	if (*ln == '*' || *ln == ';' || ((*ln == '/') && (*(ln + 1) == '/')))
 		goto goteol;
 
+	// And here we have a very ugly hack for signalling a single line 'turn off
+	// optimization'. There's really no nice way to do this, so hack it is!
+	optimizeOff = 0;		// Default is to take optimizations as they come
+
+	if (*ln == '!')
+	{
+		optimizeOff = 1;	// Signal that we don't want to optimize this line
+		ln++;				// & skip over the darned thing
+	}
+
 	// Main tokenization loop;
 	//  o  skip whitespace;
 	//  o  handle end-of-line;
@@ -1103,7 +1113,7 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
 					// token stream:
 					ln++;
 					stuffnull = 0;
-					*tk++ = (TOKEN)dotxtab[*ln++];
+					*tk.u32++ = (TOKEN)dotxtab[*ln++];
 					continue;
 				}
 			}
@@ -1121,14 +1131,15 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
 				*ln++ = EOS;		// Terminate symbol
 				stuffnull = 0;		// And never try it again
 
-				// Character following the `.' must have a DOT attribute, and
+				// Character following the '.' must have a DOT attribute, and
 				// the chararacter after THAT one must not have a start-symbol
 				// attribute (to prevent symbols that look like, for example,
 				// "zingo.barf", which might be a good idea anyway....)
 				if (((chrtab[*ln] & DOT) == 0) || (dotxtab[*ln] == 0))
 					return error("[bwsl] must follow '.' in symbol");
 
-				v = (VALUE)dotxtab[*ln++];
+				v = (uint32_t)dotxtab[*ln++];
+				cursize = (uint32_t)v;
 
 				if (chrtab[*ln] & CTSYM)
 					return error("misuse of '.'; not allowed in symbols");
@@ -1185,7 +1196,7 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
 			// If not tokenized keyword OR token was not found
 			if ((j < 0) || (state < 0))
 			{
-				*tk++ = SYMBOL;
+				*tk.u32++ = SYMBOL;
 //#warning
 //problem here: nullspot is a char * but TOKEN is a uint32_t. On a 64-bit
 //system, this will cause all kinds of mischief.
@@ -1193,18 +1204,18 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
 				*tk++ = (TOKEN)nullspot;
 #else
 				string[stringNum] = nullspot;
-				*tk++ = stringNum;
+				*tk.u32++ = stringNum;
 				stringNum++;
 #endif
 			}
 			else
 			{
-				*tk++ = (TOKEN)j;
+				*tk.u32++ = (TOKEN)j;
 				stuffnull = 0;
 			}
 
 			if (v)							// Record attribute token (if any)
-				*tk++ = (TOKEN)v;
+				*tk.u32++ = (TOKEN)v;
 
 			if (stuffnull)					// Arrange for string termination on next pass
 				nullspot = ln;
@@ -1215,7 +1226,7 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
 		// Handle identity tokens
 		if (c & SELF)
 		{
-			*tk++ = *ln++;
+			*tk.u32++ = *ln++;
 			continue;
 		}
 
@@ -1227,27 +1238,27 @@ DEBUG { printf("TokenizeLine: Calling fpop() from SRC_IREPT...\n"); }
 			case '!':		// ! or !=
 				if (*ln == '=')
 				{
-					*tk++ = NE;
-					++ln;
+					*tk.u32++ = NE;
+					ln++;
 				}
 				else
-					*tk++ = '!';
+					*tk.u32++ = '!';
 
 				continue;
 			case '\'':		// 'string'
 				if (m6502)
 				{
 					// Hardcoded for now, maybe this will change in the future
-					*tk++ = STRINGA8;
+					*tk.u32++ = STRINGA8;
 					goto dostring;
 				}
 				// Fall through
 			case '\"':		// "string"
-				*tk++ = STRING;
+				*tk.u32++ = STRING;
 dostring:
 				c1 = ln[-1];
 				string[stringNum] = ln;
-				*tk++ = stringNum;
+				*tk.u32++ = stringNum;
 				stringNum++;
 
 				for(p=ln; *ln!=EOS && *ln!=c1;)
@@ -1287,6 +1298,11 @@ dostring:
 						case '\\':
 							c = '\\';
 							break;
+						case '!':
+							// If we're evaluating a macro
+							// this is valid and expands to
+							// "dot-size"
+							break;
 						default:
 							warn("bad backslash code in string");
 							ln--;
@@ -1327,13 +1343,14 @@ dostring:
 							}
 							else if ((*(ln + 1) & 0xDF) == 'L')
 							{
+								v &= 0xFFFFFFFF;
 								ln += 2;
 							}
 						}
 					}
 
-					*tk++ = CONST;
-					*tk++ = v;
+					*tk.u32++ = CONST;
+					*tk.u64++ = v;
 
 					if (obj_format == ALCYON)
 					{
@@ -1341,79 +1358,79 @@ dostring:
 						{
 							if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
 							{
-								*tk++ = DOTW;
+								*tk.u32++ = DOTW;
 								ln += 2;
 							}
 							else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
 							{
-								*tk++ = DOTL;
+								*tk.u32++ = DOTL;
 								ln += 2;
 							}
 						}
 					}
 				}
 				else
-					*tk++ = '$';
+					*tk.u32++ = '$';
 
 				continue;
 			case '<':		// < or << or <> or <=
 				switch (*ln)
 				{
 				case '<':
-					*tk++ = SHL;
-					++ln;
+					*tk.u32++ = SHL;
+					ln++;
 					continue;
 				case '>':
-					*tk++ = NE;
-					++ln;
+					*tk.u32++ = NE;
+					ln++;
 					continue;
 				case '=':
-					*tk++ = LE;
-					++ln;
+					*tk.u32++ = LE;
+					ln++;
 					continue;
 				default:
-					*tk++ = '<';
+					*tk.u32++ = '<';
 					continue;
 				}
 			case ':':		// : or ::
 				if (*ln == ':')
 				{
-					*tk++ = DCOLON;
-					++ln;
+					*tk.u32++ = DCOLON;
+					ln++;
 				}
 				else
-					*tk++ = ':';
+					*tk.u32++ = ':';
 
 				continue;
 			case '=':		// = or ==
 				if (*ln == '=')
 				{
-					*tk++ = DEQUALS;
-					++ln;
+					*tk.u32++ = DEQUALS;
+					ln++;
 				}
 				else
-					*tk++ = '=';
+					*tk.u32++ = '=';
 
 				continue;
 			case '>':		// > or >> or >=
 				switch (*ln)
 				{
 				case '>':
-					*tk++ = SHR;
+					*tk.u32++ = SHR;
 					ln++;
 					continue;
 				case '=':
-					*tk++ = GE;
+					*tk.u32++ = GE;
 					ln++;
 					continue;
 				default:
-					*tk++ = '>';
+					*tk.u32++ = '>';
 					continue;
 				}
 			case '%':		// % or binary constant
 				if (*ln < '0' || *ln > '1')
 				{
-					*tk++ = '%';
+					*tk.u32++ = '%';
 					continue;
 				}
 
@@ -1438,17 +1455,18 @@ dostring:
 
 					if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
 					{
+						v &= 0xFFFFFFFF;
 						ln += 2;
 					}
 				}
 
-				*tk++ = CONST;
-				*tk++ = v;
+				*tk.u32++ = CONST;
+				*tk.u64++ = v;
 				continue;
 			case '@':		// @ or octal constant
 				if (*ln < '0' || *ln > '7')
 				{
-					*tk++ = '@';
+					*tk.u32++ = '@';
 					continue;
 				}
 
@@ -1459,31 +1477,32 @@ dostring:
 
 				if (*ln == '.')
 				{
-					if ((*(ln+1) == 'b') || (*(ln+1) == 'B'))
+					if ((*(ln + 1) == 'b') || (*(ln + 1) == 'B'))
 					{
 						v &= 0x000000FF;
 						ln += 2;
 					}
 
-					if ((*(ln+1) == 'w') || (*(ln+1) == 'W'))
+					if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
 					{
 						v &= 0x0000FFFF;
 						ln += 2;
 					}
 
-					if ((*(ln+1) == 'l') || (*(ln+1) == 'L'))
+					if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
 					{
+						v &= 0xFFFFFFFF;
 						ln += 2;
 					}
 				}
 
-				*tk++ = CONST;
-				*tk++ = v;
+				*tk.u32++ = CONST;
+				*tk.u64++ = v;
 				continue;
 			case '^':		// ^ or ^^ <operator-name>
 				if (*ln != '^')
 				{
-					*tk++ = '^';
+					*tk.u32++ = '^';
 					continue;
 				}
 
@@ -1529,7 +1548,7 @@ dostring:
 					continue;
 				}
 
-				*tk++ = (TOKEN)j;
+				*tk.u32++ = (TOKEN)j;
 				continue;
 			default:
 				interror(2);	// Bad MULTX entry in chrtab
@@ -1540,6 +1559,7 @@ dostring:
 		// Handle decimal constant
 		if (c & DIGIT)
 		{
+			uint8_t * numStart = ln;
 			v = 0;
 
 			while ((int)chrtab[*ln] & DIGIT)
@@ -1552,20 +1572,63 @@ dostring:
 				{
 					v &= 0x000000FF;
 					ln += 2;
+					*tk.u32++ = CONST;
+					*tk.u64++ = v;
+					*tk.u32++ = DOTB;
 				}
 				else if ((*(ln + 1) == 'w') || (*(ln + 1) == 'W'))
 				{
 					v &= 0x0000FFFF;
 					ln += 2;
+					*tk.u32++ = CONST;
+					*tk.u64++ = v;
+					*tk.u32++ = DOTW;
 				}
 				else if ((*(ln + 1) == 'l') || (*(ln + 1) == 'L'))
 				{
+					v &= 0xFFFFFFFF;
 					ln += 2;
+					*tk.u32++ = CONST;
+					*tk.u64++ = v;
+					*tk.u32++ = DOTL;
+				}
+				else if ((int)chrtab[*(ln + 1)] & DIGIT)
+				{
+					// Hey, more digits after the dot, so we assume it's a
+					// floating point number of some kind
+#if 0
+					double fract = 10;
+					ln++;
+					f = (double)v;
+
+					while ((int)chrtab[*ln] & DIGIT)
+					{
+						f = f + (double)(*ln++ - '0') / fract;
+						fract *= 10;
+					}
+#else
+					// Here we parse the whole floating point number
+					char * numEnd;
+					errno = 0;
+					double f = strtod(numStart, &numEnd);
+					ln = (uint8_t *)numEnd;
+
+					if (errno != 0)
+						return error("floating point parse error");
+#endif
+
+					*tk.u32++ = FCONST;
+// Shamus: Well, this is all kinds of icky--not the least of which is that unlike uintNN_t types, we have no guarantees of any kind when it comes to the size of floating point numbers in C (as far as I know of). If there is, we need to use those kinds here, or else figure out at runtime what sizes we're dealing with and act accordingly. To be fair, this is OK as long as the double type is less than 64 bits wide, but again, there's no guarantee that it isn't. :-/
+					*tk.u64++ = f;
+					continue;
 				}
 			}
+			else
+			{
+				*tk.u32++ = CONST;
+				*tk.u64++ = v;
+			}
 
-			*tk++ = CONST;
-			*tk++ = v;
 //printf("CONST: %i\n", v);
 			continue;
 		}
@@ -1577,12 +1640,12 @@ dostring:
 	// Terminate line of tokens and return "success."
 
 goteol:
-	tok = etok;								// Set tok to beginning of line
+	tok = etok;							// Set tok to beginning of line
 
 	if (stuffnull)							// Terminate last SYMBOL
 		*nullspot = EOS;
 
-	*tk++ = EOL;
+	*tk.u32++ = EOL;
 
 	return OK;
 }
@@ -1653,6 +1716,85 @@ int d_goto(WORD unused)
 }
 
 
+void DumpToken(TOKEN t)
+{
+	if (t == COLON)
+		printf("[COLON]");
+	else if (t == CONST)
+		printf("[CONST]");
+	else if (t == ACONST)
+		printf("[ACONST]");
+	else if (t == STRING)
+		printf("[STRING]");
+	else if (t == SYMBOL)
+		printf("[SYMBOL]");
+	else if (t == EOS)
+		printf("[EOS]");
+	else if (t == TKEOF)
+		printf("[TKEOF]");
+	else if (t == DEQUALS)
+		printf("[DEQUALS]");
+	else if (t == SET)
+		printf("[SET]");
+	else if (t == REG)
+		printf("[REG]");
+	else if (t == DCOLON)
+		printf("[DCOLON]");
+	else if (t == GE)
+		printf("[GE]");
+	else if (t == LE)
+		printf("[LE]");
+	else if (t == NE)
+		printf("[NE]");
+	else if (t == SHR)
+		printf("[SHR]");
+	else if (t == SHL)
+		printf("[SHL]");
+	else if (t == UNMINUS)
+		printf("[UNMINUS]");
+	else if (t == DOTB)
+		printf("[DOTB]");
+	else if (t == DOTW)
+		printf("[DOTW]");
+	else if (t == DOTL)
+		printf("[DOTL]");
+	else if (t == DOTQ)
+		printf("[DOTQ]");
+	else if (t == DOTS)
+		printf("[DOTS]");
+	else if (t == DOTD)
+		printf("[DOTD]");
+	else if (t == DOTI)
+		printf("[DOTI]");
+	else if (t == ENDEXPR)
+		printf("[ENDEXPR]");
+	else if (t == CR_ABSCOUNT)
+		printf("[CR_ABSCOUNT]");
+	else if (t == CR_DEFINED)
+		printf("[CR_DEFINED]");
+	else if (t == CR_REFERENCED)
+		printf("[CR_REFERENCED]");
+	else if (t == CR_STREQ)
+		printf("[CR_STREQ]");
+	else if (t == CR_MACDEF)
+		printf("[CR_MACDEF]");
+	else if (t == CR_TIME)
+		printf("[CR_TIME]");
+	else if (t == CR_DATE)
+		printf("[CR_DATE]");
+	else if (t >= 0x20 && t <= 0x2F)
+		printf("[%c]", (char)t);
+	else if (t >= 0x3A && t <= 0x3F)
+		printf("[%c]", (char)t);
+	else if (t >= 0x80 && t <= 0x87)
+		printf("[D%u]", ((uint32_t)t) - 0x80);
+	else if (t >= 0x88 && t <= 0x8F)
+		printf("[A%u]", ((uint32_t)t) - 0x88);
+	else
+		printf("[%X:%c]", (uint32_t)t, (char)t);
+}
+
+
 void DumpTokenBuffer(void)
 {
 	printf("Tokens [%X]: ", sloc);
@@ -1663,8 +1805,10 @@ void DumpTokenBuffer(void)
 			printf("[COLON]");
 		else if (*t == CONST)
 		{
-			t++;
-			printf("[CONST: $%X]", (uint32_t)*t);
+			PTR tp;
+			tp.u32 = t + 1;
+			printf("[CONST: $%lX]", *tp.u64);
+			t += 2;
 		}
 		else if (*t == ACONST)
 		{
@@ -1711,6 +1855,12 @@ void DumpTokenBuffer(void)
 			printf("[DOTW]");
 		else if (*t == DOTL)
 			printf("[DOTL]");
+		else if (*t == DOTQ)
+			printf("[DOTQ]");
+		else if (*t == DOTS)
+			printf("[DOTS]");
+		else if (*t == DOTD)
+			printf("[DOTD]");
 		else if (*t == DOTI)
 			printf("[DOTI]");
 		else if (*t == ENDEXPR)