2 Do not include this file in your project. The fparser.cc file #includes
3 this file internally and thus you don't need to do anything (other than keep
4 this file in the same directory as fparser.cc).
6 Part of this file is generated code (by using the make_function_name_parser
7 utility, found in the development version of this library). It's not intended
8 to be modified by hand.
11 unsigned nameLength = 0;
12 const unsigned maximumNameLength = 0x80000000U-8;
14 Due to the manner the identifier lengths are returned from
15 the readOpcode() function, the maximum supported length for
16 identifiers is 0x7FFFFFFF bytes. We minus 8 here to add some
17 buffer, because of the multibyteness of UTF-8.
18 Function names are limited to 0xFFFF bytes instead, but because
19 function names that long just are not defined, the point is moot.
21 const unsigned char* const uptr = (const unsigned char*) input;
22 typedef signed char schar;
23 while(likely(nameLength < maximumNameLength))
25 unsigned char byte = uptr[nameLength+0];
26 /* Handle the common case of A-Za-z first */
29 if(byte < 0x80) // 0x40..0x7F - most common case
31 // Valid characters in 40..7F: A-Za-z_
32 // Valid bitmask for 40..5F: 01111111111111111111111111100001
33 // Valid bitmask for 60..7F: 01111111111111111111111111100000
34 if(sizeof(unsigned long) == 8)
36 const unsigned n = sizeof(unsigned long)*8-32;
37 // ^ avoids compiler warning when not 64-bit
38 unsigned long masklow6bits = 1UL << (byte & 0x3F);
39 if(masklow6bits & ~((1UL << 0) | (0x0FUL << (0x1B ))
40 | (1UL << n) | (0x1FUL << (0x1B+n))))
41 { ++nameLength; continue; }
45 unsigned masklow5bits = 1 << (byte & 0x1F);
46 if((masklow5bits & ~(1 | (0x1F << 0x1B))) || byte == '_')
47 { ++nameLength; continue; }
55 if(byte < 0xC2) break; // 0x80..0xC1
56 if(byte == 0xC2 && uptr[nameLength+1]==0xA0) break; // skip nbsp
57 // C2-DF - next common case when >= 0x40
58 // Valid sequence: C2-DF 80-BF
59 if(schar(uptr[nameLength+1]) > schar(0xBF)) break;
63 if(byte == 0xE0) // E0
65 // Valid sequence: E0 A0-BF 80-BF
66 if((unsigned char)(uptr[nameLength+1] - 0xA0) > (0xBF-0xA0)) break;
70 if(byte == 0xED) break; // ED is invalid
71 // Valid sequence: E1-EC 80-BF 80-BF
72 // And: EE-EF 80-BF 80-BF
75 // break on various space characters
76 if(uptr[nameLength+1] == 0x80
77 && (schar(uptr[nameLength+2]) <= schar(0x8B)
78 || (uptr[nameLength+2] == 0xAF))) break;
79 if(uptr[nameLength+1] == 0x81
80 && uptr[nameLength+2] == 0x9F) break;
82 if(byte == 0xE3 && uptr[nameLength+1] == 0x80
83 && uptr[nameLength+2] == 0x80) break; // this too
85 if(schar(uptr[nameLength+1]) > schar(0xBF)) break;
87 if(schar(uptr[nameLength+2]) > schar(0xBF)) break;
91 if(byte == 0xF0) // F0
93 // Valid sequence: F0 90-BF 80-BF 80-BF
94 if((unsigned char)(uptr[nameLength+1] - 0x90) > (0xBF-0x90)) break;
98 if(byte > 0xF4) break; // F5-FF are invalid
99 if(byte == 0xF4) // F4
101 // Valid sequence: F4 80-8F
102 if(schar(uptr[nameLength+1]) > schar(0x8F)) break;
107 // Valid sequence: F1-F3 80-BF 80-BF 80-BF
108 if(schar(uptr[nameLength+1]) > schar(0xBF)) break;
111 if(schar(uptr[nameLength+2]) > schar(0xBF)) break;
112 if(schar(uptr[nameLength+3]) > schar(0xBF)) break;
118 if(sizeof(unsigned long) == 8)
120 // Valid bitmask for 00..1F: 00000000000000000000000000000000
121 // Valid bitmask for 20..3F: 00000000000000001111111111000000
122 const unsigned n = sizeof(unsigned long)*8-32;
123 // ^ avoids compiler warning when not 64-bit
124 unsigned long masklow6bits = 1UL << byte;
125 if(masklow6bits & (((1UL << 10)-1UL) << (16+n)))
126 { ++nameLength; continue; }
130 if(byte >= '0' && byte <= '9')
131 { ++nameLength; continue; }
137 /* This function generated with make_function_name_parser.cc */
147 #define lE 0x80000003U:3;
148 #define lD 0x80000005U:5;
149 #define lC std::memcmp(lJ+
155 #define l6 default:lF
156 #define l5 static const char tmp[
159 #define l2 0x80000004U:4;lF 4;
160 #define l1 .enabled()?(
161 #define l0 lF Functions[
173 lJ[0]){lB'a':if('b'l4
216 lJ[1]){lB'a':if('x'l4
234 lJ[1]){lB'e':if('c'l4
255 lJ[1]){lB'c':if('o'l4
277 lJ[1]){lB'b':if('r'l4
299 lJ[1]){lB'v':if('a'l4
314 lB'l':{lI'o','g','2'}
337 lB't':{lI'a','n','h'}
349 lJ[1]){lB'c':{lI'o','s','h'}
355 lN's':{lI'i','n','h'}