Internal cleanup and refactoring, improving error line reporting

This commit is contained in:
Pavel Shevaev 2023-11-23 17:03:30 +03:00
parent 40a4a85248
commit 160a3f2c24
1 changed files with 160 additions and 166 deletions

View File

@ -10,7 +10,7 @@ class mtgMetaInfoParser
const T_Enum = 1005;
const T_RPC = 1006;
const T_End = 1007;
const T_Identifier = 1008;
const T_UserSymbol = 1008;
const T_Struct = 1009;
const T_Prop = 1010;
const T_Extends = 1011;
@ -40,14 +40,17 @@ class mtgMetaInfoParser
private ?mtgMetaParsedModule $module = null;
private string $file = "";
private string $source = "";
private int $cursor = 0;
private int $cursor_pos = 0;
private string $cursor_char = '';
private int $line = 0;
//TODO: setting it an 'int' type makes PHPStan produce many
// false positives
//token numeric identifier
private $T = 0;
//token extra string value which depends on concrete T
private string $T_value = "";
/** @var array<string,int>*/
private $type2T = array();
private $symbol2T = array();
/** @var array<int,string>*/
private $T2descr = array();
private array $shared_tokens = array();
@ -65,8 +68,7 @@ class mtgMetaInfoParser
private function _initTables()
{
$this->type2T = array(
$this->symbol2T = [
"string" => self::T_string,
"uint32" => self::T_uint32,
"int32" => self::T_int32,
@ -80,9 +82,21 @@ class mtgMetaInfoParser
"int64" => self::T_int64,
"bool" => self::T_bool,
"blob" => self::T_blob,
);
$this->T2descr = array_flip($this->type2T);
"true" => self::T_IntegerConstant,
"false" => self::T_IntegerConstant,
"struct" => self::T_Struct,
"interface" => self::T_Interface,
"enum" => self::T_Enum,
"RPC" => self::T_RPC,
"end" => self::T_End,
"extends" => self::T_Extends,
"implements" => self::T_Implements,
"func" => self::T_Func,
];
$this->T2descr = array_flip($this->symbol2T);
$this->T2descr[self::T_EOF] = '<EOF>';
$this->T2descr[self::T_StringConstant] = '<StringConstant>';
$this->T2descr[self::T_RawStringConstant] = '<RawStringConstant>';
@ -91,7 +105,7 @@ class mtgMetaInfoParser
$this->T2descr[self::T_Enum] = '<enum>';
$this->T2descr[self::T_RPC] = '<RPC>';
$this->T2descr[self::T_End] = '<end>';
$this->T2descr[self::T_Identifier] = '<Identifier>';
$this->T2descr[self::T_UserSymbol] = '<Identifier>';
$this->T2descr[self::T_Struct] = '<struct>';
$this->T2descr[self::T_Interface] = '<interface>';
$this->T2descr[self::T_Prop] = '<@prop>';
@ -148,17 +162,16 @@ class mtgMetaInfoParser
$this->module = $module;
$this->file = $file;
$this->source = $source;
$this->cursor = 0;
$this->line = 1;
$this->cursor_pos = -1;
$this->_cursorNext();
$this->shared_tokens = array();
try
{
$this->_next();
$this->_nextT();
while($this->T != self::T_EOF)
{
//echo "TOKEN : " . $this->T . " " . $this->T_value . " " . $this->line . "\n";
if($this->T == self::T_Prop)
$this->_parseSharedTokens($this->_parsePropTokens());
else if($this->T == self::T_Enum)
@ -172,7 +185,7 @@ class mtgMetaInfoParser
else if($this->T == self::T_RPC)
$this->_parseRPC();
else
$this->_error("Unexpected T ('" . $this->_toStr($this->T) . "' " . $this->T_value . ")");
$this->_error("Unexpected symbol ('" . $this->_toStr($this->T) . "' " . $this->T_value . ")");
}
}
catch(Exception $e)
@ -212,7 +225,7 @@ class mtgMetaInfoParser
$func_type = $this->_parseFuncType();
$type = new mtgTypeRef($func_type, $this->module, $origin);
}
else if($this->T == self::T_Identifier)
else if($this->T == self::T_UserSymbol)
{
$origin = new mtgOrigin($this->file, $this->line);
$type_name = $this->_parseDotName();
@ -223,13 +236,13 @@ class mtgMetaInfoParser
$origin = new mtgOrigin($this->file, $this->line);
$type_name = $this->T_value;
$type = new mtgTypeRef(new mtgBuiltinType($type_name), $this->module, $origin);
$this->_next();
$this->_nextT();
}
if($this->T == ord('['))
{
$origin = new mtgOrigin($this->file, $this->line);
$this->_next();
$this->_nextT();
$this->_checkThenNext(ord(']'));
$type = new mtgTypeRef(new mtgArrType($type), $this->module, $origin);
}
@ -240,7 +253,7 @@ class mtgMetaInfoParser
if($this->T != ord(','))
break;
$this->_next();
$this->_nextT();
}
if(sizeof($types) > 1)
@ -253,7 +266,7 @@ class mtgMetaInfoParser
{
$ftype = new mtgMetaFunc('');
$this->_next();
$this->_nextT();
$this->_checkThenNext(ord('('));
@ -262,7 +275,7 @@ class mtgMetaInfoParser
{
if($this->T == ord(')'))
{
$this->_next();
$this->_nextT();
break;
}
else if($c > 0)
@ -278,7 +291,7 @@ class mtgMetaInfoParser
if($this->T == ord(':'))
{
$this->_next();
$this->_nextT();
$ret_type = $this->_parseType(true/*can be multi-type*/);
$ftype->setReturnType($ret_type);
}
@ -325,10 +338,10 @@ class mtgMetaInfoParser
$values = array();
while(true)
{
if($this->T == self::T_Identifier)
if($this->T == self::T_UserSymbol)
{
$values[] = $this->T_value;
$this->_next();
$this->_nextT();
if(!$this->_nextIf(ord('|')))
break;
}
@ -341,8 +354,7 @@ class mtgMetaInfoParser
private function _parseEnum()
{
$this->_next();
$this->_nextT();
$name = $this->_parseDotName();
$enum = new mtgMetaEnum($name);
@ -356,15 +368,17 @@ class mtgMetaInfoParser
{
if($this->_nextIf(self::T_End))
break;
$key = $this->_checkThenNext(self::T_Identifier);
$key = $this->T_value;
$this->_checkThenNext(self::T_UserSymbol);
$this->_checkThenNext(ord('='));
if($this->T == self::T_Identifier)
if($this->T == self::T_UserSymbol)
{
$or_values[$key] = $this->_parseEnumOrValues();
}
else
{
$value = $this->_checkThenNext(self::T_IntegerConstant);
$value = $this->T_value;
$this->_checkThenNext(self::T_IntegerConstant);
$enum->addValue($key, $value);
}
}
@ -412,13 +426,13 @@ class mtgMetaInfoParser
if($next_doer())
break;
if($this->T == self::T_Identifier)
if($this->T == self::T_UserSymbol)
{
$name = $this->T_value;
$this->_next();
$this->_nextT();
$this->_checkThenNext(ord(':'));
if($this->T == self::T_Identifier ||
if($this->T == self::T_UserSymbol ||
$this->T == self::T_Func ||
self::_isBuiltinType($this->T))
{
@ -435,7 +449,7 @@ class mtgMetaInfoParser
$this->_error("Type expected");
}
else
$this->_error("Unexpected fields T");
$this->_error("Unexpected fields symbol");
}
return $flds;
@ -454,11 +468,11 @@ class mtgMetaInfoParser
if($this->T == $end_token)
{
$this->_next();
$this->_nextT();
break;
}
$this->_next();
$this->_nextT();
}
return $funcs;
@ -470,15 +484,15 @@ class mtgMetaInfoParser
while(true)
{
if($this->T != self::T_Identifier)
$this->_error("Unexpected name T");
if($this->T != self::T_UserSymbol)
$this->_error("Unexpected name symbol");
$dot_name .= $this->T_value;
$this->_next();
$this->_nextT();
if($this->T != ord('.'))
break;
$dot_name .= '.';
$this->_next();
$this->_nextT();
}
return $dot_name;
@ -500,8 +514,8 @@ class mtgMetaInfoParser
$ret_type = null;
if($this->T == ord(':'))
{
$this->_next();
if($this->T == self::T_Identifier ||
$this->_nextT();
if($this->T == self::T_UserSymbol ||
$this->T == self::T_Func ||
self::_isBuiltinType($this->T))
{
@ -523,7 +537,7 @@ class mtgMetaInfoParser
private function _parseFreeFunc()
{
$this->_next();
$this->_nextT();
$fn = $this->_parseFunc();
$fn->setTokens(array_merge($this->shared_tokens, $fn->getTokens()));
$this->_addUnit(new mtgMetaInfoUnit($this->file, $fn));
@ -531,14 +545,14 @@ class mtgMetaInfoParser
private function _parseStruct()
{
$this->_next();
$this->_nextT();
$struct_origin = new mtgOrigin($this->file, $this->line);
$name = $this->_parseDotName();
$parent = null;
if($this->T == self::T_Extends)
{
$this->_next();
$this->_nextT();
$origin = new mtgOrigin($this->file, $this->line);
$parent_name = $this->_parseDotName();
$parent = new mtgTypeRef($parent_name, $this->module, $origin);
@ -549,7 +563,7 @@ class mtgMetaInfoParser
{
do
{
$this->_next();
$this->_nextT();
$origin = new mtgOrigin($this->file, $this->line);
$if_name = $this->_parseDotName();
$implements[] = new mtgTypeRef($if_name, $this->module, $origin);
@ -591,7 +605,7 @@ class mtgMetaInfoParser
private function _parseInterface()
{
$this->_next();
$this->_nextT();
$name = $this->_parseDotName();
$s = new mtgMetaInterface($name);
@ -604,19 +618,20 @@ class mtgMetaInfoParser
if($this->T !== self::T_End)
{
$this->_next();
$this->_nextT();
$funcs = $this->_parseFuncs();
foreach($funcs as $fn)
$s->addFunc($fn);
}
else
$this->_next();
$this->_nextT();
}
private function _parseRPC()
{
$this->_next();
$code = $this->_checkThenNext(self::T_IntegerConstant);
$this->_nextT();
$code = $this->T_value;
$this->_checkThenNext(self::T_IntegerConstant);
$name = $this->_parseDotName();
$this->_checkThenNext(ord('('));
@ -642,8 +657,6 @@ class mtgMetaInfoParser
private function _parsePropTokens()
{
$new_line = ord("\n");
$prop_tokens = array();
while(true)
@ -653,21 +666,27 @@ class mtgMetaInfoParser
$name = ltrim($this->T_value, '@');
$this->_validatePropToken($name);
$this->_next();
$this->_nextT();
$value = null;
$value_start_line = $this->line;
if($this->T == ord(':'))
{
//let's read the value
while(true)
{
$this->_next(false/*don't skip new line*/);
if($this->T == $new_line ||
$this->T == self::T_Prop)
//TODO: The code below is ugly and must be heavily refactored,
// it just tries to be convenient and keep BC: any token property
// value can have almost any kind of symbols excluding new line.
// In the future we should restrict property values to certain types only
$this->_nextT(true/*stop on new line*/);
if($this->T == ord("\n"))
{
$this->_nextT();
break;
}
else if($this->T == self::T_Prop)
{
//let's skip it
if($this->T == $new_line)
$this->_next();
break;
}
else
@ -682,17 +701,6 @@ class mtgMetaInfoParser
}
}
if($value && substr($value, 0, 1) === '{')
{
$json = json_decode($value);
if($json === null)
{
//for better line reporting
$this->line = $value_start_line;
$this->_error("Bad json");
}
}
$prop_tokens[$name] = $value;
}
return $prop_tokens;
@ -705,165 +713,145 @@ class mtgMetaInfoParser
return;
if(!in_array($name, $this->config['valid_tokens']))
{
throw new Exception("Unknown T '$name'");
}
throw new Exception("Unknown property token '@$name'");
}
private function _char() : string
{
$str = substr($this->source, $this->cursor, 1);
if($str === false)
$str = '';
return $str;
}
private function _next($skip_newlines = true)
private function _nextT($stop_on_new_line = false)
{
while(true)
{
$c = $this->_char();
if($c == '')
{
$this->cursor--;
$this->T = self::T_EOF;
$this->T_value = $c;
return;
}
$c = $this->cursor_char;
//setting default values
$this->T = ord($c);
$this->T_value = $c;
++$this->cursor;
//NOTE: current 'cursor_pos' is ahead of 'c' by one character
$this->_cursorNext();
switch($c)
{
case '': $this->T = self::T_EOF; return;
case "\n": if($stop_on_new_line) return; else break;
case ' ': case "\r": case "\t": break;
case "\n": $this->line++; if($skip_newlines) break; else return;
case '{': case '}': case '(': case ')': case '[': case ']': case '|': return;
case ',': case ':': case ';': case '=': return;
case '.':
if(!ctype_digit($this->_char())) return;
if(!ctype_digit($this->cursor_char))
return;
$this->_error("Floating point constant can't start with .");
break;
case '"':
$this->T_value = "";
while($this->_char() != '"')
$this->T_value = '';
while($this->cursor_char != '"')
{
if(ord($this->_char()) < ord(' '))
if(ord($this->cursor_char) < ord(' '))
$this->_error("Illegal character in string constant");
if($this->_char() == '\\')
if($this->cursor_char == '\\')
{
$this->cursor++;
switch($this->_char())
$this->_cursorNext();
switch($this->cursor_char)
{
case 'n': $this->T_value .= "\n"; $this->cursor++; break;
case 't': $this->T_value .= "\t"; $this->cursor++; break;
case 'r': $this->T_value .= "\r"; $this->cursor++; break;
case '"': $this->T_value .= '"'; $this->cursor++; break;
case '\\': $this->T_value .= '\\'; $this->cursor++; break;
case 'n': $this->T_value .= "\n"; $this->_cursorNext(); break;
case 't': $this->T_value .= "\t"; $this->_cursorNext(); break;
case 'r': $this->T_value .= "\r"; $this->_cursorNext(); break;
case '"': $this->T_value .= '"'; $this->_cursorNext(); break;
case '\\': $this->T_value .= '\\'; $this->_cursorNext(); break;
default: $this->_error("Unknown escape code in string constant"); break;
}
}
else // printable chars + UTF-8 bytes
{
$this->T_value .= $this->_char();
$this->cursor++;
$this->T_value .= $this->cursor_char;
$this->_cursorNext();
}
}
$this->T = self::T_StringConstant;
$this->cursor++;
$this->_cursorNext();
return;
case '`':
$this->T_value = "";
while($this->_char() != '`')
$this->T_value = '';
//TODO: code below is not robust enough
while($this->cursor_char != '`')
{
$this->T_value .= $this->_char();
$this->cursor++;
$this->T_value .= $this->cursor_char;
$this->_cursorNext();
}
$this->T = self::T_RawStringConstant;
$this->cursor++;
$this->_cursorNext();
return;
case '/':
if($this->_char() == '/')
if($this->cursor_char == '/')
{
$this->cursor++;
while($this->_char() != '' && $this->_char() != "\n") $this->cursor++;
$this->_cursorNext();
//@phpstan-ignore-next-line
while($this->cursor_char != '' && $this->cursor_char != "\n")
$this->_cursorNext();
//@phpstan-ignore-next-line
break;
}
case '#':
while($this->_char() != '' && $this->_char() != "\n") $this->cursor++;
while($this->cursor_char != '' && $this->cursor_char != "\n")
$this->_cursorNext();
break;
case '@':
$start = $this->cursor - 1;
while(ctype_alnum($this->_char()) || $this->_char() == '_')
$this->cursor++;
$start = $this->cursor_pos - 1;
while(ctype_alnum($this->cursor_char) || $this->cursor_char == '_')
$this->_cursorNext();
$this->T = self::T_Prop;
$this->T_value = substr($this->source, $start, $this->cursor - $start);
$this->T_value = substr($this->source, $start, $this->cursor_pos - $start);
return;
//fall thru
default:
//symbols
if(ctype_alpha($c))
{
//collect all chars of an identifier
$start = $this->cursor - 1;
while(ctype_alnum($this->_char()) || $this->_char() == '_')
$this->cursor++;
$this->T_value = substr($this->source, $start, $this->cursor - $start);
$start = $this->cursor_pos - 1;
while(ctype_alnum($this->cursor_char) || $this->cursor_char == '_')
$this->_cursorNext();
$this->T_value = substr($this->source, $start, $this->cursor_pos - $start);
if(isset($this->type2T[$this->T_value]))
{
$this->T = $this->type2T[$this->T_value];
return;
}
if(isset($this->symbol2T[$this->T_value]))
$this->T = $this->symbol2T[$this->T_value];
else //otherwise it's assumed to be a user defined symbol
$this->T = self::T_UserSymbol;
if($this->T_value == "true" || $this->T_value == "false")
{
$this->T = self::T_IntegerConstant;
return;
}
//check for declaration keywords:
if($this->T_value == "struct") { $this->T = self::T_Struct; return; }
if($this->T_value == "interface") { $this->T = self::T_Interface; return; }
if($this->T_value == "enum") { $this->T = self::T_Enum; return; }
if($this->T_value == "RPC") { $this->T = self::T_RPC; return; }
if($this->T_value == "end") { $this->T = self::T_End; return; }
if($this->T_value == "extends") { $this->T = self::T_Extends; return; }
if($this->T_value == "implements") { $this->T = self::T_Implements; return; }
if($this->T_value == "func") { $this->T = self::T_Func; return; }
//if not it's a user defined identifier
$this->T = self::T_Identifier;
return;
}
//digits
else if(ctype_digit($c) || $c == '-')
{
$start = $this->cursor - 1;
while(ctype_digit($this->_char())) $this->cursor++;
if($this->_char() == '.')
$start = $this->cursor_pos - 1;
while(ctype_digit($this->cursor_char))
$this->_cursorNext();
if($this->cursor_char == '.')
{
$this->cursor++;
while(ctype_digit($this->_char())) $this->cursor++;
$this->_cursorNext();
while(ctype_digit($this->cursor_char))
$this->_cursorNext();
// see if this float has a scientific notation suffix. Both JSON
// and C++ (through strtod() we use) have the same format:
if($this->_char() == 'e' || $this->_char() == 'E')
//@phpstan-ignore-next-line
if($this->cursor_char == 'e' || $this->cursor_char == 'E')
{
$this->cursor++;
if($this->_char() == '+' || $this->_char() == '-') $this->cursor++;
while(ctype_digit($this->_char())) $this->cursor++;
$this->_cursorNext();
if($this->cursor_char == '+' || $this->cursor_char == '-')
$this->_cursorNext();
while(ctype_digit($this->cursor_char))
$this->_cursorNext();
}
$this->T = self::T_FloatConstant;
}
else
$this->T = self::T_IntegerConstant;
$this->T_value = substr($this->source, $start, $this->cursor - $start);
$this->T_value = substr($this->source, $start, $this->cursor_pos - $start);
return;
}
@ -872,24 +860,30 @@ class mtgMetaInfoParser
}
}
private function _cursorNext()
{
++$this->cursor_pos;
$this->cursor_char = substr($this->source, $this->cursor_pos, 1);
if($this->cursor_char === "\n")
$this->line++;
//EOF
if($this->cursor_char === false)
$this->cursor_char = '';
}
private function _nextIf(int $t) : bool
{
$yes = $t === $this->T;
if($yes)
$this->_next();
$this->_nextT();
return $yes;
}
private function _checkThenNext(int $t) : string
private function _checkThenNext(int $t)
{
if($t !== $this->T)
{
$this->_error("Expecting '" . $this->_toStr($t) . "' instead got '" . $this->_toStr($this->T) . "'");
}
$attr = $this->T_value;
$this->_next();
return $attr;
$this->_nextT();
}
private function _toStr(int $t) : string
@ -901,7 +895,7 @@ class mtgMetaInfoParser
private function _error(string $msg)
{
throw new Exception($msg . " (T: {$this->T}, attr: {$this->T_value})");
throw new Exception($msg . " ('{$this->T_value}', {$this->T})");
}
}