From 160a3f2c2458471f562b984a870a0db0f1f72e6c Mon Sep 17 00:00:00 2001 From: Pavel Shevaev Date: Thu, 23 Nov 2023 17:03:30 +0300 Subject: [PATCH] Internal cleanup and refactoring, improving error line reporting --- parser.inc.php | 326 ++++++++++++++++++++++++------------------------- 1 file changed, 160 insertions(+), 166 deletions(-) diff --git a/parser.inc.php b/parser.inc.php index 3f17be2..ceb58c5 100644 --- a/parser.inc.php +++ b/parser.inc.php @@ -10,7 +10,7 @@ class mtgMetaInfoParser const T_Enum = 1005; const T_RPC = 1006; const T_End = 1007; - const T_Identifier = 1008; + const T_UserSymbol = 1008; const T_Struct = 1009; const T_Prop = 1010; const T_Extends = 1011; @@ -40,14 +40,17 @@ class mtgMetaInfoParser private ?mtgMetaParsedModule $module = null; private string $file = ""; private string $source = ""; - private int $cursor = 0; + private int $cursor_pos = 0; + private string $cursor_char = ''; private int $line = 0; //TODO: setting it an 'int' type makes PHPStan produce many // false positives + //token numeric identifier private $T = 0; + //token extra string value which depends on concrete T private string $T_value = ""; /** @var array*/ - private $type2T = array(); + private $symbol2T = array(); /** @var array*/ private $T2descr = array(); private array $shared_tokens = array(); @@ -65,8 +68,7 @@ class mtgMetaInfoParser private function _initTables() { - - $this->type2T = array( + $this->symbol2T = [ "string" => self::T_string, "uint32" => self::T_uint32, "int32" => self::T_int32, @@ -80,9 +82,21 @@ class mtgMetaInfoParser "int64" => self::T_int64, "bool" => self::T_bool, "blob" => self::T_blob, - ); - $this->T2descr = array_flip($this->type2T); + "true" => self::T_IntegerConstant, + "false" => self::T_IntegerConstant, + + "struct" => self::T_Struct, + "interface" => self::T_Interface, + "enum" => self::T_Enum, + "RPC" => self::T_RPC, + "end" => self::T_End, + "extends" => self::T_Extends, + "implements" => self::T_Implements, + "func" => self::T_Func, + ]; + + $this->T2descr = array_flip($this->symbol2T); $this->T2descr[self::T_EOF] = ''; $this->T2descr[self::T_StringConstant] = ''; $this->T2descr[self::T_RawStringConstant] = ''; @@ -91,7 +105,7 @@ class mtgMetaInfoParser $this->T2descr[self::T_Enum] = ''; $this->T2descr[self::T_RPC] = ''; $this->T2descr[self::T_End] = ''; - $this->T2descr[self::T_Identifier] = ''; + $this->T2descr[self::T_UserSymbol] = ''; $this->T2descr[self::T_Struct] = ''; $this->T2descr[self::T_Interface] = ''; $this->T2descr[self::T_Prop] = '<@prop>'; @@ -148,17 +162,16 @@ class mtgMetaInfoParser $this->module = $module; $this->file = $file; $this->source = $source; - $this->cursor = 0; $this->line = 1; + $this->cursor_pos = -1; + $this->_cursorNext(); $this->shared_tokens = array(); try { - $this->_next(); + $this->_nextT(); while($this->T != self::T_EOF) { - //echo "TOKEN : " . $this->T . " " . $this->T_value . " " . $this->line . "\n"; - if($this->T == self::T_Prop) $this->_parseSharedTokens($this->_parsePropTokens()); else if($this->T == self::T_Enum) @@ -172,7 +185,7 @@ class mtgMetaInfoParser else if($this->T == self::T_RPC) $this->_parseRPC(); else - $this->_error("Unexpected T ('" . $this->_toStr($this->T) . "' " . $this->T_value . ")"); + $this->_error("Unexpected symbol ('" . $this->_toStr($this->T) . "' " . $this->T_value . ")"); } } catch(Exception $e) @@ -212,7 +225,7 @@ class mtgMetaInfoParser $func_type = $this->_parseFuncType(); $type = new mtgTypeRef($func_type, $this->module, $origin); } - else if($this->T == self::T_Identifier) + else if($this->T == self::T_UserSymbol) { $origin = new mtgOrigin($this->file, $this->line); $type_name = $this->_parseDotName(); @@ -223,13 +236,13 @@ class mtgMetaInfoParser $origin = new mtgOrigin($this->file, $this->line); $type_name = $this->T_value; $type = new mtgTypeRef(new mtgBuiltinType($type_name), $this->module, $origin); - $this->_next(); + $this->_nextT(); } if($this->T == ord('[')) { $origin = new mtgOrigin($this->file, $this->line); - $this->_next(); + $this->_nextT(); $this->_checkThenNext(ord(']')); $type = new mtgTypeRef(new mtgArrType($type), $this->module, $origin); } @@ -240,7 +253,7 @@ class mtgMetaInfoParser if($this->T != ord(',')) break; - $this->_next(); + $this->_nextT(); } if(sizeof($types) > 1) @@ -253,7 +266,7 @@ class mtgMetaInfoParser { $ftype = new mtgMetaFunc(''); - $this->_next(); + $this->_nextT(); $this->_checkThenNext(ord('(')); @@ -262,7 +275,7 @@ class mtgMetaInfoParser { if($this->T == ord(')')) { - $this->_next(); + $this->_nextT(); break; } else if($c > 0) @@ -278,7 +291,7 @@ class mtgMetaInfoParser if($this->T == ord(':')) { - $this->_next(); + $this->_nextT(); $ret_type = $this->_parseType(true/*can be multi-type*/); $ftype->setReturnType($ret_type); } @@ -325,10 +338,10 @@ class mtgMetaInfoParser $values = array(); while(true) { - if($this->T == self::T_Identifier) + if($this->T == self::T_UserSymbol) { $values[] = $this->T_value; - $this->_next(); + $this->_nextT(); if(!$this->_nextIf(ord('|'))) break; } @@ -341,8 +354,7 @@ class mtgMetaInfoParser private function _parseEnum() { - $this->_next(); - + $this->_nextT(); $name = $this->_parseDotName(); $enum = new mtgMetaEnum($name); @@ -356,15 +368,17 @@ class mtgMetaInfoParser { if($this->_nextIf(self::T_End)) break; - $key = $this->_checkThenNext(self::T_Identifier); + $key = $this->T_value; + $this->_checkThenNext(self::T_UserSymbol); $this->_checkThenNext(ord('=')); - if($this->T == self::T_Identifier) + if($this->T == self::T_UserSymbol) { $or_values[$key] = $this->_parseEnumOrValues(); } else { - $value = $this->_checkThenNext(self::T_IntegerConstant); + $value = $this->T_value; + $this->_checkThenNext(self::T_IntegerConstant); $enum->addValue($key, $value); } } @@ -412,13 +426,13 @@ class mtgMetaInfoParser if($next_doer()) break; - if($this->T == self::T_Identifier) + if($this->T == self::T_UserSymbol) { $name = $this->T_value; - $this->_next(); + $this->_nextT(); $this->_checkThenNext(ord(':')); - if($this->T == self::T_Identifier || + if($this->T == self::T_UserSymbol || $this->T == self::T_Func || self::_isBuiltinType($this->T)) { @@ -435,7 +449,7 @@ class mtgMetaInfoParser $this->_error("Type expected"); } else - $this->_error("Unexpected fields T"); + $this->_error("Unexpected fields symbol"); } return $flds; @@ -454,11 +468,11 @@ class mtgMetaInfoParser if($this->T == $end_token) { - $this->_next(); + $this->_nextT(); break; } - $this->_next(); + $this->_nextT(); } return $funcs; @@ -470,15 +484,15 @@ class mtgMetaInfoParser while(true) { - if($this->T != self::T_Identifier) - $this->_error("Unexpected name T"); + if($this->T != self::T_UserSymbol) + $this->_error("Unexpected name symbol"); $dot_name .= $this->T_value; - $this->_next(); + $this->_nextT(); if($this->T != ord('.')) break; $dot_name .= '.'; - $this->_next(); + $this->_nextT(); } return $dot_name; @@ -500,8 +514,8 @@ class mtgMetaInfoParser $ret_type = null; if($this->T == ord(':')) { - $this->_next(); - if($this->T == self::T_Identifier || + $this->_nextT(); + if($this->T == self::T_UserSymbol || $this->T == self::T_Func || self::_isBuiltinType($this->T)) { @@ -523,7 +537,7 @@ class mtgMetaInfoParser private function _parseFreeFunc() { - $this->_next(); + $this->_nextT(); $fn = $this->_parseFunc(); $fn->setTokens(array_merge($this->shared_tokens, $fn->getTokens())); $this->_addUnit(new mtgMetaInfoUnit($this->file, $fn)); @@ -531,14 +545,14 @@ class mtgMetaInfoParser private function _parseStruct() { - $this->_next(); + $this->_nextT(); $struct_origin = new mtgOrigin($this->file, $this->line); $name = $this->_parseDotName(); $parent = null; if($this->T == self::T_Extends) { - $this->_next(); + $this->_nextT(); $origin = new mtgOrigin($this->file, $this->line); $parent_name = $this->_parseDotName(); $parent = new mtgTypeRef($parent_name, $this->module, $origin); @@ -549,7 +563,7 @@ class mtgMetaInfoParser { do { - $this->_next(); + $this->_nextT(); $origin = new mtgOrigin($this->file, $this->line); $if_name = $this->_parseDotName(); $implements[] = new mtgTypeRef($if_name, $this->module, $origin); @@ -591,7 +605,7 @@ class mtgMetaInfoParser private function _parseInterface() { - $this->_next(); + $this->_nextT(); $name = $this->_parseDotName(); $s = new mtgMetaInterface($name); @@ -604,19 +618,20 @@ class mtgMetaInfoParser if($this->T !== self::T_End) { - $this->_next(); + $this->_nextT(); $funcs = $this->_parseFuncs(); foreach($funcs as $fn) $s->addFunc($fn); } else - $this->_next(); + $this->_nextT(); } private function _parseRPC() { - $this->_next(); - $code = $this->_checkThenNext(self::T_IntegerConstant); + $this->_nextT(); + $code = $this->T_value; + $this->_checkThenNext(self::T_IntegerConstant); $name = $this->_parseDotName(); $this->_checkThenNext(ord('(')); @@ -642,8 +657,6 @@ class mtgMetaInfoParser private function _parsePropTokens() { - $new_line = ord("\n"); - $prop_tokens = array(); while(true) @@ -653,21 +666,27 @@ class mtgMetaInfoParser $name = ltrim($this->T_value, '@'); $this->_validatePropToken($name); - $this->_next(); + $this->_nextT(); $value = null; - $value_start_line = $this->line; if($this->T == ord(':')) { + //let's read the value while(true) { - $this->_next(false/*don't skip new line*/); - if($this->T == $new_line || - $this->T == self::T_Prop) + //TODO: The code below is ugly and must be heavily refactored, + // it just tries to be convenient and keep BC: any token property + // value can have almost any kind of symbols excluding new line. + // In the future we should restrict property values to certain types only + $this->_nextT(true/*stop on new line*/); + + if($this->T == ord("\n")) + { + $this->_nextT(); + break; + } + else if($this->T == self::T_Prop) { - //let's skip it - if($this->T == $new_line) - $this->_next(); break; } else @@ -682,17 +701,6 @@ class mtgMetaInfoParser } } - if($value && substr($value, 0, 1) === '{') - { - $json = json_decode($value); - if($json === null) - { - //for better line reporting - $this->line = $value_start_line; - $this->_error("Bad json"); - } - } - $prop_tokens[$name] = $value; } return $prop_tokens; @@ -705,165 +713,145 @@ class mtgMetaInfoParser return; if(!in_array($name, $this->config['valid_tokens'])) - { - throw new Exception("Unknown T '$name'"); - } + throw new Exception("Unknown property token '@$name'"); } - private function _char() : string - { - $str = substr($this->source, $this->cursor, 1); - if($str === false) - $str = ''; - return $str; - } - - private function _next($skip_newlines = true) + private function _nextT($stop_on_new_line = false) { while(true) { - $c = $this->_char(); - if($c == '') - { - $this->cursor--; - $this->T = self::T_EOF; - $this->T_value = $c; - return; - } + $c = $this->cursor_char; + //setting default values $this->T = ord($c); $this->T_value = $c; - ++$this->cursor; + //NOTE: current 'cursor_pos' is ahead of 'c' by one character + $this->_cursorNext(); switch($c) { + case '': $this->T = self::T_EOF; return; + case "\n": if($stop_on_new_line) return; else break; case ' ': case "\r": case "\t": break; - case "\n": $this->line++; if($skip_newlines) break; else return; case '{': case '}': case '(': case ')': case '[': case ']': case '|': return; case ',': case ':': case ';': case '=': return; case '.': - if(!ctype_digit($this->_char())) return; + if(!ctype_digit($this->cursor_char)) + return; $this->_error("Floating point constant can't start with ."); break; case '"': - $this->T_value = ""; - while($this->_char() != '"') + $this->T_value = ''; + while($this->cursor_char != '"') { - if(ord($this->_char()) < ord(' ')) + if(ord($this->cursor_char) < ord(' ')) $this->_error("Illegal character in string constant"); - if($this->_char() == '\\') + if($this->cursor_char == '\\') { - $this->cursor++; - switch($this->_char()) + $this->_cursorNext(); + switch($this->cursor_char) { - case 'n': $this->T_value .= "\n"; $this->cursor++; break; - case 't': $this->T_value .= "\t"; $this->cursor++; break; - case 'r': $this->T_value .= "\r"; $this->cursor++; break; - case '"': $this->T_value .= '"'; $this->cursor++; break; - case '\\': $this->T_value .= '\\'; $this->cursor++; break; + case 'n': $this->T_value .= "\n"; $this->_cursorNext(); break; + case 't': $this->T_value .= "\t"; $this->_cursorNext(); break; + case 'r': $this->T_value .= "\r"; $this->_cursorNext(); break; + case '"': $this->T_value .= '"'; $this->_cursorNext(); break; + case '\\': $this->T_value .= '\\'; $this->_cursorNext(); break; default: $this->_error("Unknown escape code in string constant"); break; } } else // printable chars + UTF-8 bytes { - $this->T_value .= $this->_char(); - $this->cursor++; + $this->T_value .= $this->cursor_char; + $this->_cursorNext(); } } $this->T = self::T_StringConstant; - $this->cursor++; + $this->_cursorNext(); return; case '`': - $this->T_value = ""; - while($this->_char() != '`') + $this->T_value = ''; + //TODO: code below is not robust enough + while($this->cursor_char != '`') { - $this->T_value .= $this->_char(); - $this->cursor++; + $this->T_value .= $this->cursor_char; + $this->_cursorNext(); } $this->T = self::T_RawStringConstant; - $this->cursor++; + $this->_cursorNext(); return; case '/': - if($this->_char() == '/') + if($this->cursor_char == '/') { - $this->cursor++; - while($this->_char() != '' && $this->_char() != "\n") $this->cursor++; + $this->_cursorNext(); + //@phpstan-ignore-next-line + while($this->cursor_char != '' && $this->cursor_char != "\n") + $this->_cursorNext(); + //@phpstan-ignore-next-line break; } case '#': - while($this->_char() != '' && $this->_char() != "\n") $this->cursor++; + while($this->cursor_char != '' && $this->cursor_char != "\n") + $this->_cursorNext(); break; case '@': - $start = $this->cursor - 1; - while(ctype_alnum($this->_char()) || $this->_char() == '_') - $this->cursor++; + $start = $this->cursor_pos - 1; + while(ctype_alnum($this->cursor_char) || $this->cursor_char == '_') + $this->_cursorNext(); + $this->T = self::T_Prop; - $this->T_value = substr($this->source, $start, $this->cursor - $start); + $this->T_value = substr($this->source, $start, $this->cursor_pos - $start); return; - //fall thru default: + //symbols if(ctype_alpha($c)) { //collect all chars of an identifier - $start = $this->cursor - 1; - while(ctype_alnum($this->_char()) || $this->_char() == '_') - $this->cursor++; - $this->T_value = substr($this->source, $start, $this->cursor - $start); + $start = $this->cursor_pos - 1; + while(ctype_alnum($this->cursor_char) || $this->cursor_char == '_') + $this->_cursorNext(); + $this->T_value = substr($this->source, $start, $this->cursor_pos - $start); - if(isset($this->type2T[$this->T_value])) - { - $this->T = $this->type2T[$this->T_value]; - return; - } + if(isset($this->symbol2T[$this->T_value])) + $this->T = $this->symbol2T[$this->T_value]; + else //otherwise it's assumed to be a user defined symbol + $this->T = self::T_UserSymbol; - if($this->T_value == "true" || $this->T_value == "false") - { - $this->T = self::T_IntegerConstant; - return; - } - - //check for declaration keywords: - if($this->T_value == "struct") { $this->T = self::T_Struct; return; } - if($this->T_value == "interface") { $this->T = self::T_Interface; return; } - if($this->T_value == "enum") { $this->T = self::T_Enum; return; } - if($this->T_value == "RPC") { $this->T = self::T_RPC; return; } - if($this->T_value == "end") { $this->T = self::T_End; return; } - if($this->T_value == "extends") { $this->T = self::T_Extends; return; } - if($this->T_value == "implements") { $this->T = self::T_Implements; return; } - if($this->T_value == "func") { $this->T = self::T_Func; return; } - - //if not it's a user defined identifier - $this->T = self::T_Identifier; return; } + //digits else if(ctype_digit($c) || $c == '-') { - $start = $this->cursor - 1; - while(ctype_digit($this->_char())) $this->cursor++; - if($this->_char() == '.') + $start = $this->cursor_pos - 1; + while(ctype_digit($this->cursor_char)) + $this->_cursorNext(); + if($this->cursor_char == '.') { - $this->cursor++; - while(ctype_digit($this->_char())) $this->cursor++; + $this->_cursorNext(); + while(ctype_digit($this->cursor_char)) + $this->_cursorNext(); // see if this float has a scientific notation suffix. Both JSON // and C++ (through strtod() we use) have the same format: - if($this->_char() == 'e' || $this->_char() == 'E') + //@phpstan-ignore-next-line + if($this->cursor_char == 'e' || $this->cursor_char == 'E') { - $this->cursor++; - if($this->_char() == '+' || $this->_char() == '-') $this->cursor++; - while(ctype_digit($this->_char())) $this->cursor++; + $this->_cursorNext(); + if($this->cursor_char == '+' || $this->cursor_char == '-') + $this->_cursorNext(); + while(ctype_digit($this->cursor_char)) + $this->_cursorNext(); } $this->T = self::T_FloatConstant; } else $this->T = self::T_IntegerConstant; - $this->T_value = substr($this->source, $start, $this->cursor - $start); + $this->T_value = substr($this->source, $start, $this->cursor_pos - $start); return; } @@ -872,24 +860,30 @@ class mtgMetaInfoParser } } + private function _cursorNext() + { + ++$this->cursor_pos; + $this->cursor_char = substr($this->source, $this->cursor_pos, 1); + if($this->cursor_char === "\n") + $this->line++; + //EOF + if($this->cursor_char === false) + $this->cursor_char = ''; + } + private function _nextIf(int $t) : bool { $yes = $t === $this->T; if($yes) - $this->_next(); + $this->_nextT(); return $yes; } - private function _checkThenNext(int $t) : string + private function _checkThenNext(int $t) { if($t !== $this->T) - { $this->_error("Expecting '" . $this->_toStr($t) . "' instead got '" . $this->_toStr($this->T) . "'"); - } - - $attr = $this->T_value; - $this->_next(); - return $attr; + $this->_nextT(); } private function _toStr(int $t) : string @@ -901,7 +895,7 @@ class mtgMetaInfoParser private function _error(string $msg) { - throw new Exception($msg . " (T: {$this->T}, attr: {$this->T_value})"); + throw new Exception($msg . " ('{$this->T_value}', {$this->T})"); } }