diff --git a/parser.inc.php b/parser.inc.php index 6b0056a..3f17be2 100644 --- a/parser.inc.php +++ b/parser.inc.php @@ -2,21 +2,7 @@ class mtgMetaInfoParser { - private $config = array(); - private $current_meta; - private $parsed_files = array(); - private $file_stack = array(); - private $module = null; - private $file = ""; - private $source = ""; - private $cursor = 0; - private $line = 0; - private $token = ""; - private $attribute = ""; - private $idltypes = array(); - private $token_strs = array(); - private $shared_tokens = array(); - + const T_NonOrdMark = 1000; //marks start of non 'ord(..)' values const T_EOF = 1001; const T_StringConstant = 1002; const T_IntegerConstant = 1003; @@ -32,6 +18,7 @@ class mtgMetaInfoParser const T_RawStringConstant = 1013; const T_Interface = 1014; const T_Implements = 1015; + const T_MinType = 1019; //built-in types begin mark const T_string = 1020; const T_uint32 = 1021; const T_int32 = 1022; @@ -44,16 +31,42 @@ class mtgMetaInfoParser const T_int64 = 1029; const T_bool = 1030; const T_blob = 1031; + const T_MaxType = 1032; //built-in types end mark + + private array $config = array(); + private mtgMetaInfo $current_meta; + private array $parsed_files = array(); + private array $file_stack = array(); + private ?mtgMetaParsedModule $module = null; + private string $file = ""; + private string $source = ""; + private int $cursor = 0; + private int $line = 0; + //TODO: setting it an 'int' type makes PHPStan produce many + // false positives + private $T = 0; + private string $T_value = ""; + /** @var array*/ + private $type2T = array(); + /** @var array*/ + private $T2descr = array(); + private array $shared_tokens = array(); function __construct($config = array()) { - self::addDefaultTokens($config); + $this->_initTables(); + + self::_addDefaultTokens($config); $this->config = $config; if(!isset($this->config['include_path'])) $this->config['include_path'] = array('.'); + } - $this->idltypes = array( + private function _initTables() + { + + $this->type2T = array( "string" => self::T_string, "uint32" => self::T_uint32, "int32" => self::T_int32, @@ -68,25 +81,26 @@ class mtgMetaInfoParser "bool" => self::T_bool, "blob" => self::T_blob, ); - $this->token_strs = array_flip($this->idltypes); - $this->token_strs[self::T_EOF] = ''; - $this->token_strs[self::T_StringConstant] = ''; - $this->token_strs[self::T_RawStringConstant] = ''; - $this->token_strs[self::T_IntegerConstant] = ''; - $this->token_strs[self::T_FloatConstant] = ''; - $this->token_strs[self::T_Enum] = ''; - $this->token_strs[self::T_RPC] = ''; - $this->token_strs[self::T_End] = ''; - $this->token_strs[self::T_Identifier] = ''; - $this->token_strs[self::T_Struct] = ''; - $this->token_strs[self::T_Interface] = ''; - $this->token_strs[self::T_Prop] = '<@prop>'; - $this->token_strs[self::T_Extends] = ''; - $this->token_strs[self::T_Implements] = ''; - $this->token_strs[self::T_Func] = ''; + + $this->T2descr = array_flip($this->type2T); + $this->T2descr[self::T_EOF] = ''; + $this->T2descr[self::T_StringConstant] = ''; + $this->T2descr[self::T_RawStringConstant] = ''; + $this->T2descr[self::T_IntegerConstant] = ''; + $this->T2descr[self::T_FloatConstant] = ''; + $this->T2descr[self::T_Enum] = ''; + $this->T2descr[self::T_RPC] = ''; + $this->T2descr[self::T_End] = ''; + $this->T2descr[self::T_Identifier] = ''; + $this->T2descr[self::T_Struct] = ''; + $this->T2descr[self::T_Interface] = ''; + $this->T2descr[self::T_Prop] = '<@prop>'; + $this->T2descr[self::T_Extends] = ''; + $this->T2descr[self::T_Implements] = ''; + $this->T2descr[self::T_Func] = ''; } - function addDefaultTokens(array &$config) + private static function _addDefaultTokens(array &$config) { if(!isset($config['valid_tokens'])) $config['valid_tokens'] = array(); @@ -116,21 +130,13 @@ class mtgMetaInfoParser $this->parsed_files[$file] = $module; $this->file_stack[] = $file; $source = file_get_contents($file); - $is_php = false; try { if($source === false) throw new Exception("Could not read file '$file'"); - //PHP include - if(strpos($source, '_resolveIncludes($module, $source); + $this->_resolveIncludes($module, $source); } catch(Exception $e) { @@ -139,9 +145,6 @@ class mtgMetaInfoParser array_pop($this->file_stack); - if($is_php) - return; - $this->module = $module; $this->file = $file; $this->source = $source; @@ -152,24 +155,24 @@ class mtgMetaInfoParser try { $this->_next(); - while($this->token != self::T_EOF) + while($this->T != self::T_EOF) { - //echo "TOKEN : " . $this->token . " " . $this->attribute . " " . $this->line . "\n"; + //echo "TOKEN : " . $this->T . " " . $this->T_value . " " . $this->line . "\n"; - if($this->token == self::T_Prop) + if($this->T == self::T_Prop) $this->_parseSharedTokens($this->_parsePropTokens()); - else if($this->token == self::T_Enum) + else if($this->T == self::T_Enum) $this->_parseEnum(); - else if($this->token == self::T_Struct) + else if($this->T == self::T_Struct) $this->_parseStruct(); - else if($this->token == self::T_Interface) + else if($this->T == self::T_Interface) $this->_parseInterface(); - else if($this->token == self::T_Func) + else if($this->T == self::T_Func) $this->_parseFreeFunc(); - else if($this->token == self::T_RPC) + else if($this->T == self::T_RPC) $this->_parseRPC(); else - $this->_error("unexpected token ('" . $this->_toStr($this->token) . "' " . $this->attribute . ")"); + $this->_error("Unexpected T ('" . $this->_toStr($this->T) . "' " . $this->T_value . ")"); } } catch(Exception $e) @@ -203,13 +206,13 @@ class mtgMetaInfoParser { $type = null; - if($this->token == self::T_Func) + if($this->T == self::T_Func) { $origin = new mtgOrigin($this->file, $this->line); $func_type = $this->_parseFuncType(); $type = new mtgTypeRef($func_type, $this->module, $origin); } - else if($this->token == self::T_Identifier) + else if($this->T == self::T_Identifier) { $origin = new mtgOrigin($this->file, $this->line); $type_name = $this->_parseDotName(); @@ -218,16 +221,16 @@ class mtgMetaInfoParser else { $origin = new mtgOrigin($this->file, $this->line); - $type_name = $this->attribute; + $type_name = $this->T_value; $type = new mtgTypeRef(new mtgBuiltinType($type_name), $this->module, $origin); $this->_next(); } - if($this->token == ord('[')) + if($this->T == ord('[')) { $origin = new mtgOrigin($this->file, $this->line); $this->_next(); - $this->_checkThenNext(']'); + $this->_checkThenNext(ord(']')); $type = new mtgTypeRef(new mtgArrType($type), $this->module, $origin); } $types[] = $type; @@ -235,7 +238,7 @@ class mtgMetaInfoParser if(!$can_be_multi) break; - if($this->token != ord(',')) + if($this->T != ord(',')) break; $this->_next(); } @@ -252,19 +255,19 @@ class mtgMetaInfoParser $this->_next(); - $this->_checkThenNext('('); + $this->_checkThenNext(ord('(')); $c = 0; while(true) { - if($this->token == ord(')')) + if($this->T == ord(')')) { $this->_next(); break; } else if($c > 0) { - $this->_checkThenNext(','); + $this->_checkThenNext(ord(',')); } $arg_type = $this->_parseType(); @@ -273,7 +276,7 @@ class mtgMetaInfoParser $ftype->addArg($arg); } - if($this->token == ord(':')) + if($this->T == ord(':')) { $this->_next(); $ret_type = $this->_parseType(true/*can be multi-type*/); @@ -322,14 +325,12 @@ class mtgMetaInfoParser $values = array(); while(true) { - if($this->token == self::T_Identifier) + if($this->T == self::T_Identifier) { - $values[] = $this->attribute; + $values[] = $this->T_value; $this->_next(); - if($this->token != ord('|')) + if(!$this->_nextIf(ord('|'))) break; - else - $this->_next(); } else break; @@ -346,7 +347,7 @@ class mtgMetaInfoParser $enum = new mtgMetaEnum($name); $tokens = $this->shared_tokens; - if($this->token == self::T_Prop) + if($this->T == self::T_Prop) $tokens = array_merge($tokens, $this->_parsePropTokens()); $enum->setTokens($tokens); @@ -356,8 +357,8 @@ class mtgMetaInfoParser if($this->_nextIf(self::T_End)) break; $key = $this->_checkThenNext(self::T_Identifier); - $this->_checkThenNext('='); - if($this->token == self::T_Identifier) + $this->_checkThenNext(ord('=')); + if($this->T == self::T_Identifier) { $or_values[$key] = $this->_parseEnumOrValues(); } @@ -397,7 +398,12 @@ class mtgMetaInfoParser $this->_addUnit(new mtgMetaInfoUnit($this->file, $enum)); } - private function _parseFields($next_doer) + static private function _isBuiltinType(int $t) : bool + { + return $t > self::T_MinType && $t < self::T_MaxType; + } + + private function _parseFields(callable $next_doer) { $flds = array(); @@ -406,30 +412,30 @@ class mtgMetaInfoParser if($next_doer()) break; - if($this->token == self::T_Identifier) + if($this->T == self::T_Identifier) { - $name = $this->attribute; + $name = $this->T_value; $this->_next(); - $this->_checkThenNext(':'); + $this->_checkThenNext(ord(':')); - if($this->token == self::T_Identifier || - $this->token == self::T_Func || - ($this->token >= self::T_string && $this->token <= self::T_blob)) + if($this->T == self::T_Identifier || + $this->T == self::T_Func || + self::_isBuiltinType($this->T)) { $type = $this->_parseType(); $fld = new mtgMetaField($name, $type); - if($this->token == self::T_Prop) + if($this->T == self::T_Prop) $fld->setTokens($this->_parsePropTokens()); $flds[] = $fld; } else - $this->_error("type expected"); + $this->_error("Type expected"); } else - $this->_error("unexpected fields token"); + $this->_error("Unexpected fields T"); } return $flds; @@ -446,7 +452,7 @@ class mtgMetaInfoParser $fn = $this->_parseFunc(); $funcs[] = $fn; - if($this->token == $end_token) + if($this->T == $end_token) { $this->_next(); break; @@ -464,12 +470,12 @@ class mtgMetaInfoParser while(true) { - if($this->token != self::T_Identifier) - $this->_error("unexpected name token"); + if($this->T != self::T_Identifier) + $this->_error("Unexpected name T"); - $dot_name .= $this->attribute; + $dot_name .= $this->T_value; $this->_next(); - if($this->token != ord('.')) + if($this->T != ord('.')) break; $dot_name .= '.'; $this->_next(); @@ -483,27 +489,27 @@ class mtgMetaInfoParser $name = $this->_parseDotName(); $fn = new mtgMetaFunc($name); - $this->_checkThenNext('('); - if($this->token == self::T_Prop) + $this->_checkThenNext(ord('(')); + if($this->T == self::T_Prop) $fn->setTokens($this->_parsePropTokens()); $args = $this->_parseFields(function() - { return $this->_nextIf(')'); } + { return $this->_nextIf(ord(')')); } ); $fn->setArgs($args); $ret_type = null; - if($this->token == ord(':')) + if($this->T == ord(':')) { $this->_next(); - if($this->token == self::T_Identifier || - $this->token == self::T_Func || - ($this->token >= self::T_string && $this->token <= self::T_bool)) + if($this->T == self::T_Identifier || + $this->T == self::T_Func || + self::_isBuiltinType($this->T)) { $ret_type = $this->_parseType(true/*can be multi-type*/); $fn->setReturnType($ret_type); } else - $this->_error("unexpected func type token"); + $this->_error("Unexpected func type"); } return $fn; @@ -530,7 +536,7 @@ class mtgMetaInfoParser $name = $this->_parseDotName(); $parent = null; - if($this->token == self::T_Extends) + if($this->T == self::T_Extends) { $this->_next(); $origin = new mtgOrigin($this->file, $this->line); @@ -539,7 +545,7 @@ class mtgMetaInfoParser } $implements = array(); - if($this->token == self::T_Implements) + if($this->T == self::T_Implements) { do { @@ -547,7 +553,7 @@ class mtgMetaInfoParser $origin = new mtgOrigin($this->file, $this->line); $if_name = $this->_parseDotName(); $implements[] = new mtgTypeRef($if_name, $this->module, $origin); - } while($this->token == ord(',')); + } while($this->T == ord(',')); } $s = new mtgMetaStruct($name, array(), $parent, array(), $implements); @@ -555,7 +561,7 @@ class mtgMetaInfoParser $this->_addUnit(new mtgMetaInfoUnit($this->file, $s)); $tokens = $this->shared_tokens; - if($this->token == self::T_Prop) + if($this->T == self::T_Prop) $tokens = array_merge($tokens, $this->_parsePropTokens()); $s->setTokens($tokens); @@ -592,11 +598,11 @@ class mtgMetaInfoParser $this->_addUnit(new mtgMetaInfoUnit($this->file, $s)); $tokens = $this->shared_tokens; - if($this->token == self::T_Prop) + if($this->T == self::T_Prop) $tokens = array_merge($tokens, $this->_parsePropTokens()); $s->setTokens($tokens); - if($this->token !== self::T_End) + if($this->T !== self::T_End) { $this->_next(); $funcs = $this->_parseFuncs(); @@ -612,14 +618,14 @@ class mtgMetaInfoParser $this->_next(); $code = $this->_checkThenNext(self::T_IntegerConstant); $name = $this->_parseDotName(); - $this->_checkThenNext('('); + $this->_checkThenNext(ord('(')); $tokens = $this->shared_tokens; - if($this->token == self::T_Prop) + if($this->T == self::T_Prop) $tokens = array_merge($tokens, $this->_parsePropTokens()); $req_fields = $this->_parseFields(function() - { return $this->_nextIf(')'); } + { return $this->_nextIf(ord(')')); } ); $rsp_fields = $this->_parseFields(function() { return $this->_nextIf(self::T_End); } @@ -642,30 +648,32 @@ class mtgMetaInfoParser while(true) { - if($this->token != self::T_Prop) + if($this->T != self::T_Prop) break; - $name = ltrim($this->attribute, '@'); + $name = ltrim($this->T_value, '@'); + $this->_validatePropToken($name); $this->_next(); $value = null; - if($this->token == ord(':')) + $value_start_line = $this->line; + if($this->T == ord(':')) { while(true) { $this->_next(false/*don't skip new line*/); - if($this->token == $new_line || - $this->token == self::T_Prop) + if($this->T == $new_line || + $this->T == self::T_Prop) { //let's skip it - if($this->token == $new_line) + if($this->T == $new_line) $this->_next(); break; } else { - $tmp = $this->attribute; - if($this->token == self::T_StringConstant) + $tmp = $this->T_value; + if($this->T == self::T_StringConstant) $tmp = "\"$tmp\""; if($value === null) $value = ''; @@ -673,23 +681,24 @@ class mtgMetaInfoParser } } } - if($value && substr($value, 0, 1) == '{') + + if($value && substr($value, 0, 1) === '{') { $json = json_decode($value); if($json === null) { - --$this->line; //hack for more precise reporting - $this->_error("bad json"); + //for better line reporting + $this->line = $value_start_line; + $this->_error("Bad json"); } } - $this->_validatePropToken($name, $value); $prop_tokens[$name] = $value; } return $prop_tokens; } - private function _validatePropToken($name, $value) + private function _validatePropToken(string $name) { if(!isset($this->config['valid_tokens']) || !is_array($this->config['valid_tokens'])) @@ -697,106 +706,104 @@ class mtgMetaInfoParser if(!in_array($name, $this->config['valid_tokens'])) { - --$this->line; //hack for more precise reporting - throw new Exception("Unknown token '$name'"); + throw new Exception("Unknown T '$name'"); } } - private function _symbol() + private function _char() : string { - return substr($this->source, $this->cursor, 1); + $str = substr($this->source, $this->cursor, 1); + if($str === false) + $str = ''; + return $str; } private function _next($skip_newlines = true) - { - $this->__next($skip_newlines); - //for debug - //var_dump("NEXT " . $this->token . " " . $this->attribute); - //debug_print_backtrace(0, 1); - } - - private function __next($skip_newlines = true) { while(true) { - $c = $this->_symbol(); - //NOTE: dealing with PHP's types juggling - if($c === false || $c === '') - $c = -1; + $c = $this->_char(); + if($c == '') + { + $this->cursor--; + $this->T = self::T_EOF; + $this->T_value = $c; + return; + } + + $this->T = ord($c); + $this->T_value = $c; - $this->token = ord($c); ++$this->cursor; - $this->attribute = $c; switch($c) { - case -1: $this->cursor--; $this->token = self::T_EOF; return; case ' ': case "\r": case "\t": break; case "\n": $this->line++; if($skip_newlines) break; else return; case '{': case '}': case '(': case ')': case '[': case ']': case '|': return; case ',': case ':': case ';': case '=': return; case '.': - if(!ctype_digit($this->_symbol())) return; - $this->_error("floating point constant can't start with ."); + if(!ctype_digit($this->_char())) return; + $this->_error("Floating point constant can't start with ."); break; case '"': - $this->attribute = ""; - while($this->_symbol() != '"') + $this->T_value = ""; + while($this->_char() != '"') { - if(ord($this->_symbol()) < ord(' ')) - $this->_error("illegal character in string constant"); - if($this->_symbol() == '\\') + if(ord($this->_char()) < ord(' ')) + $this->_error("Illegal character in string constant"); + if($this->_char() == '\\') { $this->cursor++; - switch($this->_symbol()) + switch($this->_char()) { - case 'n': $this->attribute .= "\n"; $this->cursor++; break; - case 't': $this->attribute .= "\t"; $this->cursor++; break; - case 'r': $this->attribute .= "\r"; $this->cursor++; break; - case '"': $this->attribute .= '"'; $this->cursor++; break; - case '\\': $this->attribute .= '\\'; $this->cursor++; break; - default: $this->_error("unknown escape code in string constant"); break; + case 'n': $this->T_value .= "\n"; $this->cursor++; break; + case 't': $this->T_value .= "\t"; $this->cursor++; break; + case 'r': $this->T_value .= "\r"; $this->cursor++; break; + case '"': $this->T_value .= '"'; $this->cursor++; break; + case '\\': $this->T_value .= '\\'; $this->cursor++; break; + default: $this->_error("Unknown escape code in string constant"); break; } } else // printable chars + UTF-8 bytes { - $this->attribute .= $this->_symbol(); + $this->T_value .= $this->_char(); $this->cursor++; } } - $this->token = self::T_StringConstant; + $this->T = self::T_StringConstant; $this->cursor++; return; case '`': - $this->attribute = ""; - while($this->_symbol() != '`') + $this->T_value = ""; + while($this->_char() != '`') { - $this->attribute .= $this->_symbol(); + $this->T_value .= $this->_char(); $this->cursor++; } - $this->token = self::T_RawStringConstant; + $this->T = self::T_RawStringConstant; $this->cursor++; return; case '/': - if($this->_symbol() == '/') + if($this->_char() == '/') { $this->cursor++; - while($this->_symbol() !== false && $this->_symbol() != "\n") $this->cursor++; + while($this->_char() != '' && $this->_char() != "\n") $this->cursor++; break; } case '#': - while($this->_symbol() !== false && $this->_symbol() != "\n") $this->cursor++; + while($this->_char() != '' && $this->_char() != "\n") $this->cursor++; break; case '@': $start = $this->cursor - 1; - while(ctype_alnum($this->_symbol()) || $this->_symbol() == '_') + while(ctype_alnum($this->_char()) || $this->_char() == '_') $this->cursor++; - $this->token = self::T_Prop; - $this->attribute = substr($this->source, $start, $this->cursor - $start); + $this->T = self::T_Prop; + $this->T_value = substr($this->source, $start, $this->cursor - $start); return; //fall thru @@ -806,99 +813,95 @@ class mtgMetaInfoParser { //collect all chars of an identifier $start = $this->cursor - 1; - while(ctype_alnum($this->_symbol()) || $this->_symbol() == '_') + while(ctype_alnum($this->_char()) || $this->_char() == '_') $this->cursor++; - $this->attribute = substr($this->source, $start, $this->cursor - $start); + $this->T_value = substr($this->source, $start, $this->cursor - $start); - if(isset($this->idltypes[$this->attribute])) + if(isset($this->type2T[$this->T_value])) { - $this->token = $this->idltypes[$this->attribute]; + $this->T = $this->type2T[$this->T_value]; return; } - if($this->attribute == "true" || $this->attribute == "false") + if($this->T_value == "true" || $this->T_value == "false") { - $this->token = self::T_IntegerConstant; + $this->T = self::T_IntegerConstant; return; } //check for declaration keywords: - if($this->attribute == "struct") { $this->token = self::T_Struct; return; } - if($this->attribute == "interface") { $this->token = self::T_Interface; return; } - if($this->attribute == "enum") { $this->token = self::T_Enum; return; } - if($this->attribute == "RPC") { $this->token = self::T_RPC; return; } - if($this->attribute == "end") { $this->token = self::T_End; return; } - if($this->attribute == "extends") { $this->token = self::T_Extends; return; } - if($this->attribute == "implements") { $this->token = self::T_Implements; return; } - if($this->attribute == "func") { $this->token = self::T_Func; return; } + if($this->T_value == "struct") { $this->T = self::T_Struct; return; } + if($this->T_value == "interface") { $this->T = self::T_Interface; return; } + if($this->T_value == "enum") { $this->T = self::T_Enum; return; } + if($this->T_value == "RPC") { $this->T = self::T_RPC; return; } + if($this->T_value == "end") { $this->T = self::T_End; return; } + if($this->T_value == "extends") { $this->T = self::T_Extends; return; } + if($this->T_value == "implements") { $this->T = self::T_Implements; return; } + if($this->T_value == "func") { $this->T = self::T_Func; return; } //if not it's a user defined identifier - $this->token = self::T_Identifier; + $this->T = self::T_Identifier; return; } else if(ctype_digit($c) || $c == '-') { $start = $this->cursor - 1; - while(ctype_digit($this->_symbol())) $this->cursor++; - if($this->_symbol() == '.') + while(ctype_digit($this->_char())) $this->cursor++; + if($this->_char() == '.') { $this->cursor++; - while(ctype_digit($this->_symbol())) $this->cursor++; + while(ctype_digit($this->_char())) $this->cursor++; // see if this float has a scientific notation suffix. Both JSON // and C++ (through strtod() we use) have the same format: - if($this->_symbol() == 'e' || $this->_symbol() == 'E') + if($this->_char() == 'e' || $this->_char() == 'E') { $this->cursor++; - if($this->_symbol() == '+' || $this->_symbol() == '-') $this->cursor++; - while(ctype_digit($this->_symbol())) $this->cursor++; + if($this->_char() == '+' || $this->_char() == '-') $this->cursor++; + while(ctype_digit($this->_char())) $this->cursor++; } - $this->token = self::T_FloatConstant; + $this->T = self::T_FloatConstant; } else - $this->token = self::T_IntegerConstant; - $this->attribute = substr($this->source, $start, $this->cursor - $start); + $this->T = self::T_IntegerConstant; + $this->T_value = substr($this->source, $start, $this->cursor - $start); return; } - $this->_error("illegal character '$c'"); + $this->_error("Illegal character '$c'"); } } } - private function _nextIf($t) + private function _nextIf(int $t) : bool { - if(is_string($t)) - $t = ord($t); - $yes = $t === $this->token; + $yes = $t === $this->T; if($yes) $this->_next(); return $yes; } - private function _checkThenNext($t) + private function _checkThenNext(int $t) : string { - if(is_string($t)) - $t = ord($t); - if($t !== $this->token) + if($t !== $this->T) { - $this->_error("expecting '" . $this->_toStr($t) . "' instead got '" . $this->_toStr($this->token) . "'"); + $this->_error("Expecting '" . $this->_toStr($t) . "' instead got '" . $this->_toStr($this->T) . "'"); } - $attr = $this->attribute; + $attr = $this->T_value; $this->_next(); return $attr; } - private function _toStr($t) + private function _toStr(int $t) : string { - if($t < 1000) + if($t < self::T_NonOrdMark) return chr($t); - return $this->token_strs[$t]; + return $this->T2descr[$t]; } - private function _error($msg) + private function _error(string $msg) { - throw new Exception($msg . "(token: {$this->token}, attr: {$this->attribute})"); + throw new Exception($msg . " (T: {$this->T}, attr: {$this->T_value})"); } }