metagen/parser1.inc.php

974 lines
26 KiB
PHP

<?php
class mtgMetaInfoParser implements mtgIMetaInfoParser
{
const T_NonOrdMark = 1000; //marks start of non 'ord(..)' values
const T_EOF = 1001;
const T_StringConstant = 1002;
const T_IntegerConstant = 1003;
const T_FloatConstant = 1004;
const T_Enum = 1005;
const T_RPC = 1006;
const T_End = 1007;
const T_UserSymbol = 1008;
const T_Struct = 1009;
const T_Prop = 1010;
const T_Extends = 1011;
const T_Func = 1012;
const T_RawStringConstant = 1013;
const T_Interface = 1014;
const T_Implements = 1015;
const T_MinType = 1019; //built-in types begin mark
const T_string = 1020;
const T_uint32 = 1021;
const T_int32 = 1022;
const T_uint16 = 1023;
const T_int16 = 1024;
const T_uint8 = 1025;
const T_int8 = 1026;
const T_float = 1027;
const T_uint64 = 1028;
const T_int64 = 1029;
const T_bool = 1030;
const T_blob = 1031;
const T_Service = 1032;
const T_MaxType = 1033; //built-in types end mark
private array $config = array();
private mtgMetaInfo $current_meta;
private array $parsed_files = array();
private array $file_stack = array();
private ?mtgMetaParsedModule $module = null;
private string $file = "";
private string $source = "";
private int $cursor_pos = 0;
private string $cursor_char = '';
private int $line = 0;
//TODO: setting it an 'int' type makes PHPStan produce many
// false positives
//token numeric identifier
private $T = 0;
//token extra string value which depends on concrete T
private string $T_value = "";
/** @var array<string,int>*/
private $symbol2T = array();
/** @var array<int,string>*/
private $T2descr = array();
private array $shared_tokens = array();
private array $scopes = array();
function __construct($config = array())
{
$this->_initTables();
self::_addDefaultTokens($config);
$this->config = $config;
if(!isset($this->config['include_path']))
$this->config['include_path'] = array('.');
}
private function _initTables()
{
$this->symbol2T = [
"string" => self::T_string,
"uint32" => self::T_uint32,
"int32" => self::T_int32,
"uint16" => self::T_uint16,
"int16" => self::T_int16,
"uint8" => self::T_uint8,
"int8" => self::T_int8,
"float" => self::T_float,
"double" => self::T_float,
"uint64" => self::T_uint64,
"int64" => self::T_int64,
"bool" => self::T_bool,
"blob" => self::T_blob,
"true" => self::T_IntegerConstant,
"false" => self::T_IntegerConstant,
"struct" => self::T_Struct,
"interface" => self::T_Interface,
"enum" => self::T_Enum,
"RPC" => self::T_RPC,
"service" => self::T_Service,
"end" => self::T_End,
"extends" => self::T_Extends,
"implements" => self::T_Implements,
"func" => self::T_Func,
];
$this->T2descr = array_flip($this->symbol2T);
$this->T2descr[self::T_EOF] = '<EOF>';
$this->T2descr[self::T_StringConstant] = '<StringConstant>';
$this->T2descr[self::T_RawStringConstant] = '<RawStringConstant>';
$this->T2descr[self::T_IntegerConstant] = '<IntegerConstant>';
$this->T2descr[self::T_FloatConstant] = '<FloatConstant>';
$this->T2descr[self::T_Enum] = '<enum>';
$this->T2descr[self::T_RPC] = '<RPC>';
$this->T2descr[self::T_Service] = '<Service>';
$this->T2descr[self::T_End] = '<end>';
$this->T2descr[self::T_UserSymbol] = '<Identifier>';
$this->T2descr[self::T_Struct] = '<struct>';
$this->T2descr[self::T_Interface] = '<interface>';
$this->T2descr[self::T_Prop] = '<@prop>';
$this->T2descr[self::T_Extends] = '<extends>';
$this->T2descr[self::T_Implements] = '<implements>';
$this->T2descr[self::T_Func] = '<func>';
}
private static function _addDefaultTokens(array &$config)
{
if(!isset($config['valid_tokens']))
$config['valid_tokens'] = array();
$config['valid_tokens'][] = 'class_id';
$config['valid_tokens'][] = 'shared_tokens';
$config['valid_tokens'][] = 'enum_override';
$config['valid_tokens'][] = 'enum_replace';
}
function parse(mtgMetaInfo $meta, string $raw_file)
{
$this->current_meta = $meta;
$file = realpath($raw_file);
if($file === false)
throw new Exception("No such file '$raw_file'");
$this->_parse($file);
}
private function _parse(string $file)
{
if(isset($this->parsed_files[$file]))
return;
$module = new mtgMetaParsedModule($file);
$this->parsed_files[$file] = $module;
$this->file_stack[] = $file;
$source = file_get_contents($file);
try
{
if($source === false)
throw new Exception("Could not read file '$file'");
$this->_resolveIncludes($module, $source);
}
catch(Exception $e)
{
throw new Exception(end($this->file_stack) . " : " . $e->getMessage());
}
array_pop($this->file_stack);
$this->module = $module;
$this->file = $file;
$this->source = $source;
$this->line = 1;
$this->cursor_pos = -1;
$this->_cursorNext();
$this->shared_tokens = array();
try
{
$this->_nextT();
while($this->T != self::T_EOF)
{
if($this->T == self::T_Prop)
$this->_parseSharedTokens($this->_parsePropTokens());
else if($this->T == self::T_Enum)
$this->_parseEnum();
else if($this->T == self::T_Struct)
$this->_parseStruct();
else if($this->T == self::T_Interface)
$this->_parseInterface();
else if($this->T == self::T_Func)
$this->_parseFreeFunc();
else if($this->T == self::T_RPC)
$this->_parseRPC();
else if($this->T == self::T_Service)
$this->_parseService();
else
$this->_error("Unexpected symbol ('" . $this->_toStr($this->T) . "' " . $this->T_value . ")");
}
}
catch(Exception $e)
{
throw new Exception("$file@{$this->line} : " . $e->getMessage() . " " . $e->getTraceAsString());
}
}
private function _parseInclude(mtgMetaParsedModule $module, string $file)
{
$this->_parse($file);
$module->addInclude($this->parsed_files[$file]);
}
private function _parseSharedTokens(array $tokens)
{
if(!isset($tokens['shared_tokens']))
return;
$this->shared_tokens = json_decode($tokens['shared_tokens'], true);
if(!is_array($this->shared_tokens))
$this->_error("Invalid 'shared_tokens' formant, invalid JSON");
}
private function _parseType(bool $can_be_multi = false)
{
$types = array();
while(true)
{
$type = null;
if($this->T == self::T_Func)
{
$origin = new mtgOrigin($this->file, $this->line);
$func_type = $this->_parseFuncType();
$type = new mtgTypeRef($func_type, $this->_scope(), $origin);
}
else if($this->T == self::T_UserSymbol)
{
$origin = new mtgOrigin($this->file, $this->line);
$type_name = $this->_parseDotName();
$type = new mtgTypeRef($type_name, $this->_scope(), $origin);
}
else
{
$origin = new mtgOrigin($this->file, $this->line);
$type_name = $this->T_value;
$type = new mtgTypeRef(new mtgBuiltinType($type_name), $this->_scope(), $origin);
$this->_nextT();
}
if($this->T == ord('['))
{
$origin = new mtgOrigin($this->file, $this->line);
$this->_nextT();
$this->_checkThenNext(ord(']'));
$type = new mtgTypeRef(new mtgArrType($type), $this->_scope(), $origin);
}
$types[] = $type;
if(!$can_be_multi)
break;
if($this->T != ord(','))
break;
$this->_nextT();
}
if(sizeof($types) > 1)
return new mtgTypeRef(new mtgMultiType($types), $this->_scope(), new mtgOrigin($this->file, $this->line));
else
return $types[0];
}
private function _parseFuncType()
{
$ftype = new mtgMetaFunc('');
$this->_nextT();
$this->_checkThenNext(ord('('));
$c = 0;
while(true)
{
if($this->T == ord(')'))
{
$this->_nextT();
break;
}
else if($c > 0)
{
$this->_checkThenNext(ord(','));
}
$arg_type = $this->_parseType();
$c++;
$arg = new mtgMetaField("_$c", $arg_type);
$ftype->addArg($arg);
}
if($this->T == ord(':'))
{
$this->_nextT();
$ret_type = $this->_parseType(true/*can be multi-type*/);
$ftype->setReturnType($ret_type);
}
return $ftype;
}
private function _resolveIncludes(mtgMetaParsedModule $module, string &$text)
{
$include_paths = $this->config['include_path'];
$result = array();
$lines = explode("\n", $text);
foreach($lines as $line)
{
if(preg_match('~^#include\s+(\S+)~', $line, $m))
{
$this->_processInclude($module, $m[1], $include_paths);
$result[] = "";
}
else
$result[] = $line;
}
$text = implode("\n", $result);
}
private function _processInclude(mtgMetaParsedModule $module, string $include, array $include_paths)
{
$file = false;
foreach($include_paths as $include_path)
{
$file = realpath($include_path . "/" . $include);
if($file !== false)
break;
}
if($file === false)
throw new Exception("#include {$include} can't be resolved(include path is '". implode(':', $include_paths) . "')");
$this->_parseInclude($module, $file);
}
private function _parseEnumOrValues()
{
$values = array();
while(true)
{
if($this->T == self::T_UserSymbol)
{
$values[] = $this->T_value;
$this->_nextT();
if(!$this->_nextIf(ord('|')))
break;
}
else
break;
}
return $values;
}
private function _parseEnum($is_global = true)
{
$this->_nextT();
$name = $this->_parseDotName();
$enum = new mtgMetaEnum($name);
$tokens = $this->shared_tokens;
if($this->T == self::T_Prop)
$tokens = array_merge($tokens, $this->_parsePropTokens());
$enum->setTokens($tokens);
$or_values = array();
while(true)
{
if($this->_nextIf(self::T_End))
break;
$key = $this->T_value;
$this->_checkThenNext(self::T_UserSymbol);
$this->_checkThenNext(ord('='));
if($this->T == self::T_UserSymbol)
{
$or_values[$key] = $this->_parseEnumOrValues();
}
else
{
$value = $this->T_value;
$this->_checkThenNext(self::T_IntegerConstant);
$enum->addValue($key, $value);
}
}
$enum->addOrValues($or_values);
//NOTE: special case for enums when we allow to 'override' the original one,
// with additional values
if($enum->hasToken('enum_override'))
{
if(!$is_global)
$this->_error("Override supported for global enums only");
$existing = $this->current_meta->findUnit($enum->getMetaId());
if(!$existing)
throw new Exception("Not found '{$name}' enum to override values");
if(!($existing->object instanceof mtgMetaEnum))
throw new Exception("Not an enum struct '{$name}'");
$existing->object->override($enum);
}
//NOTE: special case for enums when we allow to 'replace' the original one,
// with additional values
else if($enum->hasToken('enum_replace'))
{
if(!$is_global)
$this->_error("Replace supported for global enums only");
$existing = $this->current_meta->findUnit($enum->getMetaId());
if(!$existing)
throw new Exception("Not found '{$name}' enum to replace values");
if(!($existing->object instanceof mtgMetaEnum))
throw new Exception("Not an enum struct '{$name}'");
$existing->object->replace($enum);
}
else if($is_global)
$this->_addUnit(new mtgMetaInfoUnit($this->module, $enum));
return $enum;
}
static private function _isBuiltinType(int $t) : bool
{
return $t > self::T_MinType && $t < self::T_MaxType;
}
private function _pushScope(mtgScope $scope)
{
$this->scopes[] = $scope;
}
private function _popScope()
{
array_shift($this->scopes);
}
private function _scope() : mtgScope
{
if(!$this->scopes)
return $this->module;
return $this->scopes[count($this->scopes)-1];
}
private function _parseFields(callable $next_doer)
{
$flds = array();
while(true)
{
if($next_doer())
break;
if($this->T == self::T_UserSymbol)
{
$name = $this->T_value;
$this->_nextT();
$this->_checkThenNext(ord(':'));
if($this->T == self::T_UserSymbol ||
$this->T == self::T_Func ||
self::_isBuiltinType($this->T))
{
$type = $this->_parseType();
$fld = new mtgMetaField($name, $type);
if($this->T == self::T_Prop)
$fld->setTokens($this->_parsePropTokens());
$flds[] = $fld;
}
else
$this->_error("Type expected");
}
else
$this->_error("Unexpected fields symbol");
}
return $flds;
}
private function _parseFuncs()
{
$end_token = self::T_End;
$funcs = array();
while(true)
{
$fn = $this->_parseFunc();
$funcs[] = $fn;
if($this->T == $end_token)
{
$this->_nextT();
break;
}
$this->_nextT();
}
return $funcs;
}
private function _parseDotName() : string
{
$dot_name = '';
while(true)
{
if($this->T != self::T_UserSymbol)
$this->_error("Unexpected name symbol");
$dot_name .= $this->T_value;
$this->_nextT();
if($this->T != ord('.'))
break;
$dot_name .= '.';
$this->_nextT();
}
return $dot_name;
}
private function _parseFunc() : mtgMetaFunc
{
$name = $this->_parseDotName();
$fn = new mtgMetaFunc($name);
$this->_checkThenNext(ord('('));
if($this->T == self::T_Prop)
$fn->setTokens($this->_parsePropTokens());
$args = $this->_parseFields(function()
{ return $this->_nextIf(ord(')')); }
);
$fn->setArgs($args);
$ret_type = null;
if($this->T == ord(':'))
{
$this->_nextT();
if($this->T == self::T_UserSymbol ||
$this->T == self::T_Func ||
self::_isBuiltinType($this->T))
{
$ret_type = $this->_parseType(true/*can be multi-type*/);
$fn->setReturnType($ret_type);
}
else
$this->_error("Unexpected func type");
}
return $fn;
}
private function _addUnit(mtgMetaInfoUnit $unit)
{
$this->current_meta->addUnit($unit);
$this->module->addUnit($unit);
}
private function _parseFreeFunc()
{
$this->_nextT();
$fn = $this->_parseFunc();
$fn->setTokens(array_merge($this->shared_tokens, $fn->getTokens()));
$this->_addUnit(new mtgMetaInfoUnit($this->module, $fn));
}
private function _parseStruct()
{
$this->_nextT();
$struct_origin = new mtgOrigin($this->file, $this->line);
$name = $this->_parseDotName();
$parent = null;
if($this->T == self::T_Extends)
{
$this->_nextT();
$origin = new mtgOrigin($this->file, $this->line);
$parent_name = $this->_parseDotName();
$parent = new mtgTypeRef($parent_name, $this->_scope(), $origin);
}
$implements = array();
if($this->T == self::T_Implements)
{
do
{
$this->_nextT();
$origin = new mtgOrigin($this->file, $this->line);
$if_name = $this->_parseDotName();
$implements[] = new mtgTypeRef($if_name, $this->_scope(), $origin);
} while($this->T == ord(','));
}
$s = new mtgMetaStruct($name, array(), $parent, array(), $implements);
$s->setOrigin($struct_origin);
$this->_addUnit(new mtgMetaInfoUnit($this->module, $s));
$tokens = $this->shared_tokens;
if($this->T == self::T_Prop)
$tokens = array_merge($tokens, $this->_parsePropTokens());
$s->setTokens($tokens);
$seen_funcs = false;
$flds = $this->_parseFields(
function() use(&$seen_funcs)
{
if($this->_nextIf(self::T_End))
return true;
if($this->_nextIf(self::T_Func))
{
$seen_funcs = true;
return true;
}
}
);
foreach($flds as $fld)
$s->addField($fld);
if($seen_funcs)
{
$funcs = $this->_parseFuncs();
foreach($funcs as $fn)
$s->addFunc($fn);
}
}
private function _parseInterface()
{
$this->_nextT();
$name = $this->_parseDotName();
$s = new mtgMetaInterface($name);
$this->_addUnit(new mtgMetaInfoUnit($this->module, $s));
$tokens = $this->shared_tokens;
if($this->T == self::T_Prop)
$tokens = array_merge($tokens, $this->_parsePropTokens());
$s->setTokens($tokens);
if($this->T !== self::T_End)
{
$this->_nextT();
$funcs = $this->_parseFuncs();
foreach($funcs as $fn)
$s->addFunc($fn);
}
else
$this->_nextT();
}
private function _parseRPC($is_global = true)
{
$this->_nextT();
$code = $this->T_value;
$this->_checkThenNext(self::T_IntegerConstant);
$name = $this->_parseDotName();
$this->_checkThenNext(ord('('));
$tokens = $this->shared_tokens;
if($this->T == self::T_Prop)
$tokens = array_merge($tokens, $this->_parsePropTokens());
$req_fields = $this->_parseFields(function()
{ return $this->_nextIf(ord(')')); }
);
$rsp_fields = $this->_parseFields(function()
{ return $this->_nextIf(self::T_End); }
);
$req = new mtgMetaPacket($code, $is_global ? "RPC_REQ_$name" : "Request");
$req->setFields($req_fields);
$rsp = new mtgMetaPacket($code, $is_global ? "RPC_RSP_$name" : "Response");
$rsp->setFields($rsp_fields);
$rpc = new mtgMetaRPC($is_global ? "RPC_$name" : $name, $code, $req, $rsp, $tokens);
if($is_global)
$this->_addUnit(new mtgMetaInfoUnit($this->module, $rpc));
return $rpc;
}
private function _parseService()
{
$this->_nextT();
$name = $this->_parseDotName();
$service = new mtgMetaService($name, $this->_scope());
$this->_pushScope($service);
$tokens = $this->shared_tokens;
if($this->T == self::T_Prop)
$tokens = array_merge($tokens, $this->_parsePropTokens());
$service->setTokens($tokens);
while(true)
{
if($this->_nextIf(self::T_End))
break;
$key = $this->T_value;
if($this->T == self::T_RPC)
$service->addRPC($this->_parseRPC(false));
else if($this->T == self::T_Enum)
$service->addUserType($this->_parseEnum(false));
else
$this->_error("Unsupported type");
}
$this->_popScope();
$this->_addUnit(new mtgMetaInfoUnit($this->module, $service));
}
private function _parsePropTokens() : array
{
$prop_tokens = array();
while(true)
{
if($this->T != self::T_Prop)
break;
$name = ltrim($this->T_value, '@');
$this->_validatePropToken($name);
$this->_nextT();
$value = null;
if($this->T == ord(':'))
{
//let's read the value
while(true)
{
//TODO: The code below is ugly and must be heavily refactored,
// it just tries to be convenient and keep BC: any token property
// value can have almost any kind of symbols excluding new line.
// In the future we should restrict property values to certain types only
$this->_nextT(true/*stop on new line*/);
if($this->T == ord("\n"))
{
$this->_nextT();
break;
}
else if($this->T == self::T_Prop)
{
break;
}
else
{
$tmp = $this->T_value;
if($this->T == self::T_StringConstant)
$tmp = "\"$tmp\"";
if($value === null)
$value = '';
$value .= $tmp;
}
}
}
$prop_tokens[$name] = $value;
}
return $prop_tokens;
}
private function _validatePropToken(string $name)
{
if(!isset($this->config['valid_tokens']) ||
!is_array($this->config['valid_tokens']))
return;
if(!in_array($name, $this->config['valid_tokens']))
throw new Exception("Unknown property token '@$name'");
}
private function _nextT(bool $stop_on_new_line = false)
{
while(true)
{
$c = $this->cursor_char;
//setting default values
$this->T = ord($c);
$this->T_value = $c;
//NOTE: current 'cursor_pos' is ahead of 'c' by one character
$this->_cursorNext();
switch($c)
{
case '': $this->T = self::T_EOF; return;
case "\n": if($stop_on_new_line) return; else break;
case ' ': case "\r": case "\t": break;
case '{': case '}': case '(': case ')': case '[': case ']': case '|': return;
case ',': case ':': case ';': case '=': return;
case '.':
if(!ctype_digit($this->cursor_char))
return;
$this->_error("Floating point constant can't start with .");
break;
case '"':
$this->T_value = '';
while($this->cursor_char != '"')
{
if(ord($this->cursor_char) < ord(' '))
$this->_error("Illegal character in string constant");
if($this->cursor_char == '\\')
{
$this->_cursorNext();
switch($this->cursor_char)
{
case 'n': $this->T_value .= "\n"; $this->_cursorNext(); break;
case 't': $this->T_value .= "\t"; $this->_cursorNext(); break;
case 'r': $this->T_value .= "\r"; $this->_cursorNext(); break;
case '"': $this->T_value .= '"'; $this->_cursorNext(); break;
case '\\': $this->T_value .= '\\'; $this->_cursorNext(); break;
default: $this->_error("Unknown escape code in string constant"); break;
}
}
else // printable chars + UTF-8 bytes
{
$this->T_value .= $this->cursor_char;
$this->_cursorNext();
}
}
$this->T = self::T_StringConstant;
$this->_cursorNext();
return;
case '`':
$this->T_value = '';
//TODO: code below is not robust enough
while($this->cursor_char != '`')
{
$this->T_value .= $this->cursor_char;
$this->_cursorNext();
}
$this->T = self::T_RawStringConstant;
$this->_cursorNext();
return;
case '/':
if($this->cursor_char == '/')
{
$this->_cursorNext();
//@phpstan-ignore-next-line
while($this->cursor_char != '' && $this->cursor_char != "\n")
$this->_cursorNext();
//@phpstan-ignore-next-line
break;
}
case '#':
while($this->cursor_char != '' && $this->cursor_char != "\n")
$this->_cursorNext();
break;
case '@':
$start = $this->cursor_pos - 1;
while(ctype_alnum($this->cursor_char) || $this->cursor_char == '_')
$this->_cursorNext();
$this->T = self::T_Prop;
$this->T_value = substr($this->source, $start, $this->cursor_pos - $start);
return;
default:
//symbols
if(ctype_alpha($c))
{
//collect all chars of an identifier
$start = $this->cursor_pos - 1;
while(ctype_alnum($this->cursor_char) || $this->cursor_char == '_')
$this->_cursorNext();
$this->T_value = substr($this->source, $start, $this->cursor_pos - $start);
if(isset($this->symbol2T[$this->T_value]))
$this->T = $this->symbol2T[$this->T_value];
else //otherwise it's assumed to be a user defined symbol
$this->T = self::T_UserSymbol;
return;
}
//digits
else if(ctype_digit($c) || $c == '-')
{
$start = $this->cursor_pos - 1;
while(ctype_digit($this->cursor_char))
$this->_cursorNext();
if($this->cursor_char == '.')
{
$this->_cursorNext();
while(ctype_digit($this->cursor_char))
$this->_cursorNext();
// see if this float has a scientific notation suffix. Both JSON
// and C++ (through strtod() we use) have the same format:
//@phpstan-ignore-next-line
if($this->cursor_char == 'e' || $this->cursor_char == 'E')
{
$this->_cursorNext();
if($this->cursor_char == '+' || $this->cursor_char == '-')
$this->_cursorNext();
while(ctype_digit($this->cursor_char))
$this->_cursorNext();
}
$this->T = self::T_FloatConstant;
}
else
$this->T = self::T_IntegerConstant;
$this->T_value = substr($this->source, $start, $this->cursor_pos - $start);
return;
}
$this->_error("Illegal character '$c'");
}
}
}
private function _cursorNext()
{
++$this->cursor_pos;
$this->cursor_char = substr($this->source, $this->cursor_pos, 1);
if($this->cursor_char === "\n")
$this->line++;
//EOF
if($this->cursor_char === '' ||
//keeping BC with substr(..) before php 8.0
//@phpstan-ignore-next-line
is_bool($this->cursor_char))
{
$this->cursor_char = '';
}
}
private function _nextIf(int $t) : bool
{
$yes = $t === $this->T;
if($yes)
$this->_nextT();
return $yes;
}
private function _checkThenNext(int $t)
{
if($t !== $this->T)
$this->_error("Expecting '" . $this->_toStr($t) . "' instead got '" . $this->_toStr($this->T) . "'");
$this->_nextT();
}
private function _toStr(int $t) : string
{
if($t < self::T_NonOrdMark)
return chr($t);
return $this->T2descr[$t];
}
private function _error(string $msg)
{
throw new Exception($msg . " ('{$this->T_value}', {$this->T})");
}
}