metagen/parser.inc.php

861 lines
22 KiB
PHP

<?php
class mtgMetaInfoParser
{
private $config = array();
private $current_meta;
private $parsed_files = array();
private $file_stack = array();
private $file = "";
private $source = "";
private $cursor = 0;
private $line = 0;
private $token = "";
private $attribute = "";
private $idltypes = array();
private $token_strs = array();
const T_EOF = 1001;
const T_StringConstant = 1002;
const T_IntegerConstant = 1003;
const T_FloatConstant = 1004;
const T_Enum = 1005;
const T_RPC = 1006;
const T_End = 1007;
const T_Identifier = 1008;
const T_Struct = 1009;
const T_Prop = 1010;
const T_Extends = 1011;
const T_Func = 1012;
const T_RawStringConstant = 1013;
const T_Interface = 1014;
const T_string = 1020;
const T_uint32 = 1021;
const T_int32 = 1022;
const T_uint16 = 1023;
const T_int16 = 1024;
const T_uint8 = 1025;
const T_int8 = 1026;
const T_float = 1027;
const T_uint64 = 1028;
const T_int64 = 1029;
const T_bool = 1030;
const T_blob = 1031;
function __construct($config = array())
{
$this->config = $config;
if(!isset($this->config['include_path']))
$this->config['include_path'] = array('.');
$this->idltypes = array(
"string" => self::T_string,
"uint32" => self::T_uint32,
"int32" => self::T_int32,
"uint16" => self::T_uint16,
"int16" => self::T_int16,
"uint8" => self::T_uint8,
"int8" => self::T_int8,
"float" => self::T_float,
"double" => self::T_float,
"uint64" => self::T_uint64,
"int64" => self::T_int64,
"bool" => self::T_bool,
"blob" => self::T_blob,
);
$this->token_strs = array_flip($this->idltypes);
$this->token_strs[self::T_EOF] = '<EOF>';
$this->token_strs[self::T_StringConstant] = '<StringConstant>';
$this->token_strs[self::T_RawStringConstant] = '<RawStringConstant>';
$this->token_strs[self::T_IntegerConstant] = '<IntegerConstant>';
$this->token_strs[self::T_FloatConstant] = '<FloatConstant>';
$this->token_strs[self::T_Enum] = '<enum>';
$this->token_strs[self::T_RPC] = '<RPC>';
$this->token_strs[self::T_End] = '<end>';
$this->token_strs[self::T_Identifier] = '<Identifier>';
$this->token_strs[self::T_Struct] = '<struct>';
$this->token_strs[self::T_Interface] = '<interface>';
$this->token_strs[self::T_Prop] = '<@prop>';
$this->token_strs[self::T_Extends] = '<extends>';
$this->token_strs[self::T_Func] = '<func>';
}
function parse(mtgMetaInfo $meta, $raw_file)
{
$this->current_meta = $meta;
$file = realpath($raw_file);
if($file === false)
throw new Exception("No such file '$raw_file'");
$this->_parse($file);
mtgTypeRef::checkAllResolved();
}
private function _parse($file)
{
if(isset($this->parsed_files[$file]))
return;
$this->parsed_files[$file] = true;
$this->file_stack[] = $file;
$source = file_get_contents($file);
$is_php = false;
try
{
if($source === false)
throw new Exception("Could not read file '$file'");
//PHP include
if(strpos($source, '<?php') === 0)
{
include_once($file);
$is_php = true;
}
else
{
self::resolveIncludes($source, $this->config['include_path'], array($this, '_parse'));
}
}
catch(Exception $e)
{
throw new Exception(end($this->file_stack) . " : " . $e->getMessage());
}
array_pop($this->file_stack);
if($is_php)
return;
$this->file = $file;
$this->source = $source;
$this->cursor = 0;
$this->line = 1;
try
{
$this->_next();
while($this->token != self::T_EOF)
{
//echo "TOKEN : " . $this->token . " " . $this->attribute . " " . $this->line . "\n";
if($this->token == self::T_Enum)
$this->_parseEnum();
else if($this->token == self::T_Struct)
$this->_parseStruct();
else if($this->token == self::T_Interface)
$this->_parseInterface();
else if($this->token == self::T_Func)
$this->_parseFreeFunc();
else if($this->token == self::T_RPC)
$this->_parseRPC();
else
$this->_error("unexpected token ('" . $this->_toStr($this->token) . "' " . $this->attribute . ")");
}
}
catch(Exception $e)
{
throw new Exception("$file@{$this->line} : " . $e->getMessage() . " " . $e->getTraceAsString());
}
}
private function _parseType($can_be_multi = false)
{
$types = array();
while(true)
{
$type = null;
if($this->token == self::T_Func)
{
$func_type = $this->_parseFuncType();
$type = new mtgTypeRef($func_type);
}
else if($this->token == self::T_Identifier)
{
$type_name = $this->_parseDotName();
$type = new mtgTypeRef($type_name, $this->current_meta, $this->file, $this->line);
}
else
{
$type_name = $this->attribute;
$type = new mtgTypeRef(new mtgBuiltinType($type_name));
$this->_next();
}
if($this->token == ord('['))
{
$this->_next();
$this->_checkThenNext(']');
$type = new mtgTypeRef(new mtgArrType($type));
}
$types[] = $type;
if(!$can_be_multi)
break;
if($this->token != ord(','))
break;
$this->_next();
}
if(sizeof($types) > 1)
return new mtgTypeRef(new mtgMultiType($types));
else
return $types[0];
}
private function _parseFuncType()
{
$ftype = new mtgMetaFunc('');
$this->_next();
$this->_checkThenNext('(');
$c = 0;
while(true)
{
if($this->token == ord(')'))
{
$this->_next();
break;
}
else if($c > 0)
{
$this->_checkThenNext(',');
}
$arg_type = $this->_parseType();
$c++;
$arg = new mtgMetaField("_$c", $arg_type);
$ftype->addArg($arg);
}
if($this->token == ord(':'))
{
$this->_next();
$ret_type = $this->_parseType(true/*can be multi-type*/);
$ftype->setReturnType($ret_type);
}
return $ftype;
}
static function resolveIncludes(&$text, array $include_paths, $callback)
{
$result = array();
$lines = explode("\n", $text);
foreach($lines as $line)
{
if(preg_match('~^#include\s+(\S+)~', $line, $m))
{
self::processInclude($m[1], $include_paths, $callback);
$result[] = "";
}
else
$result[] = $line;
}
$text = implode("\n", $result);
}
static function processInclude($include, array $include_paths, $callback)
{
$file = false;
foreach($include_paths as $include_path)
{
$file = realpath($include_path . "/" . $include);
if($file !== false)
break;
}
if($file === false)
throw new Exception("#include {$include} can't be resolved(include path is '". implode(':', $include_paths) . "')");
call_user_func_array($callback, array($file));
}
private function _parseEnumOrValues()
{
$values = array();
while(true)
{
if($this->token == self::T_Identifier)
{
$values[] = $this->attribute;
$this->_next();
if($this->token != ord('|'))
break;
else
$this->_next();
}
else
break;
}
return $values;
}
private function _parseEnum()
{
$this->_next();
$name = $this->_parseDotName();
$enum = new mtgMetaEnum($name);
$this->current_meta->addUnit(new mtgMetaInfoUnit($this->file, $enum));
if($this->token == self::T_Prop)
{
$enum->setTokens($this->_parsePropTokens());
}
$or_values = array();
while(true)
{
if($this->_nextIf(self::T_End))
break;
$key = $this->_checkThenNext(self::T_Identifier);
$this->_checkThenNext('=');
if($this->token == self::T_Identifier)
{
$or_values[$key] = $this->_parseEnumOrValues();
}
else
{
$value = $this->_checkThenNext(self::T_IntegerConstant);
$enum->addValue($key, $value);
}
}
$enum->addOrValues($or_values);
}
private function _parseFields($next_doer)
{
$flds = array();
while(true)
{
if($next_doer())
break;
if($this->token == self::T_Identifier)
{
$name = $this->attribute;
$this->_next();
$this->_checkThenNext(':');
if($this->token == self::T_Identifier ||
$this->token == self::T_Func ||
($this->token >= self::T_string && $this->token <= self::T_blob))
{
$type = $this->_parseType();
$fld = new mtgMetaField($name, $type);
if($this->token == self::T_Prop)
$fld->setTokens($this->_parsePropTokens());
$flds[] = $fld;
}
else
$this->_error("type expected");
}
else
$this->_error("unexpected fields token");
}
return $flds;
}
private function _parseFuncs()
{
$end_token = self::T_End;
$funcs = array();
while(true)
{
$fn = $this->_parseFunc();
$funcs[] = $fn;
if($this->token == $end_token)
{
$this->_next();
break;
}
$this->_next();
}
return $funcs;
}
private function _parseDotName()
{
$dot_name = '';
while(true)
{
if($this->token != self::T_Identifier)
$this->_error("unexpected name token");
$dot_name .= $this->attribute;
$this->_next();
if($this->token != ord('.'))
break;
$dot_name .= '.';
$this->_next();
}
return $dot_name;
}
private function _parseFunc()
{
$name = $this->_parseDotName();
$fn = new mtgMetaFunc($name);
$this->_checkThenNext('(');
if($this->token == self::T_Prop)
$fn->setTokens($this->_parsePropTokens());
$args = $this->_parseFields(function()
{ return $this->_nextIf(')'); }
);
$fn->setArgs($args);
$ret_type = null;
if($this->token == ord(':'))
{
$this->_next();
if($this->token == self::T_Identifier ||
$this->token == self::T_Func ||
($this->token >= self::T_string && $this->token <= self::T_bool))
{
$ret_type = $this->_parseType(true/*can be multi-type*/);
$fn->setReturnType($ret_type);
}
else
$this->_error("unexpected func type token");
}
return $fn;
}
private function _parseFreeFunc()
{
$this->_next();
$fn = $this->_parseFunc();
$this->current_meta->addUnit(new mtgMetaInfoUnit($this->file, $fn));
}
private function _parseStruct()
{
$this->_next();
$name = $this->_parseDotName();
$parent = null;
if($this->token == self::T_Extends)
{
$this->_next();
$parent_name = $this->_checkThenNext(self::T_Identifier);
$parent = new mtgTypeRef($parent_name, $this->current_meta, $this->file, $this->line);
}
$s = new mtgMetaStruct($name, array(), $parent);
$this->current_meta->addUnit(new mtgMetaInfoUnit($this->file, $s));
if($this->token == self::T_Prop)
$s->setTokens($this->_parsePropTokens());
$seen_funcs = false;
$flds = $this->_parseFields(
function() use(&$seen_funcs)
{
if($this->_nextIf(self::T_End))
return true;
if($this->_nextIf(self::T_Func))
{
$seen_funcs = true;
return true;
}
}
);
foreach($flds as $fld)
$s->addField($fld);
if($seen_funcs)
{
$funcs = $this->_parseFuncs();
foreach($funcs as $fn)
$s->addFunc($fn);
}
}
private function _parseInterface()
{
$this->_next();
$name = $this->_parseDotName();
$s = new mtgMetaInterface($name);
$this->current_meta->addUnit(new mtgMetaInfoUnit($this->file, $s));
if($this->token == self::T_Prop)
$s->setTokens($this->_parsePropTokens());
$this->_next();
$funcs = $this->_parseFuncs();
foreach($funcs as $fn)
$s->addFunc($fn);
}
private function _parseRPC()
{
$this->_next();
$code = $this->_checkThenNext(self::T_IntegerConstant);
$name = $this->_checkThenNext(self::T_Identifier);
$this->_checkThenNext('(');
$tokens = array();
if($this->token == self::T_Prop)
$tokens = $this->_parsePropTokens();
$req_fields = $this->_parseFields(function()
{ return $this->_nextIf(')'); }
);
$rsp_fields = $this->_parseFields(function()
{ return $this->_nextIf(self::T_End); }
);
$req = new mtgMetaPacket($code, "RPC_REQ_$name");
$req->setFields($req_fields);
$rsp = new mtgMetaPacket($code, "RPC_RSP_$name");
$rsp->setFields($rsp_fields);
$rpc = new mtgMetaRPC("RPC_$name", $code, $req, $rsp, $tokens);
$this->current_meta->addUnit(new mtgMetaInfoUnit($this->file, $rpc));
}
private function _parsePropTokens()
{
$new_line = ord("\n");
$prop_tokens = array();
while(true)
{
if($this->token != self::T_Prop)
break;
$name = ltrim($this->attribute, '@');
$this->_next();
$value = null;
if($this->token == ord(':'))
{
while(true)
{
$this->_next(false/*don't skip new line*/);
if($this->token == $new_line ||
$this->token == self::T_Prop)
{
//let's skip it
if($this->token == $new_line)
$this->_next();
break;
}
else
{
$tmp = $this->attribute;
if($this->token == self::T_StringConstant)
$tmp = "\"$tmp\"";
if($value === null)
$value = '';
$value .= $tmp;
}
}
}
if($value && substr($value, 0, 1) == '{')
{
$json = json_decode($value);
if($json === null)
{
--$this->line; //hack for more precise reporting
$this->_error("bad json");
}
}
$this->_validatePropToken($name, $value);
$prop_tokens[$name] = $value;
}
return $prop_tokens;
}
private function _validatePropToken($name, $value)
{
if(!isset($this->config['valid_tokens']) || !$this->config['valid_tokens'])
return;
if(!in_array($name, $this->config['valid_tokens']))
throw new Exception("Unknown token '$name'");
}
private function _symbol()
{
return substr($this->source, $this->cursor, 1);
}
private function _next($skip_newlines = true)
{
$this->__next($skip_newlines);
//for debug
//var_dump("NEXT " . $this->token . " " . $this->attribute);
//debug_print_backtrace(0, 1);
}
private function __next($skip_newlines = true)
{
while(true)
{
$c = $this->_symbol();
//NOTE: dealing with PHP's types juggling
if($c === false || $c === '')
$c = -1;
$this->token = ord($c);
++$this->cursor;
$this->attribute = $c;
switch($c)
{
case -1: $this->cursor--; $this->token = self::T_EOF; return;
case ' ': case "\r": case "\t": break;
case "\n": $this->line++; if($skip_newlines) break; else return;
case '{': case '}': case '(': case ')': case '[': case ']': case '|': return;
case ',': case ':': case ';': case '=': return;
case '.':
if(!ctype_digit($this->_symbol())) return;
$this->_error("floating point constant can't start with .");
break;
case '"':
$this->attribute = "";
while($this->_symbol() != '"')
{
if(ord($this->_symbol()) < ord(' ') && ord($this->_symbol()) >= 0)
$this->_error("illegal character in string constant");
if($this->_symbol() == '\\')
{
$this->cursor++;
switch($this->_symbol())
{
case 'n': $this->attribute .= "\n"; $this->cursor++; break;
case 't': $this->attribute .= "\t"; $this->cursor++; break;
case 'r': $this->attribute .= "\r"; $this->cursor++; break;
case '"': $this->attribute .= '"'; $this->cursor++; break;
case '\\': $this->attribute .= '\\'; $this->cursor++; break;
default: $this->_error("unknown escape code in string constant"); break;
}
}
else // printable chars + UTF-8 bytes
{
$this->attribute .= $this->_symbol();
$this->cursor++;
}
}
$this->token = self::T_StringConstant;
$this->cursor++;
return;
case '`':
$this->attribute = "";
while($this->_symbol() != '`')
{
$this->attribute .= $this->_symbol();
$this->cursor++;
}
$this->token = self::T_RawStringConstant;
$this->cursor++;
return;
case '/':
if($this->_symbol() == '/')
{
$this->cursor++;
while($this->_symbol() !== false && $this->_symbol() != "\n") $this->cursor++;
break;
}
case '#':
while($this->_symbol() !== false && $this->_symbol() != "\n") $this->cursor++;
break;
case '@':
$start = $this->cursor - 1;
while(ctype_alnum($this->_symbol()) || $this->_symbol() == '_')
$this->cursor++;
$this->token = self::T_Prop;
$this->attribute = substr($this->source, $start, $this->cursor - $start);
return;
//fall thru
default:
if(ctype_alpha($c))
{
//collect all chars of an identifier
$start = $this->cursor - 1;
while(ctype_alnum($this->_symbol()) || $this->_symbol() == '_')
$this->cursor++;
$this->attribute = substr($this->source, $start, $this->cursor - $start);
if(isset($this->idltypes[$this->attribute]))
{
$this->token = $this->idltypes[$this->attribute];
return;
}
if($this->attribute == "true" || $this->attribute == "false")
{
$this->token = self::T_IntegerConstant;
return;
}
//check for declaration keywords:
if($this->attribute == "struct") { $this->token = self::T_Struct; return; }
if($this->attribute == "interface") { $this->token = self::T_Interface; return; }
if($this->attribute == "enum") { $this->token = self::T_Enum; return; }
if($this->attribute == "RPC") { $this->token = self::T_RPC; return; }
if($this->attribute == "end") { $this->token = self::T_End; return; }
if($this->attribute == "extends") { $this->token = self::T_Extends; return; }
if($this->attribute == "func") { $this->token = self::T_Func; return; }
//if not it's a user defined identifier
$this->token = self::T_Identifier;
return;
}
else if(ctype_digit($c) || $c == '-')
{
$start = $this->cursor - 1;
while(ctype_digit($this->_symbol())) $this->cursor++;
if($this->_symbol() == '.')
{
$this->cursor++;
while(ctype_digit($this->_symbol())) $this->cursor++;
// see if this float has a scientific notation suffix. Both JSON
// and C++ (through strtod() we use) have the same format:
if($this->_symbol() == 'e' || $this->_symbol() == 'E')
{
$this->cursor++;
if($this->_symbol() == '+' || $this->_symbol() == '-') $this->cursor++;
while(ctype_digit($this->_symbol())) $this->cursor++;
}
$this->token = self::T_FloatConstant;
}
else
$this->token = self::T_IntegerConstant;
$this->attribute = substr($this->source, $start, $this->cursor - $start);
return;
}
$this->_error("illegal character '$c'");
}
}
}
private function _nextIf($t)
{
if(is_string($t))
$t = ord($t);
$yes = $t === $this->token;
if($yes)
$this->_next();
return $yes;
}
private function _checkThenNext($t)
{
if(is_string($t))
$t = ord($t);
if($t !== $this->token)
{
$this->_error("Expecting '" . $this->_toStr($t) . "' instead got '" . $this->_toStr($this->token) . "'");
}
$attr = $this->attribute;
$this->_next();
return $attr;
}
private function _toStr($t)
{
if($t < 1000)
return chr($t);
return $this->token_strs[$t];
}
private function _error($msg)
{
throw new Exception($msg . "(token: {$this->token}, attr: {$this->attribute}})");
}
}
function mtg_parse_meta(array $meta_srcs, $valid_tokens = null)
{
$meta_dirs = array();
foreach($meta_srcs as $src)
{
if(is_dir($src))
$meta_dirs[] = $src;
else if(is_file($src))
$meta_dirs[] = dirname($src);
}
$meta_parser = new mtgMetaInfoParser(
array(
'include_path' => $meta_dirs,
'valid_tokens' => $valid_tokens
)
);
$meta = new mtgMetaInfo();
foreach($meta_srcs as $src)
mtg_load_meta($meta, $meta_parser, $src);
return $meta;
}
function mtg_load_meta(mtgMetaInfo $meta, mtgMetaInfoParser $meta_parser, $dir_or_file)
{
$files = array();
if(is_dir($dir_or_file))
$files = mtg_find_meta_files($dir_or_file);
else if(is_file($dir_or_file))
$files[] = $dir_or_file;
else
throw new Exception("Bad meta source '$dir_or_file'");
foreach($files as $file)
$meta_parser->parse($meta, $file);
}
function mtg_find_meta_files($dir)
{
$items = scandir($dir);
if($items === false)
throw new Exception("Directory '$dir' is invalid");
$files = array();
foreach($items as $item)
{
if($item[0] == '.')
continue;
if(strpos($item, ".meta") !== (strlen($item)-5))
continue;
$file = $dir . '/' . $item;
if(is_file($file) && !is_dir($file))
$files[] = $file;
}
return $files;
}