Compare commits

...

8 Commits

Author SHA1 Message Date
Alexey Chubar 6fe8a0dff5 Added fmt3 bundle format
Publish PHP Package / docker (push) Successful in 6s Details
2025-03-14 00:14:14 +07:00
Pavel Shevaev fd49e48a57 Support for configs bundle patching WIP; Migrating to iterable from array where it makes sense; Adding config_hash_changed(..)
Publish PHP Package / docker (push) Successful in 6s Details
2025-03-06 13:23:44 +03:00
Pavel Shevaev 77a63241ff Enabling junk check by default
Publish PHP Package / docker (push) Successful in 6s Details
2025-02-27 20:26:43 +03:00
Pavel Shevaev 458b72e367 Adding config2json
Publish PHP Package / docker (push) Successful in 7s Details
2025-02-27 20:00:39 +03:00
Pavel Shevaev a15f4f935e Fixing config workers initialization
Publish PHP Package / docker (push) Successful in 6s Details
2025-02-27 18:45:49 +03:00
Pavel Shevaev 07d69c4a02 Minor improvements
Publish PHP Package / docker (push) Successful in 6s Details
2025-02-27 15:53:17 +03:00
Pavel Shevaev 4bd731aa25 Adding debug flag to pack routine
Publish PHP Package / docker (push) Successful in 7s Details
2025-02-27 13:16:46 +03:00
Pavel Shevaev 63c2c46cc7 Improving initialization of workers; Adding sorting when splitting config jobs; Minor improvements here and there
Publish PHP Package / docker (push) Successful in 7s Details
2025-02-27 01:35:17 +03:00
7 changed files with 466 additions and 70 deletions

View File

@ -1,3 +1,6 @@
## v5.10.0
- Added fmt3 bundle format - basically, fmt1 with chunk-based lz4 compression
## v4.0.2
- Improving exception message
@ -7,4 +10,4 @@
## v4.0.0
- Adding initial binary format 2 support: no more lookups by CRC28 strids
- Removed misc obsolete stuff
- Adding PHP type hints
- Adding PHP type hints

View File

@ -154,7 +154,7 @@ class ConfigFetchParams
public bool $verbose = false;
public ?int $max_workers = null;
public bool $touch_files_with_includes = true;
public bool $check_junk = false;
public bool $check_junk = true;
function __construct(ConfigGlobals $globals, ConfigScanResult $scanned,
bool $force_stale = false, bool $verbose = false, ?int $max_workers = null)
@ -170,22 +170,27 @@ class ConfigFetchParams
{
$max_workers = $this->max_workers;
if($max_workers === null)
$max_workers = $this->scanned->count() < 20 ? 1 : 5;
$max_workers = $this->scanned->count() < 100 ? 1 : 5;
return $max_workers;
}
function splitFilesByChunks(int $max_workers) : array
function splitFilesByChunks(int $max_workers, bool $sort = true) : array
{
$chunk_size = (int)ceil($this->scanned->count()/$max_workers);
return array_chunk($this->scanned->getFlatArray(), $chunk_size);
$flat = $this->scanned->getFlatArray();
if($sort)
usort($flat, fn($a, $b) => $a[1] <=> $b[1]);
$chunk_size = (int)ceil(count($flat)/$max_workers);
return array_chunk($flat, $chunk_size);
}
//returns [[idx, time, [[[base_dir, file1], [base_dir, file2], ..]]], ]
function splitJobs() : array
function splitJobs(bool $sort = true) : array
{
$max_workers = $this->calcMaxWorkers();
$jobs = array();
foreach($this->splitFilesByChunks($max_workers) as $idx => $chunk)
$chunks = $this->splitFilesByChunks($max_workers, $sort);
foreach($chunks as $idx => $chunk)
$jobs[] = array($idx, microtime(true), $chunk);
return $jobs;
}
@ -259,12 +264,7 @@ function _config_cache_fetch(ConfigFetchParams $params) : ConfigFetchResult
if($params->scanned->isEmpty())
return new ConfigFetchResult();
//TODO: not sure if it's the best place for this one
$GLOBALS['CONFIG_GLOBALS'] = $params->globals;
$params->scanned->apply(function($base_dir, $files) { sort($files); return $files;});
$jobs = $params->splitJobs();
$jobs = $params->splitJobs(sort: true);
$serial = sizeof($jobs) == 1;
@ -272,11 +272,11 @@ function _config_cache_fetch(ConfigFetchParams $params) : ConfigFetchResult
if(!$serial)
{
//NOTE: in case of any result error try serial processing
//NOTE: in case result unserialize error try serial processing
if(array_search(false, $results_by_job, true/*strict*/) !== false)
{
if($params->verbose)
config_log("Result error detected, trying serial processing...");
config_log("Corrupted result, trying serial processing...");
$results_by_job = _config_worker_run_procs($params, $jobs, true);
}
}

View File

@ -23,21 +23,16 @@ class ConfigGlobals
$this->worker_init_fn = $worker_init_fn;
}
function initWorker()
function initWorker(bool $is_master_proc)
{
$GLOBALS['CONFIG_GLOBALS'] = $this;
if($this->worker_init_fn !== null)
{
$fn = $this->worker_init_fn;
$fn();
$fn($is_master_proc);
}
}
function setNormalizeBaseDirs(array $dirs)
{
$this->base_dirs = array_map(function($d) { return normalize_path($d); }, $dirs);
}
}
function config_log($msg)

View File

@ -12,8 +12,19 @@ class ConfigPackParams
public ?int $version = null;
public bool $debug = false;
function __construct(array $cache_entries, int $version, bool $use_lz4 = false,
bool $use_config_refs = false, int $binary_format = 1, bool $debug = false)
public const string EXTRA_FMT3_CHUNK_SIZE = "EXTRA_FMT3_CHUNK_SIZE";
public const string EXTRA_FMT3_COMPRESSION_LEVEL = "EXTRA_FMT3_COMPRESSION_LEVEL";
public array $extras = array();
function __construct(
array $cache_entries,
int $version,
bool $use_lz4 = false,
bool $use_config_refs = false,
int $binary_format = 1,
bool $debug = false,
array $extras = array()
)
{
$this->cache_entries = $cache_entries;
$this->use_lz4 = $use_lz4;
@ -21,6 +32,7 @@ class ConfigPackParams
$this->binary_format = $binary_format;
$this->version = $version;
$this->debug = $debug;
$this->extras = $extras;
}
}
@ -45,7 +57,18 @@ function config_pack_bundle(ConfigPackParams $params) : string
$params->cache_entries,
$params->use_lz4,
$params->use_config_refs,
$params->version
$params->version,
);
}
else if($params->binary_format == 3)
{
$packed_data = _config_pack_bundle_fmt3(
$params->cache_entries,
$params->use_lz4,
$params->use_config_refs,
$params->version,
$params->extras[ConfigPackParams::EXTRA_FMT3_CHUNK_SIZE],
$params->extras[ConfigPackParams::EXTRA_FMT3_COMPRESSION_LEVEL],
);
}
else
@ -61,6 +84,36 @@ function config_pack_bundle(ConfigPackParams $params) : string
return $packed_data;
}
function config_patch_bundle(ConfigPackParams $params, string $packed_data) : string
{
$t = microtime(true);
$patched_data = null;
if($params->binary_format == 2)
{
$patched_data = _config_patch_bundle_fmt2(
$packed_data,
$params->cache_entries,
$params->use_lz4,
$params->use_config_refs,
$params->version,
);
}
else
throw new Exception("Unknown binary format: {$params->binary_format}");
if($params->debug)
config_log("Patched entries: " . sizeof($params->cache_entries) . ", total: " .
kb($patched_data) . ", format: {$params->binary_format}, lz4: " .
var_export($params->use_lz4, true) . ", refs: " . var_export($params->use_config_refs, true) .
", CRC: " . crc32($patched_data) .
", " . round(microtime(true) - $t,2) . " sec.");
return $patched_data;
}
//NOTE: strids are stored as CRCs, potential collision may happen (error will be raised during build)
function _config_pack_bundle_fmt1(
array $cache_entries,
bool $use_lz4,
@ -117,11 +170,14 @@ function _config_pack_bundle_fmt1(
return $packed_data;
}
//NOTE: strids are stored as lookup strings, and actually an array of lookup string indices
// (each path item separated by '/' is stored as an array item)
function _config_pack_bundle_fmt2(
array $cache_entries,
bool $use_lz4,
bool $use_config_refs,
int $version) : string
int $version,
) : string
{
$MAP = array();
$STRIDMAP = array();
@ -137,20 +193,8 @@ function _config_pack_bundle_fmt2(
$payloads[] = array($payloads_offset, $payload, $format, $payload_size);
$payloads_offset += $payload_size;
$strids_indices = array();
$strid_parts = explode('/', ltrim($entry->strid, '@'));
foreach($strid_parts as $strid_part)
{
if(!isset($STRIDMAP[$strid_part]))
{
$strid_index = count($STRIDLIST);
$STRIDLIST[] = $strid_part;
$STRIDMAP[$strid_part] = $strid_index;
$strids_indices[] = $strid_index;
}
else
$strids_indices[] = $STRIDMAP[$strid_part];
}
$strids_indices = _config_encode_strid_as_indices($entry->strid, $STRIDMAP, $STRIDLIST);
$strids[] = $strids_indices;
}
@ -190,6 +234,184 @@ function _config_pack_bundle_fmt2(
return $packed_data;
}
//NOTE: Much like fmt1, but configs entries are grouped into sizeable chuncks, with each chunk lz4-compressed.
// This reduces overall bundle size when there are many small configs entries.
function _config_pack_bundle_fmt3(
array $cache_entries,
bool $use_lz4,
bool $use_config_refs,
int $version,
int $chunk_size,
int $compression_level) : string
{
if(!$use_lz4)
throw new Exception("Config bundle FMT3 is only available with LZ4 enabled");
if($compression_level < 0 || $compression_level > 12)
throw new Exception("LZ4 compression level must be in range [0, 12]");
$MAP = array();
$STRIDMAP = array();
$header = array();
$payloads_offset = 0;
$max_chunk_size = 0;
$chunk_offset = 0;
$payloads_bundle = '';
$payloads_buffer = '';
$count_entries = count($cache_entries);
foreach($cache_entries as $idx => $entry)
{
list($format, $payload) = _config_get_payload($entry, $use_lz4, $use_config_refs);
$payload_size = strlen($payload);
$payloads_buffer .= $payload;
if(isset($MAP[$entry->id]))
throw new Exception("Duplicating config id for '{$entry->strid}' conflicts with '{$MAP[$entry->id]}'");
$MAP[$entry->id] = $entry->strid;
$strid_crc = crc32($entry->strid);
if(isset($STRIDMAP[$strid_crc]))
throw new Exception("Duplicating config str id crc for '{$entry->strid}' conflicts with '{$STRIDMAP[$strid_crc]}'");
$STRIDMAP[$strid_crc] = $entry->strid;
$header[] = array(
$format,
$entry->id,
crc32($entry->strid),
$entry->class_id,
$chunk_offset,
$payloads_offset,
$payload_size
);
$payloads_offset += $payload_size;
if($payloads_offset >= $chunk_size || $idx == ($count_entries - 1))
{
if($payloads_offset > $max_chunk_size)
$max_chunk_size = $payloads_offset;
$payloads_offset = 0;
$lz4_data = lz4_compress($payloads_buffer, $compression_level);
$payloads_bundle .= pack("V", strlen($lz4_data));
$payloads_bundle .= $lz4_data;
$chunk_offset = strlen($payloads_bundle);
$payloads_buffer = '';
}
}
$header_msgpack = config_msgpack_pack($header);
$packed_data =
pack("C", 3) .
pack("V", $version) .
pack("V", strlen($header_msgpack)) .
pack("V", $max_chunk_size) .
$header_msgpack .
$payloads_bundle;
return $packed_data;
}
function _config_encode_strid_as_indices(string $strid, array &$STRIDMAP, array &$STRIDLIST) : array
{
$strids_indices = array();
$strid_parts = explode('/', ltrim($strid, '@'));
foreach($strid_parts as $strid_part)
{
if(!isset($STRIDMAP[$strid_part]))
{
$strid_index = count($STRIDLIST);
$STRIDLIST[] = $strid_part;
$STRIDMAP[$strid_part] = $strid_index;
$strids_indices[] = $strid_index;
}
else
$strids_indices[] = $STRIDMAP[$strid_part];
}
return $strids_indices;
}
function _config_patch_bundle_fmt2(
string $packed_data,
array $patch_entries,
bool $use_lz4,
bool $use_config_refs,
int $version,
) : string
{
list($strids, $header, $_, $payloads_bundle) =
_config_unpack_bundle_fmt2(packed_data: $packed_data, unpack_entries: false);
$stridmap = array_flip($strids);
foreach($patch_entries as $idx => $patch_entry)
{
list($patch_format, $patch_payload) = _config_get_payload($patch_entry, $use_lz4, $use_config_refs);
$header_found = array_filter($header, fn($item) => $item[1] == $patch_entry->id);
if($header_found)
{
$header_idx = key($header_found);
$header_entry = current($header_found);
$current_offset = $header_entry[4];
$current_size = $header_entry[5];
if($current_size >= strlen($patch_payload))
{
//let's do the inline patching
$payloads_bundle = substr_replace($payloads_bundle, $patch_payload, $current_offset, strlen($patch_payload));
$header_entry[0] = $patch_format;
$header_entry[5] = strlen($patch_payload);
$header[$header_idx] = $header_entry;
}
else
{
//let's add it to the end
$header_entry[0] = $patch_format;
$header_entry[4] = strlen($payloads_bundle);
$header_entry[5] = strlen($patch_payload);
$header[$header_idx] = $header_entry;
$payloads_bundle .= $patch_payload;
}
}
else
{
//let's add new entry it to the end
$strid_indices = _config_encode_strid_as_indices($patch_entry->strid, $stridmap, $strids);
$header_entry = array(
$patch_format,
$patch_entry->id,
$strid_indices,
$patch_entry->class_id,
strlen($payloads_bundle),
strlen($patch_payload),
);
$header[] = $header_entry;
$payloads_bundle .= $patch_payload;
}
}
$strids_msgpack = config_msgpack_pack($strids);
$header_msgpack = config_msgpack_pack($header);
$patched_data =
pack("C", 2) .
pack("V", $version) .
pack("V", strlen($strids_msgpack)) .
pack("V", strlen($header_msgpack)) .
$strids_msgpack .
$header_msgpack .
$payloads_bundle;
return $patched_data;
}
//format: [[class_id, [data]], ...[class_id, [data]]]
function config_unpack_bundle(string $packed_data) : array
{
@ -202,7 +424,12 @@ function config_unpack_bundle(string $packed_data) : array
}
else if($info['format'] === 2)
{
return _config_unpack_bundle_fmt2($packed_data);
list($_, $_, $entries) = _config_unpack_bundle_fmt2($packed_data);
return $entries;
}
else if($info['format'] === 3)
{
return _config_unpack_bundle_fmt3($packed_data);
}
else
throw new Exception("Unknown format: {$info['format']}");
@ -235,7 +462,7 @@ function _config_unpack_bundle_fmt1(string $packed_data) : array
return $entries;
}
function _config_unpack_bundle_fmt2(string $packed_data) : array
function _config_unpack_bundle_fmt2(string $packed_data, bool $unpack_entries = true) : array
{
$packed_info = substr($packed_data, 0, 1+4+4+4);
@ -253,16 +480,80 @@ function _config_unpack_bundle_fmt2(string $packed_data) : array
$payloads_bundle = substr($packed_data, 1+4+4+4+$info['strids_len']+$info['header_len']);
$entries = array();
foreach($header as $item)
if($unpack_entries)
{
list($format, $id, $strid_crc, $class_id, $offset, $size) = $item;
foreach($header as $item)
{
list($format, $id, $strid_crc, $class_id, $offset, $size) = $item;
$payload = substr($payloads_bundle, $offset, $size);
$payload = substr($payloads_bundle, $offset, $size);
$entries[$id] = array($class_id, _config_unpack_payload($format, $payload));
$entries[$id] = array($class_id, _config_unpack_payload($format, $payload));
}
}
return $entries;
return array($strids, $header, $entries, $unpack_entries ? null : $payloads_bundle);
}
function _config_unpack_bundle_fmt3(string $packed_data): array
{
if(ord($packed_data[0]) !== 3)
{
throw new Exception("Invalid config bundle format");
}
$offset = 1;
$version = unpack("V", substr($packed_data, $offset, 4))[1];
$offset += 4;
$header_len = unpack("V", substr($packed_data, $offset, 4))[1];
$offset += 4;
$max_chunk_size = unpack("V", substr($packed_data, $offset, 4))[1];
$offset += 4;
$header_msgpack = substr($packed_data, $offset, $header_len);
$offset += $header_len;
$header = config_msgpack_unpack($header_msgpack);
$cache_entries = [];
$chunk_offset = 0;
$chunk_buffer = '';
$chunk_id = -1;
foreach ($header as $entry_data)
{
list($format, $id, $strid_crc, $class_id, $entry_chunk_offset, $payload_offset_within_chunk, $payload_size) = $entry_data;
if($entry_chunk_offset !== $chunk_id)
{
if($chunk_offset !== -1)
{
$lz4_chunk_size = unpack("V", substr($packed_data, $offset, 4))[1];
$offset+=4;
$lz4_chunk_data = substr($packed_data, $offset, $lz4_chunk_size);
$chunk_buffer = lz4_uncompress($lz4_chunk_data);
$offset += $lz4_chunk_size;
$chunk_offset = $offset;
}
else
{
$lz4_chunk_size = unpack("V", substr($packed_data, $chunk_offset, 4))[1];
$chunk_offset += 4;
$lz4_chunk_data = substr($packed_data, $chunk_offset, $lz4_chunk_size);
$chunk_buffer = lz4_uncompress($lz4_chunk_data);
$chunk_offset += $lz4_chunk_size;
}
$chunk_id = $entry_chunk_offset;
}
$payload = substr($chunk_buffer, $payload_offset_within_chunk, $payload_size);
$cache_entries[$id] = array($class_id, _config_unpack_payload($format, $payload));
}
return $cache_entries;
}
//format: [format_id, payload_data]
@ -310,6 +601,7 @@ function config_pack_and_write_bundle(
int $version,
bool $use_lz4 = true,
int $binary_format = 1,
bool $debug = false
)
{
$cache_entries = array();
@ -334,7 +626,8 @@ function config_pack_and_write_bundle(
cache_entries: $cache_entries,
use_lz4: $use_lz4,
binary_format: $binary_format,
version: $version
version: $version,
debug: $debug
)
);

View File

@ -2,11 +2,24 @@
namespace taskman;
use Exception;
class ConfigScanResult
class ConfigScanResult implements \ArrayAccess, \Countable, \Iterator
{
/*var array<string, string[]>*/
public array $base_dir2files = array();
private $iter_pos = 0;
function __construct(array $base_dir2files = array())
{
foreach($base_dir2files as $dir => $files)
$this->base_dir2files[$dir] = $files;
}
function clear()
{
$this->base_dir2files = array();
}
function isEmpty() : bool
{
return empty($this->base_dir2files);
@ -69,10 +82,94 @@ class ConfigScanResult
return $all_files;
}
//ArrayAccess interface
function offsetExists(mixed $offset) : bool
{
if(!is_int($offset))
throw new Exception("Invalid offset");
return $this->count() > $offset;
}
function offsetGet(mixed $offset) : mixed
{
if(!is_int($offset))
throw new Exception("Invalid offset");
foreach($this->base_dir2files as $base_dir => $files)
{
$n = count($files);
if($offset - $n < 0)
return $files[$offset];
$offset -= $n;
}
return null;
}
function offsetSet(mixed $offset, mixed $value) : void
{
if(!is_int($offset))
throw new Exception("Invalid offset");
foreach($this->base_dir2files as $base_dir => &$files)
{
$n = count($files);
if($offset - $n < 0)
{
$files[$offset] = $value;
return;
}
$offset -= $n;
}
}
function offsetUnset(mixed $offset) : void
{
if(!is_int($offset))
throw new Exception("Invalid offset");
foreach($this->base_dir2files as $base_dir => $files)
{
$n = count($files);
if($offset - $n < 0)
{
unset($files[$offset]);
return;
}
$offset -= $n;
}
}
//Iterator interface
function rewind() : void
{
$this->iter_pos = 0;
}
function current() : mixed
{
return $this->offsetGet($this->iter_pos);
}
function key() : mixed
{
return $this->iter_pos;
}
function next() : void
{
++$this->iter_pos;
}
function valid() : bool
{
return $this->offsetExists($this->iter_pos);
}
}
function config_scan_files(
array $base_dirs,
iterable $base_dirs,
string $ext_filter = '.conf.js',
bool $verbose = false
) : ConfigScanResult
@ -92,3 +189,10 @@ function config_scan_files(
return $result;
}
function config_hash_changed(ConfigGlobals $globals, iterable $all_files)
{
$all_crc_file = $globals->build_dir . "/configs.crc";
return names_hash_changed($all_crc_file, $all_files);
}

View File

@ -33,25 +33,28 @@ function _config_worker_run_procs(ConfigFetchParams $params, array $jobs, bool $
{
$results_by_job = array();
foreach($jobs as $job)
$results_by_job[] = _config_worker_func($params, $job);
$results_by_job[] = _config_worker_func($params, $job, $serial);
return $results_by_job;
}
else
{
$worker_args = array();
foreach($jobs as $idx => $job)
$worker_args[] = array($params, $job);
//initializing worker for master process anyway
$params->globals->initWorker(true);
return run_background_gamectl_workers('config_worker', $worker_args);
$workers_args = array();
foreach($jobs as $job)
$workers_args[] = array($params, $job);
return run_background_gamectl_workers('config_worker', $workers_args);
}
}
//returns [[base_dir, file, cache_file, was_stale, parser_type, error], ...]
function _config_worker_func(ConfigFetchParams $params, array $job) : array
function _config_worker_func(ConfigFetchParams $params, array $job, bool $is_master_proc = false) : array
{
$start_time = microtime(true);
$params->globals->initWorker();
$params->globals->initWorker($is_master_proc);
list($idx, $start_time, $chunk) = $job;
if($params->verbose)

View File

@ -122,15 +122,13 @@ function config_includes_map_find_text_origin(array $map, string $file, string $
return $res;
}
//TODO:
//function conf2json(object $conf, int $json_flags = 0) : string
//{
// $arr = $conf->export(true);
// $arr['class'] = get_class($conf);
// unset($arr['id']);
// unset($arr['strid']);
// $json = json_encode($arr, $json_flags);
// $json = str_replace(array('\\\\', '\\n', '\/'), array('\\', "\n", '/'), $json);
// return $json;
//}
function config2json(object $conf, int $json_flags = 0) : string
{
$arr = $conf->export(true);
$arr['class'] = get_class($conf);
unset($arr['id']); // @phpstan-ignore-line
unset($arr['strid']); // @phpstan-ignore-line
$json = json_encode($arr, $json_flags);
$json = str_replace(array('\\\\', '\\n', '\/'), array('\\', "\n", '/'), $json);
return $json;
}