From 6fe8a0dff5571e503dbee73809ce7c315dde7928 Mon Sep 17 00:00:00 2001 From: "a.chubar" Date: Fri, 14 Mar 2025 00:14:14 +0700 Subject: [PATCH] Added fmt3 bundle format --- CHANGELOG.md | 5 +- pack.inc.php | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 165 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f40e5e3..3847f28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## v5.10.0 +- Added fmt3 bundle format - basically, fmt1 with chunk-based lz4 compression + ## v4.0.2 - Improving exception message @@ -7,4 +10,4 @@ ## v4.0.0 - Adding initial binary format 2 support: no more lookups by CRC28 strids - Removed misc obsolete stuff -- Adding PHP type hints \ No newline at end of file +- Adding PHP type hints diff --git a/pack.inc.php b/pack.inc.php index 9c6d394..373cb4f 100644 --- a/pack.inc.php +++ b/pack.inc.php @@ -12,13 +12,18 @@ class ConfigPackParams public ?int $version = null; public bool $debug = false; + public const string EXTRA_FMT3_CHUNK_SIZE = "EXTRA_FMT3_CHUNK_SIZE"; + public const string EXTRA_FMT3_COMPRESSION_LEVEL = "EXTRA_FMT3_COMPRESSION_LEVEL"; + public array $extras = array(); + function __construct( array $cache_entries, int $version, bool $use_lz4 = false, bool $use_config_refs = false, int $binary_format = 1, - bool $debug = false + bool $debug = false, + array $extras = array() ) { $this->cache_entries = $cache_entries; @@ -27,6 +32,7 @@ class ConfigPackParams $this->binary_format = $binary_format; $this->version = $version; $this->debug = $debug; + $this->extras = $extras; } } @@ -54,6 +60,17 @@ function config_pack_bundle(ConfigPackParams $params) : string $params->version, ); } + else if($params->binary_format == 3) + { + $packed_data = _config_pack_bundle_fmt3( + $params->cache_entries, + $params->use_lz4, + $params->use_config_refs, + $params->version, + $params->extras[ConfigPackParams::EXTRA_FMT3_CHUNK_SIZE], + $params->extras[ConfigPackParams::EXTRA_FMT3_COMPRESSION_LEVEL], + ); + } else throw new Exception("Unknown binary format: {$params->binary_format}"); @@ -217,6 +234,85 @@ function _config_pack_bundle_fmt2( return $packed_data; } +//NOTE: Much like fmt1, but configs entries are grouped into sizeable chuncks, with each chunk lz4-compressed. +// This reduces overall bundle size when there are many small configs entries. +function _config_pack_bundle_fmt3( + array $cache_entries, + bool $use_lz4, + bool $use_config_refs, + int $version, + int $chunk_size, + int $compression_level) : string +{ + if(!$use_lz4) + throw new Exception("Config bundle FMT3 is only available with LZ4 enabled"); + + if($compression_level < 0 || $compression_level > 12) + throw new Exception("LZ4 compression level must be in range [0, 12]"); + + $MAP = array(); + $STRIDMAP = array(); + + $header = array(); + $payloads_offset = 0; + $max_chunk_size = 0; + $chunk_offset = 0; + $payloads_bundle = ''; + $payloads_buffer = ''; + $count_entries = count($cache_entries); + foreach($cache_entries as $idx => $entry) + { + list($format, $payload) = _config_get_payload($entry, $use_lz4, $use_config_refs); + $payload_size = strlen($payload); + $payloads_buffer .= $payload; + + if(isset($MAP[$entry->id])) + throw new Exception("Duplicating config id for '{$entry->strid}' conflicts with '{$MAP[$entry->id]}'"); + $MAP[$entry->id] = $entry->strid; + + $strid_crc = crc32($entry->strid); + if(isset($STRIDMAP[$strid_crc])) + throw new Exception("Duplicating config str id crc for '{$entry->strid}' conflicts with '{$STRIDMAP[$strid_crc]}'"); + $STRIDMAP[$strid_crc] = $entry->strid; + + $header[] = array( + $format, + $entry->id, + crc32($entry->strid), + $entry->class_id, + $chunk_offset, + $payloads_offset, + $payload_size + ); + + $payloads_offset += $payload_size; + + if($payloads_offset >= $chunk_size || $idx == ($count_entries - 1)) + { + if($payloads_offset > $max_chunk_size) + $max_chunk_size = $payloads_offset; + $payloads_offset = 0; + $lz4_data = lz4_compress($payloads_buffer, $compression_level); + $payloads_bundle .= pack("V", strlen($lz4_data)); + $payloads_bundle .= $lz4_data; + $chunk_offset = strlen($payloads_bundle); + $payloads_buffer = ''; + } + } + + $header_msgpack = config_msgpack_pack($header); + + $packed_data = + pack("C", 3) . + pack("V", $version) . + pack("V", strlen($header_msgpack)) . + pack("V", $max_chunk_size) . + $header_msgpack . + $payloads_bundle; + + return $packed_data; +} + function _config_encode_strid_as_indices(string $strid, array &$STRIDMAP, array &$STRIDLIST) : array { $strids_indices = array(); @@ -331,6 +427,10 @@ function config_unpack_bundle(string $packed_data) : array list($_, $_, $entries) = _config_unpack_bundle_fmt2($packed_data); return $entries; } + else if($info['format'] === 3) + { + return _config_unpack_bundle_fmt3($packed_data); + } else throw new Exception("Unknown format: {$info['format']}"); } @@ -396,6 +496,66 @@ function _config_unpack_bundle_fmt2(string $packed_data, bool $unpack_entries = return array($strids, $header, $entries, $unpack_entries ? null : $payloads_bundle); } +function _config_unpack_bundle_fmt3(string $packed_data): array +{ + if(ord($packed_data[0]) !== 3) + { + throw new Exception("Invalid config bundle format"); + } + + $offset = 1; + $version = unpack("V", substr($packed_data, $offset, 4))[1]; + $offset += 4; + $header_len = unpack("V", substr($packed_data, $offset, 4))[1]; + $offset += 4; + $max_chunk_size = unpack("V", substr($packed_data, $offset, 4))[1]; + $offset += 4; + + $header_msgpack = substr($packed_data, $offset, $header_len); + $offset += $header_len; + + $header = config_msgpack_unpack($header_msgpack); + + $cache_entries = []; + $chunk_offset = 0; + $chunk_buffer = ''; + $chunk_id = -1; + + foreach ($header as $entry_data) + { + list($format, $id, $strid_crc, $class_id, $entry_chunk_offset, $payload_offset_within_chunk, $payload_size) = $entry_data; + + if($entry_chunk_offset !== $chunk_id) + { + if($chunk_offset !== -1) + { + $lz4_chunk_size = unpack("V", substr($packed_data, $offset, 4))[1]; + $offset+=4; + $lz4_chunk_data = substr($packed_data, $offset, $lz4_chunk_size); + $chunk_buffer = lz4_uncompress($lz4_chunk_data); + $offset += $lz4_chunk_size; + $chunk_offset = $offset; + } + else + { + $lz4_chunk_size = unpack("V", substr($packed_data, $chunk_offset, 4))[1]; + $chunk_offset += 4; + $lz4_chunk_data = substr($packed_data, $chunk_offset, $lz4_chunk_size); + $chunk_buffer = lz4_uncompress($lz4_chunk_data); + $chunk_offset += $lz4_chunk_size; + } + + $chunk_id = $entry_chunk_offset; + } + + $payload = substr($chunk_buffer, $payload_offset_within_chunk, $payload_size); + + $cache_entries[$id] = array($class_id, _config_unpack_payload($format, $payload)); + } + + return $cache_entries; +} + //format: [format_id, payload_data] function _config_get_payload(ConfigCacheEntry $ce, bool $use_lz4, bool $use_config_refs) : array {