Added fmt3 bundle format
Publish PHP Package / docker (push) Successful in 6s Details

This commit is contained in:
Alexey Chubar 2025-03-14 00:14:14 +07:00
parent fd49e48a57
commit 6fe8a0dff5
2 changed files with 165 additions and 2 deletions

View File

@ -1,3 +1,6 @@
## v5.10.0
- Added fmt3 bundle format - basically, fmt1 with chunk-based lz4 compression
## v4.0.2
- Improving exception message
@ -7,4 +10,4 @@
## v4.0.0
- Adding initial binary format 2 support: no more lookups by CRC28 strids
- Removed misc obsolete stuff
- Adding PHP type hints
- Adding PHP type hints

View File

@ -12,13 +12,18 @@ class ConfigPackParams
public ?int $version = null;
public bool $debug = false;
public const string EXTRA_FMT3_CHUNK_SIZE = "EXTRA_FMT3_CHUNK_SIZE";
public const string EXTRA_FMT3_COMPRESSION_LEVEL = "EXTRA_FMT3_COMPRESSION_LEVEL";
public array $extras = array();
function __construct(
array $cache_entries,
int $version,
bool $use_lz4 = false,
bool $use_config_refs = false,
int $binary_format = 1,
bool $debug = false
bool $debug = false,
array $extras = array()
)
{
$this->cache_entries = $cache_entries;
@ -27,6 +32,7 @@ class ConfigPackParams
$this->binary_format = $binary_format;
$this->version = $version;
$this->debug = $debug;
$this->extras = $extras;
}
}
@ -54,6 +60,17 @@ function config_pack_bundle(ConfigPackParams $params) : string
$params->version,
);
}
else if($params->binary_format == 3)
{
$packed_data = _config_pack_bundle_fmt3(
$params->cache_entries,
$params->use_lz4,
$params->use_config_refs,
$params->version,
$params->extras[ConfigPackParams::EXTRA_FMT3_CHUNK_SIZE],
$params->extras[ConfigPackParams::EXTRA_FMT3_COMPRESSION_LEVEL],
);
}
else
throw new Exception("Unknown binary format: {$params->binary_format}");
@ -217,6 +234,85 @@ function _config_pack_bundle_fmt2(
return $packed_data;
}
//NOTE: Much like fmt1, but configs entries are grouped into sizeable chuncks, with each chunk lz4-compressed.
// This reduces overall bundle size when there are many small configs entries.
function _config_pack_bundle_fmt3(
array $cache_entries,
bool $use_lz4,
bool $use_config_refs,
int $version,
int $chunk_size,
int $compression_level) : string
{
if(!$use_lz4)
throw new Exception("Config bundle FMT3 is only available with LZ4 enabled");
if($compression_level < 0 || $compression_level > 12)
throw new Exception("LZ4 compression level must be in range [0, 12]");
$MAP = array();
$STRIDMAP = array();
$header = array();
$payloads_offset = 0;
$max_chunk_size = 0;
$chunk_offset = 0;
$payloads_bundle = '';
$payloads_buffer = '';
$count_entries = count($cache_entries);
foreach($cache_entries as $idx => $entry)
{
list($format, $payload) = _config_get_payload($entry, $use_lz4, $use_config_refs);
$payload_size = strlen($payload);
$payloads_buffer .= $payload;
if(isset($MAP[$entry->id]))
throw new Exception("Duplicating config id for '{$entry->strid}' conflicts with '{$MAP[$entry->id]}'");
$MAP[$entry->id] = $entry->strid;
$strid_crc = crc32($entry->strid);
if(isset($STRIDMAP[$strid_crc]))
throw new Exception("Duplicating config str id crc for '{$entry->strid}' conflicts with '{$STRIDMAP[$strid_crc]}'");
$STRIDMAP[$strid_crc] = $entry->strid;
$header[] = array(
$format,
$entry->id,
crc32($entry->strid),
$entry->class_id,
$chunk_offset,
$payloads_offset,
$payload_size
);
$payloads_offset += $payload_size;
if($payloads_offset >= $chunk_size || $idx == ($count_entries - 1))
{
if($payloads_offset > $max_chunk_size)
$max_chunk_size = $payloads_offset;
$payloads_offset = 0;
$lz4_data = lz4_compress($payloads_buffer, $compression_level);
$payloads_bundle .= pack("V", strlen($lz4_data));
$payloads_bundle .= $lz4_data;
$chunk_offset = strlen($payloads_bundle);
$payloads_buffer = '';
}
}
$header_msgpack = config_msgpack_pack($header);
$packed_data =
pack("C", 3) .
pack("V", $version) .
pack("V", strlen($header_msgpack)) .
pack("V", $max_chunk_size) .
$header_msgpack .
$payloads_bundle;
return $packed_data;
}
function _config_encode_strid_as_indices(string $strid, array &$STRIDMAP, array &$STRIDLIST) : array
{
$strids_indices = array();
@ -331,6 +427,10 @@ function config_unpack_bundle(string $packed_data) : array
list($_, $_, $entries) = _config_unpack_bundle_fmt2($packed_data);
return $entries;
}
else if($info['format'] === 3)
{
return _config_unpack_bundle_fmt3($packed_data);
}
else
throw new Exception("Unknown format: {$info['format']}");
}
@ -396,6 +496,66 @@ function _config_unpack_bundle_fmt2(string $packed_data, bool $unpack_entries =
return array($strids, $header, $entries, $unpack_entries ? null : $payloads_bundle);
}
function _config_unpack_bundle_fmt3(string $packed_data): array
{
if(ord($packed_data[0]) !== 3)
{
throw new Exception("Invalid config bundle format");
}
$offset = 1;
$version = unpack("V", substr($packed_data, $offset, 4))[1];
$offset += 4;
$header_len = unpack("V", substr($packed_data, $offset, 4))[1];
$offset += 4;
$max_chunk_size = unpack("V", substr($packed_data, $offset, 4))[1];
$offset += 4;
$header_msgpack = substr($packed_data, $offset, $header_len);
$offset += $header_len;
$header = config_msgpack_unpack($header_msgpack);
$cache_entries = [];
$chunk_offset = 0;
$chunk_buffer = '';
$chunk_id = -1;
foreach ($header as $entry_data)
{
list($format, $id, $strid_crc, $class_id, $entry_chunk_offset, $payload_offset_within_chunk, $payload_size) = $entry_data;
if($entry_chunk_offset !== $chunk_id)
{
if($chunk_offset !== -1)
{
$lz4_chunk_size = unpack("V", substr($packed_data, $offset, 4))[1];
$offset+=4;
$lz4_chunk_data = substr($packed_data, $offset, $lz4_chunk_size);
$chunk_buffer = lz4_uncompress($lz4_chunk_data);
$offset += $lz4_chunk_size;
$chunk_offset = $offset;
}
else
{
$lz4_chunk_size = unpack("V", substr($packed_data, $chunk_offset, 4))[1];
$chunk_offset += 4;
$lz4_chunk_data = substr($packed_data, $chunk_offset, $lz4_chunk_size);
$chunk_buffer = lz4_uncompress($lz4_chunk_data);
$chunk_offset += $lz4_chunk_size;
}
$chunk_id = $entry_chunk_offset;
}
$payload = substr($chunk_buffer, $payload_offset_within_chunk, $payload_size);
$cache_entries[$id] = array($class_id, _config_unpack_payload($format, $payload));
}
return $cache_entries;
}
//format: [format_id, payload_data]
function _config_get_payload(ConfigCacheEntry $ce, bool $use_lz4, bool $use_config_refs) : array
{