DotRecastNetSim/src/DotRecast.Core/Compression/FastLZ.cs

/*
  FastLZ - Byte-aligned LZ77 compression library
  Copyright (C) 2005-2020 Ariya Hidayat <ariya.hidayat@gmail.com>
  Copyright (C) 2023 Choi Ikpil <ikpil@naver.com> https://github.com/ikpil/DotFastLZ

  Permission is hereby granted, free of charge, to any person obtaining a copy
  of this software and associated documentation files (the "Software"), to deal
  in the Software without restriction, including without limitation the rights
  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  copies of the Software, and to permit persons to whom the Software is
  furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included in
  all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  THE SOFTWARE.
*/

using System;

namespace DotRecast.Core.Compression
{
    public static class FastLZ
    {
        public const int FASTLZ_VERSION = 0x000500;
        public const int FASTLZ_VERSION_MAJOR = 0;
        public const int FASTLZ_VERSION_MINOR = 5;
        public const int FASTLZ_VERSION_REVISION = 0;
        public const string FASTLZ_VERSION_STRING = "0.5.0";

        public const int MAX_COPY = 32;
        public const int MAX_LEN = 264; /* 256 + 8 */
        public const int MAX_L1_DISTANCE = 8192;
        public const int MAX_L2_DISTANCE = 8191;
        public const int MAX_FARDISTANCE = (65535 + MAX_L2_DISTANCE - 1);
        public const int HASH_LOG = 13;
        public const int HASH_SIZE = (1 << HASH_LOG);
        public const int HASH_MASK = (HASH_SIZE - 1);

        /**
          Compress a block of data in the input buffer and returns the size of
          compressed block. The size of input buffer is specified by length. The
          minimum input buffer size is 16.

          The output buffer must be at least 5% larger than the input buffer
          and can not be smaller than 66 bytes.

          If the input is not compressible, the return value might be larger than
          length (input buffer size).

          The input buffer and the output buffer can not overlap.

          Compression level can be specified in parameter level. At the moment,
          only level 1 and level 2 are supported.
          Level 1 is the fastest compression and generally useful for short data.
          Level 2 is slightly slower but it gives better compression ratio.

          Note that the compressed data, regardless of the level, can always be
          decompressed using the function fastlz_decompress below.
        */
        // fastlz_compress
        public static long CompressLevel(int level, byte[] input, long length, byte[] output)
        {
            return CompressLevel(level, input, 0, length, output);
        }

        public static long CompressLevel(int level, byte[] input, long inputOffset, long length, byte[] output)
        {
            if (level == 1)
            {
                return CompressLevel1(input, inputOffset, length, output);
            }

            if (level == 2)
            {
                return CompressLevel2(input, inputOffset, length, output);
            }

            throw new Exception($"invalid level: {level} (expected: 1 or 2)");
        }

        // fastlz1_compress
        public static long CompressLevel1(byte[] input, long inputOffset, long length, byte[] output)
        {
            long ip = inputOffset;
            long ip_start = ip;
            long ip_bound = ip + length - 4;
            long ip_limit = ip + length - 12 - 1;

            long op = 0;

            long[] htab = new long[HASH_SIZE];
            long seq, hash;

            // Initializes hash table
            for (hash = 0; hash < HASH_SIZE; ++hash)
            {
                htab[hash] = 0;
            }

            // We start with literal copy
            long anchor = ip;
            ip += 2;

            // Main loop
            while (ip < ip_limit)
            {
                long refIdx;
                long distance, cmp;

                // Find potential match
                do
                {
                    seq = ReadUInt32(input, ip) & 0xffffff;
                    hash = Hash(seq);
                    refIdx = ip_start + htab[hash];
                    htab[hash] = ip - ip_start;
                    distance = ip - refIdx;
                    cmp = distance < MAX_L1_DISTANCE
                        ? ReadUInt32(input, refIdx) & 0xffffff
                        : 0x1000000;

                    if (ip >= ip_limit)
                    {
                        break;
                    }

                    ++ip;
                } while (seq != cmp);

                if (ip >= ip_limit)
                {
                    break;
                }

                --ip;

                if (ip > anchor)
                {
                    op = Literals(ip - anchor, input, anchor, output, op);
                }

                long len = MemCompare(input, refIdx + 3, input, ip + 3, ip_bound);
                op = MatchLevel1(len, distance, output, op);

                // Update the hash at the match boundary
                ip += len;
                seq = ReadUInt32(input, ip);
                hash = Hash(seq & 0xffffff);
                htab[hash] = ip++ - ip_start;
                seq >>= 8;
                hash = Hash(seq);
                htab[hash] = ip++ - ip_start;

                anchor = ip;
            }

            long copy = length - anchor;
            op = Literals(copy, input, anchor, output, op);
            return op;
        }

        // fastlz2_compress
        public static long CompressLevel2(byte[] input, long inputOffset, long length, byte[] output)
        {
            long ip = inputOffset;
            long ip_start = ip;
            long ip_bound = ip + length - 4; /* because readU32 */
            long ip_limit = ip + length - 12 - 1;

            long op = 0;

            long[] htab = new long[HASH_SIZE];
            long seq, hash;

            /* initializes hash table */
            for (hash = 0; hash < HASH_SIZE; ++hash)
            {
                htab[hash] = 0;
            }

            /* we start with literal copy */
            long anchor = ip;
            ip += 2;

            /* main loop */
            while (ip < ip_limit)
            {
                long refs;
                long distance, cmp;

                /* find potential match */
                do
                {
                    seq = ReadUInt32(input, ip) & 0xffffff;
                    hash = Hash(seq);
                    refs = ip_start + htab[hash];
                    htab[hash] = ip - ip_start;
                    distance = ip - refs;
                    cmp = distance < MAX_FARDISTANCE
                        ? ReadUInt32(input, refs) & 0xffffff
                        : 0x1000000;

                    if (ip >= ip_limit)
                    {
                        break;
                    }

                    ++ip;
                } while (seq != cmp);

                if (ip >= ip_limit)
                {
                    break;
                }

                --ip;

                /* far, needs at least 5-byte match */
                if (distance >= MAX_L2_DISTANCE)
                {
                    if (input[refs + 3] != input[ip + 3] || input[refs + 4] != input[ip + 4])
                    {
                        ++ip;
                        continue;
                    }
                }

                if (ip > anchor)
                {
                    op = Literals(ip - anchor, input, anchor, output, op);
                }

                long len = MemCompare(input, refs + 3, input, ip + 3, ip_bound);
                op = MatchLevel2(len, distance, output, op);

                /* update the hash at match boundary */
                ip += len;
                seq = ReadUInt32(input, ip);
                hash = Hash(seq & 0xffffff);
                htab[hash] = ip++ - ip_start;
                seq >>= 8;
                hash = Hash(seq);
                htab[hash] = ip++ - ip_start;

                anchor = ip;
            }

            long copy = length - anchor;
            op = Literals(copy, input, anchor, output, op);

            /* marker for fastlz2 */
            output[inputOffset] |= (1 << 5);

            return op;
        }

        /**
          Decompress a block of compressed data and returns the size of the
          decompressed block. If error occurs, e.g. the compressed data is
          corrupted or the output buffer is not large enough, then 0 (zero)
          will be returned instead.

          The input buffer and the output buffer can not overlap.

          Decompression is memory safe and guaranteed not to write the output buffer
          more than what is specified in maxout.

          Note that the decompression will always work, regardless of the
          compression level specified in fastlz_compress_level above (when
          producing the compressed block).
         */
        // fastlz_decompress
        public static long Decompress(byte[] input, long length, byte[] output, long maxout)
        {
            return Decompress(input, 0, length, output, 0, maxout);
        }

        public static long Decompress(byte[] input, long inputOffset, long length, byte[] output, long outputOffset, long maxout)
        {
            /* magic identifier for compression level */
            int level = (input[inputOffset] >> 5) + 1;

            if (level == 1)
            {
                return DecompressLevel1(input, inputOffset, length, output, outputOffset, maxout);
            }

            if (level == 2)
            {
                return DecompressLevel2(input, inputOffset, length, output, outputOffset, maxout);
            }

            throw new Exception($"invalid level: {level} (expected: 1 or 2)");
        }

        // fastlz1_decompress
        public static long DecompressLevel1(byte[] input, long inputOffset, long length, byte[] output, long outputOffset, long maxout)
        {
            long ip = inputOffset;
            long ip_limit = ip + length;
            long ip_bound = ip_limit - 2;

            long op = outputOffset;
            long op_limit = op + maxout;
            long ctrl = input[ip++] & 31;

            while (true)
            {
                if (ctrl >= 32)
                {
                    long len = (ctrl >> 5) - 1;
                    long ofs = (ctrl & 31) << 8;
                    long refs = op - ofs - 1;
                    if (len == 7 - 1)
                    {
                        if (!(ip <= ip_bound))
                        {
                            return 0;
                        }

                        len += input[ip++];
                    }

                    refs -= input[ip++];
                    len += 3;
                    if (!(op + len <= op_limit))
                    {
                        return 0;
                    }

                    if (!(refs >= outputOffset))
                    {
                        return 0;
                    }

                    MemMove(output, op, output, refs, len);
                    op += len;
                }
                else
                {
                    ctrl++;
                    if (!(op + ctrl <= op_limit))
                    {
                        return 0;
                    }

                    if (!(ip + ctrl <= ip_limit))
                    {
                        return 0;
                    }

                    RcArrays.Copy(input, ip, output, op, ctrl);
                    ip += ctrl;
                    op += ctrl;
                }

                if (ip > ip_bound)
                {
                    break;
                }

                ctrl = input[ip++];
            }

            return op;
        }

        // fastlz2_decompress
        public static long DecompressLevel2(byte[] input, long inputOffset, long length, byte[] output, long outputOffset, long maxout)
        {
            long ip = inputOffset;
            long ip_limit = ip + length;
            long ip_bound = ip_limit - 2;

            long op = outputOffset;
            long op_limit = op + maxout;
            long ctrl = input[ip++] & 31;

            while (true)
            {
                if (ctrl >= 32)
                {
                    long len = (ctrl >> 5) - 1;
                    long ofs = (ctrl & 31) << 8;
                    long refIdx = op - ofs - 1;

                    long code;
                    if (len == 7 - 1)
                    {
                        do
                        {
                            if (!(ip <= ip_bound))
                            {
                                return 0;
                            }

                            code = input[ip++];
                            len += code;
                        } while (code == 255);
                    }

                    code = input[ip++];
                    refIdx -= code;
                    len += 3;

                    /* match from 16-bit distance */
                    if (code == 255)
                    {
                        if (ofs == (31 << 8))
                        {
                            if (!(ip < ip_bound))
                            {
                                return 0;
                            }

                            ofs = input[ip++] << 8;
                            ofs += input[ip++];
                            refIdx = op - ofs - MAX_L2_DISTANCE - 1;
                        }
                    }

                    if (!(op + len <= op_limit))
                    {
                        return 0;
                    }

                    if (!(refIdx >= outputOffset))
                    {
                        return 0;
                    }

                    MemMove(output, op, output, refIdx, len);
                    op += len;
                }
                else
                {
                    ctrl++;
                    if (!(op + ctrl <= op_limit))
                    {
                        return 0;
                    }

                    if (!(ip + ctrl <= ip_limit))
                    {
                        return 0;
                    }

                    RcArrays.Copy(input, ip, output, op, ctrl);
                    ip += ctrl;
                    op += ctrl;
                }

                if (ip >= ip_limit)
                {
                    break;
                }

                ctrl = input[ip++];
            }

            return op;
        }

        // flz_readu32
        public static uint ReadUInt32(byte[] data, long offset)
        {
            return ((uint)data[offset + 3] & 0xff) << 24 |
                   ((uint)data[offset + 2] & 0xff) << 16 |
                   ((uint)data[offset + 1] & 0xff) << 8 |
                   ((uint)data[offset + 0] & 0xff);
        }

        public static ushort ReadUInt16(byte[] data, long offset)
        {
            var u16 = ((uint)data[offset + 1] << 8) |
                      ((uint)data[offset + 0]);
            return (ushort)u16;
        }

        // flz_hash
        public static ushort Hash(long v)
        {
            ulong h = ((ulong)v * 2654435769UL) >> (32 - HASH_LOG);
            return (ushort)(h & HASH_MASK);
        }

        // special case of memcpy: at most MAX_COPY bytes
        // flz_smallcopy
        public static void SmallCopy(byte[] dest, long destOffset, byte[] src, long srcOffset, long count)
        {
            // if (count >= 4)
            // {
            //     count -= count % 4;
            //     RcArrays.Copy(src, srcOffset, dest, destOffset, count);
            // }
            RcArrays.Copy(src, srcOffset, dest, destOffset, count);
        }

        // special case of memcpy: exactly MAX_COPY bytes
        // flz_maxcopy
        static void MaxCopy(byte[] dest, long destOffset, byte[] src, long secOffset)
        {
            RcArrays.Copy(src, secOffset, dest, destOffset, MAX_COPY);
        }

        // flz_literals
        public static long Literals(long runs, byte[] src, long srcOffset, byte[] dest, long destOffset)
        {
            while (runs >= MAX_COPY)
            {
                dest[destOffset++] = MAX_COPY - 1;
                MaxCopy(dest, destOffset, src, srcOffset);
                srcOffset += MAX_COPY;
                destOffset += MAX_COPY;
                runs -= MAX_COPY;
            }

            if (runs > 0)
            {
                dest[destOffset++] = (byte)(runs - 1);
                SmallCopy(dest, destOffset, src, srcOffset, runs);
                destOffset += runs;
            }

            return destOffset;
        }

        // flz1_match
        public static long MatchLevel1(long len, long distance, byte[] output, long op)
        {
            --distance;
            if (len > MAX_LEN - 2)
            {
                while (len > MAX_LEN - 2)
                {
                    output[op++] = (byte)((7 << 5) + (distance >> 8));
                    output[op++] = (byte)(MAX_LEN - 2 - 7 - 2);
                    output[op++] = (byte)(distance & 255);
                    len -= MAX_LEN - 2;
                }
            }

            if (len < 7)
            {
                output[op++] = (byte)((len << 5) + (distance >> 8));
                output[op++] = (byte)(distance & 255);
            }
            else
            {
                output[op++] = (byte)((7 << 5) + (distance >> 8));
                output[op++] = (byte)(len - 7);
                output[op++] = (byte)((distance & 255));
            }

            return op;
        }

        // flz2_match
        public static long MatchLevel2(long len, long distance, byte[] output, long op)
        {
            --distance;
            if (distance < MAX_L2_DISTANCE)
            {
                if (len < 7)
                {
                    output[op++] = (byte)((len << 5) + (distance >> 8));
                    output[op++] = (byte)((distance & 255));
                }
                else
                {
                    output[op++] = (byte)((7 << 5) + (distance >> 8));
                    for (len -= 7; len >= 255; len -= 255)
                    {
                        output[op++] = 255;
                    }

                    output[op++] = (byte)(len);
                    output[op++] = (byte)((distance & 255));
                }
            }
            else
            {
                /* far away, but not yet in the another galaxy... */
                if (len < 7)
                {
                    distance -= MAX_L2_DISTANCE;
                    output[op++] = (byte)((len << 5) + 31);
                    output[op++] = (byte)(255);
                    output[op++] = (byte)(distance >> 8);
                    output[op++] = (byte)(distance & 255);
                }
                else
                {
                    distance -= MAX_L2_DISTANCE;
                    output[op++] = (7 << 5) + 31;
                    for (len -= 7; len >= 255; len -= 255)
                    {
                        output[op++] = 255;
                    }

                    output[op++] = (byte)(len);
                    output[op++] = (byte)(255);
                    output[op++] = (byte)(distance >> 8);
                    output[op++] = (byte)(distance & 255);
                }
            }

            return op;
        }

        // flz_cmp
        public static long MemCompare(byte[] p, long pOffset, byte[] q, long qOffset, long r)
        {
            long start = pOffset;

            if (4 <= r && ReadUInt32(p, pOffset) == ReadUInt32(q, qOffset))
            {
                pOffset += 4;
                qOffset += 4;
            }

            while (qOffset < r)
            {
                if (p[pOffset++] != q[qOffset++])
                    break;
            }

            return pOffset - start;
        }

        // fastlz_memmove
        public static void MemMove(byte[] dest, long destOffset, byte[] src, long srcOffset, long count)
        {
            if (dest.Length < destOffset + count)
            {
                throw new IndexOutOfRangeException($"{dest.Length} < {destOffset} + {count}");
            }

            if (src.Length < srcOffset + count)
            {
                throw new IndexOutOfRangeException($"{src.Length} < {srcOffset} + {count}");
            }

            for (long i = 0; i < count; ++i)
            {
                dest[destOffset + i] = src[srcOffset + i];
            }
        }

        public static long EstimateCompressedSize(long size)
        {
            long estimatedSize = (long)Math.Ceiling(size * 1.06f);
            return Math.Max(estimatedSize, 66);
        }
    }
}