在 2023/6/30 10:39, Kefeng Wang 写道:
整体的代码风格都刷新下,按照linux的要求来吧
正在修改。
On 2023/6/30 10:41, Tu Jinjiang wrote:
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7H9IA CVE: NA
Add lz4k algorithm support for zram.
有没有原始作者信息和commit信息,要保留
我这边没有这些信息。
Signed-off-by: Nanyong Sun sunnanyong@huawei.com Signed-off-by: Tu Jinjiang tujinjiang@huawei.com
crypto/Kconfig | 8 + crypto/Makefile | 1 + crypto/lz4k.c | 97 ++++++ drivers/block/zram/zcomp.c | 3 + include/linux/lz4k.h | 383 +++++++++++++++++++++++ lib/Kconfig | 6 + lib/Makefile | 2 + lib/lz4k/Makefile | 2 + lib/lz4k/lz4k_decode.c | 308 +++++++++++++++++++ lib/lz4k/lz4k_encode.c | 539 +++++++++++++++++++++++++++++++++ lib/lz4k/lz4k_encode_private.h | 137 +++++++++ lib/lz4k/lz4k_private.h | 269 ++++++++++++++++ 12 files changed, 1755 insertions(+) create mode 100644 crypto/lz4k.c create mode 100644 include/linux/lz4k.h create mode 100644 lib/lz4k/Makefile create mode 100644 lib/lz4k/lz4k_decode.c create mode 100644 lib/lz4k/lz4k_encode.c create mode 100644 lib/lz4k/lz4k_encode_private.h create mode 100644 lib/lz4k/lz4k_private.h
diff --git a/crypto/Kconfig b/crypto/Kconfig index 64cb304f5103..35223cff7c8a 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1871,6 +1871,14 @@ config CRYPTO_LZ4HC help This is the LZ4 high compression mode algorithm. +config CRYPTO_LZ4K + tristate "LZ4K compression algorithm" + select CRYPTO_ALGAPI + select LZ4K_COMPRESS + select LZ4K_DECOMPRESS + help + This is the LZ4K algorithm.
config CRYPTO_ZSTD tristate "Zstd compression algorithm" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index 9d1191f2b741..5c3b0a0839c5 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -161,6 +161,7 @@ obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o +obj-$(CONFIG_CRYPTO_LZ4K) += lz4k.o obj-$(CONFIG_CRYPTO_XXHASH) += xxhash_generic.o obj-$(CONFIG_CRYPTO_842) += 842.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o diff --git a/crypto/lz4k.c b/crypto/lz4k.c new file mode 100644 index 000000000000..8daceab269ef --- /dev/null +++ b/crypto/lz4k.c @@ -0,0 +1,97 @@ +/*
- Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights
reserved.
- Description: LZ4K compression algorithm for ZRAM
- Author: Arkhipov Denis arkhipov.denis@huawei.com
- Create: 2020-03-25
- */
+#include <linux/init.h> +#include <linux/module.h> +#include <linux/crypto.h> +#include <linux/vmalloc.h> +#include <linux/lz4k.h>
+struct lz4k_ctx { + void *lz4k_comp_mem; +};
+static int lz4k_init(struct crypto_tfm *tfm) +{ + struct lz4k_ctx *ctx = crypto_tfm_ctx(tfm);
+ ctx->lz4k_comp_mem = vmalloc(lz4k_encode_state_bytes_min()); + if (!ctx->lz4k_comp_mem) + return -ENOMEM;
+ return 0; +}
+static void lz4k_exit(struct crypto_tfm *tfm) +{ + struct lz4k_ctx *ctx = crypto_tfm_ctx(tfm); + vfree(ctx->lz4k_comp_mem); +}
+static int lz4k_compress_crypto(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lz4k_ctx *ctx = crypto_tfm_ctx(tfm); + int ret;
+ ret = lz4k_encode(ctx->lz4k_comp_mem, src, dst, slen, *dlen, 0);
去掉空行
+ if (ret < 0) { + return -EINVAL; + }
去掉括号
+ if (ret) + *dlen = ret;
+ return 0; +}
+static int lz4k_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) +{ + int ret;
+ ret = lz4k_decode(src, dst, slen, *dlen);
去空行
+ if (ret <= 0) + return -EINVAL;
加空行
+ *dlen = ret; + return 0; +}
+static struct crypto_alg alg_lz4k = { + .cra_name = "lz4k", + .cra_driver_name = "lz4k-generic", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lz4k_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg_lz4k.cra_list), + .cra_init = lz4k_init, + .cra_exit = lz4k_exit, + .cra_u = { + .compress = { + .coa_compress = lz4k_compress_crypto, + .coa_decompress = lz4k_decompress_crypto + } + } +};
+static int __init lz4k_mod_init(void) +{ + return crypto_register_alg(&alg_lz4k); +}
+static void __exit lz4k_mod_fini(void) +{ + crypto_unregister_alg(&alg_lz4k); +}
+module_init(lz4k_mod_init); +module_exit(lz4k_mod_fini);
+MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4K Compression Algorithm"); +MODULE_ALIAS_CRYPTO("lz4k"); diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index b08650417bf0..28bda2035326 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -29,6 +29,9 @@ static const char * const backends[] = { #if IS_ENABLED(CONFIG_CRYPTO_ZSTD) "zstd", #endif +#if IS_ENABLED(CONFIG_CRYPTO_LZ4K) + "lz4k", +#endif }; static void zcomp_strm_free(struct zcomp_strm *zstrm) diff --git a/include/linux/lz4k.h b/include/linux/lz4k.h new file mode 100644 index 000000000000..6e73161b1840 --- /dev/null +++ b/include/linux/lz4k.h @@ -0,0 +1,383 @@ +/*
- Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights
reserved.
- Description: LZ4K compression algorithm
- Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com
- Created: 2020-03-25
- */
+#ifndef _LZ4K_H +#define _LZ4K_H
+/* file lz4k.h + This file contains the platform-independent API of LZ-class + lossless codecs (compressors/decompressors) with complete + in-place documentation. The documentation is formatted + in accordance with DOXYGEN mark-up format. So, one can + generate proper documentation, e.g. in HTML format, using DOXYGEN.
+ Currently, LZ-class codecs, documented here, implement following + algorithms for lossless data compression/decompression: + \li "LZ HUAWEI" proprietary codec competing with LZ4 - lz4k_encode(), + lz4k_encode_delta(), lz4k_decode(), lz4k_decode_delta()
+ The LZ HUAWEI compressors accept any data as input and compress it + without loss to a smaller size if possible. + Compressed data produced by LZ HUAWEI compressor API lz4k_encode*(), + can be decompressed only by lz4k_decode() API documented below.\n + */
+/* + lz4k_status defines simple set of status values returned by Huawei APIs
- */
各种Huawei 都改成 lz4k之类的 算法本身不要保留 LZ HUAWEI之类的
+typedef enum { + LZ4K_STATUS_INCOMPRESSIBLE = 0, /* !< Return when data is incompressible */ + LZ4K_STATUS_FAILED = -1, /* !< Return on general failure */ + LZ4K_STATUS_READ_ERROR = -2, /* !< Return when data reading failed */ + LZ4K_STATUS_WRITE_ERROR = -3 /* !< Return when data writing failed */ +} lz4k_status;
+/* + LZ4K_Version() returns static unmutable string with algorithm version
- */
+const char *lz4k_version(void);
+/* + lz4k_encode_state_bytes_min() returns number of bytes for state parameter, + supplied to lz4k_encode(), lz4k_encode_delta(), + lz4k_update_delta_state(). + So, state should occupy at least lz4k_encode_state_bytes_min() for mentioned + functions to work correctly.
- */
+unsigned lz4k_encode_state_bytes_min(void);
下面的注释风格之类要改下;或者删掉一些无用的
+/* + lz4k_encode() encodes/compresses one input buffer at *in, places + result of encoding into one output buffer at *out if encoded data + size fits specified values of out_max and out_limit. + It returs size of encoded data in case of success or value<=0 otherwise. + The result of successful encoding is in HUAWEI proprietary format, that + is the encoded data can be decoded only by lz4k_decode().
+ \return + \li positive value\n + if encoding was successful. The value returned is the size of encoded + (compressed) data always <=out_max. + \li non-positive value\n + if in==0||in_max==0||out==0||out_max==0 or + if out_max is less than needed for encoded (compressed) data. + \li 0 value\n + if encoded data size >= out_limit
+ \param[in] state + !=0, pointer to state buffer used internally by the function. Size of + state in bytes should be at least lz4k_encode_state_bytes_min(). The content + of state buffer will be changed during encoding.
+ \param[in] in + !=0, pointer to the input buffer to encode (compress). The content of + the input buffer does not change during encoding.
+ \param[in] out + !=0, pointer to the output buffer where to place result of encoding + (compression). + If encoding is unsuccessful, e.g. out_max or out_limit are less than + needed for encoded data then content of out buffer may be arbitrary.
+ \param[in] in_max + !=0, size in bytes of the input buffer at *in
+ \param[in] out_max + !=0, size in bytes of the output buffer at *out
+ \param[in] out_limit + encoded data size soft limit in bytes. Due to performance reasons it is + not guaranteed that + lz4k_encode will always detect that resulting encoded data size is + bigger than out_limit. + Hovewer, when reaching out_limit is detected, lz4k_encode() returns + earlier and spares CPU cycles. Caller code should recheck result + returned by lz4k_encode() (value greater than 0) if it is really + less or equal than out_limit. + out_limit is ignored if it is equal to 0.
- */
+int lz4k_encode( + void *const state, + const void *const in, + void *out, + unsigned in_max, + unsigned out_max, + unsigned out_limit);
+/* + lz4k_encode_max_cr() encodes/compresses one input buffer at *in, places + result of encoding into one output buffer at *out if encoded data + size fits specified value of out_max. + It returs size of encoded data in case of success or value<=0 otherwise. + The result of successful encoding is in HUAWEI proprietary format, that + is the encoded data can be decoded only by lz4k_decode().
+ \return + \li positive value\n + if encoding was successful. The value returned is the size of encoded + (compressed) data always <=out_max. + \li non-positive value\n + if in==0||in_max==0||out==0||out_max==0 or + if out_max is less than needed for encoded (compressed) data.
+ \param[in] state + !=0, pointer to state buffer used internally by the function. Size of + state in bytes should be at least lz4k_encode_state_bytes_min(). The content + of state buffer will be changed during encoding.
+ \param[in] in + !=0, pointer to the input buffer to encode (compress). The content of + the input buffer does not change during encoding.
+ \param[in] out + !=0, pointer to the output buffer where to place result of encoding + (compression). + If encoding is unsuccessful, e.g. out_max is less than + needed for encoded data then content of out buffer may be arbitrary.
+ \param[in] in_max + !=0, size in bytes of the input buffer at *in
+ \param[in] out_max + !=0, size in bytes of the output buffer at *out
+ \param[in] out_limit + encoded data size soft limit in bytes. Due to performance reasons it is + not guaranteed that + lz4k_encode will always detect that resulting encoded data size is + bigger than out_limit. + Hovewer, when reaching out_limit is detected, lz4k_encode() returns + earlier and spares CPU cycles. Caller code should recheck result + returned by lz4k_encode() (value greater than 0) if it is really + less or equal than out_limit. + out_limit is ignored if it is equal to 0.
- */
+int lz4k_encode_max_cr( + void *const state, + const void *const in, + void *out, + unsigned in_max, + unsigned out_max, + unsigned out_limit);
+/* + lz4k_update_delta_state() fills/updates state (hash table) in the same way as + lz4k_encode does while encoding (compressing). + The state and its content can then be used by lz4k_encode_delta() + to encode (compress) data more efficiently. + By other words, effect of lz4k_update_delta_state() is the same as + lz4k_encode() with all encoded output discarded.
+ Example sequence of calls for lz4k_update_delta_state and + lz4k_encode_delta: + //dictionary (1st) block + int result0=lz4k_update_delta_state(state, in0, in0, in_max0); +//delta (2nd) block + int result1=lz4k_encode_delta(state, in0, in, out, in_max, + out_max);
+ \param[in] state + !=0, pointer to state buffer used internally by lz4k_encode*. + Size of state in bytes should be at least lz4k_encode_state_bytes_min(). + The content of state buffer is zeroed at the beginning of + lz4k_update_delta_state ONLY when in0==in. + The content of state buffer will be changed inside + lz4k_update_delta_state.
+ \param[in] in0 + !=0, pointer to the reference/dictionary input buffer that was used + as input to preceding call of lz4k_encode() or lz4k_update_delta_state() + to fill/update the state buffer. + The content of the reference/dictionary input buffer does not change + during encoding. + The in0 is needed for use-cases when there are several dictionary and + input blocks interleaved, e.g.
- <dictionaryA><inputA><dictionaryB><inputB>..., or
+ <dictionaryA><dictionaryB><inputAB>..., etc.
+ \param[in] in + !=0, pointer to the input buffer to fill/update state as if encoding + (compressing) this input. This input buffer is also called dictionary + input buffer. + The content of the input buffer does not change during encoding. + The two buffers - at in0 and at in - should be contiguous in memory. + That is, the last byte of buffer at in0 is located exactly before byte + at in.
+ \param[in] in_max + !=0, size in bytes of the input buffer at in.
- */
+int lz4k_update_delta_state( + void *const state, + const void *const in0, + const void *const in, + unsigned in_max);
+/* + lz4k_encode_delta() encodes (compresses) data from one input buffer + using one reference buffer as dictionary and places the result of + compression into one output buffer. + The result of successful compression is in HUAWEI proprietary format, so + that compressed data can be decompressed only by lz4k_decode_delta(). + Reference/dictionary buffer and input buffer should be contiguous in + memory.
+ Example sequence of calls for lz4k_update_delta_state and + lz4k_encode_delta: +//dictionary (1st) block + int result0=lz4k_update_delta_state(state, in0, in0, in_max0); +//delta (2nd) block + int result1=lz4k_encode_delta(state, in0, in, out, in_max, + out_max);
+ Example sequence of calls for lz4k_encode and lz4k_encode_delta: +//dictionary (1st) block + int result0=lz4k_encode(state, in0, out0, in_max0, out_max0); +//delta (2nd) block + int result1=lz4k_encode_delta(state, in0, in, out, in_max, + out_max);
+ \return + \li positive value\n + if encoding was successful. The value returned is the size of encoded + (compressed) data. + \li non-positive value\n + if state==0||in0==0||in==0||in_max==0||out==0||out_max==0 or + if out_max is less than needed for encoded (compressed) data.
+ \param[in] state + !=0, pointer to state buffer used internally by the function. Size of + state in bytes should be at least lz4k_encode_state_bytes_min(). For more + efficient encoding the state buffer may be filled/updated by calling + lz4k_update_delta_state() or lz4k_encode() before lz4k_encode_delta(). + The content of state buffer is zeroed at the beginning of + lz4k_encode_delta() ONLY when in0==in. + The content of state will be changed during encoding.
+ \param[in] in0 + !=0, pointer to the reference/dictionary input buffer that was used as + input to preceding call of lz4k_encode() or lz4k_update_delta_state() to + fill/update the state buffer. + The content of the reference/dictionary input buffer does not change + during encoding.
+ \param[in] in + !=0, pointer to the input buffer to encode (compress). The input buffer + is compressed using content of the reference/dictionary input buffer at + in0. The content of the input buffer does not change during encoding. + The two buffers - at *in0 and at *in - should be contiguous in memory. + That is, the last byte of buffer at *in0 is located exactly before byte + at *in.
+ \param[in] out + !=0, pointer to the output buffer where to place result of encoding + (compression). If compression is unsuccessful then content of out + buffer may be arbitrary.
+ \param[in] in_max + !=0, size in bytes of the input buffer at *in
+ \param[in] out_max + !=0, size in bytes of the output buffer at *out.
- */
+int lz4k_encode_delta( + void *const state, + const void *const in0, + const void *const in, + void *out, + unsigned in_max, + unsigned out_max);
+/* + lz4k_decode() decodes (decompresses) data from one input buffer and places + the result of decompression into one output buffer. The encoded data in input + buffer should be in HUAWEI proprietary format, produced by lz4k_encode() + or by lz4k_encode_delta().
+ \return + \li positive value\n + if decoding was successful. The value returned is the size of decoded + (decompressed) data. + \li non-positive value\n + if in==0||in_max==0||out==0||out_max==0 or + if out_max is less than needed for decoded (decompressed) data or + if input encoded data format is corrupted.
+ \param[in] in + !=0, pointer to the input buffer to decode (decompress). The content of + the input buffer does not change during decoding.
+ \param[in] out + !=0, pointer to the output buffer where to place result of decoding + (decompression). If decompression is unsuccessful then content of out + buffer may be arbitrary.
+ \param[in] in_max + !=0, size in bytes of the input buffer at in
+ \param[in] out_max + !=0, size in bytes of the output buffer at out
- */
+int lz4k_decode( + const void *const in, + void *const out, + unsigned in_max, + unsigned out_max);
+/* + lz4k_decode_delta() decodes (decompresses) data from one input buffer + and places the result of decompression into one output buffer. The + compressed data in input buffer should be in format, produced by + lz4k_encode_delta().
+ Example sequence of calls for lz4k_decode and lz4k_decode_delta: +//dictionary (1st) block + int result0=lz4k_decode(in0, out0, in_max0, out_max0); +//delta (2nd) block + int result1=lz4k_decode_delta(in, out0, out, in_max, out_max);
+ \return + \li positive value\n + if decoding was successful. The value returned is the size of decoded + (decompressed) data. + \li non-positive value\n + if in==0||in_max==0||out==0||out_max==0 or + if out_max is less than needed for decoded (decompressed) data or + if input data format is corrupted.
+ \param[in] in + !=0, pointer to the input buffer to decode (decompress). The content of + the input buffer does not change during decoding.
+ \param[in] out0 + !=0, pointer to the dictionary input buffer that was used as input to + lz4k_update_delta_state() to fill/update the state buffer. The content + of the dictionary input buffer does not change during decoding.
+ \param[in] out + !=0, pointer to the output buffer where to place result of decoding + (decompression). If decompression is unsuccessful then content of out + buffer may be arbitrary. + The two buffers - at *out0 and at *out - should be contiguous in memory. + That is, the last byte of buffer at *out0 is located exactly before byte + at *out.
+ \param[in] in_max + !=0, size in bytes of the input buffer at *in
+ \param[in] out_max + !=0, size in bytes of the output buffer at *out
- */
+int lz4k_decode_delta( + const void *in, + const void *const out0, + void *const out, + unsigned in_max, + unsigned out_max);
+#endif /* _LZ4K_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 36326864249d..4bf1c2c21157 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -310,6 +310,12 @@ config LZ4HC_COMPRESS config LZ4_DECOMPRESS tristate +config LZ4K_COMPRESS + tristate
+config LZ4K_DECOMPRESS + tristate
config ZSTD_COMPRESS select XXHASH tristate diff --git a/lib/Makefile b/lib/Makefile index a803e1527c4b..bd0d3635ae46 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -187,6 +187,8 @@ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ obj-$(CONFIG_LZ4_COMPRESS) += lz4/ obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/ obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ +obj-$(CONFIG_LZ4K_COMPRESS) += lz4k/ +obj-$(CONFIG_LZ4K_DECOMPRESS) += lz4k/ obj-$(CONFIG_ZSTD_COMPRESS) += zstd/ obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd/ obj-$(CONFIG_XZ_DEC) += xz/ diff --git a/lib/lz4k/Makefile b/lib/lz4k/Makefile new file mode 100644 index 000000000000..6ea3578639d4 --- /dev/null +++ b/lib/lz4k/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_LZ4K_COMPRESS) += lz4k_encode.o +obj-$(CONFIG_LZ4K_DECOMPRESS) += lz4k_decode.o \ No newline at end of file diff --git a/lib/lz4k/lz4k_decode.c b/lib/lz4k/lz4k_decode.c new file mode 100644 index 000000000000..567b76b7bc51 --- /dev/null +++ b/lib/lz4k/lz4k_decode.c @@ -0,0 +1,308 @@ +/*
- Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights
reserved.
- Description: LZ4K compression algorithm
- Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com
- Created: 2020-03-25
- */
+#if !defined(__KERNEL__) +#include "lz4k.h" +#else +#include <linux/lz4k.h> +#include <linux/module.h> +#endif
+#include "lz4k_private.h" /* types, etc */
+static const uint8_t *get_size( + uint_fast32_t *size, + const uint8_t *in_at, + const uint8_t *const in_end) +{ + uint_fast32_t u; + do { + if (unlikely(in_at >= in_end)) + return NULL; + *size += (u = *(const uint8_t*)in_at); + ++in_at; + } while (BYTE_MAX == u); + return in_at; +}
+static int end_of_block( + const uint_fast32_t nr_bytes_max, + const uint_fast32_t r_bytes_max, + const uint8_t *const in_at, + const uint8_t *const in_end, + const uint8_t *const out, + const uint8_t *const out_at) +{ + if (!nr_bytes_max) + return LZ4K_STATUS_FAILED; /* should be the last one in block */ + if (r_bytes_max != REPEAT_MIN) + return LZ4K_STATUS_FAILED; /* should be the last one in block */ + if (in_at != in_end) + return LZ4K_STATUS_FAILED; /* should be the last one in block */ + return (int)(out_at - out); +}
+enum { + NR_COPY_MIN = 16, + R_COPY_MIN = 16, + R_COPY_SAFE = R_COPY_MIN - 1, + R_COPY_SAFE_2X = (R_COPY_MIN << 1) - 1 +};
+static bool out_non_repeat( + const uint8_t **in_at, + uint8_t **out_at, + uint_fast32_t nr_bytes_max, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + const uint8_t *const in_copy_end = *in_at + nr_bytes_max; + uint8_t *const out_copy_end = *out_at + nr_bytes_max; + if (likely(nr_bytes_max <= NR_COPY_MIN)) { + if (likely(*in_at <= in_end - NR_COPY_MIN && + *out_at <= out_end - NR_COPY_MIN)) + m_copy(*out_at, *in_at, NR_COPY_MIN); + else if (in_copy_end <= in_end && out_copy_end <= out_end) + m_copy(*out_at, *in_at, nr_bytes_max); + else + return false; + } else { + if (likely(in_copy_end <= in_end - NR_COPY_MIN && + out_copy_end <= out_end - NR_COPY_MIN)) { + m_copy(*out_at, *in_at, NR_COPY_MIN); + copy_x_while_lt(*out_at + NR_COPY_MIN, + *in_at + NR_COPY_MIN, + out_copy_end, NR_COPY_MIN); + } else if (in_copy_end <= in_end && out_copy_end <= out_end) { + m_copy(*out_at, *in_at, nr_bytes_max); + } else { /* in_copy_end > in_end || out_copy_end > out_end */ + return false; + } + } + *in_at = in_copy_end; + *out_at = out_copy_end; + return true; +}
+static void out_repeat_overlap( + uint_fast32_t offset, + uint8_t *out_at, + const uint8_t *out_from, + const uint8_t *const out_copy_end) +{ /* (1 < offset < R_COPY_MIN/2) && out_copy_end + R_COPY_SAFE_2X <= out_end */ + enum { + COPY_MIN = R_COPY_MIN >> 1, + OFFSET_LIMIT = COPY_MIN >> 1 + }; + m_copy(out_at, out_from, COPY_MIN); + out_at += offset; + if (offset <= OFFSET_LIMIT) + offset <<= 1; + do { + m_copy(out_at, out_from, COPY_MIN); + out_at += offset; + if (offset <= OFFSET_LIMIT) + offset <<= 1; + } while (out_at - out_from < R_COPY_MIN); + while_lt_copy_2x_as_x2(out_at, out_from, out_copy_end, R_COPY_MIN); +}
+static bool out_repeat_slow( + uint_fast32_t r_bytes_max, + uint_fast32_t offset, + uint8_t *out_at, + const uint8_t *out_from, + const uint8_t *const out_copy_end, + const uint8_t *const out_end) +{ + if (offset > 1 && out_copy_end <= out_end - R_COPY_SAFE_2X) { + out_repeat_overlap(offset, out_at, out_from, out_copy_end); + } else { + if (unlikely(out_copy_end > out_end)) + return false; + if (offset == 1) { + m_set(out_at, *out_from, r_bytes_max); + } else { + do + *out_at++ = *out_from++; + while (out_at < out_copy_end); + } + } + return true; +}
+static int decode( + const uint8_t *const in, + const uint8_t *const out0, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2) +{ + const uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2); + const uint8_t *in_at = in; + const uint8_t *const in_end_minus_x = in_end - TAG_BYTES_MAX; + uint8_t *out_at = out; + while (likely(in_at <= in_end_minus_x)) { + const uint_fast32_t utag = read4_at(in_at - 1) >> BYTE_BITS; + const uint_fast32_t offset = utag & mask(off_log2); + uint_fast32_t nr_bytes_max = utag >> (off_log2 + r_log2), + r_bytes_max = ((utag >> off_log2) & mask(r_log2)) + + REPEAT_MIN; + const uint8_t *out_from = 0; + uint8_t *out_copy_end = 0; + in_at += TAG_BYTES_MAX; + if (unlikely(nr_bytes_max == mask(nr_log2))) { + in_at = get_size(&nr_bytes_max, in_at, in_end); + if (in_at == NULL) + return LZ4K_STATUS_READ_ERROR; + } + if (!out_non_repeat(&in_at, &out_at, nr_bytes_max, in_end, out_end)) + return LZ4K_STATUS_FAILED; + if (unlikely(r_bytes_max == mask(r_log2) + REPEAT_MIN)) { + in_at = get_size(&r_bytes_max, in_at, in_end); + if (in_at == NULL) + return LZ4K_STATUS_READ_ERROR; + } + out_from = out_at - offset; + if (unlikely(out_from < out0)) + return LZ4K_STATUS_FAILED; + out_copy_end = out_at + r_bytes_max; + if (likely(offset >= R_COPY_MIN && + out_copy_end <= out_end - R_COPY_SAFE_2X)) { + copy_2x_as_x2_while_lt(out_at, out_from, out_copy_end, + R_COPY_MIN); + } else if (likely(offset >= (R_COPY_MIN >> 1) && + out_copy_end <= out_end - R_COPY_SAFE_2X)) { + m_copy(out_at, out_from, R_COPY_MIN); + out_at += offset; + while_lt_copy_x(out_at, out_from, out_copy_end, R_COPY_MIN); + /* faster than 2x */ + } else if (likely(offset > 0)) { + if (!out_repeat_slow(r_bytes_max, offset, out_at, out_from, + out_copy_end, out_end)) + return LZ4K_STATUS_FAILED; + } else { /* offset == 0: EOB, last literal */ + return end_of_block(nr_bytes_max, r_bytes_max, in_at, + in_end, out, out_at); + } + out_at = out_copy_end; + } + return in_at == in_end ? (int)(out_at - out) : LZ4K_STATUS_FAILED; +}
+static int decode4kb( + const uint8_t *const in, + const uint8_t *const out0, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + enum { + NR_LOG2 = 6 + }; + return decode(in, out0, out, in_end, out_end, NR_LOG2, BLOCK_4KB_LOG2); +}
+static int decode8kb( + const uint8_t *const in, + const uint8_t *const out0, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + enum { + NR_LOG2 = 5 + }; + return decode(in, out0, out, in_end, out_end, NR_LOG2, BLOCK_8KB_LOG2); +}
+static int decode16kb( + const uint8_t *const in, + const uint8_t *const out0, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + enum { + NR_LOG2 = 5 + }; + return decode(in, out0, out, in_end, out_end, NR_LOG2, BLOCK_16KB_LOG2); +}
+static int decode32kb( + const uint8_t *const in, + const uint8_t *const out0, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + enum { + NR_LOG2 = 4 + }; + return decode(in, out0, out, in_end, out_end, NR_LOG2, BLOCK_32KB_LOG2); +}
+static int decode64kb( + const uint8_t *const in, + const uint8_t *const out0, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + enum { + NR_LOG2 = 4 + }; + return decode(in, out0, out, in_end, out_end, NR_LOG2, BLOCK_64KB_LOG2); +}
+static inline const void *u8_inc(const uint8_t *a) +{ + return a+1; +}
+int lz4k_decode( + const void *in, + void *const out, + unsigned in_max, + unsigned out_max) +{ + /* ++use volatile pointers to prevent compiler optimizations */ + const uint8_t *volatile in_end = (const uint8_t*)in + in_max; + const uint8_t *volatile out_end = (uint8_t*)out + out_max; + uint8_t in_log2 = 0; + if (unlikely(in == NULL || out == NULL || in_max <= 4 || out_max <= 0)) + return LZ4K_STATUS_FAILED; + in_log2 = (uint8_t)(BLOCK_4KB_LOG2 + *(const uint8_t*)in); + /* invalid buffer size or pointer overflow */ + if (unlikely((const uint8_t*)in >= in_end || (uint8_t*)out >= out_end)) + return LZ4K_STATUS_FAILED; + /* -- */ + in = u8_inc((const uint8_t*)in); + --in_max; + if (in_log2 < BLOCK_8KB_LOG2) + return decode4kb((const uint8_t*)in, (uint8_t*)out, + (uint8_t*)out, in_end, out_end); + if (in_log2 == BLOCK_8KB_LOG2) + return decode8kb((const uint8_t*)in, (uint8_t*)out, + (uint8_t*)out, in_end, out_end); + if (in_log2 == BLOCK_16KB_LOG2) + return decode16kb((const uint8_t*)in, (uint8_t*)out, + (uint8_t*)out, in_end, out_end); + if (in_log2 == BLOCK_32KB_LOG2) + return decode32kb((const uint8_t*)in, (uint8_t*)out, + (uint8_t*)out, in_end, out_end); + if (in_log2 == BLOCK_64KB_LOG2) + return decode64kb((const uint8_t*)in, (uint8_t*)out, + (uint8_t*)out, in_end, out_end); + return LZ4K_STATUS_FAILED; +} +EXPORT_SYMBOL(lz4k_decode);
+MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("LZ4K decoder"); diff --git a/lib/lz4k/lz4k_encode.c b/lib/lz4k/lz4k_encode.c new file mode 100644 index 000000000000..a425d3a0b827 --- /dev/null +++ b/lib/lz4k/lz4k_encode.c @@ -0,0 +1,539 @@ +/*
- Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights
reserved.
- Description: LZ4K compression algorithm
- Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com
- Created: 2020-03-25
- */
+#if !defined(__KERNEL__) +#include "lz4k.h" +#else +#include <linux/lz4k.h> +#include <linux/module.h> +#endif
+#include "lz4k_private.h" +#include "lz4k_encode_private.h"
+static uint8_t *out_size_bytes(uint8_t *out_at, uint_fast32_t u) +{ + for (; unlikely(u >= BYTE_MAX); u -= BYTE_MAX) + *out_at++ = (uint8_t)BYTE_MAX; + *out_at++ = (uint8_t)u; + return out_at; +}
+static inline uint8_t *out_utag_then_bytes_left( + uint8_t *out_at, + uint_fast32_t utag, + uint_fast32_t bytes_left) +{ + m_copy(out_at, &utag, TAG_BYTES_MAX); + return out_size_bytes(out_at + TAG_BYTES_MAX, bytes_left); +}
+static int out_tail( + uint8_t *out_at, + uint8_t *const out_end, + const uint8_t *const out, + const uint8_t *const nr0, + const uint8_t *const in_end, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out) +{ + const uint_fast32_t nr_mask = mask(nr_log2); + const uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2); + const uint_fast32_t nr_bytes_max = u_32(in_end - nr0); + if (encoded_bytes_min(nr_log2, nr_bytes_max) > u_32(out_end - out_at)) + return check_out ? LZ4K_STATUS_WRITE_ERROR : + LZ4K_STATUS_INCOMPRESSIBLE; + if (nr_bytes_max < nr_mask) { + /* caller guarantees at least one nr-byte */ + uint_fast32_t utag = (nr_bytes_max << (off_log2 + r_log2)); + m_copy(out_at, &utag, TAG_BYTES_MAX); + out_at += TAG_BYTES_MAX; + } else { + uint_fast32_t bytes_left = nr_bytes_max - nr_mask; + uint_fast32_t utag = (nr_mask << (off_log2 + r_log2)); + out_at = out_utag_then_bytes_left(out_at, utag, bytes_left); + } + m_copy(out_at, nr0, nr_bytes_max); + return (int)(out_at + nr_bytes_max - out); +}
+int lz4k_out_tail( + uint8_t *out_at, + uint8_t *const out_end, + const uint8_t *const out, + const uint8_t *const nr0, + const uint8_t *const in_end, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out) +{ + return out_tail(out_at, out_end, out, nr0, in_end, + nr_log2, off_log2, check_out); +}
+static uint8_t *out_non_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + const uint8_t *const nr0, + const uint8_t *const r, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out) +{ + const uint_fast32_t nr_bytes_max = u_32(r - nr0); + const uint_fast32_t nr_mask = mask(nr_log2), + r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2); + if (likely(nr_bytes_max < nr_mask)) { + if (unlikely(check_out && + TAG_BYTES_MAX + nr_bytes_max > u_32(out_end - out_at))) + return NULL; + utag |= (nr_bytes_max << (off_log2 + r_log2)); + m_copy(out_at, &utag, TAG_BYTES_MAX); + out_at += TAG_BYTES_MAX; + } else { + uint_fast32_t bytes_left = nr_bytes_max - nr_mask; + if (unlikely(check_out && + TAG_BYTES_MAX + size_bytes_count(bytes_left) + nr_bytes_max > + u_32(out_end - out_at))) + return NULL; + utag |= (nr_mask << (off_log2 + r_log2)); + out_at = out_utag_then_bytes_left(out_at, utag, bytes_left); + } + if (unlikely(check_out)) + m_copy(out_at, nr0, nr_bytes_max); + else + copy_x_while_total(out_at, nr0, nr_bytes_max, NR_COPY_MIN); + out_at += nr_bytes_max; + return out_at; +}
+uint8_t *lz4k_out_non_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + const uint8_t *const nr0, + const uint8_t *const r, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out) +{ + return out_non_repeat(out_at, out_end, utag, nr0, r, + nr_log2, off_log2, check_out); +}
+static uint8_t *out_r_bytes_left( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out) /* =false when +out_max>=encoded_bytes_max(in_max), =true otherwise */ +{ + const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2)); + if (unlikely(r_bytes_max - REPEAT_MIN >= r_mask)) { + uint_fast32_t bytes_left = r_bytes_max - REPEAT_MIN - r_mask; + if (unlikely(check_out && + size_bytes_count(bytes_left) > u_32(out_end - out_at))) + return NULL; + out_at = out_size_bytes(out_at, bytes_left); + } + return out_at; /* SUCCESS: continue compression */ +}
+uint8_t *lz4k_out_r_bytes_left( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out) +{ + return out_r_bytes_left(out_at, out_end, r_bytes_max, + nr_log2, off_log2, check_out); +}
+static uint8_t *out_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out) /* =false when +out_max>=encoded_bytes_max(in_max), =true otherwise */ +{ + const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2)); + if (likely(r_bytes_max - REPEAT_MIN < r_mask)) { + if (unlikely(check_out && TAG_BYTES_MAX > u_32(out_end - out_at))) + return NULL; + utag |= ((r_bytes_max - REPEAT_MIN) << off_log2); + m_copy(out_at, &utag, TAG_BYTES_MAX); + out_at += TAG_BYTES_MAX; + } else { + uint_fast32_t bytes_left = r_bytes_max - REPEAT_MIN - r_mask; + if (unlikely(check_out && + TAG_BYTES_MAX + size_bytes_count(bytes_left) > + u_32(out_end - out_at))) + return NULL; + utag |= (r_mask << off_log2); + out_at = out_utag_then_bytes_left(out_at, utag, bytes_left); + } + return out_at; /* SUCCESS: continue compression */ +}
+uint8_t *lz4k_out_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out) +{ + return out_repeat(out_at, out_end, utag, r_bytes_max, + nr_log2, off_log2, check_out); +}
+static const uint8_t *repeat_end( + const uint8_t *q, + const uint8_t *r, + const uint8_t *const in_end_safe, + const uint8_t *const in_end) +{ + q += REPEAT_MIN; + r += REPEAT_MIN; + /* caller guarantees r+12<=in_end */ + do { + const uint64_t x = read8_at(q) ^ read8_at(r); + if (x) { + const uint16_t ctz = (uint16_t)__builtin_ctzl(x); + return r + (ctz >> BYTE_BITS_LOG2); + } + /* some bytes differ:+ count of trailing 0-bits/bytes */ + q += sizeof(uint64_t), r += sizeof(uint64_t); + } while (likely(r <= in_end_safe)); /* once, at input block end */ + do { + if (*q != *r) return r; + ++q; + ++r; + } while (r < in_end); + return r; +}
+const uint8_t *lz4k_repeat_end( + const uint8_t *q, + const uint8_t *r, + const uint8_t *const in_end_safe, + const uint8_t *const in_end) +{ + return repeat_end(q, r, in_end_safe, in_end); +}
+enum { + HT_BYTES_LOG2 = HT_LOG2 + 1 +};
+inline unsigned encode_state_bytes_min(void) +{ + unsigned bytes_total = (1U << HT_BYTES_LOG2); + return bytes_total; +}
+#if !defined(LZ4K_DELTA) && !defined(LZ4K_MAX_CR)
+unsigned lz4k_encode_state_bytes_min(void) +{ + return encode_state_bytes_min(); +} +EXPORT_SYMBOL(lz4k_encode_state_bytes_min);
+#endif /* !defined(LZ4K_DELTA) && !defined(LZ4K_MAX_CR) */
+/* CR increase order: +STEP, have OFFSETS, use _5b */ +/* *_6b to compete with LZ4 */ +static inline uint_fast32_t hash0_v(const uint64_t r, uint32_t shift) +{ + return hash64v_6b(r, shift); +}
+static inline uint_fast32_t hash0(const uint8_t *r, uint32_t shift) +{ + return hash64_6b(r, shift); +}
+/*
- Proof that 'r' increments are safe-NO pointer overflows are
possible:
- While using STEP_LOG2=5, step_start=1<<STEP_LOG2 == 32 we
increment s
- 32 times by 1, 32 times by 2, 32 times by 3, and so on:
- 32*1+32*2+32*3+...+32*31 == 32*SUM(1..31) == 32*((1+31)*15+16).
- So, we can safely increment s by at most 31 for input block size <=
- 1<<13 < 15872.
- More precisely, STEP_LIMIT == x for any input block calculated as
follows:
- 1<<off_log2 >= (1<<STEP_LOG2)*((x+1)(x-1)/2+x/2) ==>
- 1<<(off_log2-STEP_LOG2+1) >= x^2+x-1 ==>
- x^2+x-1-1<<(off_log2-STEP_LOG2+1) == 0, which is solved by standard
- method.
- To avoid overhead here conservative approximate value of x is
calculated
- as average of two nearest square roots, see STEP_LIMIT above.
- */
+enum { + STEP_LOG2 = 5 /* increase for better CR */ +};
+static int encode_any( + uint16_t *const ht0, + const uint8_t *const in0, + uint8_t *const out, + const uint8_t *const in_end, + uint8_t *const out_end, /* ==out_limit for !check_out */ + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out) +{ /* caller guarantees off_log2 <=16 */ + uint8_t *out_at = out; + const uint8_t *const in_end_safe = in_end - NR_COPY_MIN; + const uint8_t *r = in0; + const uint8_t *nr0 = r++; + uint_fast32_t step = 1 << STEP_LOG2; + for (;;) { + uint_fast32_t utag = 0; + const uint8_t *r_end = 0; + uint_fast32_t r_bytes_max = 0; + const uint8_t *const q = hashed(in0, ht0, hash0(r, HT_LOG2), r); + if (!equal4(q, r)) { + r += (++step >> STEP_LOG2); + if (unlikely(r > in_end_safe)) + return out_tail(out_at, out_end, out, nr0, in_end, + nr_log2, off_log2, check_out); + continue; + } + utag = u_32(r - q); + r_end = repeat_end(q, r, in_end_safe, in_end); + r = repeat_start(q, r, nr0, in0); + r_bytes_max = u_32(r_end - r); + if (nr0 == r) { + out_at = out_repeat(out_at, out_end, utag, r_bytes_max, + nr_log2, off_log2, check_out); + } else { + update_utag(r_bytes_max, &utag, nr_log2, off_log2); + out_at = out_non_repeat(out_at, out_end, utag, nr0, r, + nr_log2, off_log2, check_out); + if (unlikely(check_out && out_at == NULL)) + return LZ4K_STATUS_WRITE_ERROR; + out_at = out_r_bytes_left(out_at, out_end, r_bytes_max, + nr_log2, off_log2, check_out); + } + if (unlikely(check_out && out_at == NULL)) + return LZ4K_STATUS_WRITE_ERROR; + nr0 = (r += r_bytes_max); + if (unlikely(r > in_end_safe)) + return r == in_end ? (int)(out_at - out) : + out_tail(out_at, out_end, out, r, in_end, + nr_log2, off_log2, check_out); + ht0[hash0(r - 1 - 1, HT_LOG2)] = (uint16_t)(r - 1 - 1 - in0); + step = 1 << STEP_LOG2; + } +}
+static int encode_fast( + uint16_t *const ht, + const uint8_t *const in, + uint8_t *const out, + const uint8_t *const in_end, + uint8_t *const out_end, /* ==out_limit for !check_out */ + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2) +{ /* caller guarantees off_log2 <=16 */ + return encode_any(ht, in, out, in_end, out_end, nr_log2, off_log2, + false); /* !check_out */ +}
+static int encode_slow( + uint16_t *const ht, + const uint8_t *const in, + uint8_t *const out, + const uint8_t *const in_end, + uint8_t *const out_end, /* ==out_limit for !check_out */ + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2) +{ /* caller guarantees off_log2 <=16 */ + return encode_any(ht, in, out, in_end, out_end, nr_log2, off_log2, + true); /* check_out */ +}
+static int encode4kb( + uint16_t *const state, + const uint8_t *const in, + uint8_t *out, + const uint_fast32_t in_max, + const uint_fast32_t out_max, + const uint_fast32_t out_limit) +{ + enum { + NR_LOG2 = 6 + }; + const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ? + encode_slow(state, in, out, in + in_max, out + out_max, + NR_LOG2, BLOCK_4KB_LOG2) : + encode_fast(state, in, out, in + in_max, out + out_limit, + NR_LOG2, BLOCK_4KB_LOG2); + return result <= 0 ? result : result + 1; /* +1 for in_log2 */ +}
+static int encode8kb( + uint16_t *const state, + const uint8_t *const in, + uint8_t *out, + const uint_fast32_t in_max, + const uint_fast32_t out_max, + const uint_fast32_t out_limit) +{ + enum { + NR_LOG2 = 5 + }; + const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ? + encode_slow(state, in, out, in + in_max, out + out_max, + NR_LOG2, BLOCK_8KB_LOG2) : + encode_fast(state, in, out, in + in_max, out + out_limit, + NR_LOG2, BLOCK_8KB_LOG2); + return result <= 0 ? result : result + 1; /* +1 for in_log2 */ +}
+static int encode16kb( + uint16_t *const state, + const uint8_t *const in, + uint8_t *out, + const uint_fast32_t in_max, + const uint_fast32_t out_max, + const uint_fast32_t out_limit) +{ + enum { + NR_LOG2 = 5 + }; + const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ? + encode_slow(state, in, out, in + in_max, out + out_max, + NR_LOG2, BLOCK_16KB_LOG2) : + encode_fast(state, in, out, in + in_max, out + out_limit, + NR_LOG2, BLOCK_16KB_LOG2); + return result <= 0 ? result : result + 1; /* +1 for in_log2 */ +}
+static int encode32kb( + uint16_t *const state, + const uint8_t *const in, + uint8_t *out, + const uint_fast32_t in_max, + const uint_fast32_t out_max, + const uint_fast32_t out_limit) +{ + enum { + NR_LOG2 = 4 + }; + const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ? + encode_slow(state, in, out, in + in_max, out + out_max, + NR_LOG2, BLOCK_32KB_LOG2) : + encode_fast(state, in, out, in + in_max, out + out_limit, + NR_LOG2, BLOCK_32KB_LOG2); + return result <= 0 ? result : result + 1; /* +1 for in_log2 */ +}
+static int encode64kb( + uint16_t *const state, + const uint8_t *const in, + uint8_t *out, + const uint_fast32_t in_max, + const uint_fast32_t out_max, + const uint_fast32_t out_limit) +{ + enum { + NR_LOG2 = 4 + }; + const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ? + encode_slow(state, in, out, in + in_max, out + out_max, + NR_LOG2, BLOCK_64KB_LOG2) : + encode_fast(state, in, out, in + in_max, out + out_limit, + NR_LOG2, BLOCK_64KB_LOG2); + return result <= 0 ? result : result + 1; /* +1 for in_log2 */ +}
+static int encode( + uint16_t *const state, + const uint8_t *const in, + uint8_t *out, + uint_fast32_t in_max, + uint_fast32_t out_max, + uint_fast32_t out_limit) +{ + const uint8_t in_log2 = (uint8_t)(most_significant_bit_of( + round_up_to_power_of2(in_max - REPEAT_MIN))); + m_set(state, 0, encode_state_bytes_min()); + *out = in_log2 > BLOCK_4KB_LOG2 ? (uint8_t)(in_log2 - BLOCK_4KB_LOG2) : 0; + ++out; + --out_max; + --out_limit; + if (in_log2 < BLOCK_8KB_LOG2) + return encode4kb(state, in, out, in_max, out_max, out_limit); + if (in_log2 == BLOCK_8KB_LOG2) + return encode8kb(state, in, out, in_max, out_max, out_limit); + if (in_log2 == BLOCK_16KB_LOG2) + return encode16kb(state, in, out, in_max, out_max, out_limit); + if (in_log2 == BLOCK_32KB_LOG2) + return encode32kb(state, in, out, in_max, out_max, out_limit); + if (in_log2 == BLOCK_64KB_LOG2) + return encode64kb(state, in, out, in_max, out_max, out_limit); + return LZ4K_STATUS_FAILED; +}
+int lz4k_encode( + void *const state, + const void *const in, + void *out, + unsigned in_max, + unsigned out_max, + unsigned out_limit) +{ + const unsigned gain_max = 64 > (in_max >> 6) ? 64 : (in_max >> 6); + const unsigned out_limit_min = in_max < out_max ? in_max : out_max; + const uint8_t *volatile in_end = (const uint8_t*)in + in_max; + const uint8_t *volatile out_end = (uint8_t*)out + out_max; + const void *volatile state_end = + (uint8_t*)state + encode_state_bytes_min(); + if (unlikely(state == NULL)) + return LZ4K_STATUS_FAILED; + if (unlikely(in == NULL || out == NULL)) + return LZ4K_STATUS_FAILED; + if (unlikely(in_max <= gain_max)) + return LZ4K_STATUS_INCOMPRESSIBLE; + if (unlikely(out_max <= gain_max)) /* need 1 byte for in_log2 */ + return LZ4K_STATUS_FAILED; + /* ++use volatile pointers to prevent compiler optimizations */ + if (unlikely((const uint8_t*)in >= in_end || (uint8_t*)out >= out_end)) + return LZ4K_STATUS_FAILED; + if (unlikely(state >= state_end)) + return LZ4K_STATUS_FAILED; /* pointer overflow */ + if (!out_limit || out_limit >= out_limit_min) + out_limit = out_limit_min - gain_max; + return encode((uint16_t*)state, (const uint8_t*)in, (uint8_t*)out, + in_max, out_max, out_limit); +} +EXPORT_SYMBOL(lz4k_encode);
+const char *lz4k_version(void) +{ + static const char *version = "2020.07.07"; + return version; +} +EXPORT_SYMBOL(lz4k_version);
+MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("LZ4K encoder"); diff --git a/lib/lz4k/lz4k_encode_private.h b/lib/lz4k/lz4k_encode_private.h new file mode 100644 index 000000000000..eb5cd162468f --- /dev/null +++ b/lib/lz4k/lz4k_encode_private.h @@ -0,0 +1,137 @@ +/*
- Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights
reserved.
- Description: LZ4K compression algorithm
- Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com
- Created: 2020-03-25
- */
+#ifndef _LZ4K_ENCODE_PRIVATE_H +#define _LZ4K_ENCODE_PRIVATE_H
+#include "lz4k_private.h"
+/* <nrSize bytes for whole block>+<1 terminating 0 byte> */ +static inline uint_fast32_t size_bytes_count(uint_fast32_t u) +{ + return (u + BYTE_MAX - 1) / BYTE_MAX; +}
+/* minimum encoded size for non-compressible data */ +static inline uint_fast32_t encoded_bytes_min( + uint_fast32_t nr_log2, + uint_fast32_t in_max) +{ + return in_max < mask(nr_log2) ? + TAG_BYTES_MAX + in_max : + TAG_BYTES_MAX + size_bytes_count(in_max - mask(nr_log2)) + in_max; +}
+enum { + NR_COPY_LOG2 = 4, + NR_COPY_MIN = 1 << NR_COPY_LOG2 +};
+static inline uint_fast32_t u_32(int64_t i) +{ + return (uint_fast32_t)i; +}
+/* maximum encoded size for non-comprressible data if "fast" encoder is used */ +static inline uint_fast32_t encoded_bytes_max( + uint_fast32_t nr_log2, + uint_fast32_t in_max) +{ + uint_fast32_t r = TAG_BYTES_MAX + (uint32_t)round_up_to_log2(in_max, NR_COPY_LOG2); + return in_max < mask(nr_log2) ? r : r + size_bytes_count(in_max
- mask(nr_log2));
+}
+enum { + HT_LOG2 = 12 +};
+/*
- Compressed data format (where {} means 0 or more occurrences, []
means
- optional):
- <24bits tag: (off_log2 rOffset| r_log2 rSize|nr_log2 nrSize)>
- {<nrSize byte>}[<nr bytes>]{<rSize byte>}
- <rSize byte> and <nrSize byte> bytes are terminated by byte != 255
- */
+static inline void update_utag( + uint_fast32_t r_bytes_max, + uint_fast32_t *utag, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2) +{ + const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2)); + *utag |= likely(r_bytes_max - REPEAT_MIN < r_mask) ? + ((r_bytes_max - REPEAT_MIN) << off_log2) : (r_mask << off_log2); +}
+static inline const uint8_t *hashed( + const uint8_t *const in0, + uint16_t *const ht, + uint_fast32_t h, + const uint8_t *r) +{ + const uint8_t *q = in0 + ht[h]; + ht[h] = (uint16_t)(r - in0); + return q; +}
+static inline const uint8_t *repeat_start( + const uint8_t *q, + const uint8_t *r, + const uint8_t *const nr0, + const uint8_t *const in0) +{ + for (; r > nr0 && likely(q > in0) && unlikely(q[-1] == r[-1]); --q, --r); + return r; +}
+int lz4k_out_tail( + uint8_t *out_at, + uint8_t *const out_end, + const uint8_t *const out, + const uint8_t *const nr0, + const uint8_t *const in_end, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out);
+uint8_t *lz4k_out_non_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + const uint8_t *const nr0, + const uint8_t *const r, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out);
+uint8_t *lz4k_out_r_bytes_left( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out);
+uint8_t *lz4k_out_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out);
+const uint8_t *lz4k_repeat_end( + const uint8_t *q, + const uint8_t *r, + const uint8_t *const in_end_safe, + const uint8_t *const in_end);
+#endif /* _LZ4K_ENCODE_PRIVATE_H */
diff --git a/lib/lz4k/lz4k_private.h b/lib/lz4k/lz4k_private.h new file mode 100644 index 000000000000..2a8f4b37dc74 --- /dev/null +++ b/lib/lz4k/lz4k_private.h @@ -0,0 +1,269 @@ +/*
- Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights
reserved.
- Description: LZ4K compression algorithm
- Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com
- Created: 2020-03-25
- */
+#ifndef _LZ4K_PRIVATE_H +#define _LZ4K_PRIVATE_H
+#if !defined(__KERNEL__)
+#include "lz4k.h" +#include <stdint.h> /* uint*_t */ +#define __STDC_WANT_LIB_EXT1__ 1 +#include <string.h> /* memcpy() */
+#define likely(e) __builtin_expect(e, 1) +#define unlikely(e) __builtin_expect(e, 0)
+#else /* __KERNEL__ */
+#include <linux/lz4k.h> +#define __STDC_WANT_LIB_EXT1__ 1 +#include <linux/string.h> /* memcpy() */ +#include <linux/types.h> /* uint8_t, int8_t, uint16_t, int16_t, +uint32_t, int32_t, uint64_t, int64_t */ +#include <stddef.h>
+typedef uint64_t uint_fast32_t; +typedef int64_t int_fast32_t;
+#endif /* __KERNEL__ */
+#if defined(__GNUC__) && (__GNUC__>=4) +#define LZ4K_WITH_GCC_INTRINSICS +#endif
+#if !defined(__GNUC__) +#define __builtin_expect(e, v) (e) +#endif /* defined(__GNUC__) */
+enum { + BYTE_BITS = 8, + BYTE_BITS_LOG2 = 3, + BYTE_MAX = 255U, + REPEAT_MIN = 4, + TAG_BYTES_MAX = 3, + TAG_BITS_MAX = TAG_BYTES_MAX * 8, + BLOCK_4KB_LOG2 = 12, + BLOCK_8KB_LOG2 = 13, + BLOCK_16KB_LOG2 = 14, + BLOCK_32KB_LOG2 = 15, + BLOCK_64KB_LOG2 = 16 +};
+static inline uint32_t mask(uint_fast32_t log2) +{ + return (1U << log2) - 1U; +}
+static inline uint64_t mask64(uint_fast32_t log2) +{ + return (1ULL << log2) - 1ULL; +}
+#if defined LZ4K_WITH_GCC_INTRINSICS +static inline int most_significant_bit_of(uint64_t u) +{ + return (int)(__builtin_expect((u) == 0, false) ? + -1 : (int)(31 ^ (uint32_t)__builtin_clz((unsigned)(u)))); +} +#else /* #!defined LZ4K_WITH_GCC_INTRINSICS */ +#error undefined most_significant_bit_of(unsigned u) +#endif /* #if defined LZ4K_WITH_GCC_INTRINSICS */
+static inline uint64_t round_up_to_log2(uint64_t u, uint8_t log2) +{ + return (uint64_t)((u + mask64(log2)) & ~mask64(log2)); +}
+static inline uint64_t round_up_to_power_of2(uint64_t u) +{ + const int_fast32_t msb = most_significant_bit_of(u); + return round_up_to_log2(u, (uint8_t)msb); +}
+static inline void m_copy(void *dst, const void *src, size_t total) +{ +#if defined(__STDC_LIB_EXT1__) + (void)memcpy_s(dst, total, src, (total * 2) >> 1); /* *2 >> 1 to avoid bot errors */ +#else + (void)__builtin_memcpy(dst, src, total); +#endif +}
+static inline void m_set(void *dst, uint8_t value, size_t total) +{ +#if defined(__STDC_LIB_EXT1__) + (void)memset_s(dst, total, value, (total * 2) >> 1); /* *2 >> 1 to avoid bot errors */ +#else + (void)__builtin_memset(dst, value, total); +#endif +}
+static inline uint32_t read4_at(const void *p) +{ + uint32_t result; + m_copy(&result, p, sizeof(result)); + return result; +}
+static inline uint64_t read8_at(const void *p) +{ + uint64_t result; + m_copy(&result, p, sizeof(result)); + return result; +}
+static inline bool equal4(const uint8_t *const q, const uint8_t *const r) +{ + return read4_at(q) == read4_at(r); +}
+static inline bool equal3(const uint8_t *const q, const uint8_t *const r) +{ + return (read4_at(q) << BYTE_BITS) == (read4_at(r) << BYTE_BITS); +}
+static inline uint_fast32_t hash24v(const uint64_t r, uint32_t shift) +{ + const uint32_t m = 3266489917U; + return (((uint32_t)r << BYTE_BITS) * m) >> (32 - shift); +}
+static inline uint_fast32_t hash24(const uint8_t *r, uint32_t shift) +{ + return hash24v(read4_at(r), shift); +}
+static inline uint_fast32_t hash32v_2(const uint64_t r, uint32_t shift) +{ + const uint32_t m = 3266489917U; + return ((uint32_t)r * m) >> (32 - shift); +}
+static inline uint_fast32_t hash32_2(const uint8_t *r, uint32_t shift) +{ + return hash32v_2(read4_at(r), shift); +}
+static inline uint_fast32_t hash32v(const uint64_t r, uint32_t shift) +{ + const uint32_t m = 2654435761U; + return ((uint32_t)r * m) >> (32 - shift); +}
+static inline uint_fast32_t hash32(const uint8_t *r, uint32_t shift) +{ + return hash32v(read4_at(r), shift); +}
+static inline uint_fast32_t hash64v_5b(const uint64_t r, uint32_t shift) +{ + const uint64_t m = 889523592379ULL; + return (uint32_t)(((r << 24) * m) >> (64 - shift)); +}
+static inline uint_fast32_t hash64_5b(const uint8_t *r, uint32_t shift) +{ + return hash64v_5b(read8_at(r), shift); +}
+static inline uint_fast32_t hash64v_6b(const uint64_t r, uint32_t shift) +{ + const uint64_t m = 227718039650203ULL; + return (uint32_t)(((r << 16) * m) >> (64 - shift)); +}
+static inline uint_fast32_t hash64_6b(const uint8_t *r, uint32_t shift) +{ + return hash64v_6b(read8_at(r), shift); +}
+static inline uint_fast32_t hash64v_7b(const uint64_t r, uint32_t shift) +{ + const uint64_t m = 58295818150454627ULL; + return (uint32_t)(((r << 8) * m) >> (64 - shift)); +}
+static inline uint_fast32_t hash64_7b(const uint8_t *r, uint32_t shift) +{ + return hash64v_7b(read8_at(r), shift); +}
+static inline uint_fast32_t hash64v_8b(const uint64_t r, uint32_t shift) +{ + const uint64_t m = 2870177450012600261ULL; + return (uint32_t)((r * m) >> (64 - shift)); +}
+static inline uint_fast32_t hash64_8b(const uint8_t *r, uint32_t shift) +{ + return hash64v_8b(read8_at(r), shift); +}
+static inline void while_lt_copy_x( + uint8_t *dst, + const uint8_t *src, + const uint8_t *dst_end, + const size_t copy_min) +{ + for (; dst < dst_end; dst += copy_min, src += copy_min) + m_copy(dst, src, copy_min); +}
+static inline void copy_x_while_lt( + uint8_t *dst, + const uint8_t *src, + const uint8_t *dst_end, + const size_t copy_min) +{ + m_copy(dst, src, copy_min); + while (dst + copy_min < dst_end) + m_copy(dst += copy_min, src += copy_min, copy_min); +}
+static inline void copy_x_while_total( + uint8_t *dst, + const uint8_t *src, + size_t total, + const size_t copy_min) +{ + m_copy(dst, src, copy_min); + for (; total > copy_min; total-= copy_min) + m_copy(dst += copy_min, src += copy_min, copy_min); +}
+static inline void copy_2x( + uint8_t *dst, + const uint8_t *src, + const size_t copy_min) +{ + m_copy(dst, src, copy_min); + m_copy(dst + copy_min, src + copy_min, copy_min); +}
+static inline void copy_2x_as_x2_while_lt( + uint8_t *dst, + const uint8_t *src, + const uint8_t *dst_end, + const size_t copy_min) +{ + copy_2x(dst, src, copy_min); + while (dst + (copy_min << 1) < dst_end) + copy_2x(dst += (copy_min << 1), src += (copy_min << 1), copy_min); +}
+static inline void while_lt_copy_2x_as_x2( + uint8_t *dst, + const uint8_t *src, + const uint8_t *dst_end, + const size_t copy_min) +{ + for (; dst < dst_end; dst += (copy_min << 1), src += (copy_min << 1)) + copy_2x(dst, src, copy_min); +}
+#endif /* _LZ4K_PRIVATE_H */