Change log: v3: - update size_bytes_count() to make it compile sucessfully on 32-bit architecture. - fix some style problem.
v2: - fix some style problem.
Tu Jinjiang (2): crypto: add lz4k Cryptographic API mm/zram: Add lz4k support for zram
crypto/Kconfig | 8 + crypto/Makefile | 1 + crypto/lz4k.c | 100 ++++++ drivers/block/zram/zcomp.c | 3 + include/linux/lz4k.h | 384 +++++++++++++++++++++++ lib/Kconfig | 6 + lib/Makefile | 2 + lib/lz4k/Makefile | 2 + lib/lz4k/lz4k_decode.c | 314 +++++++++++++++++++ lib/lz4k/lz4k_encode.c | 554 +++++++++++++++++++++++++++++++++ lib/lz4k/lz4k_encode_private.h | 142 +++++++++ lib/lz4k/lz4k_private.h | 282 +++++++++++++++++ 12 files changed, 1798 insertions(+) create mode 100644 crypto/lz4k.c create mode 100644 include/linux/lz4k.h create mode 100644 lib/lz4k/Makefile create mode 100644 lib/lz4k/lz4k_decode.c create mode 100644 lib/lz4k/lz4k_encode.c create mode 100644 lib/lz4k/lz4k_encode_private.h create mode 100644 lib/lz4k/lz4k_private.h
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/1288 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/U...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/1288 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/U...
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7H9IA CVE: NA
-------------------------------------------
Add support for lz4k compression algorithm. Author: Arkhipov Denis arkhipov.denis@huawei.com.
Signed-off-by: Nanyong Sun sunnanyong@huawei.com Signed-off-by: Tu Jinjiang tujinjiang@huawei.com --- crypto/Kconfig | 8 + crypto/Makefile | 1 + crypto/lz4k.c | 100 ++++++ include/linux/lz4k.h | 384 +++++++++++++++++++++++ lib/Kconfig | 6 + lib/Makefile | 2 + lib/lz4k/Makefile | 2 + lib/lz4k/lz4k_decode.c | 314 +++++++++++++++++++ lib/lz4k/lz4k_encode.c | 554 +++++++++++++++++++++++++++++++++ lib/lz4k/lz4k_encode_private.h | 152 +++++++++ lib/lz4k/lz4k_private.h | 282 +++++++++++++++++ 11 files changed, 1805 insertions(+) create mode 100644 crypto/lz4k.c create mode 100644 include/linux/lz4k.h create mode 100644 lib/lz4k/Makefile create mode 100644 lib/lz4k/lz4k_decode.c create mode 100644 lib/lz4k/lz4k_encode.c create mode 100644 lib/lz4k/lz4k_encode_private.h create mode 100644 lib/lz4k/lz4k_private.h
diff --git a/crypto/Kconfig b/crypto/Kconfig index 64cb304f5103..35223cff7c8a 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1871,6 +1871,14 @@ config CRYPTO_LZ4HC help This is the LZ4 high compression mode algorithm.
+config CRYPTO_LZ4K + tristate "LZ4K compression algorithm" + select CRYPTO_ALGAPI + select LZ4K_COMPRESS + select LZ4K_DECOMPRESS + help + This is the LZ4K algorithm. + config CRYPTO_ZSTD tristate "Zstd compression algorithm" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index 9d1191f2b741..5c3b0a0839c5 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -161,6 +161,7 @@ obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o +obj-$(CONFIG_CRYPTO_LZ4K) += lz4k.o obj-$(CONFIG_CRYPTO_XXHASH) += xxhash_generic.o obj-$(CONFIG_CRYPTO_842) += 842.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o diff --git a/crypto/lz4k.c b/crypto/lz4k.c new file mode 100644 index 000000000000..5f0308e3bb83 --- /dev/null +++ b/crypto/lz4k.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights reserved. + * Description: LZ4K compression algorithm for ZRAM + * Author: Arkhipov Denis arkhipov.denis@huawei.com + * Create: 2020-03-25 + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/crypto.h> +#include <linux/vmalloc.h> +#include <linux/lz4k.h> + + +struct lz4k_ctx { + void *lz4k_comp_mem; +}; + +static int lz4k_init(struct crypto_tfm *tfm) +{ + struct lz4k_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->lz4k_comp_mem = vmalloc(lz4k_encode_state_bytes_min()); + if (!ctx->lz4k_comp_mem) + return -ENOMEM; + + return 0; +} + +static void lz4k_exit(struct crypto_tfm *tfm) +{ + struct lz4k_ctx *ctx = crypto_tfm_ctx(tfm); + + vfree(ctx->lz4k_comp_mem); +} + +static int lz4k_compress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lz4k_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = lz4k_encode(ctx->lz4k_comp_mem, src, dst, slen, *dlen, 0); + + if (ret < 0) + return -EINVAL; + + if (ret) + *dlen = ret; + + return 0; +} + +static int lz4k_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + int ret; + + ret = lz4k_decode(src, dst, slen, *dlen); + + if (ret <= 0) + return -EINVAL; + *dlen = ret; + return 0; +} + +static struct crypto_alg alg_lz4k = { + .cra_name = "lz4k", + .cra_driver_name = "lz4k-generic", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lz4k_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg_lz4k.cra_list), + .cra_init = lz4k_init, + .cra_exit = lz4k_exit, + .cra_u = { + .compress = { + .coa_compress = lz4k_compress_crypto, + .coa_decompress = lz4k_decompress_crypto + } + } +}; + +static int __init lz4k_mod_init(void) +{ + return crypto_register_alg(&alg_lz4k); +} + +static void __exit lz4k_mod_fini(void) +{ + crypto_unregister_alg(&alg_lz4k); +} + +module_init(lz4k_mod_init); +module_exit(lz4k_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4K Compression Algorithm"); +MODULE_ALIAS_CRYPTO("lz4k"); diff --git a/include/linux/lz4k.h b/include/linux/lz4k.h new file mode 100644 index 000000000000..698dc3e0a522 --- /dev/null +++ b/include/linux/lz4k.h @@ -0,0 +1,384 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights reserved. + * Description: LZ4K compression algorithm + * Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com + * Created: 2020-03-25 + */ + +#ifndef _LZ4K_H +#define _LZ4K_H + +/* file lz4k.h + * This file contains the platform-independent API of LZ-class + * lossless codecs (compressors/decompressors) with complete + * in-place documentation. The documentation is formatted + * in accordance with DOXYGEN mark-up format. So, one can + * generate proper documentation, e.g. in HTML format, using DOXYGEN. + * + * Currently, LZ-class codecs, documented here, implement following + * algorithms for lossless data compression/decompression: + * \li "LZ HUAWEI" proprietary codec competing with LZ4 - lz4k_encode(), + * lz4k_encode_delta(), lz4k_decode(), lz4k_decode_delta() + * + * The LZ HUAWEI compressors accept any data as input and compress it + * without loss to a smaller size if possible. + * Compressed data produced by LZ HUAWEI compressor API lz4k_encode*(), + * can be decompressed only by lz4k_decode() API documented below.\n + */ + +/* + * lz4k_status defines simple set of status values returned by Huawei APIs + */ +enum lz4k_status { + LZ4K_STATUS_INCOMPRESSIBLE = 0, /* !< Return when data is incompressible */ + LZ4K_STATUS_FAILED = -1, /* !< Return on general failure */ + LZ4K_STATUS_READ_ERROR = -2, /* !< Return when data reading failed */ + LZ4K_STATUS_WRITE_ERROR = -3 /* !< Return when data writing failed */ +}; + +/* + * LZ4K_Version() returns static unmutable string with algorithm version + */ +const char *lz4k_version(void); + +/* + * lz4k_encode_state_bytes_min() returns number of bytes for state parameter, + * supplied to lz4k_encode(), lz4k_encode_delta(), + * lz4k_update_delta_state(). + * So, state should occupy at least lz4k_encode_state_bytes_min() for mentioned + * functions to work correctly. + */ +unsigned int lz4k_encode_state_bytes_min(void); + +/* + * lz4k_encode() encodes/compresses one input buffer at *in, places + * result of encoding into one output buffer at *out if encoded data + * size fits specified values of out_max and out_limit. + * It returs size of encoded data in case of success or value<=0 otherwise. + * The result of successful encoding is in HUAWEI proprietary format, that + * is the encoded data can be decoded only by lz4k_decode(). + * + * \return + * \li positive value\n + * if encoding was successful. The value returned is the size of encoded + * (compressed) data always <=out_max. + * \li non-positive value\n + * if in==0||in_max==0||out==0||out_max==0 or + * if out_max is less than needed for encoded (compressed) data. + * \li 0 value\n + * if encoded data size >= out_limit + * + * \param[in] state + * !=0, pointer to state buffer used internally by the function. Size of + * state in bytes should be at least lz4k_encode_state_bytes_min(). The content + * of state buffer will be changed during encoding. + * + * \param[in] in + * !=0, pointer to the input buffer to encode (compress). The content of + * the input buffer does not change during encoding. + * + * \param[in] out + * !=0, pointer to the output buffer where to place result of encoding + * (compression). + * If encoding is unsuccessful, e.g. out_max or out_limit are less than + * needed for encoded data then content of out buffer may be arbitrary. + * + * \param[in] in_max + * !=0, size in bytes of the input buffer at *in + * + * \param[in] out_max + * !=0, size in bytes of the output buffer at *out + * + * \param[in] out_limit + * encoded data size soft limit in bytes. Due to performance reasons it is + * not guaranteed that + * lz4k_encode will always detect that resulting encoded data size is + * bigger than out_limit. + * Hovewer, when reaching out_limit is detected, lz4k_encode() returns + * earlier and spares CPU cycles. Caller code should recheck result + * returned by lz4k_encode() (value greater than 0) if it is really + * less or equal than out_limit. + * out_limit is ignored if it is equal to 0. + */ +int lz4k_encode( + void *const state, + const void *const in, + void *out, + unsigned int in_max, + unsigned int out_max, + unsigned int out_limit); + +/* + * lz4k_encode_max_cr() encodes/compresses one input buffer at *in, places + * result of encoding into one output buffer at *out if encoded data + * size fits specified value of out_max. + * It returs size of encoded data in case of success or value<=0 otherwise. + * The result of successful encoding is in HUAWEI proprietary format, that + * is the encoded data can be decoded only by lz4k_decode(). + * + * \return + * \li positive value\n + * if encoding was successful. The value returned is the size of encoded + * (compressed) data always <=out_max. + * \li non-positive value\n + * if in==0||in_max==0||out==0||out_max==0 or + * if out_max is less than needed for encoded (compressed) data. + * + * \param[in] state + * !=0, pointer to state buffer used internally by the function. Size of + * state in bytes should be at least lz4k_encode_state_bytes_min(). The content + * of state buffer will be changed during encoding. + * + * \param[in] in + * !=0, pointer to the input buffer to encode (compress). The content of + * the input buffer does not change during encoding. + * + * \param[in] out + * !=0, pointer to the output buffer where to place result of encoding + * (compression). + * If encoding is unsuccessful, e.g. out_max is less than + * needed for encoded data then content of out buffer may be arbitrary. + * + * \param[in] in_max + * !=0, size in bytes of the input buffer at *in + * + * \param[in] out_max + * !=0, size in bytes of the output buffer at *out + * + * \param[in] out_limit + * encoded data size soft limit in bytes. Due to performance reasons it is + * not guaranteed that + * lz4k_encode will always detect that resulting encoded data size is + * bigger than out_limit. + * Hovewer, when reaching out_limit is detected, lz4k_encode() returns + * earlier and spares CPU cycles. Caller code should recheck result + * returned by lz4k_encode() (value greater than 0) if it is really + * less or equal than out_limit. + * out_limit is ignored if it is equal to 0. + */ +int lz4k_encode_max_cr( + void *const state, + const void *const in, + void *out, + unsigned int in_max, + unsigned int out_max, + unsigned int out_limit); + +/* + * lz4k_update_delta_state() fills/updates state (hash table) in the same way as + * lz4k_encode does while encoding (compressing). + * The state and its content can then be used by lz4k_encode_delta() + * to encode (compress) data more efficiently. + * By other words, effect of lz4k_update_delta_state() is the same as + * lz4k_encode() with all encoded output discarded. + * + * Example sequence of calls for lz4k_update_delta_state and + * lz4k_encode_delta: + * //dictionary (1st) block + * int result0=lz4k_update_delta_state(state, in0, in0, in_max0); + * //delta (2nd) block + * int result1=lz4k_encode_delta(state, in0, in, out, in_max, + * out_max); + * + * \param[in] state + * !=0, pointer to state buffer used internally by lz4k_encode*. + * Size of state in bytes should be at least lz4k_encode_state_bytes_min(). + * The content of state buffer is zeroed at the beginning of + * lz4k_update_delta_state ONLY when in0==in. + * The content of state buffer will be changed inside + * lz4k_update_delta_state. + * + * \param[in] in0 + * !=0, pointer to the reference/dictionary input buffer that was used + * as input to preceding call of lz4k_encode() or lz4k_update_delta_state() + * to fill/update the state buffer. + * The content of the reference/dictionary input buffer does not change + * during encoding. + * The in0 is needed for use-cases when there are several dictionary and + * input blocks interleaved, e.g. + * <dictionaryA><inputA><dictionaryB><inputB>..., or + * <dictionaryA><dictionaryB><inputAB>..., etc. + * + * \param[in] in + * !=0, pointer to the input buffer to fill/update state as if encoding + * (compressing) this input. This input buffer is also called dictionary + * input buffer. + * The content of the input buffer does not change during encoding. + * The two buffers - at in0 and at in - should be contiguous in memory. + * That is, the last byte of buffer at in0 is located exactly before byte + * at in. + * + * \param[in] in_max + * !=0, size in bytes of the input buffer at in. + */ +int lz4k_update_delta_state( + void *const state, + const void *const in0, + const void *const in, + unsigned int in_max); + +/* + * lz4k_encode_delta() encodes (compresses) data from one input buffer + * using one reference buffer as dictionary and places the result of + * compression into one output buffer. + * The result of successful compression is in HUAWEI proprietary format, so + * that compressed data can be decompressed only by lz4k_decode_delta(). + * Reference/dictionary buffer and input buffer should be contiguous in + * memory. + * + * Example sequence of calls for lz4k_update_delta_state and + * lz4k_encode_delta: + * //dictionary (1st) block + * int result0=lz4k_update_delta_state(state, in0, in0, in_max0); + * //delta (2nd) block + * int result1=lz4k_encode_delta(state, in0, in, out, in_max, + * out_max); + * + * Example sequence of calls for lz4k_encode and lz4k_encode_delta: + * //dictionary (1st) block + * int result0=lz4k_encode(state, in0, out0, in_max0, out_max0); + * //delta (2nd) block + * int result1=lz4k_encode_delta(state, in0, in, out, in_max, + * out_max); + * + * \return + * \li positive value\n + * if encoding was successful. The value returned is the size of encoded + * (compressed) data. + * \li non-positive value\n + * if state==0||in0==0||in==0||in_max==0||out==0||out_max==0 or + * if out_max is less than needed for encoded (compressed) data. + * + * \param[in] state + * !=0, pointer to state buffer used internally by the function. Size of + * state in bytes should be at least lz4k_encode_state_bytes_min(). For more + * efficient encoding the state buffer may be filled/updated by calling + * lz4k_update_delta_state() or lz4k_encode() before lz4k_encode_delta(). + * The content of state buffer is zeroed at the beginning of + * lz4k_encode_delta() ONLY when in0==in. + * The content of state will be changed during encoding. + * + * \param[in] in0 + * !=0, pointer to the reference/dictionary input buffer that was used as + * input to preceding call of lz4k_encode() or lz4k_update_delta_state() to + * fill/update the state buffer. + * The content of the reference/dictionary input buffer does not change + * during encoding. + + * \param[in] in + * !=0, pointer to the input buffer to encode (compress). The input buffer + * is compressed using content of the reference/dictionary input buffer at + * in0. The content of the input buffer does not change during encoding. + * The two buffers - at *in0 and at *in - should be contiguous in memory. + * That is, the last byte of buffer at *in0 is located exactly before byte + * at *in. + + * \param[in] out + * !=0, pointer to the output buffer where to place result of encoding + * (compression). If compression is unsuccessful then content of out + * buffer may be arbitrary. + + * \param[in] in_max + * !=0, size in bytes of the input buffer at *in + + * \param[in] out_max + * !=0, size in bytes of the output buffer at *out. + */ +int lz4k_encode_delta( + void *const state, + const void *const in0, + const void *const in, + void *out, + unsigned int in_max, + unsigned int out_max); + +/* + * lz4k_decode() decodes (decompresses) data from one input buffer and places + * the result of decompression into one output buffer. The encoded data in input + * buffer should be in HUAWEI proprietary format, produced by lz4k_encode() + * or by lz4k_encode_delta(). + * + * \return + * \li positive value\n + * if decoding was successful. The value returned is the size of decoded + * (decompressed) data. + * \li non-positive value\n + * if in==0||in_max==0||out==0||out_max==0 or + * if out_max is less than needed for decoded (decompressed) data or + * if input encoded data format is corrupted. + * + * \param[in] in + * !=0, pointer to the input buffer to decode (decompress). The content of + * the input buffer does not change during decoding. + * + * \param[in] out + * !=0, pointer to the output buffer where to place result of decoding + * (decompression). If decompression is unsuccessful then content of out + * buffer may be arbitrary. + * + * \param[in] in_max + * !=0, size in bytes of the input buffer at in + * + * \param[in] out_max + * !=0, size in bytes of the output buffer at out + */ +int lz4k_decode( + const void *const in, + void *const out, + unsigned int in_max, + unsigned int out_max); + +/* + * lz4k_decode_delta() decodes (decompresses) data from one input buffer + * and places the result of decompression into one output buffer. The + * compressed data in input buffer should be in format, produced by + * lz4k_encode_delta(). + * + * Example sequence of calls for lz4k_decode and lz4k_decode_delta: + * //dictionary (1st) block + * int result0=lz4k_decode(in0, out0, in_max0, out_max0); + * //delta (2nd) block + * int result1=lz4k_decode_delta(in, out0, out, in_max, out_max); + * + * \return + * \li positive value\n + * if decoding was successful. The value returned is the size of decoded + * (decompressed) data. + * \li non-positive value\n + * if in==0||in_max==0||out==0||out_max==0 or + * if out_max is less than needed for decoded (decompressed) data or + * if input data format is corrupted. + * + * \param[in] in + * !=0, pointer to the input buffer to decode (decompress). The content of + * the input buffer does not change during decoding. + * + * \param[in] out0 + * !=0, pointer to the dictionary input buffer that was used as input to + * lz4k_update_delta_state() to fill/update the state buffer. The content + * of the dictionary input buffer does not change during decoding. + * + * \param[in] out + * !=0, pointer to the output buffer where to place result of decoding + * (decompression). If decompression is unsuccessful then content of out + * buffer may be arbitrary. + * The two buffers - at *out0 and at *out - should be contiguous in memory. + * That is, the last byte of buffer at *out0 is located exactly before byte + * at *out. + * + * \param[in] in_max + * !=0, size in bytes of the input buffer at *in + * + * \param[in] out_max + * !=0, size in bytes of the output buffer at *out + */ +int lz4k_decode_delta( + const void *in, + const void *const out0, + void *const out, + unsigned int in_max, + unsigned int out_max); + + +#endif /* _LZ4K_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 36326864249d..4bf1c2c21157 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -310,6 +310,12 @@ config LZ4HC_COMPRESS config LZ4_DECOMPRESS tristate
+config LZ4K_COMPRESS + tristate + +config LZ4K_DECOMPRESS + tristate + config ZSTD_COMPRESS select XXHASH tristate diff --git a/lib/Makefile b/lib/Makefile index a803e1527c4b..bd0d3635ae46 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -187,6 +187,8 @@ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ obj-$(CONFIG_LZ4_COMPRESS) += lz4/ obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/ obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ +obj-$(CONFIG_LZ4K_COMPRESS) += lz4k/ +obj-$(CONFIG_LZ4K_DECOMPRESS) += lz4k/ obj-$(CONFIG_ZSTD_COMPRESS) += zstd/ obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd/ obj-$(CONFIG_XZ_DEC) += xz/ diff --git a/lib/lz4k/Makefile b/lib/lz4k/Makefile new file mode 100644 index 000000000000..6ea3578639d4 --- /dev/null +++ b/lib/lz4k/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_LZ4K_COMPRESS) += lz4k_encode.o +obj-$(CONFIG_LZ4K_DECOMPRESS) += lz4k_decode.o \ No newline at end of file diff --git a/lib/lz4k/lz4k_decode.c b/lib/lz4k/lz4k_decode.c new file mode 100644 index 000000000000..b15f3b8a5d62 --- /dev/null +++ b/lib/lz4k/lz4k_decode.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights reserved. + * Description: LZ4K compression algorithm + * Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com + * Created: 2020-03-25 + */ + +#if !defined(__KERNEL__) +#include "lz4k.h" +#else +#include <linux/lz4k.h> +#include <linux/module.h> +#endif + +#include "lz4k_private.h" /* types, etc */ + +static const uint8_t *get_size( + uint_fast32_t *size, + const uint8_t *in_at, + const uint8_t *const in_end) +{ + uint_fast32_t u; + + do { + if (unlikely(in_at >= in_end)) + return NULL; + *size += (u = *(const uint8_t *)in_at); + ++in_at; + } while (u == BYTE_MAX); + return in_at; +} + +static int end_of_block( + const uint_fast32_t nr_bytes_max, + const uint_fast32_t r_bytes_max, + const uint8_t *const in_at, + const uint8_t *const in_end, + const uint8_t *const out, + const uint8_t *const out_at) +{ + if (!nr_bytes_max) + return LZ4K_STATUS_FAILED; /* should be the last one in block */ + if (r_bytes_max != REPEAT_MIN) + return LZ4K_STATUS_FAILED; /* should be the last one in block */ + if (in_at != in_end) + return LZ4K_STATUS_FAILED; /* should be the last one in block */ + return (int)(out_at - out); +} + +enum { + NR_COPY_MIN = 16, + R_COPY_MIN = 16, + R_COPY_SAFE = R_COPY_MIN - 1, + R_COPY_SAFE_2X = (R_COPY_MIN << 1) - 1 +}; + +static bool out_non_repeat( + const uint8_t **in_at, + uint8_t **out_at, + uint_fast32_t nr_bytes_max, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + const uint8_t *const in_copy_end = *in_at + nr_bytes_max; + uint8_t *const out_copy_end = *out_at + nr_bytes_max; + + if (likely(nr_bytes_max <= NR_COPY_MIN)) { + if (likely(*in_at <= in_end - NR_COPY_MIN && + *out_at <= out_end - NR_COPY_MIN)) + m_copy(*out_at, *in_at, NR_COPY_MIN); + else if (in_copy_end <= in_end && out_copy_end <= out_end) + m_copy(*out_at, *in_at, nr_bytes_max); + else + return false; + } else { + if (likely(in_copy_end <= in_end - NR_COPY_MIN && + out_copy_end <= out_end - NR_COPY_MIN)) { + m_copy(*out_at, *in_at, NR_COPY_MIN); + copy_x_while_lt(*out_at + NR_COPY_MIN, + *in_at + NR_COPY_MIN, + out_copy_end, NR_COPY_MIN); + } else if (in_copy_end <= in_end && out_copy_end <= out_end) { + m_copy(*out_at, *in_at, nr_bytes_max); + } else { /* in_copy_end > in_end || out_copy_end > out_end */ + return false; + } + } + *in_at = in_copy_end; + *out_at = out_copy_end; + return true; +} + +static void out_repeat_overlap( + uint_fast32_t offset, + uint8_t *out_at, + const uint8_t *out_from, + const uint8_t *const out_copy_end) +{ /* (1 < offset < R_COPY_MIN/2) && out_copy_end + R_COPY_SAFE_2X <= out_end */ + enum { + COPY_MIN = R_COPY_MIN >> 1, + OFFSET_LIMIT = COPY_MIN >> 1 + }; + m_copy(out_at, out_from, COPY_MIN); + out_at += offset; + if (offset <= OFFSET_LIMIT) + offset <<= 1; + do { + m_copy(out_at, out_from, COPY_MIN); + out_at += offset; + if (offset <= OFFSET_LIMIT) + offset <<= 1; + } while (out_at - out_from < R_COPY_MIN); + while_lt_copy_2x_as_x2(out_at, out_from, out_copy_end, R_COPY_MIN); +} + +static bool out_repeat_slow( + uint_fast32_t r_bytes_max, + uint_fast32_t offset, + uint8_t *out_at, + const uint8_t *out_from, + const uint8_t *const out_copy_end, + const uint8_t *const out_end) +{ + if (offset > 1 && out_copy_end <= out_end - R_COPY_SAFE_2X) { + out_repeat_overlap(offset, out_at, out_from, out_copy_end); + } else { + if (unlikely(out_copy_end > out_end)) + return false; + if (offset == 1) { + m_set(out_at, *out_from, r_bytes_max); + } else { + do + *out_at++ = *out_from++; + while (out_at < out_copy_end); + } + } + return true; +} + +static int decode( + const uint8_t *const in, + const uint8_t *const out0, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2) +{ + const uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2); + const uint8_t *in_at = in; + const uint8_t *const in_end_minus_x = in_end - TAG_BYTES_MAX; + uint8_t *out_at = out; + + while (likely(in_at <= in_end_minus_x)) { + const uint_fast32_t utag = read4_at(in_at - 1) >> BYTE_BITS; + const uint_fast32_t offset = utag & mask(off_log2); + uint_fast32_t nr_bytes_max = utag >> (off_log2 + r_log2), + r_bytes_max = ((utag >> off_log2) & mask(r_log2)) + + REPEAT_MIN; + const uint8_t *out_from = 0; + uint8_t *out_copy_end = 0; + + in_at += TAG_BYTES_MAX; + if (unlikely(nr_bytes_max == mask(nr_log2))) { + in_at = get_size(&nr_bytes_max, in_at, in_end); + if (in_at == NULL) + return LZ4K_STATUS_READ_ERROR; + } + if (!out_non_repeat(&in_at, &out_at, nr_bytes_max, in_end, out_end)) + return LZ4K_STATUS_FAILED; + if (unlikely(r_bytes_max == mask(r_log2) + REPEAT_MIN)) { + in_at = get_size(&r_bytes_max, in_at, in_end); + if (in_at == NULL) + return LZ4K_STATUS_READ_ERROR; + } + out_from = out_at - offset; + if (unlikely(out_from < out0)) + return LZ4K_STATUS_FAILED; + out_copy_end = out_at + r_bytes_max; + if (likely(offset >= R_COPY_MIN && + out_copy_end <= out_end - R_COPY_SAFE_2X)) { + copy_2x_as_x2_while_lt(out_at, out_from, out_copy_end, + R_COPY_MIN); + } else if (likely(offset >= (R_COPY_MIN >> 1) && + out_copy_end <= out_end - R_COPY_SAFE_2X)) { + m_copy(out_at, out_from, R_COPY_MIN); + out_at += offset; + while_lt_copy_x(out_at, out_from, out_copy_end, R_COPY_MIN); + /* faster than 2x */ + } else if (likely(offset > 0)) { + if (!out_repeat_slow(r_bytes_max, offset, out_at, out_from, + out_copy_end, out_end)) + return LZ4K_STATUS_FAILED; + } else { /* offset == 0: EOB, last literal */ + return end_of_block(nr_bytes_max, r_bytes_max, in_at, + in_end, out, out_at); + } + out_at = out_copy_end; + } + return in_at == in_end ? (int)(out_at - out) : LZ4K_STATUS_FAILED; +} + +static int decode4kb( + const uint8_t *const in, + const uint8_t *const out0, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + enum { + NR_LOG2 = 6 + }; + return decode(in, out0, out, in_end, out_end, NR_LOG2, BLOCK_4KB_LOG2); +} + +static int decode8kb( + const uint8_t *const in, + const uint8_t *const out0, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + enum { + NR_LOG2 = 5 + }; + return decode(in, out0, out, in_end, out_end, NR_LOG2, BLOCK_8KB_LOG2); +} + +static int decode16kb( + const uint8_t *const in, + const uint8_t *const out0, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + enum { + NR_LOG2 = 5 + }; + return decode(in, out0, out, in_end, out_end, NR_LOG2, BLOCK_16KB_LOG2); +} + +static int decode32kb( + const uint8_t *const in, + const uint8_t *const out0, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + enum { + NR_LOG2 = 4 + }; + return decode(in, out0, out, in_end, out_end, NR_LOG2, BLOCK_32KB_LOG2); +} + +static int decode64kb( + const uint8_t *const in, + const uint8_t *const out0, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + enum { + NR_LOG2 = 4 + }; + return decode(in, out0, out, in_end, out_end, NR_LOG2, BLOCK_64KB_LOG2); +} + +static inline const void *u8_inc(const uint8_t *a) +{ + return a + 1; +} + +int lz4k_decode( + const void *in, + void *const out, + unsigned int in_max, + unsigned int out_max) +{ + /* ++use volatile pointers to prevent compiler optimizations */ + const uint8_t *volatile in_end = (const uint8_t *)in + in_max; + const uint8_t *volatile out_end = (uint8_t *)out + out_max; + uint8_t in_log2 = 0; + + if (unlikely(in == NULL || out == NULL || in_max <= 4 || out_max <= 0)) + return LZ4K_STATUS_FAILED; + in_log2 = (uint8_t)(BLOCK_4KB_LOG2 + *(const uint8_t *)in); + /* invalid buffer size or pointer overflow */ + if (unlikely((const uint8_t *)in >= in_end || (uint8_t *)out >= out_end)) + return LZ4K_STATUS_FAILED; + /* -- */ + in = u8_inc((const uint8_t *)in); + --in_max; + if (in_log2 < BLOCK_8KB_LOG2) + return decode4kb((const uint8_t *)in, (uint8_t *)out, + (uint8_t *)out, in_end, out_end); + if (in_log2 == BLOCK_8KB_LOG2) + return decode8kb((const uint8_t *)in, (uint8_t *)out, + (uint8_t *)out, in_end, out_end); + if (in_log2 == BLOCK_16KB_LOG2) + return decode16kb((const uint8_t *)in, (uint8_t *)out, + (uint8_t *)out, in_end, out_end); + if (in_log2 == BLOCK_32KB_LOG2) + return decode32kb((const uint8_t *)in, (uint8_t *)out, + (uint8_t *)out, in_end, out_end); + if (in_log2 == BLOCK_64KB_LOG2) + return decode64kb((const uint8_t *)in, (uint8_t *)out, + (uint8_t *)out, in_end, out_end); + return LZ4K_STATUS_FAILED; +} +EXPORT_SYMBOL(lz4k_decode); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("LZ4K decoder"); diff --git a/lib/lz4k/lz4k_encode.c b/lib/lz4k/lz4k_encode.c new file mode 100644 index 000000000000..4ec2b1e13c7d --- /dev/null +++ b/lib/lz4k/lz4k_encode.c @@ -0,0 +1,554 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights reserved. + * Description: LZ4K compression algorithm + * Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com + * Created: 2020-03-25 + */ + +#if !defined(__KERNEL__) +#include "lz4k.h" +#else +#include <linux/lz4k.h> +#include <linux/module.h> +#endif + +#include "lz4k_private.h" +#include "lz4k_encode_private.h" + +static uint8_t *out_size_bytes(uint8_t *out_at, uint_fast32_t u) +{ + for (; unlikely(u >= BYTE_MAX); u -= BYTE_MAX) + *out_at++ = (uint8_t)BYTE_MAX; + *out_at++ = (uint8_t)u; + return out_at; +} + +static inline uint8_t *out_utag_then_bytes_left( + uint8_t *out_at, + uint_fast32_t utag, + uint_fast32_t bytes_left) +{ + m_copy(out_at, &utag, TAG_BYTES_MAX); + return out_size_bytes(out_at + TAG_BYTES_MAX, bytes_left); +} + +static int out_tail( + uint8_t *out_at, + uint8_t *const out_end, + const uint8_t *const out, + const uint8_t *const nr0, + const uint8_t *const in_end, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out) +{ + const uint_fast32_t nr_mask = mask(nr_log2); + const uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2); + const uint_fast32_t nr_bytes_max = u_32(in_end - nr0); + uint_fast32_t utag, bytes_left; + + if (encoded_bytes_min(nr_log2, nr_bytes_max) > u_32(out_end - out_at)) + return check_out ? LZ4K_STATUS_WRITE_ERROR : + LZ4K_STATUS_INCOMPRESSIBLE; + if (nr_bytes_max < nr_mask) { + /* caller guarantees at least one nr-byte */ + utag = (nr_bytes_max << (off_log2 + r_log2)); + m_copy(out_at, &utag, TAG_BYTES_MAX); + out_at += TAG_BYTES_MAX; + } else { + bytes_left = nr_bytes_max - nr_mask; + utag = (nr_mask << (off_log2 + r_log2)); + out_at = out_utag_then_bytes_left(out_at, utag, bytes_left); + } + m_copy(out_at, nr0, nr_bytes_max); + return (int)(out_at + nr_bytes_max - out); +} + +int lz4k_out_tail( + uint8_t *out_at, + uint8_t *const out_end, + const uint8_t *const out, + const uint8_t *const nr0, + const uint8_t *const in_end, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out) +{ + return out_tail(out_at, out_end, out, nr0, in_end, + nr_log2, off_log2, check_out); +} + +static uint8_t *out_non_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + const uint8_t *const nr0, + const uint8_t *const r, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out) +{ + const uint_fast32_t nr_bytes_max = u_32(r - nr0); + const uint_fast32_t nr_mask = mask(nr_log2), + r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2); + + if (likely(nr_bytes_max < nr_mask)) { + if (unlikely(check_out && + TAG_BYTES_MAX + nr_bytes_max > u_32(out_end - out_at))) + return NULL; + utag |= (nr_bytes_max << (off_log2 + r_log2)); + m_copy(out_at, &utag, TAG_BYTES_MAX); + out_at += TAG_BYTES_MAX; + } else { + uint_fast32_t bytes_left = nr_bytes_max - nr_mask; + + if (unlikely(check_out && + TAG_BYTES_MAX + size_bytes_count(bytes_left) + nr_bytes_max > + u_32(out_end - out_at))) + return NULL; + utag |= (nr_mask << (off_log2 + r_log2)); + out_at = out_utag_then_bytes_left(out_at, utag, bytes_left); + } + if (unlikely(check_out)) + m_copy(out_at, nr0, nr_bytes_max); + else + copy_x_while_total(out_at, nr0, nr_bytes_max, NR_COPY_MIN); + out_at += nr_bytes_max; + return out_at; +} + +uint8_t *lz4k_out_non_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + const uint8_t *const nr0, + const uint8_t *const r, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out) +{ + return out_non_repeat(out_at, out_end, utag, nr0, r, + nr_log2, off_log2, check_out); +} + +static uint8_t *out_r_bytes_left( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out) + /* =false when out_max>=encoded_bytes_max(in_max), =true otherwise */ +{ + const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2)); + + if (unlikely(r_bytes_max - REPEAT_MIN >= r_mask)) { + uint_fast32_t bytes_left = r_bytes_max - REPEAT_MIN - r_mask; + + if (unlikely(check_out && + size_bytes_count(bytes_left) > u_32(out_end - out_at))) + return NULL; + out_at = out_size_bytes(out_at, bytes_left); + } + return out_at; /* SUCCESS: continue compression */ +} + +uint8_t *lz4k_out_r_bytes_left( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out) +{ + return out_r_bytes_left(out_at, out_end, r_bytes_max, + nr_log2, off_log2, check_out); +} + +static uint8_t *out_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out) + /* =false when out_max>=encoded_bytes_max(in_max), =true otherwise */ +{ + const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2)); + + if (likely(r_bytes_max - REPEAT_MIN < r_mask)) { + if (unlikely(check_out && TAG_BYTES_MAX > u_32(out_end - out_at))) + return NULL; + utag |= ((r_bytes_max - REPEAT_MIN) << off_log2); + m_copy(out_at, &utag, TAG_BYTES_MAX); + out_at += TAG_BYTES_MAX; + } else { + uint_fast32_t bytes_left = r_bytes_max - REPEAT_MIN - r_mask; + + if (unlikely(check_out && + TAG_BYTES_MAX + size_bytes_count(bytes_left) > + u_32(out_end - out_at))) + return NULL; + utag |= (r_mask << off_log2); + out_at = out_utag_then_bytes_left(out_at, utag, bytes_left); + } + return out_at; /* SUCCESS: continue compression */ +} + +uint8_t *lz4k_out_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out) +{ + return out_repeat(out_at, out_end, utag, r_bytes_max, + nr_log2, off_log2, check_out); +} + +static const uint8_t *repeat_end( + const uint8_t *q, + const uint8_t *r, + const uint8_t *const in_end_safe, + const uint8_t *const in_end) +{ + q += REPEAT_MIN; + r += REPEAT_MIN; + /* caller guarantees r+12<=in_end */ + do { + const uint64_t x = read8_at(q) ^ read8_at(r); + + if (x) { + const uint16_t ctz = (uint16_t)__builtin_ctzl(x); + + return r + (ctz >> BYTE_BITS_LOG2); + } + /* some bytes differ:+ count of trailing 0-bits/bytes */ + q += sizeof(uint64_t), r += sizeof(uint64_t); + } while (likely(r <= in_end_safe)); /* once, at input block end */ + do { + if (*q != *r) + return r; + ++q; + ++r; + } while (r < in_end); + return r; +} + +const uint8_t *lz4k_repeat_end( + const uint8_t *q, + const uint8_t *r, + const uint8_t *const in_end_safe, + const uint8_t *const in_end) +{ + return repeat_end(q, r, in_end_safe, in_end); +} + +enum { + HT_BYTES_LOG2 = HT_LOG2 + 1 +}; + +inline unsigned int encode_state_bytes_min(void) +{ + unsigned int bytes_total = (1U << HT_BYTES_LOG2); + return bytes_total; +} + +#if !defined(LZ4K_DELTA) && !defined(LZ4K_MAX_CR) + +unsigned int lz4k_encode_state_bytes_min(void) +{ + return encode_state_bytes_min(); +} +EXPORT_SYMBOL(lz4k_encode_state_bytes_min); + +#endif /* !defined(LZ4K_DELTA) && !defined(LZ4K_MAX_CR) */ + +/* CR increase order: +STEP, have OFFSETS, use _5b */ +/* *_6b to compete with LZ4 */ +static inline uint_fast32_t hash0_v(const uint64_t r, uint32_t shift) +{ + return hash64v_6b(r, shift); +} + +static inline uint_fast32_t hash0(const uint8_t *r, uint32_t shift) +{ + return hash64_6b(r, shift); +} + +/* + * Proof that 'r' increments are safe-NO pointer overflows are possible: + * + * While using STEP_LOG2=5, step_start=1<<STEP_LOG2 == 32 we increment s + * 32 times by 1, 32 times by 2, 32 times by 3, and so on: + * 32*1+32*2+32*3+...+32*31 == 32*SUM(1..31) == 32*((1+31)*15+16). + * So, we can safely increment s by at most 31 for input block size <= + * 1<<13 < 15872. + * + * More precisely, STEP_LIMIT == x for any input block calculated as follows: + * 1<<off_log2 >= (1<<STEP_LOG2)*((x+1)(x-1)/2+x/2) ==> + * 1<<(off_log2-STEP_LOG2+1) >= x^2+x-1 ==> + * x^2+x-1-1<<(off_log2-STEP_LOG2+1) == 0, which is solved by standard + * method. + * To avoid overhead here conservative approximate value of x is calculated + * as average of two nearest square roots, see STEP_LIMIT above. + */ + +enum { + STEP_LOG2 = 5 /* increase for better CR */ +}; + +static int encode_any( + uint16_t *const ht0, + const uint8_t *const in0, + uint8_t *const out, + const uint8_t *const in_end, + uint8_t *const out_end, /* ==out_limit for !check_out */ + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out) +{ /* caller guarantees off_log2 <=16 */ + uint8_t *out_at = out; + const uint8_t *const in_end_safe = in_end - NR_COPY_MIN; + const uint8_t *r = in0; + const uint8_t *nr0 = r++; + uint_fast32_t step = 1 << STEP_LOG2; + + for (;;) { + uint_fast32_t utag = 0; + const uint8_t *r_end = 0; + uint_fast32_t r_bytes_max = 0; + const uint8_t *const q = hashed(in0, ht0, hash0(r, HT_LOG2), r); + + if (!equal4(q, r)) { + r += (++step >> STEP_LOG2); + if (unlikely(r > in_end_safe)) + return out_tail(out_at, out_end, out, nr0, in_end, + nr_log2, off_log2, check_out); + continue; + } + utag = u_32(r - q); + r_end = repeat_end(q, r, in_end_safe, in_end); + r = repeat_start(q, r, nr0, in0); + r_bytes_max = u_32(r_end - r); + if (nr0 == r) { + out_at = out_repeat(out_at, out_end, utag, r_bytes_max, + nr_log2, off_log2, check_out); + } else { + update_utag(r_bytes_max, &utag, nr_log2, off_log2); + out_at = out_non_repeat(out_at, out_end, utag, nr0, r, + nr_log2, off_log2, check_out); + if (unlikely(check_out && out_at == NULL)) + return LZ4K_STATUS_WRITE_ERROR; + out_at = out_r_bytes_left(out_at, out_end, r_bytes_max, + nr_log2, off_log2, check_out); + } + if (unlikely(check_out && out_at == NULL)) + return LZ4K_STATUS_WRITE_ERROR; + nr0 = (r += r_bytes_max); + if (unlikely(r > in_end_safe)) + return r == in_end ? (int)(out_at - out) : + out_tail(out_at, out_end, out, r, in_end, + nr_log2, off_log2, check_out); + ht0[hash0(r - 1 - 1, HT_LOG2)] = (uint16_t)(r - 1 - 1 - in0); + step = 1 << STEP_LOG2; + } +} + +static int encode_fast( + uint16_t *const ht, + const uint8_t *const in, + uint8_t *const out, + const uint8_t *const in_end, + uint8_t *const out_end, /* ==out_limit for !check_out */ + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2) +{ /* caller guarantees off_log2 <=16 */ + return encode_any(ht, in, out, in_end, out_end, nr_log2, off_log2, + false); /* !check_out */ +} + +static int encode_slow( + uint16_t *const ht, + const uint8_t *const in, + uint8_t *const out, + const uint8_t *const in_end, + uint8_t *const out_end, /* ==out_limit for !check_out */ + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2) +{ /* caller guarantees off_log2 <=16 */ + return encode_any(ht, in, out, in_end, out_end, nr_log2, off_log2, + true); /* check_out */ +} + +static int encode4kb( + uint16_t *const state, + const uint8_t *const in, + uint8_t *out, + const uint_fast32_t in_max, + const uint_fast32_t out_max, + const uint_fast32_t out_limit) +{ + enum { + NR_LOG2 = 6 + }; + const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ? + encode_slow(state, in, out, in + in_max, out + out_max, + NR_LOG2, BLOCK_4KB_LOG2) : + encode_fast(state, in, out, in + in_max, out + out_limit, + NR_LOG2, BLOCK_4KB_LOG2); + return result <= 0 ? result : result + 1; /* +1 for in_log2 */ +} + +static int encode8kb( + uint16_t *const state, + const uint8_t *const in, + uint8_t *out, + const uint_fast32_t in_max, + const uint_fast32_t out_max, + const uint_fast32_t out_limit) +{ + enum { + NR_LOG2 = 5 + }; + const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ? + encode_slow(state, in, out, in + in_max, out + out_max, + NR_LOG2, BLOCK_8KB_LOG2) : + encode_fast(state, in, out, in + in_max, out + out_limit, + NR_LOG2, BLOCK_8KB_LOG2); + return result <= 0 ? result : result + 1; /* +1 for in_log2 */ +} + +static int encode16kb( + uint16_t *const state, + const uint8_t *const in, + uint8_t *out, + const uint_fast32_t in_max, + const uint_fast32_t out_max, + const uint_fast32_t out_limit) +{ + enum { + NR_LOG2 = 5 + }; + const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ? + encode_slow(state, in, out, in + in_max, out + out_max, + NR_LOG2, BLOCK_16KB_LOG2) : + encode_fast(state, in, out, in + in_max, out + out_limit, + NR_LOG2, BLOCK_16KB_LOG2); + return result <= 0 ? result : result + 1; /* +1 for in_log2 */ +} + +static int encode32kb( + uint16_t *const state, + const uint8_t *const in, + uint8_t *out, + const uint_fast32_t in_max, + const uint_fast32_t out_max, + const uint_fast32_t out_limit) +{ + enum { + NR_LOG2 = 4 + }; + const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ? + encode_slow(state, in, out, in + in_max, out + out_max, + NR_LOG2, BLOCK_32KB_LOG2) : + encode_fast(state, in, out, in + in_max, out + out_limit, + NR_LOG2, BLOCK_32KB_LOG2); + return result <= 0 ? result : result + 1; /* +1 for in_log2 */ +} + +static int encode64kb( + uint16_t *const state, + const uint8_t *const in, + uint8_t *out, + const uint_fast32_t in_max, + const uint_fast32_t out_max, + const uint_fast32_t out_limit) +{ + enum { + NR_LOG2 = 4 + }; + const int result = (encoded_bytes_max(NR_LOG2, in_max) > out_max) ? + encode_slow(state, in, out, in + in_max, out + out_max, + NR_LOG2, BLOCK_64KB_LOG2) : + encode_fast(state, in, out, in + in_max, out + out_limit, + NR_LOG2, BLOCK_64KB_LOG2); + return result <= 0 ? result : result + 1; /* +1 for in_log2 */ +} + +static int encode( + uint16_t *const state, + const uint8_t *const in, + uint8_t *out, + uint_fast32_t in_max, + uint_fast32_t out_max, + uint_fast32_t out_limit) +{ + const uint8_t in_log2 = (uint8_t)(most_significant_bit_of( + round_up_to_power_of2(in_max - REPEAT_MIN))); + m_set(state, 0, encode_state_bytes_min()); + *out = in_log2 > BLOCK_4KB_LOG2 ? (uint8_t)(in_log2 - BLOCK_4KB_LOG2) : 0; + ++out; + --out_max; + --out_limit; + if (in_log2 < BLOCK_8KB_LOG2) + return encode4kb(state, in, out, in_max, out_max, out_limit); + if (in_log2 == BLOCK_8KB_LOG2) + return encode8kb(state, in, out, in_max, out_max, out_limit); + if (in_log2 == BLOCK_16KB_LOG2) + return encode16kb(state, in, out, in_max, out_max, out_limit); + if (in_log2 == BLOCK_32KB_LOG2) + return encode32kb(state, in, out, in_max, out_max, out_limit); + if (in_log2 == BLOCK_64KB_LOG2) + return encode64kb(state, in, out, in_max, out_max, out_limit); + return LZ4K_STATUS_FAILED; +} + +int lz4k_encode( + void *const state, + const void *const in, + void *out, + unsigned int in_max, + unsigned int out_max, + unsigned int out_limit) +{ + const unsigned int gain_max = 64 > (in_max >> 6) ? 64 : (in_max >> 6); + const unsigned int out_limit_min = in_max < out_max ? in_max : out_max; + const uint8_t *volatile in_end = (const uint8_t *)in + in_max; + const uint8_t *volatile out_end = (uint8_t *)out + out_max; + const void *volatile state_end = (uint8_t *)state + encode_state_bytes_min(); + + if (unlikely(state == NULL)) + return LZ4K_STATUS_FAILED; + if (unlikely(in == NULL || out == NULL)) + return LZ4K_STATUS_FAILED; + if (unlikely(in_max <= gain_max)) + return LZ4K_STATUS_INCOMPRESSIBLE; + if (unlikely(out_max <= gain_max)) /* need 1 byte for in_log2 */ + return LZ4K_STATUS_FAILED; + /* ++use volatile pointers to prevent compiler optimizations */ + if (unlikely((const uint8_t *)in >= in_end || (uint8_t *)out >= out_end)) + return LZ4K_STATUS_FAILED; + if (unlikely(state >= state_end)) + return LZ4K_STATUS_FAILED; /* pointer overflow */ + if (!out_limit || out_limit >= out_limit_min) + out_limit = out_limit_min - gain_max; + return encode((uint16_t *)state, (const uint8_t *)in, (uint8_t *)out, + in_max, out_max, out_limit); +} +EXPORT_SYMBOL(lz4k_encode); + +const char *lz4k_version(void) +{ + static const char *version = "2020.07.07"; + + return version; +} +EXPORT_SYMBOL(lz4k_version); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("LZ4K encoder"); diff --git a/lib/lz4k/lz4k_encode_private.h b/lib/lz4k/lz4k_encode_private.h new file mode 100644 index 000000000000..a79616aba789 --- /dev/null +++ b/lib/lz4k/lz4k_encode_private.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights reserved. + * Description: LZ4K compression algorithm + * Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com + * Created: 2020-03-25 + */ + +#ifndef _LZ4K_ENCODE_PRIVATE_H +#define _LZ4K_ENCODE_PRIVATE_H + +#include "lz4k_private.h" +#ifndef CONFIG_64BIT +#include <asm/div64.h> +#endif + +/* <nrSize bytes for whole block>+<1 terminating 0 byte> */ +static inline uint_fast32_t size_bytes_count(uint_fast32_t u) +{ +#ifdef CONFIG_64BIT + return (u + BYTE_MAX - 1) / BYTE_MAX; +#else + uint_fast32_t n = u + BYTE_MAX - 1; + + do_div(n, BYTE_MAX); + return n; +#endif +} + +/* minimum encoded size for non-compressible data */ +static inline uint_fast32_t encoded_bytes_min( + uint_fast32_t nr_log2, + uint_fast32_t in_max) +{ + return in_max < mask(nr_log2) ? + TAG_BYTES_MAX + in_max : + TAG_BYTES_MAX + size_bytes_count(in_max - mask(nr_log2)) + in_max; +} + +enum { + NR_COPY_LOG2 = 4, + NR_COPY_MIN = 1 << NR_COPY_LOG2 +}; + +static inline uint_fast32_t u_32(int64_t i) +{ + return (uint_fast32_t)i; +} + +/* maximum encoded size for non-comprressible data if "fast" encoder is used */ +static inline uint_fast32_t encoded_bytes_max( + uint_fast32_t nr_log2, + uint_fast32_t in_max) +{ + uint_fast32_t r = TAG_BYTES_MAX + (uint32_t)round_up_to_log2(in_max, NR_COPY_LOG2); + + return in_max < mask(nr_log2) ? r : r + size_bytes_count(in_max - mask(nr_log2)); +} + +enum { + HT_LOG2 = 12 +}; + +/* + * Compressed data format (where {} means 0 or more occurrences, [] means + * optional): + * <24bits tag: (off_log2 rOffset| r_log2 rSize|nr_log2 nrSize)> + * {<nrSize byte>}[<nr bytes>]{<rSize byte>} + * <rSize byte> and <nrSize byte> bytes are terminated by byte != 255 + * + */ + +static inline void update_utag( + uint_fast32_t r_bytes_max, + uint_fast32_t *utag, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2) +{ + const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2)); + + *utag |= likely(r_bytes_max - REPEAT_MIN < r_mask) ? + ((r_bytes_max - REPEAT_MIN) << off_log2) : (r_mask << off_log2); +} + +static inline const uint8_t *hashed( + const uint8_t *const in0, + uint16_t *const ht, + uint_fast32_t h, + const uint8_t *r) +{ + const uint8_t *q = in0 + ht[h]; + + ht[h] = (uint16_t)(r - in0); + return q; +} + +static inline const uint8_t *repeat_start( + const uint8_t *q, + const uint8_t *r, + const uint8_t *const nr0, + const uint8_t *const in0) +{ + for (; r > nr0 && likely(q > in0) && unlikely(q[-1] == r[-1]); --q, --r) + ; + return r; +} + +int lz4k_out_tail( + uint8_t *out_at, + uint8_t *const out_end, + const uint8_t *const out, + const uint8_t *const nr0, + const uint8_t *const in_end, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out); + +uint8_t *lz4k_out_non_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + const uint8_t *const nr0, + const uint8_t *const r, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out); + +uint8_t *lz4k_out_r_bytes_left( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out); + +uint8_t *lz4k_out_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out); + +const uint8_t *lz4k_repeat_end( + const uint8_t *q, + const uint8_t *r, + const uint8_t *const in_end_safe, + const uint8_t *const in_end); + +#endif /* _LZ4K_ENCODE_PRIVATE_H */ + diff --git a/lib/lz4k/lz4k_private.h b/lib/lz4k/lz4k_private.h new file mode 100644 index 000000000000..d9219d3591cc --- /dev/null +++ b/lib/lz4k/lz4k_private.h @@ -0,0 +1,282 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2012-2020. All rights reserved. + * Description: LZ4K compression algorithm + * Author: Aleksei Romanovskii aleksei.romanovskii@huawei.com + * Created: 2020-03-25 + */ + +#ifndef _LZ4K_PRIVATE_H +#define _LZ4K_PRIVATE_H + +#if !defined(__KERNEL__) + +#include "lz4k.h" +#include <stdint.h> /* uint*_t */ +#define __STDC_WANT_LIB_EXT1__ 1 +#include <string.h> /* memcpy() */ + +#define likely(e) __builtin_expect(e, 1) +#define unlikely(e) __builtin_expect(e, 0) + +#else /* __KERNEL__ */ + +#include <linux/lz4k.h> +#define __STDC_WANT_LIB_EXT1__ 1 +#include <linux/string.h> /* memcpy() */ +/* uint8_t, int8_t, uint16_t, int16_t, + * uint32_t, int32_t, uint64_t, int64_t + */ +#include <linux/types.h> +#include <stddef.h> + +typedef uint64_t uint_fast32_t; +typedef int64_t int_fast32_t; + +#endif /* __KERNEL__ */ + +#if defined(__GNUC__) && (__GNUC__ >= 4) +#define LZ4K_WITH_GCC_INTRINSICS +#endif + +#if !defined(__GNUC__) +#define __builtin_expect(e, v) (e) +#endif /* defined(__GNUC__) */ + +enum { + BYTE_BITS = 8, + BYTE_BITS_LOG2 = 3, + BYTE_MAX = 255U, + REPEAT_MIN = 4, + TAG_BYTES_MAX = 3, + TAG_BITS_MAX = TAG_BYTES_MAX * 8, + BLOCK_4KB_LOG2 = 12, + BLOCK_8KB_LOG2 = 13, + BLOCK_16KB_LOG2 = 14, + BLOCK_32KB_LOG2 = 15, + BLOCK_64KB_LOG2 = 16 +}; + +static inline uint32_t mask(uint_fast32_t log2) +{ + return (1U << log2) - 1U; +} + +static inline uint64_t mask64(uint_fast32_t log2) +{ + return (1ULL << log2) - 1ULL; +} + +#if defined LZ4K_WITH_GCC_INTRINSICS +static inline int most_significant_bit_of(uint64_t u) +{ + return (int)(__builtin_expect((u) == 0, false) ? + -1 : (int)(31 ^ (uint32_t)__builtin_clz((unsigned int)(u)))); +} +#else /* #!defined LZ4K_WITH_GCC_INTRINSICS */ +#error undefined most_significant_bit_of(unsigned int u) +#endif /* #if defined LZ4K_WITH_GCC_INTRINSICS */ + +static inline uint64_t round_up_to_log2(uint64_t u, uint8_t log2) +{ + return (uint64_t)((u + mask64(log2)) & ~mask64(log2)); +} + +static inline uint64_t round_up_to_power_of2(uint64_t u) +{ + const int_fast32_t msb = most_significant_bit_of(u); + + return round_up_to_log2(u, (uint8_t)msb); +} + +static inline void m_copy(void *dst, const void *src, size_t total) +{ +#if defined(__STDC_LIB_EXT1__) + (void)memcpy_s(dst, total, src, (total * 2) >> 1); /* *2 >> 1 to avoid bot errors */ +#else + (void)__builtin_memcpy(dst, src, total); +#endif +} + +static inline void m_set(void *dst, uint8_t value, size_t total) +{ +#if defined(__STDC_LIB_EXT1__) + (void)memset_s(dst, total, value, (total * 2) >> 1); /* *2 >> 1 to avoid bot errors */ +#else + (void)__builtin_memset(dst, value, total); +#endif +} + +static inline uint32_t read4_at(const void *p) +{ + uint32_t result; + + m_copy(&result, p, sizeof(result)); + return result; +} + +static inline uint64_t read8_at(const void *p) +{ + uint64_t result; + + m_copy(&result, p, sizeof(result)); + return result; +} + +static inline bool equal4(const uint8_t *const q, const uint8_t *const r) +{ + return read4_at(q) == read4_at(r); +} + +static inline bool equal3(const uint8_t *const q, const uint8_t *const r) +{ + return (read4_at(q) << BYTE_BITS) == (read4_at(r) << BYTE_BITS); +} + +static inline uint_fast32_t hash24v(const uint64_t r, uint32_t shift) +{ + const uint32_t m = 3266489917U; + + return (((uint32_t)r << BYTE_BITS) * m) >> (32 - shift); +} + +static inline uint_fast32_t hash24(const uint8_t *r, uint32_t shift) +{ + return hash24v(read4_at(r), shift); +} + +static inline uint_fast32_t hash32v_2(const uint64_t r, uint32_t shift) +{ + const uint32_t m = 3266489917U; + + return ((uint32_t)r * m) >> (32 - shift); +} + +static inline uint_fast32_t hash32_2(const uint8_t *r, uint32_t shift) +{ + return hash32v_2(read4_at(r), shift); +} + +static inline uint_fast32_t hash32v(const uint64_t r, uint32_t shift) +{ + const uint32_t m = 2654435761U; + + return ((uint32_t)r * m) >> (32 - shift); +} + +static inline uint_fast32_t hash32(const uint8_t *r, uint32_t shift) +{ + return hash32v(read4_at(r), shift); +} + +static inline uint_fast32_t hash64v_5b(const uint64_t r, uint32_t shift) +{ + const uint64_t m = 889523592379ULL; + + return (uint32_t)(((r << 24) * m) >> (64 - shift)); +} + +static inline uint_fast32_t hash64_5b(const uint8_t *r, uint32_t shift) +{ + return hash64v_5b(read8_at(r), shift); +} + +static inline uint_fast32_t hash64v_6b(const uint64_t r, uint32_t shift) +{ + const uint64_t m = 227718039650203ULL; + + return (uint32_t)(((r << 16) * m) >> (64 - shift)); +} + +static inline uint_fast32_t hash64_6b(const uint8_t *r, uint32_t shift) +{ + return hash64v_6b(read8_at(r), shift); +} + +static inline uint_fast32_t hash64v_7b(const uint64_t r, uint32_t shift) +{ + const uint64_t m = 58295818150454627ULL; + + return (uint32_t)(((r << 8) * m) >> (64 - shift)); +} + +static inline uint_fast32_t hash64_7b(const uint8_t *r, uint32_t shift) +{ + return hash64v_7b(read8_at(r), shift); +} + +static inline uint_fast32_t hash64v_8b(const uint64_t r, uint32_t shift) +{ + const uint64_t m = 2870177450012600261ULL; + + return (uint32_t)((r * m) >> (64 - shift)); +} + +static inline uint_fast32_t hash64_8b(const uint8_t *r, uint32_t shift) +{ + return hash64v_8b(read8_at(r), shift); +} + +static inline void while_lt_copy_x( + uint8_t *dst, + const uint8_t *src, + const uint8_t *dst_end, + const size_t copy_min) +{ + for (; dst < dst_end; dst += copy_min, src += copy_min) + m_copy(dst, src, copy_min); +} + +static inline void copy_x_while_lt( + uint8_t *dst, + const uint8_t *src, + const uint8_t *dst_end, + const size_t copy_min) +{ + m_copy(dst, src, copy_min); + while (dst + copy_min < dst_end) + m_copy(dst += copy_min, src += copy_min, copy_min); +} + +static inline void copy_x_while_total( + uint8_t *dst, + const uint8_t *src, + size_t total, + const size_t copy_min) +{ + m_copy(dst, src, copy_min); + for (; total > copy_min; total -= copy_min) + m_copy(dst += copy_min, src += copy_min, copy_min); +} + +static inline void copy_2x( + uint8_t *dst, + const uint8_t *src, + const size_t copy_min) +{ + m_copy(dst, src, copy_min); + m_copy(dst + copy_min, src + copy_min, copy_min); +} + +static inline void copy_2x_as_x2_while_lt( + uint8_t *dst, + const uint8_t *src, + const uint8_t *dst_end, + const size_t copy_min) +{ + copy_2x(dst, src, copy_min); + while (dst + (copy_min << 1) < dst_end) + copy_2x(dst += (copy_min << 1), src += (copy_min << 1), copy_min); +} + +static inline void while_lt_copy_2x_as_x2( + uint8_t *dst, + const uint8_t *src, + const uint8_t *dst_end, + const size_t copy_min) +{ + for (; dst < dst_end; dst += (copy_min << 1), src += (copy_min << 1)) + copy_2x(dst, src, copy_min); +} + +#endif /* _LZ4K_PRIVATE_H */
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7H9IA CVE: NA
-------------------------------------------
Add lz4k support for zram.
Signed-off-by: Nanyong Sun sunnanyong@huawei.com Signed-off-by: Tu Jinjiang tujinjiang@huawei.com --- drivers/block/zram/zcomp.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index b08650417bf0..28bda2035326 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -29,6 +29,9 @@ static const char * const backends[] = { #if IS_ENABLED(CONFIG_CRYPTO_ZSTD) "zstd", #endif +#if IS_ENABLED(CONFIG_CRYPTO_LZ4K) + "lz4k", +#endif };
static void zcomp_strm_free(struct zcomp_strm *zstrm)